diff --git a/libcorpus2/token.cpp b/libcorpus2/token.cpp index 7efe33d2a6b6a8c16fee0e6f4d42d996d886e038..dba032254ca1c2e122be2429cd41d4c5ae776a86 100644 --- a/libcorpus2/token.cpp +++ b/libcorpus2/token.cpp @@ -15,9 +15,10 @@ or FITNESS FOR A PARTICULAR PURPOSE. */ #include <libcorpus2/token.h> -#include <sstream> #include <libpwrutils/foreach.h> #include <libcorpus2/tokenmetadata.h> +#include <sstream> +#include <boost/bind.hpp> namespace Corpus2 { @@ -79,6 +80,25 @@ size_t Token::get_preferred_lexeme_index(const Tagset& tagset) const return std::distance(lexemes_.begin(), pref); } +bool Token::has_disamb_lexeme() const +{ + return std::find_if(lexemes().begin(), lexemes().end(), + boost::bind(&Lexeme::is_disamb, _1)) != lexemes().end(); +} + +int Token::count_disamb_lexemes() const +{ + return std::count_if(lexemes().begin(), lexemes().end(), + boost::bind(&Lexeme::is_disamb, _1)); +} + +std::pair<Token::lexeme_filter_iterator, Token::lexeme_filter_iterator> Token::disamb_lexemes() const +{ + lexeme_filter_iterator f1(boost::bind(&Lexeme::is_disamb, _1), lexemes().begin(), lexemes().end()); + lexeme_filter_iterator f2(boost::bind(&Lexeme::is_disamb, _1), lexemes().end(), lexemes().end()); + return std::make_pair(f1, f2); +} + void Token::make_ign(const Tagset& tagset) { lexemes_.clear(); diff --git a/libcorpus2/token.h b/libcorpus2/token.h index 7246fb2bfd5c7ae5d15ab294bcb95de8c872d4cb..13c623fb8895955913de658f8e87225a336cf190 100644 --- a/libcorpus2/token.h +++ b/libcorpus2/token.h @@ -23,6 +23,9 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <libpwrutils/util.h> #include <libpwrutils/whitespace.h> +#include <boost/iterator/filter_iterator.hpp> +#include <boost/function.hpp> + #include <unicode/unistr.h> #include <memory> #include <string> @@ -128,6 +131,28 @@ public: */ bool remove_duplicate_lexemes(); + /** + * Return true if there is at least one disambed lexeme in this token. + */ + bool has_disamb_lexeme() const; + + /** + * Return the count of disambed lexemes in this token. + */ + int count_disamb_lexemes() const; + + /// Helper iterator typedef + typedef boost::filter_iterator< + boost::function<bool (const Lexeme&)>, + std::vector<Lexeme>::const_iterator + > lexeme_filter_iterator; + + /** + * Helper to iterate through disamb-marked lexemes only (lexemes with + * the disamb flag false are skipped). + */ + std::pair<lexeme_filter_iterator, lexeme_filter_iterator> disamb_lexemes() const; + /** * Check if all the lexemes of the token have the given POS, and and * the token orth caselessly matches the given orth.