Skip to content
Snippets Groups Projects
Commit 19e77ee0 authored by ilor's avatar ilor
Browse files

convenience functions for token disamb lexemes

parent b244ceb1
No related merge requests found
......@@ -15,9 +15,10 @@ or FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <libcorpus2/token.h>
#include <sstream>
#include <libpwrutils/foreach.h>
#include <libcorpus2/tokenmetadata.h>
#include <sstream>
#include <boost/bind.hpp>
namespace Corpus2 {
......@@ -79,6 +80,25 @@ size_t Token::get_preferred_lexeme_index(const Tagset& tagset) const
return std::distance(lexemes_.begin(), pref);
}
bool Token::has_disamb_lexeme() const
{
return std::find_if(lexemes().begin(), lexemes().end(),
boost::bind(&Lexeme::is_disamb, _1)) != lexemes().end();
}
int Token::count_disamb_lexemes() const
{
return std::count_if(lexemes().begin(), lexemes().end(),
boost::bind(&Lexeme::is_disamb, _1));
}
std::pair<Token::lexeme_filter_iterator, Token::lexeme_filter_iterator> Token::disamb_lexemes() const
{
lexeme_filter_iterator f1(boost::bind(&Lexeme::is_disamb, _1), lexemes().begin(), lexemes().end());
lexeme_filter_iterator f2(boost::bind(&Lexeme::is_disamb, _1), lexemes().end(), lexemes().end());
return std::make_pair(f1, f2);
}
void Token::make_ign(const Tagset& tagset)
{
lexemes_.clear();
......
......@@ -23,6 +23,9 @@ or FITNESS FOR A PARTICULAR PURPOSE.
#include <libpwrutils/util.h>
#include <libpwrutils/whitespace.h>
#include <boost/iterator/filter_iterator.hpp>
#include <boost/function.hpp>
#include <unicode/unistr.h>
#include <memory>
#include <string>
......@@ -128,6 +131,28 @@ public:
*/
bool remove_duplicate_lexemes();
/**
* Return true if there is at least one disambed lexeme in this token.
*/
bool has_disamb_lexeme() const;
/**
* Return the count of disambed lexemes in this token.
*/
int count_disamb_lexemes() const;
/// Helper iterator typedef
typedef boost::filter_iterator<
boost::function<bool (const Lexeme&)>,
std::vector<Lexeme>::const_iterator
> lexeme_filter_iterator;
/**
* Helper to iterate through disamb-marked lexemes only (lexemes with
* the disamb flag false are skipped).
*/
std::pair<lexeme_filter_iterator, lexeme_filter_iterator> disamb_lexemes() const;
/**
* Check if all the lexemes of the token have the given POS, and and
* the token orth caselessly matches the given orth.
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment