From 1e850797d3b9b9e154a050c695d6f9368d3f6fcf Mon Sep 17 00:00:00 2001 From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl> Date: Fri, 15 Jul 2011 13:58:14 +0200 Subject: [PATCH] disambiguation functions --- libcorpus2/tagging.cpp | 34 ++++++++++++++++++++++++++++++++++ libcorpus2/tagging.h | 18 ++++++++++++++++++ swig/tagging.i | 4 ++++ 3 files changed, 56 insertions(+) diff --git a/libcorpus2/tagging.cpp b/libcorpus2/tagging.cpp index 6730461..d009c95 100644 --- a/libcorpus2/tagging.cpp +++ b/libcorpus2/tagging.cpp @@ -91,4 +91,38 @@ void select_singular_tags(const Tagset& tagset, Token* token) } } +bool disambiguate_equal(Token* token, const Tag& mask_where, + const Tag& mask_wanted) +{ + std::vector<Lexeme> wanted; + foreach (const Lexeme& lex, token->lexemes()) { + Tag mask_theirs = lex.tag().get_masked(mask_where); + if (mask_theirs == mask_wanted) { + wanted.push_back(lex); + } + } + if (wanted.empty()) { + return false; + } + token->replace_lexemes(wanted); + return true; +} + +bool disambiguate_subset(Token* token, const Tag& mask_where, + const Tag& mask_wanted) +{ + std::vector<Lexeme> wanted; + foreach (const Lexeme& lex, token->lexemes()) { + Tag mask_theirs = lex.tag().get_masked(mask_where); + if (mask_theirs.get_masked(mask_wanted) == mask_theirs) { + wanted.push_back(lex); + } + } + if (wanted.empty()) { + return false; + } + token->replace_lexemes(wanted); + return true; +} + } /* end ns Corpus2 */ diff --git a/libcorpus2/tagging.h b/libcorpus2/tagging.h index 16ed943..0cbb443 100644 --- a/libcorpus2/tagging.h +++ b/libcorpus2/tagging.h @@ -61,6 +61,24 @@ void expand_unspec_attrs(const Tagset& tagset, Token* token); */ void select_singular_tags(const Tagset& tagset, Token* token); +/** Tries to select only those lexemes whose tags projected onto mask_where + * have exactly the value as given in mask_wanted. E.g. pass whole attribute + * as mask_where and a particular desired value as mask_wanted. If no lexeme + * satisfies the constraint, will leave the token intact. + * @return if succeeded + */ +bool disambiguate_equal(Token* token, const Tag& mask_where, + const Tag& mask_wanted); + +/** Tries to select only those lexemes whose tags projected onto mask_where + * have a subset of the value as given in mask_wanted. E.g. pass noun + gerund + * mask and have both left. NOTE: this may be inconvenient for dealing with + * optional attributes. If no lexeme satisfies the constraint, will leave the + * token intact. + * @return if succeeded + */ +bool disambiguate_subset(Token* token, const Tag& mask_where, + const Tag& mask_wanted); } /* end ns Corpus2 */ diff --git a/swig/tagging.i b/swig/tagging.i index 5ae141d..96d1bb8 100644 --- a/swig/tagging.i +++ b/swig/tagging.i @@ -27,6 +27,10 @@ void expand_unspec_attrs(const Tagset& tagset, Token* token); void select_singular_tags(const Tagset& tagset, Token* token); +bool disambiguate_equal(Token* token, const Tag& mask_where, const Tag& mask_wanted); + +bool disambiguate_subset(Token* token, const Tag& mask_where, const Tag& mask_wanted); + } using namespace std; -- GitLab