Skip to content
Snippets Groups Projects
Commit 1e850797 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

disambiguation functions

parent f7917760
Branches
No related merge requests found
......@@ -91,4 +91,38 @@ void select_singular_tags(const Tagset& tagset, Token* token)
}
}
bool disambiguate_equal(Token* token, const Tag& mask_where,
const Tag& mask_wanted)
{
std::vector<Lexeme> wanted;
foreach (const Lexeme& lex, token->lexemes()) {
Tag mask_theirs = lex.tag().get_masked(mask_where);
if (mask_theirs == mask_wanted) {
wanted.push_back(lex);
}
}
if (wanted.empty()) {
return false;
}
token->replace_lexemes(wanted);
return true;
}
bool disambiguate_subset(Token* token, const Tag& mask_where,
const Tag& mask_wanted)
{
std::vector<Lexeme> wanted;
foreach (const Lexeme& lex, token->lexemes()) {
Tag mask_theirs = lex.tag().get_masked(mask_where);
if (mask_theirs.get_masked(mask_wanted) == mask_theirs) {
wanted.push_back(lex);
}
}
if (wanted.empty()) {
return false;
}
token->replace_lexemes(wanted);
return true;
}
} /* end ns Corpus2 */
......@@ -61,6 +61,24 @@ void expand_unspec_attrs(const Tagset& tagset, Token* token);
*/
void select_singular_tags(const Tagset& tagset, Token* token);
/** Tries to select only those lexemes whose tags projected onto mask_where
* have exactly the value as given in mask_wanted. E.g. pass whole attribute
* as mask_where and a particular desired value as mask_wanted. If no lexeme
* satisfies the constraint, will leave the token intact.
* @return if succeeded
*/
bool disambiguate_equal(Token* token, const Tag& mask_where,
const Tag& mask_wanted);
/** Tries to select only those lexemes whose tags projected onto mask_where
* have a subset of the value as given in mask_wanted. E.g. pass noun + gerund
* mask and have both left. NOTE: this may be inconvenient for dealing with
* optional attributes. If no lexeme satisfies the constraint, will leave the
* token intact.
* @return if succeeded
*/
bool disambiguate_subset(Token* token, const Tag& mask_where,
const Tag& mask_wanted);
} /* end ns Corpus2 */
......
......@@ -27,6 +27,10 @@ void expand_unspec_attrs(const Tagset& tagset, Token* token);
void select_singular_tags(const Tagset& tagset, Token* token);
bool disambiguate_equal(Token* token, const Tag& mask_where, const Tag& mask_wanted);
bool disambiguate_subset(Token* token, const Tag& mask_where, const Tag& mask_wanted);
}
using namespace std;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment