Skip to content
Snippets Groups Projects
Commit ec560597 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

tagging utils

parent 2f02ba57
Branches
No related merge requests found
......@@ -59,8 +59,7 @@ int mask_card(const Tag& mask)
+ PwrNlp::count_bits_set(mask.get_values());
}
bool select_preferred_disamb(const Tagset& tagset,
Token* token)
bool select_preferred_disamb(const Tagset& tagset, Token* token)
{
size_t lex_idx = token->get_preferred_lexeme_index(tagset);
if(!token->lexemes()[lex_idx].is_disamb()) {
......@@ -77,6 +76,18 @@ bool select_preferred_disamb(const Tagset& tagset,
return true;
}
void select_preferred_lexeme(const Tagset& tagset, Token* token)
{
foreach (Lexeme& lex, token->lexemes()) {
lex.set_disamb(true);
}
if (token->lexemes().size() > 1) {
std::vector<Lexeme> one;
one.push_back(token->get_preferred_lexeme(tagset));
token->replace_lexemes(one);
}
}
void expand_optional_attrs(const Tagset& tagset, Token* token)
{
foreach (Lexeme& lex, token->lexemes()) {
......
......@@ -50,6 +50,12 @@ int mask_card(const Tag& mask);
*/
bool select_preferred_disamb(const Tagset& tagset, Token* token);
/** Forces one lexeme per token. The selection is based on tagset
* definition order, disamb markers are not respected.
* The selected lexeme will be set to disamb=True.
*/
void select_preferred_lexeme(const Tagset& tagset, Token* token);
/** Encodes optional attributes with unspecified values as each value set.
* This is to facilitate safe masking when the value in question is not to be
* skipped.
......
......@@ -23,6 +23,8 @@ int mask_card(const Tag& mask);
bool select_preferred_disamb(const Tagset& tagset, Token* token);
void select_preferred_lexeme(const Tagset& tagset, Token* token);
void expand_optional_attrs(const Tagset& tagset, Token* token);
void select_singular_tags(const Tagset& tagset, Token* token);
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment