Skip to content
Snippets Groups Projects
Commit ec560597 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

tagging utils

parent 2f02ba57
Branches
No related tags found
No related merge requests found
...@@ -59,8 +59,7 @@ int mask_card(const Tag& mask) ...@@ -59,8 +59,7 @@ int mask_card(const Tag& mask)
+ PwrNlp::count_bits_set(mask.get_values()); + PwrNlp::count_bits_set(mask.get_values());
} }
bool select_preferred_disamb(const Tagset& tagset, bool select_preferred_disamb(const Tagset& tagset, Token* token)
Token* token)
{ {
size_t lex_idx = token->get_preferred_lexeme_index(tagset); size_t lex_idx = token->get_preferred_lexeme_index(tagset);
if(!token->lexemes()[lex_idx].is_disamb()) { if(!token->lexemes()[lex_idx].is_disamb()) {
...@@ -77,6 +76,18 @@ bool select_preferred_disamb(const Tagset& tagset, ...@@ -77,6 +76,18 @@ bool select_preferred_disamb(const Tagset& tagset,
return true; return true;
} }
void select_preferred_lexeme(const Tagset& tagset, Token* token)
{
foreach (Lexeme& lex, token->lexemes()) {
lex.set_disamb(true);
}
if (token->lexemes().size() > 1) {
std::vector<Lexeme> one;
one.push_back(token->get_preferred_lexeme(tagset));
token->replace_lexemes(one);
}
}
void expand_optional_attrs(const Tagset& tagset, Token* token) void expand_optional_attrs(const Tagset& tagset, Token* token)
{ {
foreach (Lexeme& lex, token->lexemes()) { foreach (Lexeme& lex, token->lexemes()) {
......
...@@ -50,6 +50,12 @@ int mask_card(const Tag& mask); ...@@ -50,6 +50,12 @@ int mask_card(const Tag& mask);
*/ */
bool select_preferred_disamb(const Tagset& tagset, Token* token); bool select_preferred_disamb(const Tagset& tagset, Token* token);
/** Forces one lexeme per token. The selection is based on tagset
* definition order, disamb markers are not respected.
* The selected lexeme will be set to disamb=True.
*/
void select_preferred_lexeme(const Tagset& tagset, Token* token);
/** Encodes optional attributes with unspecified values as each value set. /** Encodes optional attributes with unspecified values as each value set.
* This is to facilitate safe masking when the value in question is not to be * This is to facilitate safe masking when the value in question is not to be
* skipped. * skipped.
......
...@@ -23,6 +23,8 @@ int mask_card(const Tag& mask); ...@@ -23,6 +23,8 @@ int mask_card(const Tag& mask);
bool select_preferred_disamb(const Tagset& tagset, Token* token); bool select_preferred_disamb(const Tagset& tagset, Token* token);
void select_preferred_lexeme(const Tagset& tagset, Token* token);
void expand_optional_attrs(const Tagset& tagset, Token* token); void expand_optional_attrs(const Tagset& tagset, Token* token);
void select_singular_tags(const Tagset& tagset, Token* token); void select_singular_tags(const Tagset& tagset, Token* token);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment