Skip to content
Snippets Groups Projects
Commit ec560597 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

tagging utils

parent 2f02ba57
No related merge requests found
...@@ -59,8 +59,7 @@ int mask_card(const Tag& mask) ...@@ -59,8 +59,7 @@ int mask_card(const Tag& mask)
+ PwrNlp::count_bits_set(mask.get_values()); + PwrNlp::count_bits_set(mask.get_values());
} }
bool select_preferred_disamb(const Tagset& tagset, bool select_preferred_disamb(const Tagset& tagset, Token* token)
Token* token)
{ {
size_t lex_idx = token->get_preferred_lexeme_index(tagset); size_t lex_idx = token->get_preferred_lexeme_index(tagset);
if(!token->lexemes()[lex_idx].is_disamb()) { if(!token->lexemes()[lex_idx].is_disamb()) {
...@@ -77,6 +76,18 @@ bool select_preferred_disamb(const Tagset& tagset, ...@@ -77,6 +76,18 @@ bool select_preferred_disamb(const Tagset& tagset,
return true; return true;
} }
void select_preferred_lexeme(const Tagset& tagset, Token* token)
{
foreach (Lexeme& lex, token->lexemes()) {
lex.set_disamb(true);
}
if (token->lexemes().size() > 1) {
std::vector<Lexeme> one;
one.push_back(token->get_preferred_lexeme(tagset));
token->replace_lexemes(one);
}
}
void expand_optional_attrs(const Tagset& tagset, Token* token) void expand_optional_attrs(const Tagset& tagset, Token* token)
{ {
foreach (Lexeme& lex, token->lexemes()) { foreach (Lexeme& lex, token->lexemes()) {
......
...@@ -50,6 +50,12 @@ int mask_card(const Tag& mask); ...@@ -50,6 +50,12 @@ int mask_card(const Tag& mask);
*/ */
bool select_preferred_disamb(const Tagset& tagset, Token* token); bool select_preferred_disamb(const Tagset& tagset, Token* token);
/** Forces one lexeme per token. The selection is based on tagset
* definition order, disamb markers are not respected.
* The selected lexeme will be set to disamb=True.
*/
void select_preferred_lexeme(const Tagset& tagset, Token* token);
/** Encodes optional attributes with unspecified values as each value set. /** Encodes optional attributes with unspecified values as each value set.
* This is to facilitate safe masking when the value in question is not to be * This is to facilitate safe masking when the value in question is not to be
* skipped. * skipped.
......
...@@ -23,6 +23,8 @@ int mask_card(const Tag& mask); ...@@ -23,6 +23,8 @@ int mask_card(const Tag& mask);
bool select_preferred_disamb(const Tagset& tagset, Token* token); bool select_preferred_disamb(const Tagset& tagset, Token* token);
void select_preferred_lexeme(const Tagset& tagset, Token* token);
void expand_optional_attrs(const Tagset& tagset, Token* token); void expand_optional_attrs(const Tagset& tagset, Token* token);
void select_singular_tags(const Tagset& tagset, Token* token); void select_singular_tags(const Tagset& tagset, Token* token);
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment