Skip to content
Snippets Groups Projects
Commit e1f1bf11 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

tagging functions that distinguish tags only

parent 03614799
Branches
No related merge requests found
......@@ -63,7 +63,7 @@ bool select_preferred_disamb(const Tagset& tagset, Token* token)
{
size_t lex_idx = token->get_preferred_lexeme_index(tagset);
if(!token->lexemes()[lex_idx].is_disamb()) {
return false;
return false; // disamb would've taken precedence => no disamb at all
}
for (size_t other_idx = 0;
......@@ -88,6 +88,38 @@ void select_preferred_lexeme(const Tagset& tagset, Token* token)
}
}
bool select_preferred_disamb_tag(const Tagset& tagset, Token* token)
{
const Corpus2::Lexeme &prototypical = token->get_preferred_lexeme(tagset);
if(!prototypical.is_disamb()) {
return false; // disamb would've taken precedence => no disamb at all
}
foreach (Lexeme& lex, token->lexemes()) {
if (lex.tag() != prototypical.tag()) {
lex.set_disamb(false);
}
}
return true;
}
void select_preferred_tag(const Tagset& tagset, Token* token)
{
foreach (Lexeme& lex, token->lexemes()) {
lex.set_disamb(true);
}
if (token->lexemes().size() > 1) {
const Corpus2::Tag tag_wanted = token->get_preferred_lexeme(tagset).tag();
std::vector<Lexeme> wanted;
foreach (const Lexeme& lex, token->lexemes()) {
if (lex.tag() == tag_wanted) {
wanted.push_back(lex);
}
}
assert(!wanted.empty());
token->replace_lexemes(wanted);
}
}
void expand_optional_attrs(const Tagset& tagset, Token* token)
{
foreach (Lexeme& lex, token->lexemes()) {
......
......@@ -56,6 +56,17 @@ bool select_preferred_disamb(const Tagset& tagset, Token* token);
*/
void select_preferred_lexeme(const Tagset& tagset, Token* token);
/** Forces one DISAMB TAG per token. Works as select_preferred_disamb,
* but multiple disamb lexemes may be left, as long as they differ only
* in base forms. Returns if any disamb found.
*/
bool select_preferred_disamb_tag(const Tagset& tagset, Token* token);
/** Forces one TAG per token. Works as select_preferred_lexeme, but multiple
* lexemes may be left, as long as they differ only in base forms.
*/
void select_preferred_tag(const Tagset& tagset, Token* token);
/** Encodes optional attributes with unspecified values as each value set.
* This is to facilitate safe masking when the value in question is not to be
* skipped.
......
......@@ -25,6 +25,10 @@ bool select_preferred_disamb(const Tagset& tagset, Token* token);
void select_preferred_lexeme(const Tagset& tagset, Token* token);
bool select_preferred_disamb_tag(const Tagset& tagset, Token* token);
void select_preferred_tag(const Tagset& tagset, Token* token);
void expand_optional_attrs(const Tagset& tagset, Token* token);
void select_singular_tags(const Tagset& tagset, Token* token);
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment