diff --git a/libcorpus2/tagging.cpp b/libcorpus2/tagging.cpp index b98aee1c6d5e677eed92002d2209d3c38927d824..b1f4587deff2189e8cc9f962ce0dbc942dfae1b7 100644 --- a/libcorpus2/tagging.cpp +++ b/libcorpus2/tagging.cpp @@ -15,9 +15,10 @@ or FITNESS FOR A PARTICULAR PURPOSE. */ #include <libcorpus2/tagging.h> -#include <libcorpus2/tagsetmanager.h> +#include <libcorpus2/lexeme.h> -// #include <libpwrutils/foreach.h> + +#include <libpwrutils/foreach.h> namespace Corpus2 { @@ -40,5 +41,16 @@ Tag get_attribute_mask(const Tagset& tagset, std::string attr_name) } } +Tag mask_token(const Token& token, const Tag& mask, bool disamb_only) +{ + Tag t; + foreach (const Corpus2::Lexeme& lexeme, token.lexemes()) { + if(lexeme.is_disamb() || !disamb_only) { + t.combine_with(lexeme.tag().get_masked(mask)); + } + } + return t; +} + } /* end ns Corpus2 */ diff --git a/libcorpus2/tagging.h b/libcorpus2/tagging.h index b767a853fdad6d800827e53ed94c643215baf5a5..42e4dd29d6c1376eff539cbca954d81f0bce77ad 100644 --- a/libcorpus2/tagging.h +++ b/libcorpus2/tagging.h @@ -18,6 +18,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #define LIBCORPUS2_TAGGING_H #include <libcorpus2/tag.h> +#include <libcorpus2/token.h> #include <libcorpus2/tagset.h> namespace Corpus2 { @@ -35,6 +36,12 @@ namespace Corpus2 { Tag get_attribute_mask(const Tagset& tagset, const std::string attr_name); +/** + * Projects the token onto the mask. Depending on disamb_only, will consider + * only disamb or all lexemes. + */ +Tag mask_token(const Token& token, const Tag& mask, bool disamb_only); + } /* end ns Corpus2 */ #endif // LIBCORPUS2_TAGGING_H diff --git a/swig/tagging.i b/swig/tagging.i index 8d2301a690fd17c615467df646776bd40f4f30cf..21c79f1fe484df8a795239711a0ce1c242eb2bd6 100644 --- a/swig/tagging.i +++ b/swig/tagging.i @@ -10,11 +10,15 @@ %include "tag.i" %include "tagset.i" +%include "token.i" namespace Corpus2 { Tag get_attribute_mask(const Tagset& tagset, - const std::string attr_name); + const std::string attr_name); + +Tag mask_token(const Token& token, const Tag& mask, bool disamb_only); + } using namespace std;