From 8aa9c414cb19558650c8deaa91daf47471bc639f Mon Sep 17 00:00:00 2001 From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl> Date: Wed, 13 Jul 2011 14:38:41 +0200 Subject: [PATCH] tagging: token as tag projection --- libcorpus2/tagging.cpp | 16 ++++++++++++++-- libcorpus2/tagging.h | 7 +++++++ swig/tagging.i | 6 +++++- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/libcorpus2/tagging.cpp b/libcorpus2/tagging.cpp index b98aee1..b1f4587 100644 --- a/libcorpus2/tagging.cpp +++ b/libcorpus2/tagging.cpp @@ -15,9 +15,10 @@ or FITNESS FOR A PARTICULAR PURPOSE. */ #include <libcorpus2/tagging.h> -#include <libcorpus2/tagsetmanager.h> +#include <libcorpus2/lexeme.h> -// #include <libpwrutils/foreach.h> + +#include <libpwrutils/foreach.h> namespace Corpus2 { @@ -40,5 +41,16 @@ Tag get_attribute_mask(const Tagset& tagset, std::string attr_name) } } +Tag mask_token(const Token& token, const Tag& mask, bool disamb_only) +{ + Tag t; + foreach (const Corpus2::Lexeme& lexeme, token.lexemes()) { + if(lexeme.is_disamb() || !disamb_only) { + t.combine_with(lexeme.tag().get_masked(mask)); + } + } + return t; +} + } /* end ns Corpus2 */ diff --git a/libcorpus2/tagging.h b/libcorpus2/tagging.h index b767a85..42e4dd2 100644 --- a/libcorpus2/tagging.h +++ b/libcorpus2/tagging.h @@ -18,6 +18,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #define LIBCORPUS2_TAGGING_H #include <libcorpus2/tag.h> +#include <libcorpus2/token.h> #include <libcorpus2/tagset.h> namespace Corpus2 { @@ -35,6 +36,12 @@ namespace Corpus2 { Tag get_attribute_mask(const Tagset& tagset, const std::string attr_name); +/** + * Projects the token onto the mask. Depending on disamb_only, will consider + * only disamb or all lexemes. + */ +Tag mask_token(const Token& token, const Tag& mask, bool disamb_only); + } /* end ns Corpus2 */ #endif // LIBCORPUS2_TAGGING_H diff --git a/swig/tagging.i b/swig/tagging.i index 8d2301a..21c79f1 100644 --- a/swig/tagging.i +++ b/swig/tagging.i @@ -10,11 +10,15 @@ %include "tag.i" %include "tagset.i" +%include "token.i" namespace Corpus2 { Tag get_attribute_mask(const Tagset& tagset, - const std::string attr_name); + const std::string attr_name); + +Tag mask_token(const Token& token, const Tag& mask, bool disamb_only); + } using namespace std; -- GitLab