Skip to content
Snippets Groups Projects
Commit 8aa9c414 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

tagging: token as tag projection

parent 712d1481
Branches
No related merge requests found
...@@ -15,9 +15,10 @@ or FITNESS FOR A PARTICULAR PURPOSE. ...@@ -15,9 +15,10 @@ or FITNESS FOR A PARTICULAR PURPOSE.
*/ */
#include <libcorpus2/tagging.h> #include <libcorpus2/tagging.h>
#include <libcorpus2/tagsetmanager.h> #include <libcorpus2/lexeme.h>
// #include <libpwrutils/foreach.h>
#include <libpwrutils/foreach.h>
namespace Corpus2 { namespace Corpus2 {
...@@ -40,5 +41,16 @@ Tag get_attribute_mask(const Tagset& tagset, std::string attr_name) ...@@ -40,5 +41,16 @@ Tag get_attribute_mask(const Tagset& tagset, std::string attr_name)
} }
} }
Tag mask_token(const Token& token, const Tag& mask, bool disamb_only)
{
Tag t;
foreach (const Corpus2::Lexeme& lexeme, token.lexemes()) {
if(lexeme.is_disamb() || !disamb_only) {
t.combine_with(lexeme.tag().get_masked(mask));
}
}
return t;
}
} /* end ns Corpus2 */ } /* end ns Corpus2 */
...@@ -18,6 +18,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. ...@@ -18,6 +18,7 @@ or FITNESS FOR A PARTICULAR PURPOSE.
#define LIBCORPUS2_TAGGING_H #define LIBCORPUS2_TAGGING_H
#include <libcorpus2/tag.h> #include <libcorpus2/tag.h>
#include <libcorpus2/token.h>
#include <libcorpus2/tagset.h> #include <libcorpus2/tagset.h>
namespace Corpus2 { namespace Corpus2 {
...@@ -35,6 +36,12 @@ namespace Corpus2 { ...@@ -35,6 +36,12 @@ namespace Corpus2 {
Tag get_attribute_mask(const Tagset& tagset, Tag get_attribute_mask(const Tagset& tagset,
const std::string attr_name); const std::string attr_name);
/**
* Projects the token onto the mask. Depending on disamb_only, will consider
* only disamb or all lexemes.
*/
Tag mask_token(const Token& token, const Tag& mask, bool disamb_only);
} /* end ns Corpus2 */ } /* end ns Corpus2 */
#endif // LIBCORPUS2_TAGGING_H #endif // LIBCORPUS2_TAGGING_H
...@@ -10,11 +10,15 @@ ...@@ -10,11 +10,15 @@
%include "tag.i" %include "tag.i"
%include "tagset.i" %include "tagset.i"
%include "token.i"
namespace Corpus2 { namespace Corpus2 {
Tag get_attribute_mask(const Tagset& tagset, Tag get_attribute_mask(const Tagset& tagset,
const std::string attr_name); const std::string attr_name);
Tag mask_token(const Token& token, const Tag& mask, bool disamb_only);
} }
using namespace std; using namespace std;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment