From ec56059728c826b4ed3d597dc6e017c3691fad18 Mon Sep 17 00:00:00 2001
From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl>
Date: Wed, 27 Jul 2011 18:52:21 +0200
Subject: [PATCH] tagging utils

---
 libcorpus2/tagging.cpp | 15 +++++++++++++--
 libcorpus2/tagging.h   |  6 ++++++
 swig/tagging.i         |  2 ++
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/libcorpus2/tagging.cpp b/libcorpus2/tagging.cpp
index 41b0733..1572fa8 100644
--- a/libcorpus2/tagging.cpp
+++ b/libcorpus2/tagging.cpp
@@ -59,8 +59,7 @@ int mask_card(const Tag& mask)
 			+ PwrNlp::count_bits_set(mask.get_values());
 }
 
-bool select_preferred_disamb(const Tagset& tagset,
-							 Token* token)
+bool select_preferred_disamb(const Tagset& tagset, Token* token)
 {
 	size_t lex_idx = token->get_preferred_lexeme_index(tagset);
 	if(!token->lexemes()[lex_idx].is_disamb()) {
@@ -77,6 +76,18 @@ bool select_preferred_disamb(const Tagset& tagset,
 	return true;
 }
 
+void select_preferred_lexeme(const Tagset& tagset, Token* token)
+{
+	foreach (Lexeme& lex, token->lexemes()) {
+		lex.set_disamb(true);
+	}
+	if (token->lexemes().size() > 1) {
+		std::vector<Lexeme> one;
+		one.push_back(token->get_preferred_lexeme(tagset));
+		token->replace_lexemes(one);
+	}
+}
+
 void expand_optional_attrs(const Tagset& tagset, Token* token)
 {
 	foreach (Lexeme& lex, token->lexemes()) {
diff --git a/libcorpus2/tagging.h b/libcorpus2/tagging.h
index 7ae0a39..59fbcd4 100644
--- a/libcorpus2/tagging.h
+++ b/libcorpus2/tagging.h
@@ -50,6 +50,12 @@ int mask_card(const Tag& mask);
   */
 bool select_preferred_disamb(const Tagset& tagset, Token* token);
 
+/** Forces one lexeme per token. The selection is based on tagset
+  * definition order, disamb markers are not respected.
+  * The selected lexeme will be set to disamb=True.
+  */
+void select_preferred_lexeme(const Tagset& tagset, Token* token);
+
 /** Encodes optional attributes with unspecified values as each value set.
   * This is to facilitate safe masking when the value in question is not to be
   * skipped.
diff --git a/swig/tagging.i b/swig/tagging.i
index c9fdd9c..c09e2a1 100644
--- a/swig/tagging.i
+++ b/swig/tagging.i
@@ -23,6 +23,8 @@ int mask_card(const Tag& mask);
 
 bool select_preferred_disamb(const Tagset& tagset, Token* token);
 
+void select_preferred_lexeme(const Tagset& tagset, Token* token);
+
 void expand_optional_attrs(const Tagset& tagset, Token* token);
 
 void select_singular_tags(const Tagset& tagset, Token* token);
-- 
GitLab