From e1f1bf11afaf56b16610c24512ec06aea2f37f08 Mon Sep 17 00:00:00 2001
From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl>
Date: Wed, 17 Aug 2011 14:14:54 +0200
Subject: [PATCH] tagging functions that distinguish tags only

---
 libcorpus2/tagging.cpp | 34 +++++++++++++++++++++++++++++++++-
 libcorpus2/tagging.h   | 11 +++++++++++
 swig/tagging.i         |  4 ++++
 3 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/libcorpus2/tagging.cpp b/libcorpus2/tagging.cpp
index 1572fa8..b4bc5c3 100644
--- a/libcorpus2/tagging.cpp
+++ b/libcorpus2/tagging.cpp
@@ -63,7 +63,7 @@ bool select_preferred_disamb(const Tagset& tagset, Token* token)
 {
 	size_t lex_idx = token->get_preferred_lexeme_index(tagset);
 	if(!token->lexemes()[lex_idx].is_disamb()) {
-		return false;
+		return false; // disamb would've taken precedence => no disamb at all
 	}
 
 	for (size_t other_idx = 0;
@@ -88,6 +88,38 @@ void select_preferred_lexeme(const Tagset& tagset, Token* token)
 	}
 }
 
+bool select_preferred_disamb_tag(const Tagset& tagset, Token* token)
+{
+	const Corpus2::Lexeme &prototypical = token->get_preferred_lexeme(tagset);
+	if(!prototypical.is_disamb()) {
+		return false; // disamb would've taken precedence => no disamb at all
+	}
+	foreach (Lexeme& lex, token->lexemes()) {
+		if (lex.tag() != prototypical.tag()) {
+			lex.set_disamb(false);
+		}
+	}
+	return true;
+}
+
+void select_preferred_tag(const Tagset& tagset, Token* token)
+{
+	foreach (Lexeme& lex, token->lexemes()) {
+		lex.set_disamb(true);
+	}
+	if (token->lexemes().size() > 1) {
+		const Corpus2::Tag tag_wanted = token->get_preferred_lexeme(tagset).tag();
+		std::vector<Lexeme> wanted;
+		foreach (const Lexeme& lex, token->lexemes()) {
+			if (lex.tag() == tag_wanted) {
+				wanted.push_back(lex);
+			}
+		}
+		assert(!wanted.empty());
+		token->replace_lexemes(wanted);
+	}
+}
+
 void expand_optional_attrs(const Tagset& tagset, Token* token)
 {
 	foreach (Lexeme& lex, token->lexemes()) {
diff --git a/libcorpus2/tagging.h b/libcorpus2/tagging.h
index 59fbcd4..65c4b80 100644
--- a/libcorpus2/tagging.h
+++ b/libcorpus2/tagging.h
@@ -56,6 +56,17 @@ bool select_preferred_disamb(const Tagset& tagset, Token* token);
   */
 void select_preferred_lexeme(const Tagset& tagset, Token* token);
 
+/** Forces one DISAMB TAG per token. Works as select_preferred_disamb,
+  * but multiple disamb lexemes may be left, as long as they differ only
+  * in base forms. Returns if any disamb found.
+  */
+bool select_preferred_disamb_tag(const Tagset& tagset, Token* token);
+
+/** Forces one TAG per token. Works as select_preferred_lexeme, but multiple
+  * lexemes may be left, as long as they differ only in base forms.
+  */
+void select_preferred_tag(const Tagset& tagset, Token* token);
+
 /** Encodes optional attributes with unspecified values as each value set.
   * This is to facilitate safe masking when the value in question is not to be
   * skipped.
diff --git a/swig/tagging.i b/swig/tagging.i
index c09e2a1..0d65ecf 100644
--- a/swig/tagging.i
+++ b/swig/tagging.i
@@ -25,6 +25,10 @@ bool select_preferred_disamb(const Tagset& tagset, Token* token);
 
 void select_preferred_lexeme(const Tagset& tagset, Token* token);
 
+bool select_preferred_disamb_tag(const Tagset& tagset, Token* token);
+
+void select_preferred_tag(const Tagset& tagset, Token* token);
+
 void expand_optional_attrs(const Tagset& tagset, Token* token);
 
 void select_singular_tags(const Tagset& tagset, Token* token);
-- 
GitLab