From 1e850797d3b9b9e154a050c695d6f9368d3f6fcf Mon Sep 17 00:00:00 2001
From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl>
Date: Fri, 15 Jul 2011 13:58:14 +0200
Subject: [PATCH] disambiguation functions

---
 libcorpus2/tagging.cpp | 34 ++++++++++++++++++++++++++++++++++
 libcorpus2/tagging.h   | 18 ++++++++++++++++++
 swig/tagging.i         |  4 ++++
 3 files changed, 56 insertions(+)

diff --git a/libcorpus2/tagging.cpp b/libcorpus2/tagging.cpp
index 6730461..d009c95 100644
--- a/libcorpus2/tagging.cpp
+++ b/libcorpus2/tagging.cpp
@@ -91,4 +91,38 @@ void select_singular_tags(const Tagset& tagset, Token* token)
 	}
 }
 
+bool disambiguate_equal(Token* token, const Tag& mask_where,
+						const Tag& mask_wanted)
+{
+	std::vector<Lexeme> wanted;
+	foreach (const Lexeme& lex, token->lexemes()) {
+		Tag mask_theirs = lex.tag().get_masked(mask_where);
+		if (mask_theirs == mask_wanted) {
+			wanted.push_back(lex);
+		}
+	}
+	if (wanted.empty()) {
+		return false;
+	}
+	token->replace_lexemes(wanted);
+	return true;
+}
+
+bool disambiguate_subset(Token* token, const Tag& mask_where,
+						const Tag& mask_wanted)
+{
+	std::vector<Lexeme> wanted;
+	foreach (const Lexeme& lex, token->lexemes()) {
+		Tag mask_theirs = lex.tag().get_masked(mask_where);
+		if (mask_theirs.get_masked(mask_wanted) == mask_theirs) {
+			wanted.push_back(lex);
+		}
+	}
+	if (wanted.empty()) {
+		return false;
+	}
+	token->replace_lexemes(wanted);
+	return true;
+}
+
 } /* end ns Corpus2 */
diff --git a/libcorpus2/tagging.h b/libcorpus2/tagging.h
index 16ed943..0cbb443 100644
--- a/libcorpus2/tagging.h
+++ b/libcorpus2/tagging.h
@@ -61,6 +61,24 @@ void expand_unspec_attrs(const Tagset& tagset, Token* token);
   */
 void select_singular_tags(const Tagset& tagset, Token* token);
 
+/** Tries to select only those lexemes whose tags projected onto mask_where
+  * have exactly the value as given in mask_wanted. E.g. pass whole attribute
+  * as mask_where and a particular desired value as mask_wanted. If no lexeme
+  * satisfies the constraint, will leave the token intact.
+  * @return if succeeded
+  */
+bool disambiguate_equal(Token* token, const Tag& mask_where,
+						const Tag& mask_wanted);
+
+/** Tries to select only those lexemes whose tags projected onto mask_where
+  * have a subset of the value as given in mask_wanted. E.g. pass noun + gerund
+  * mask and have both left. NOTE: this may be inconvenient for dealing with
+  * optional attributes. If no lexeme satisfies the constraint, will leave the
+  * token intact.
+  * @return if succeeded
+  */
+bool disambiguate_subset(Token* token, const Tag& mask_where,
+						const Tag& mask_wanted);
 
 } /* end ns Corpus2 */
 
diff --git a/swig/tagging.i b/swig/tagging.i
index 5ae141d..96d1bb8 100644
--- a/swig/tagging.i
+++ b/swig/tagging.i
@@ -27,6 +27,10 @@ void expand_unspec_attrs(const Tagset& tagset, Token* token);
 
 void select_singular_tags(const Tagset& tagset, Token* token);
 
+bool disambiguate_equal(Token* token, const Tag& mask_where, const Tag& mask_wanted);
+
+bool disambiguate_subset(Token* token, const Tag& mask_where, const Tag& mask_wanted);
+
 }
 
 using namespace std;
-- 
GitLab