From 340bbcacb00fc7c6cca5ec343d3b4805334b4e0f Mon Sep 17 00:00:00 2001
From: Adam Wardynski <award@.(win7-laptop)>
Date: Thu, 9 Dec 2010 21:03:00 +0100
Subject: [PATCH] Rework strong agreement operator. Clearer and better
 performing version, also more likely to actually work as expected :)

---
 .../bool/predicates/strongagreement.cpp       | 54 +++++++++++--------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/libwccl/ops/functions/bool/predicates/strongagreement.cpp b/libwccl/ops/functions/bool/predicates/strongagreement.cpp
index 4081562..a29be8d 100644
--- a/libwccl/ops/functions/bool/predicates/strongagreement.cpp
+++ b/libwccl/ops/functions/bool/predicates/strongagreement.cpp
@@ -52,34 +52,46 @@ StrongAgreement::BaseRetValPtr StrongAgreement::apply_internal(const FunExecCont
 
 	int min_card = attribs->categories_count(tagset_);
 
-	for(int i = abs_left; i <= abs_right; ++i) {
-		bool i_has_matched_tag = false;
-		foreach (const Corpus2::Lexeme& i_lex, sc.at(i)->lexemes()) {
-			const Corpus2::Tag& i_tag = i_lex.tag();
-			if (attribs->matching_categories(i_tag) >= min_card) {
-				i_has_matched_tag = true;
-				for(int j = abs_right; j > i; --j) {
-					bool i_agrees_with_j = false;
-					foreach(const Corpus2::Lexeme& j_lex, sc.at(j)->lexemes()) {
-						Corpus2::Tag intersection = i_tag.get_masked(j_lex.tag());
-						// if the intersection matches enough categories we have agreement
-						if (attribs->matching_categories(intersection) >= min_card) {
-							i_agrees_with_j = true;
-							break;
+	const Corpus2::Token* t1 = sc.at(abs_left);
+	const Corpus2::Token* t2 = sc.at(abs_right);
+	// to optimize a bit, make sure t1 is the one with less lexemes
+	if (t1->lexemes().size() > t2->lexemes().size()) {
+		std::swap(t1, t2);
+	}
+	// check strong agreement between range endpoints
+	// for each possible agreement between them,
+	// check if remaining tokens meet that agreement too
+	// return true if an agreement met by all tokens is found
+	foreach (const Corpus2::Lexeme& t1_lex, t1->lexemes()) {
+		const Corpus2::Tag& t1_tag = t1_lex.tag();
+		// don't bother checking t2 unless current t1_tag matches enough categories
+		if (attribs->matching_categories(t1_tag) >= min_card) {
+			foreach (const Corpus2::Lexeme& t2_lex, t2->lexemes()) {
+				Corpus2::Tag inter = t1_tag.get_masked(t2_lex.tag());
+				// if the intersection matches enough categories we have agreement
+				if (attribs->matching_categories(inter) >= min_card) {
+					// check if selected agreement is met by all remaining tokens
+					bool agreement_met = true;
+					for(int i = abs_left + 1; agreement_met && (i < abs_right); ++i) {
+						agreement_met = false;
+						foreach(const Corpus2::Lexeme& i_lex, sc.at(i)->lexemes()) {
+							Corpus2::Tag i_inter = i_lex.tag().get_masked(inter);
+							if (attribs->matching_categories(i_inter) < min_card) {
+								agreement_met = true;
+								break;
+							}
 						}
 					}
-					if (!i_agrees_with_j) {
-						return Predicate::False(context);
+					if (agreement_met) {
+						return Predicate::True(context);
 					}
 				}
 			}
 		}
-		if (!i_has_matched_tag) {
-			return Predicate::False(context);
-		}
 	}
-
-	return Predicate::True(context);
+	// None of possible agreements between endpoints carried over to all tokens
+	// or there was no such possible agreement at all, so return False.
+	return Predicate::False(context);
 }
 
 } /* end ns Wccl */
-- 
GitLab