From 340bbcacb00fc7c6cca5ec343d3b4805334b4e0f Mon Sep 17 00:00:00 2001 From: Adam Wardynski <award@.(win7-laptop)> Date: Thu, 9 Dec 2010 21:03:00 +0100 Subject: [PATCH] Rework strong agreement operator. Clearer and better performing version, also more likely to actually work as expected :) --- .../bool/predicates/strongagreement.cpp | 54 +++++++++++-------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/libwccl/ops/functions/bool/predicates/strongagreement.cpp b/libwccl/ops/functions/bool/predicates/strongagreement.cpp index 4081562..a29be8d 100644 --- a/libwccl/ops/functions/bool/predicates/strongagreement.cpp +++ b/libwccl/ops/functions/bool/predicates/strongagreement.cpp @@ -52,34 +52,46 @@ StrongAgreement::BaseRetValPtr StrongAgreement::apply_internal(const FunExecCont int min_card = attribs->categories_count(tagset_); - for(int i = abs_left; i <= abs_right; ++i) { - bool i_has_matched_tag = false; - foreach (const Corpus2::Lexeme& i_lex, sc.at(i)->lexemes()) { - const Corpus2::Tag& i_tag = i_lex.tag(); - if (attribs->matching_categories(i_tag) >= min_card) { - i_has_matched_tag = true; - for(int j = abs_right; j > i; --j) { - bool i_agrees_with_j = false; - foreach(const Corpus2::Lexeme& j_lex, sc.at(j)->lexemes()) { - Corpus2::Tag intersection = i_tag.get_masked(j_lex.tag()); - // if the intersection matches enough categories we have agreement - if (attribs->matching_categories(intersection) >= min_card) { - i_agrees_with_j = true; - break; + const Corpus2::Token* t1 = sc.at(abs_left); + const Corpus2::Token* t2 = sc.at(abs_right); + // to optimize a bit, make sure t1 is the one with less lexemes + if (t1->lexemes().size() > t2->lexemes().size()) { + std::swap(t1, t2); + } + // check strong agreement between range endpoints + // for each possible agreement between them, + // check if remaining tokens meet that agreement too + // return true if an agreement met by all tokens is found + foreach (const Corpus2::Lexeme& t1_lex, t1->lexemes()) { + const Corpus2::Tag& t1_tag = t1_lex.tag(); + // don't bother checking t2 unless current t1_tag matches enough categories + if (attribs->matching_categories(t1_tag) >= min_card) { + foreach (const Corpus2::Lexeme& t2_lex, t2->lexemes()) { + Corpus2::Tag inter = t1_tag.get_masked(t2_lex.tag()); + // if the intersection matches enough categories we have agreement + if (attribs->matching_categories(inter) >= min_card) { + // check if selected agreement is met by all remaining tokens + bool agreement_met = true; + for(int i = abs_left + 1; agreement_met && (i < abs_right); ++i) { + agreement_met = false; + foreach(const Corpus2::Lexeme& i_lex, sc.at(i)->lexemes()) { + Corpus2::Tag i_inter = i_lex.tag().get_masked(inter); + if (attribs->matching_categories(i_inter) < min_card) { + agreement_met = true; + break; + } } } - if (!i_agrees_with_j) { - return Predicate::False(context); + if (agreement_met) { + return Predicate::True(context); } } } } - if (!i_has_matched_tag) { - return Predicate::False(context); - } } - - return Predicate::True(context); + // None of possible agreements between endpoints carried over to all tokens + // or there was no such possible agreement at all, so return False. + return Predicate::False(context); } } /* end ns Wccl */ -- GitLab