diff --git a/libwccl/ops/functions/bool/predicates/weakagreement.cpp b/libwccl/ops/functions/bool/predicates/weakagreement.cpp index a8a8ce49c8e4c8e986b05c171c14602e06947da4..44bfb9e605a11fb2e0191758b014b7ba77a69cc5 100644 --- a/libwccl/ops/functions/bool/predicates/weakagreement.cpp +++ b/libwccl/ops/functions/bool/predicates/weakagreement.cpp @@ -50,40 +50,56 @@ WeakAgreement::BaseRetValPtr WeakAgreement::apply_internal(const FunExecContext& const boost::shared_ptr<const TSet>& attribs = attribs_expr_->apply(context); int min_card = attribs->categories_count(tagset_); - - for(int i = abs_left; i <= abs_right; ++i) { - bool i_has_matched_tag = false; - foreach (const Corpus2::Lexeme& i_lex, sc.at(i)->lexemes()) { - const Corpus2::Tag& i_tag = i_lex.tag(); - if (attribs->matching_categories(i_tag) >= min_card) { - i_has_matched_tag = true; - for(int j = abs_right; j > i; --j) { - bool i_agrees_with_j = false; - bool j_has_matched_tag = false; - foreach(const Corpus2::Lexeme& j_lex, sc.at(j)->lexemes()) { - const Corpus2::Tag& j_tag = j_lex.tag(); - if (attribs->matching_categories(i_tag) >= min_card) { - j_has_matched_tag = true; - Corpus2::Tag intersection = i_tag.get_masked(j_tag); - // if the intersection matches enough categories we have agreement - if (attribs->matching_categories(intersection) >= min_card) { - i_agrees_with_j = true; - break; + const Corpus2::Token* t1 = sc.at(abs_left); + const Corpus2::Token* t2 = sc.at(abs_right); + // to optimize a bit, make sure t1 is the one with less lexemes + if (t1->lexemes().size() > t2->lexemes().size()) { + std::swap(t1, t2); + } + // check strong agreement between range endpoints + // for each possible agreement between them, + // check if remaining tokens meet that agreement too, + // but take into account only tokens with a tag that + // has enough categories, ignoring the rest (hence "weak" + // agreement). + // return True if an agreement met by all selected tokens is found + foreach (const Corpus2::Lexeme& t1_lex, t1->lexemes()) { + const Corpus2::Tag& t1_tag = t1_lex.tag(); + // don't bother checking t2 unless current t1_tag matches enough categories + if (attribs->matching_categories(t1_tag) >= min_card) { + foreach (const Corpus2::Lexeme& t2_lex, t2->lexemes()) { + Corpus2::Tag inter = t1_tag.get_masked(t2_lex.tag()); + // if the intersection matches enough categories we have agreement + if (attribs->matching_categories(inter) >= min_card) { + // check if selected agreement is met by all remaining tokens + // but take into account only tokens with a tag that has + // proper amount of categories. + bool agreement_met = true; + for(int i = abs_left + 1; agreement_met && (i < abs_right); ++i) { + foreach(const Corpus2::Lexeme& i_lex, sc.at(i)->lexemes()) { + if (attribs->matching_categories(i_lex.tag()) >= min_card) { + // token has a tag matching enough categories, + // so now it has to meet the selected agreement + agreement_met = false; + Corpus2::Tag i_inter = i_lex.tag().get_masked(inter); + if (attribs->matching_categories(i_inter) >= min_card) { + agreement_met = true; + break; + } } } } - if (j_has_matched_tag && !i_agrees_with_j) { - return Predicate::False(context); + if (agreement_met) { + return Predicate::True(context); } } } } - if (!i_has_matched_tag && (i == abs_left || i == abs_right)) { - return Predicate::False(context); - } } - - return Predicate::True(context); + // None of possible agreements between endpoints carried over to all tokens + // that had enough categories specified, or there was no such possible + // agreement at all, so return False. + return Predicate::False(context); } } /* end ns Wccl */