Skip to content
Snippets Groups Projects
Commit 76f1dc70 authored by Adam Wardynski's avatar Adam Wardynski
Browse files

Rework weak agreement in similar fashion as strong one.

parent 5e1129d5
Branches
No related merge requests found
......@@ -50,40 +50,56 @@ WeakAgreement::BaseRetValPtr WeakAgreement::apply_internal(const FunExecContext&
const boost::shared_ptr<const TSet>& attribs = attribs_expr_->apply(context);
int min_card = attribs->categories_count(tagset_);
for(int i = abs_left; i <= abs_right; ++i) {
bool i_has_matched_tag = false;
foreach (const Corpus2::Lexeme& i_lex, sc.at(i)->lexemes()) {
const Corpus2::Tag& i_tag = i_lex.tag();
if (attribs->matching_categories(i_tag) >= min_card) {
i_has_matched_tag = true;
for(int j = abs_right; j > i; --j) {
bool i_agrees_with_j = false;
bool j_has_matched_tag = false;
foreach(const Corpus2::Lexeme& j_lex, sc.at(j)->lexemes()) {
const Corpus2::Tag& j_tag = j_lex.tag();
if (attribs->matching_categories(i_tag) >= min_card) {
j_has_matched_tag = true;
Corpus2::Tag intersection = i_tag.get_masked(j_tag);
// if the intersection matches enough categories we have agreement
if (attribs->matching_categories(intersection) >= min_card) {
i_agrees_with_j = true;
break;
const Corpus2::Token* t1 = sc.at(abs_left);
const Corpus2::Token* t2 = sc.at(abs_right);
// to optimize a bit, make sure t1 is the one with less lexemes
if (t1->lexemes().size() > t2->lexemes().size()) {
std::swap(t1, t2);
}
// check strong agreement between range endpoints
// for each possible agreement between them,
// check if remaining tokens meet that agreement too,
// but take into account only tokens with a tag that
// has enough categories, ignoring the rest (hence "weak"
// agreement).
// return True if an agreement met by all selected tokens is found
foreach (const Corpus2::Lexeme& t1_lex, t1->lexemes()) {
const Corpus2::Tag& t1_tag = t1_lex.tag();
// don't bother checking t2 unless current t1_tag matches enough categories
if (attribs->matching_categories(t1_tag) >= min_card) {
foreach (const Corpus2::Lexeme& t2_lex, t2->lexemes()) {
Corpus2::Tag inter = t1_tag.get_masked(t2_lex.tag());
// if the intersection matches enough categories we have agreement
if (attribs->matching_categories(inter) >= min_card) {
// check if selected agreement is met by all remaining tokens
// but take into account only tokens with a tag that has
// proper amount of categories.
bool agreement_met = true;
for(int i = abs_left + 1; agreement_met && (i < abs_right); ++i) {
foreach(const Corpus2::Lexeme& i_lex, sc.at(i)->lexemes()) {
if (attribs->matching_categories(i_lex.tag()) >= min_card) {
// token has a tag matching enough categories,
// so now it has to meet the selected agreement
agreement_met = false;
Corpus2::Tag i_inter = i_lex.tag().get_masked(inter);
if (attribs->matching_categories(i_inter) >= min_card) {
agreement_met = true;
break;
}
}
}
}
if (j_has_matched_tag && !i_agrees_with_j) {
return Predicate::False(context);
if (agreement_met) {
return Predicate::True(context);
}
}
}
}
if (!i_has_matched_tag && (i == abs_left || i == abs_right)) {
return Predicate::False(context);
}
}
return Predicate::True(context);
// None of possible agreements between endpoints carried over to all tokens
// that had enough categories specified, or there was no such possible
// agreement at all, so return False.
return Predicate::False(context);
}
} /* end ns Wccl */
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment