diff --git a/libwccl/ops/functions/tset/agrfilter.cpp b/libwccl/ops/functions/tset/agrfilter.cpp index ce6911dbc1527afdb33294e02cf79194be703473..21b6661bfd8fb5c390170e30a624ef2b1dc39869 100644 --- a/libwccl/ops/functions/tset/agrfilter.cpp +++ b/libwccl/ops/functions/tset/agrfilter.cpp @@ -50,17 +50,67 @@ AgrFilter::BaseRetValPtr AgrFilter::apply_internal(const FunExecContext& context return detail::DefaultFunction<TSet>()->apply(context); } - const boost::shared_ptr<const TSet>& attribs_tset = attribs_expr_->apply(context); - const boost::shared_ptr<const TSet>& mask_tset = mask_expr_->apply(context); - const Corpus2::Tag& attribs = attribs_tset->get_value(); - const Corpus2::Tag& mask = mask_tset->get_value(); + const boost::shared_ptr<const TSet>& attribs = attribs_expr_->apply(context); + const boost::shared_ptr<const TSet>& mask = mask_expr_->apply(context); - boost::shared_ptr<TSet> tset = boost::make_shared<TSet>(); - // - //@ todo: implement - // - tset->contents().mask_with(mask); - return tset; + int min_card = attribs->categories_count(tagset_); + const Corpus2::Token* t1 = sc.at(abs_left); + const Corpus2::Token* t2 = sc.at(abs_right); + // to optimize a bit, make sure t1 is the one with less lexemes + if (t1->lexemes().size() > t2->lexemes().size()) { + std::swap(t1, t2); + } + + + boost::shared_ptr<TSet> agreements = boost::make_shared<TSet>(); + + // Check strong agreement between range endpoints. + // For each possible agreement between the endpoints, + // check if remaining tokens meet that agreement too, + // but instead of looking for strong agreement i.e. + // matching on exact number of categories, look for + // weak agreement i.e. matching only on those categories + // that are present. + // Specifically, if there is a lexeme that does not + // match any of the categories, that means the token + // does meet the weak agreement. + // For each agreement we take symbols that define the + // agreement, sum them up, apply the filter mask, and return. + foreach (const Corpus2::Lexeme& t1_lex, t1->lexemes()) { + const Corpus2::Tag& t1_tag = t1_lex.tag(); + // don't bother checking t2 unless current t1_tag matches enough categories + if (attribs->matching_categories(t1_tag) >= min_card) { + foreach (const Corpus2::Lexeme& t2_lex, t2->lexemes()) { + Corpus2::Tag inter = t1_tag.get_masked(t2_lex.tag()); + // if the intersection matches enough categories we have agreement + if (attribs->matching_categories(inter) >= min_card) { + // Check if selected agreement is met by all remaining tokens + bool agreement_met = true; + for(int i = abs_left + 1; agreement_met && (i < abs_right); ++i) { + foreach(const Corpus2::Lexeme& i_lex, sc.at(i)->lexemes()) { + // Check if agreement is met, but taking into account + // only categories actually matched in current tag, + // without requirement to match all categories in the + // agreement. + Corpus2::Tag i_inter = i_lex.tag().get_masked(inter); + agreement_met = + (attribs->matching_categories(i_lex.tag()) + == attribs->matching_categories(i_inter)); + if(agreement_met) { + break; + } + } + } + if (agreement_met) { + agreements->combine_with(inter); + } + } + } + } + } + + agreements->contents().mask_with(mask->get_value()); + return agreements; } } /* end ns Wccl */ diff --git a/tests/data/iterations.ccl b/tests/data/iterations.ccl index f50cd485c013ffd404e6627447eb7ee50440dd3a..41764311a8cae589c43385e6d20b317e3e43b1b9 100644 --- a/tests/data/iterations.ccl +++ b/tests/data/iterations.ccl @@ -82,9 +82,3 @@ atleast(9,13,$Loc, in(loc, cas[$Loc]), 4) False Loc=nowhere --- -position=-1 -llook(0,begin,$NomAcc, equal(cas[$NomAcc],{nom,acc})) - -False -NomAcc=nowhere ----