Skip to content
Snippets Groups Projects
Commit 911db340 authored by Adam Wardynski's avatar Adam Wardynski
Browse files

agrfltr implementation.

parent d07a58cf
Branches
No related tags found
No related merge requests found
...@@ -50,17 +50,67 @@ AgrFilter::BaseRetValPtr AgrFilter::apply_internal(const FunExecContext& context ...@@ -50,17 +50,67 @@ AgrFilter::BaseRetValPtr AgrFilter::apply_internal(const FunExecContext& context
return detail::DefaultFunction<TSet>()->apply(context); return detail::DefaultFunction<TSet>()->apply(context);
} }
const boost::shared_ptr<const TSet>& attribs_tset = attribs_expr_->apply(context); const boost::shared_ptr<const TSet>& attribs = attribs_expr_->apply(context);
const boost::shared_ptr<const TSet>& mask_tset = mask_expr_->apply(context); const boost::shared_ptr<const TSet>& mask = mask_expr_->apply(context);
const Corpus2::Tag& attribs = attribs_tset->get_value();
const Corpus2::Tag& mask = mask_tset->get_value();
boost::shared_ptr<TSet> tset = boost::make_shared<TSet>(); int min_card = attribs->categories_count(tagset_);
// const Corpus2::Token* t1 = sc.at(abs_left);
//@ todo: implement const Corpus2::Token* t2 = sc.at(abs_right);
// // to optimize a bit, make sure t1 is the one with less lexemes
tset->contents().mask_with(mask); if (t1->lexemes().size() > t2->lexemes().size()) {
return tset; std::swap(t1, t2);
}
boost::shared_ptr<TSet> agreements = boost::make_shared<TSet>();
// Check strong agreement between range endpoints.
// For each possible agreement between the endpoints,
// check if remaining tokens meet that agreement too,
// but instead of looking for strong agreement i.e.
// matching on exact number of categories, look for
// weak agreement i.e. matching only on those categories
// that are present.
// Specifically, if there is a lexeme that does not
// match any of the categories, that means the token
// does meet the weak agreement.
// For each agreement we take symbols that define the
// agreement, sum them up, apply the filter mask, and return.
foreach (const Corpus2::Lexeme& t1_lex, t1->lexemes()) {
const Corpus2::Tag& t1_tag = t1_lex.tag();
// don't bother checking t2 unless current t1_tag matches enough categories
if (attribs->matching_categories(t1_tag) >= min_card) {
foreach (const Corpus2::Lexeme& t2_lex, t2->lexemes()) {
Corpus2::Tag inter = t1_tag.get_masked(t2_lex.tag());
// if the intersection matches enough categories we have agreement
if (attribs->matching_categories(inter) >= min_card) {
// Check if selected agreement is met by all remaining tokens
bool agreement_met = true;
for(int i = abs_left + 1; agreement_met && (i < abs_right); ++i) {
foreach(const Corpus2::Lexeme& i_lex, sc.at(i)->lexemes()) {
// Check if agreement is met, but taking into account
// only categories actually matched in current tag,
// without requirement to match all categories in the
// agreement.
Corpus2::Tag i_inter = i_lex.tag().get_masked(inter);
agreement_met =
(attribs->matching_categories(i_lex.tag())
== attribs->matching_categories(i_inter));
if(agreement_met) {
break;
}
}
}
if (agreement_met) {
agreements->combine_with(inter);
}
}
}
}
}
agreements->contents().mask_with(mask->get_value());
return agreements;
} }
} /* end ns Wccl */ } /* end ns Wccl */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment