Skip to content
Snippets Groups Projects
Commit 911db340 authored by Adam Wardynski's avatar Adam Wardynski
Browse files

agrfltr implementation.

parent d07a58cf
Branches
No related merge requests found
......@@ -50,17 +50,67 @@ AgrFilter::BaseRetValPtr AgrFilter::apply_internal(const FunExecContext& context
return detail::DefaultFunction<TSet>()->apply(context);
}
const boost::shared_ptr<const TSet>& attribs_tset = attribs_expr_->apply(context);
const boost::shared_ptr<const TSet>& mask_tset = mask_expr_->apply(context);
const Corpus2::Tag& attribs = attribs_tset->get_value();
const Corpus2::Tag& mask = mask_tset->get_value();
const boost::shared_ptr<const TSet>& attribs = attribs_expr_->apply(context);
const boost::shared_ptr<const TSet>& mask = mask_expr_->apply(context);
boost::shared_ptr<TSet> tset = boost::make_shared<TSet>();
//
//@ todo: implement
//
tset->contents().mask_with(mask);
return tset;
int min_card = attribs->categories_count(tagset_);
const Corpus2::Token* t1 = sc.at(abs_left);
const Corpus2::Token* t2 = sc.at(abs_right);
// to optimize a bit, make sure t1 is the one with less lexemes
if (t1->lexemes().size() > t2->lexemes().size()) {
std::swap(t1, t2);
}
boost::shared_ptr<TSet> agreements = boost::make_shared<TSet>();
// Check strong agreement between range endpoints.
// For each possible agreement between the endpoints,
// check if remaining tokens meet that agreement too,
// but instead of looking for strong agreement i.e.
// matching on exact number of categories, look for
// weak agreement i.e. matching only on those categories
// that are present.
// Specifically, if there is a lexeme that does not
// match any of the categories, that means the token
// does meet the weak agreement.
// For each agreement we take symbols that define the
// agreement, sum them up, apply the filter mask, and return.
foreach (const Corpus2::Lexeme& t1_lex, t1->lexemes()) {
const Corpus2::Tag& t1_tag = t1_lex.tag();
// don't bother checking t2 unless current t1_tag matches enough categories
if (attribs->matching_categories(t1_tag) >= min_card) {
foreach (const Corpus2::Lexeme& t2_lex, t2->lexemes()) {
Corpus2::Tag inter = t1_tag.get_masked(t2_lex.tag());
// if the intersection matches enough categories we have agreement
if (attribs->matching_categories(inter) >= min_card) {
// Check if selected agreement is met by all remaining tokens
bool agreement_met = true;
for(int i = abs_left + 1; agreement_met && (i < abs_right); ++i) {
foreach(const Corpus2::Lexeme& i_lex, sc.at(i)->lexemes()) {
// Check if agreement is met, but taking into account
// only categories actually matched in current tag,
// without requirement to match all categories in the
// agreement.
Corpus2::Tag i_inter = i_lex.tag().get_masked(inter);
agreement_met =
(attribs->matching_categories(i_lex.tag())
== attribs->matching_categories(i_inter));
if(agreement_met) {
break;
}
}
}
if (agreement_met) {
agreements->combine_with(inter);
}
}
}
}
}
agreements->contents().mask_with(mask->get_value());
return agreements;
}
} /* end ns Wccl */
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment