#include <libwccl/ops/functions/bool/predicates/weakagreement.h>
#include <libpwrutils/foreach.h>

namespace Wccl {

std::string WeakAgreement::to_string(const Corpus2::Tagset& tagset) const
{
	std::ostringstream ss;
	ss << name(tagset) << "("
		<< left_pos_expr_->to_string(tagset) << ", "
		<< right_pos_expr_->to_string(tagset) << ", "
		<< attribs_expr_->to_string(tagset) << ")";
	return ss.str();
}

std::ostream& WeakAgreement::write_to(std::ostream& os) const
{
	return os << raw_name() << "("
			<< *left_pos_expr_ << ", "
			<< *right_pos_expr_ << ", "
			<< *attribs_expr_ << ")";
}

WeakAgreement::BaseRetValPtr WeakAgreement::apply_internal(const FunExecContext& context) const
{
	const SentenceContext& sc = context.sentence_context();

	const boost::shared_ptr<const Position>& range_left = left_pos_expr_->apply(context);
	if (range_left->get_value() == Position::Nowhere) {
		return Predicate::False(context);
	}
	const boost::shared_ptr<const Position>& range_right = right_pos_expr_->apply(context);
	if (range_right->get_value() == Position::Nowhere) {
		return Predicate::False(context);
	}
	int abs_left, abs_right;
	if (!sc.validate_range(*range_left, *range_right, abs_left, abs_right)) {
		return Predicate::False(context);
	}
	const boost::shared_ptr<const TSet>& attribs = attribs_expr_->apply(context);

	int min_card = attribs->categories_count(tagset_);
	const Corpus2::Token* t1 = sc.at(abs_left);
	const Corpus2::Token* t2 = sc.at(abs_right);
	// to optimize a bit, make sure t1 is the one with less lexemes
	if (t1->lexemes().size() > t2->lexemes().size()) {
		std::swap(t1, t2);
	}
	// Check strong agreement between range endpoints.
	// For each possible agreement between the endpoints,
	// check if remaining tokens meet that agreement too,
	// but instead of looking for strong agreement i.e.
	// matching on exact number of categories, look for
	// weak agreement i.e. matching only on those categories
	// that are present.
	// Specifically, if there is a lexeme that does not
	// match any of the categories, that means the token
	// does meet the weak agreement.
	foreach (const Corpus2::Lexeme& t1_lex, t1->lexemes()) {
		const Corpus2::Tag& t1_tag = t1_lex.tag();
		// don't bother checking t2 unless current t1_tag matches enough categories
		if (attribs->matching_categories(t1_tag) >= min_card) {
			foreach (const Corpus2::Lexeme& t2_lex, t2->lexemes()) {
				Corpus2::Tag inter = t1_tag.get_masked(t2_lex.tag());
				// if the intersection matches enough categories we have agreement
				if (attribs->matching_categories(inter) >= min_card) {
					// Check if selected agreement is met by all remaining tokens
					bool agreement_met = true;
					for(int i = abs_left + 1; agreement_met && (i < abs_right); ++i) {
						foreach(const Corpus2::Lexeme& i_lex, sc.at(i)->lexemes()) {
							// Check if agreement is met, but taking into account
							// only categories actually matched in current tag,
							// without requirement to match all categories in the
							// agreement.
							Corpus2::Tag i_inter = i_lex.tag().get_masked(inter);
							agreement_met =
								(attribs->matching_categories(i_lex.tag())
								 == attribs->matching_categories(i_inter));
							if(agreement_met) {
								break;
							}
						}
					}
					if (agreement_met) {
						return Predicate::True(context);
					}
				}
			}
		}
	}
	// None of possible agreements between endpoints carried over to all tokens
	// that had enough categories specified, or there was no such possible
	// agreement at all, so return False.
	return Predicate::False(context);
}

} /* end ns Wccl */