#include <libwccl/ops/actions/unify.h>
#include <libpwrutils/foreach.h>
#include <sstream>

namespace Wccl {

Bool Unify::execute(const ActionExecContext& context) const
{
	Bool changed(false);
	SentenceContext& sc = context.sentence_context();

	const boost::shared_ptr<const Position>& range_left = pos_begin_->apply(context);
	if (range_left->get_value() == Position::Nowhere) {
		return Bool(false);
	}
	const boost::shared_ptr<const Position>& range_right = pos_end_->apply(context);
	if (range_right->get_value() == Position::Nowhere) {
		return Bool(false);
	}
	int abs_left, abs_right;
	if (!sc.validate_range(*range_left, *range_right, abs_left, abs_right)) {
		return Bool(false);
	}

	const boost::shared_ptr<const TSet>& attribs = attribs_expr_->apply(context);

	std::vector<std::vector<Corpus2::Lexeme> > remainings;
	std::vector<std::vector<Corpus2::Lexeme> > underspecifieds;

	// first pass - cut lexemes that don't match agreement attribs at all
	for (int i = abs_left; i <= abs_right; ++i) {
		Corpus2::Token& curr_tok = *sc.at(i);
		std::vector<Corpus2::Lexeme> remaining;
		std::vector<Corpus2::Lexeme> underspecified;
		foreach(Corpus2::Lexeme& lexeme, curr_tok.lexemes()) {
			if (attribs->matching_categories(lexeme.tag()) > 0) {
				remaining.push_back(lexeme);
			} else {
				underspecified.push_back(lexeme);
			}
		}
		remainings.push_back(remaining);
		underspecifieds.push_back(underspecified);
	}
	// second pass - leave only lexemes that are in agreement
	// with at least one lexeme from previous non-empty-after-1st-pass token
	int previous = remainings.size() - 1;
	// at first, "previous" is the last token not emptied after 1st pass
	while ((previous > 0) && (remainings[previous].empty())) {
		previous--;
	}
	for (size_t i = 0; i < remainings.size(); ++i) {
		// proceed only for tokens that have lexemes that could go into agreement
		// (i.e. are not emptied after 1st pass)
		if (!remainings[i].empty()) {
			const std::vector<Corpus2::Lexeme>& prev_rem = remainings[previous];
			std::vector<Corpus2::Lexeme> curr_rem;
			// get only lexemes that agree with a lexeme from previous token
			foreach(const Corpus2::Lexeme& curr_lex, remainings[i]) {
				foreach(const Corpus2::Lexeme& prev_lex, prev_rem) {
					Corpus2::Tag inter = curr_lex.tag().get_masked(prev_lex.tag());
					if (attribs->matching_categories(inter) == attribs->matching_categories(curr_lex.tag())) {
						curr_rem.push_back(curr_lex);
						break;
					}
				}
			}
			if (curr_rem.empty()) {
				if (underspecifieds[i].empty()) {
					// there is no agreement possible, abort with no changes
					changed.set_value(false);
					return changed;
				}
			} else {
				// some non-underspecified lexemes are left, so
				// next time around, "previous" token is going to be the current one
				previous = i;
			}
			// there were changes only if we actually removed something
			if (curr_rem.size() != remainings[i].size()) {
				changed.set_value(true);
				remainings[i] = curr_rem;
			}
		}
	}
	// finally assign remaining lexemes to original tokens
	if (changed.get_value()) {
		for (size_t i = 0; i < remainings.size(); ++i) {
			std::vector<Corpus2::Lexeme>& lexemes = sc.at(i + abs_left)->lexemes();
			lexemes = remainings[i];
			// underspecced lexemes meet agreement too, so leave them in the end, if any
			foreach (const Corpus2::Lexeme& lex, underspecifieds[i]) {
				lexemes.push_back(lex);
			}
		}
	}
	return changed;
}

std::string Unify::to_string(const Corpus2::Tagset& tagset) const
{
	std::ostringstream os;
	os << name() << "(" << pos_begin_->to_string(tagset) << ", "
			<< pos_end_->to_string(tagset) << ", "
			<< attribs_expr_->to_string(tagset) << ")";
	return os.str();
}

std::ostream& Unify::write_to(std::ostream& os) const
{
	os << name() << "(" << *pos_begin_ << ", " << *pos_end_ << ", " << *attribs_expr_ << ")";
	return os;
}


} /* end ns Wccl */