Skip to content
Snippets Groups Projects
mwe.cpp 3.56 KiB
Newer Older
Bartosz Broda's avatar
Bartosz Broda committed
#include "mwe.h"
#include <boost/algorithm/string.hpp>
#include <libwccl/values/strset.h>
#include <boost/algorithm/string/predicate.hpp>
Bartosz Broda's avatar
Bartosz Broda committed

namespace Corpus2{

LexicalUnit::LexicalUnit(const std::string &base,
						 LexicalUnit::BoolOpPtr condition,
						 LexicalUnit::BoolOpPtr head_cond,
						 LexicalUnit::strmap variables)
	: condition_(condition),
	  head_cond_(head_cond),
	  base_(base),
	  nowhere_(Wccl::Position())
{
omekr's avatar
omekr committed

	for(strmap::iterator iter = variables.begin();
		iter != variables.end(); ++iter){
		Wccl::StrSet ss;
		ss.insert_utf8(iter->second);
		variables_[iter->first] = ss;
	}

bool LexicalUnit::IsHere(const Wccl::SentenceContext &sc,
					std::set<int> &out_position, int &head_pos) const
	// set variables, skip vars with names starting with '!'
	for(variables_map::const_iterator ivars = variables_.begin();
		ivars != variables_.end(); ++ivars){			
			if(!boost::starts_with(ivars->first, "!")){
				std::cout << ivars->first << " " << std::endl;
				condition_->set<Wccl::StrSet>(ivars->first, ivars->second);
			}
		}

	// fire up the operator
	boost::shared_ptr<const Wccl::Bool> pResult = condition_->apply(sc);
	if(pResult->get_value() == false)
		return false;

	bool found_head = false;

	Wccl::SentenceContext sc2(sc.get_sentence_ptr());

	// fill up positions
	foreach(const std::string&varname, condition_->valid_variable_names()){
		if(boost::algorithm::starts_with(varname, "Pos")){
			Wccl::Position pos = condition_->get<Wccl::Position>(varname);
			if(pos.equals(nowhere_)){
				std::string errmsg("Position for found MWE cannot be zero.");
				errmsg += " Offending unit: " + base_;
				throw Wccl::WcclError(errmsg);
			}
			int abs_pos = sc.get_abs_position(pos);
			out_position.insert( abs_pos );
			if(!found_head){
				sc2.set_position(abs_pos);
				if(head_cond_->apply(sc2)->get_value()){
					head_pos = abs_pos;
					found_head = true;
				}
			}
Bartosz Broda's avatar
Bartosz Broda committed
	}
	if(!found_head){
		std::string errmsg("MWE found, but no head for it.");
		errmsg += " Offending unit: " + base_;
		throw Wccl::WcclError(errmsg);
	}

////////////////////////////////////////////////////////////////////
// TODO realy needed?

FixedLU::FixedLU(const std::string &base,
				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
				std::map<std::string, std::string> variables)
		: LexicalUnit(base, condition, head_cond, variables)
{
}

FlexLU::FlexLU(const std::string &base,
				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
				std::map<std::string, std::string> variables)
		: LexicalUnit(base, condition, head_cond, variables)
{
}

////////////////////////////////////////////////////////////////////
MWEIndex::MWEIndex() : index_(), empty_()
{
	// noop
void MWEIndex::add_lexicalunit(LexicalUnit::Ptr lu)
{
	foreach(const std::string& base, lu->get_potential_bases()){
		value_type::iterator find = index_.find(base);
		std::cout << "b:"<<base<<std::endl;
		if(find == index_.end()){ // not found -> create new one
			luvec v;
			v.push_back(lu);
			index_.insert( std::make_pair(base, v));
		}else{// already exists -> add lu
			(find->second).push_back(lu);
		}
	}
}

const MWEIndex::luvec& MWEIndex::get_potential_lu(const std::string &base){
	std::cout << "index " << index_.size()<< std::endl;
	std::cout << "sb:"<<base<<std::endl;
	value_type::iterator find = index_.find(base);
	if(find == index_.end()){ // not found -> return empty
		return empty_;
	}

	return (find->second);
}

Bartosz Broda's avatar
Bartosz Broda committed
}//ns Corpus2