#include "mwe.h" #include <boost/algorithm/string.hpp> #include <libwccl/values/strset.h> #include <boost/algorithm/string/predicate.hpp> namespace Corpus2{ LexicalUnit::LexicalUnit(const std::string &base, LexicalUnit::BoolOpPtr condition, LexicalUnit::BoolOpPtr head_cond, LexicalUnit::strmap variables) : condition_(condition), head_cond_(head_cond), base_(base), nowhere_(Wccl::Position()) { for(strmap::iterator iter = variables.begin(); iter != variables.end(); ++iter){ potential_bases_.insert(iter->second); Wccl::StrSet ss; ss.insert_utf8(iter->second); variables_[iter->first] = ss; } } bool LexicalUnit::IsHere(const Wccl::SentenceContext &sc, std::set<int> &out_position, int &head_pos) const { // set variables, skip vars with names starting with '!' for(variables_map::const_iterator ivars = variables_.begin(); ivars != variables_.end(); ++ivars){ if(!boost::starts_with(ivars->first, "!")){ std::cout << ivars->first << " " << std::endl; condition_->set<Wccl::StrSet>(ivars->first, ivars->second); } } // fire up the operator boost::shared_ptr<const Wccl::Bool> pResult = condition_->apply(sc); if(pResult->get_value() == false) return false; bool found_head = false; Wccl::SentenceContext sc2(sc.get_sentence_ptr()); // fill up positions foreach(const std::string&varname, condition_->valid_variable_names()){ if(boost::algorithm::starts_with(varname, "Pos")){ Wccl::Position pos = condition_->get<Wccl::Position>(varname); if(pos.equals(nowhere_)){ std::string errmsg("Position for found MWE cannot be zero."); errmsg += " Offending unit: " + base_; throw Wccl::WcclError(errmsg); } int abs_pos = sc.get_abs_position(pos); out_position.insert( abs_pos ); if(!found_head){ sc2.set_position(abs_pos); if(head_cond_->apply(sc2)->get_value()){ head_pos = abs_pos; found_head = true; } } } } if(!found_head){ std::string errmsg("MWE found, but no head for it."); errmsg += " Offending unit: " + base_; throw Wccl::WcclError(errmsg); } return true; } //////////////////////////////////////////////////////////////////// // TODO realy needed? FixedLU::FixedLU(const std::string &base, boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition, boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond, std::map<std::string, std::string> variables) : LexicalUnit(base, condition, head_cond, variables) { } FlexLU::FlexLU(const std::string &base, boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition, boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond, std::map<std::string, std::string> variables) : LexicalUnit(base, condition, head_cond, variables) { } //////////////////////////////////////////////////////////////////// MWEIndex::MWEIndex() : index_(), empty_() { // noop } void MWEIndex::add_lexicalunit(LexicalUnit::Ptr lu) { foreach(const std::string& base, lu->get_potential_bases()){ value_type::iterator find = index_.find(base); std::cout << "b:"<<base<<std::endl; if(find == index_.end()){ // not found -> create new one luvec v; v.push_back(lu); index_.insert( std::make_pair(base, v)); }else{// already exists -> add lu (find->second).push_back(lu); } } } const MWEIndex::luvec& MWEIndex::get_potential_lu(const std::string &base){ std::cout << "index " << index_.size()<< std::endl; std::cout << "sb:"<<base<<std::endl; value_type::iterator find = index_.find(base); if(find == index_.end()){ // not found -> return empty return empty_; } return (find->second); } }//ns Corpus2