diff --git a/libmwereader/mwe.cpp b/libmwereader/mwe.cpp index 4fcc72bfcf8aa55d022972ff0a693b1ced5e98eb..9397a4f1f486afc0024c920da88ec60b0179ce33 100644 --- a/libmwereader/mwe.cpp +++ b/libmwereader/mwe.cpp @@ -1,6 +1,7 @@ #include "mwe.h" #include <boost/algorithm/string.hpp> #include <libwccl/values/strset.h> +#include <boost/algorithm/string/predicate.hpp> namespace Corpus2{ @@ -27,11 +28,14 @@ LexicalUnit::LexicalUnit(const std::string &base, bool LexicalUnit::IsHere(const Wccl::SentenceContext &sc, std::set<int> &out_position, int &head_pos) const { - // set variables + // set variables, skip vars with names starting with '!' for(variables_map::const_iterator ivars = variables_.begin(); - ivars != variables_.end(); ++ivars){ - condition_->set<Wccl::StrSet>(ivars->first, ivars->second); - } + ivars != variables_.end(); ++ivars){ + if(!boost::starts_with(ivars->first, "!")){ + std::cout << ivars->first << " " << std::endl; + condition_->set<Wccl::StrSet>(ivars->first, ivars->second); + } + } // fire up the operator boost::shared_ptr<const Wccl::Bool> pResult = condition_->apply(sc); @@ -102,6 +106,7 @@ void MWEIndex::add_lexicalunit(LexicalUnit::Ptr lu) { foreach(const std::string& base, lu->get_potential_bases()){ value_type::iterator find = index_.find(base); + std::cout << "b:"<<base<<std::endl; if(find == index_.end()){ // not found -> create new one luvec v; v.push_back(lu); @@ -113,6 +118,8 @@ void MWEIndex::add_lexicalunit(LexicalUnit::Ptr lu) } const MWEIndex::luvec& MWEIndex::get_potential_lu(const std::string &base){ + std::cout << "index " << index_.size()<< std::endl; + std::cout << "sb:"<<base<<std::endl; value_type::iterator find = index_.find(base); if(find == index_.end()){ // not found -> return empty return empty_; diff --git a/libmwereader/mweparser.cpp b/libmwereader/mweparser.cpp index 1edb9c684334180e359ad987658a2ece34337670..5d7534f8d17356360044bf5d7a33621945bf8373 100644 --- a/libmwereader/mweparser.cpp +++ b/libmwereader/mweparser.cpp @@ -34,9 +34,9 @@ namespace Corpus2 { if(search != where.end()) return search->second; - + //std::cout << " dddddddddddddd "<< cond << std::endl; BoolOpPtr op = parser_.parseBoolOperator(cond); - + //std::cout << " dddddddddddddd $$$$" << cond << std::endl; where[cond] = op; return op; @@ -57,24 +57,28 @@ namespace Corpus2 { void MWEParser::create_mwe() { print_current_mwe(true); + //std::cout << " kupa cond" << std::endl; MWEBuilder::BoolOpPtr main = mwe_builder_->get_mwe_condition( wccl_operator_); + //std::cout << " kupa head" << std::endl; MWEBuilder::BoolOpPtr head = mwe_builder_->get_head_condition( head_cond_); - + //std::cout << " kupa " << std::endl; if(group_type_ == "fix"){ // group_name_ -> lower case - + //std::cout << " kupa fix" << std::endl; mwe_index_.add_lexicalunit( LexicalUnit::Ptr(new FixedLU(mwe_base_, main, head, variables_))); } else if(group_type_ == "flex"){ + //std::cout << " kupa flex" << std::endl; mwe_index_.add_lexicalunit(LexicalUnit::Ptr(new FlexLU(mwe_base_, main, head, variables_))); } else { throw Wccl::WcclError("Unknown type of lexical unit:" + group_type_); } - + //std::cout << " kupa clear" << std::endl; variables_.clear(); + //std::cout << "po kupie " << std::endl; } std::string MWEParser::get_attribute(const AttributeList& attributes, @@ -109,8 +113,9 @@ namespace Corpus2 { void MWEParser::on_start_element(const Glib::ustring &name, const AttributeList& attributes) { + std::cout << "about to check" << std::endl; std::cout << state_ << ": " << name << std::endl; - + std::cout << "done with check" << std::endl; if(state_ == NONE && name == "units_description"){ tagset_ = get_attribute(attributes, "tagset"); mwe_builder_ = boost::shared_ptr<MWEBuilder>(new MWEBuilder(Corpus2::get_named_tagset(tagset_))); diff --git a/libmwereader/mwereader.cpp b/libmwereader/mwereader.cpp index 83739ee7a6d923476dff696ee63a3c0a7dcd48a3..90b3bcdcf25525b224c7bfa394a1f49e0f58b59a 100644 --- a/libmwereader/mwereader.cpp +++ b/libmwereader/mwereader.cpp @@ -47,10 +47,13 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( if(lex.is_disamb()){ std::string base = lex.lemma_utf8(); const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base); + std::cout << "potential " << potential.size() << std::endl; foreach(LexicalUnit::Ptr pLU, potential){ std::set<int> positions; int head; + //std::cout << " is " << std::endl; bool is_here = pLU->IsHere(sc, positions, head); + //std::cout << " is out" << std::endl; if(is_here){ std::string new_orth_utf8; Corpus2::Sentence::Ptr sent = sc.get_sentence_ptr(); diff --git a/libmwereader/tests/mwefunctional.cpp b/libmwereader/tests/mwefunctional.cpp index 141d3fcabce21841b88c849399d978d65d6b2ceb..5e0b082fe5348bb93b8ff52cf769caeb5da89aa4 100644 --- a/libmwereader/tests/mwefunctional.cpp +++ b/libmwereader/tests/mwefunctional.cpp @@ -31,6 +31,8 @@ struct Fixture{ }; + + BOOST_FIXTURE_TEST_CASE( preferred_lexeme, Fixture) { BOOST_MESSAGE("test: finding preferred lexeme");