From 20edcc90470b4e719da1d544dbe49b410efa5983 Mon Sep 17 00:00:00 2001
From: omekr <roman.kurc@pwr.wroc.pl>
Date: Fri, 5 Aug 2011 13:10:39 +0200
Subject: [PATCH] test against converted multiword units

---
 libmwereader/mwe.cpp                 | 15 +++++++++++----
 libmwereader/mweparser.cpp           | 17 +++++++++++------
 libmwereader/mwereader.cpp           |  3 +++
 libmwereader/tests/mwefunctional.cpp |  2 ++
 4 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/libmwereader/mwe.cpp b/libmwereader/mwe.cpp
index 4fcc72b..9397a4f 100644
--- a/libmwereader/mwe.cpp
+++ b/libmwereader/mwe.cpp
@@ -1,6 +1,7 @@
 #include "mwe.h"
 #include <boost/algorithm/string.hpp>
 #include <libwccl/values/strset.h>
+#include <boost/algorithm/string/predicate.hpp>
 
 namespace Corpus2{
 
@@ -27,11 +28,14 @@ LexicalUnit::LexicalUnit(const std::string &base,
 bool LexicalUnit::IsHere(const Wccl::SentenceContext &sc,
 					std::set<int> &out_position, int &head_pos) const
 {
-	// set variables
+	// set variables, skip vars with names starting with '!'
 	for(variables_map::const_iterator ivars = variables_.begin();
-		ivars != variables_.end(); ++ivars){
-		condition_->set<Wccl::StrSet>(ivars->first, ivars->second);
-	}
+		ivars != variables_.end(); ++ivars){			
+			if(!boost::starts_with(ivars->first, "!")){
+				std::cout << ivars->first << " " << std::endl;
+				condition_->set<Wccl::StrSet>(ivars->first, ivars->second);
+			}
+		}
 
 	// fire up the operator
 	boost::shared_ptr<const Wccl::Bool> pResult = condition_->apply(sc);
@@ -102,6 +106,7 @@ void MWEIndex::add_lexicalunit(LexicalUnit::Ptr lu)
 {
 	foreach(const std::string& base, lu->get_potential_bases()){
 		value_type::iterator find = index_.find(base);
+		std::cout << "b:"<<base<<std::endl;
 		if(find == index_.end()){ // not found -> create new one
 			luvec v;
 			v.push_back(lu);
@@ -113,6 +118,8 @@ void MWEIndex::add_lexicalunit(LexicalUnit::Ptr lu)
 }
 
 const MWEIndex::luvec& MWEIndex::get_potential_lu(const std::string &base){
+	std::cout << "index " << index_.size()<< std::endl;
+	std::cout << "sb:"<<base<<std::endl;
 	value_type::iterator find = index_.find(base);
 	if(find == index_.end()){ // not found -> return empty
 		return empty_;
diff --git a/libmwereader/mweparser.cpp b/libmwereader/mweparser.cpp
index 1edb9c6..5d7534f 100644
--- a/libmwereader/mweparser.cpp
+++ b/libmwereader/mweparser.cpp
@@ -34,9 +34,9 @@ namespace Corpus2 {
 
 		if(search != where.end())
 			return search->second;
-
+		//std::cout << " dddddddddddddd "<< cond << std::endl;
 		BoolOpPtr op = parser_.parseBoolOperator(cond);
-
+		//std::cout << " dddddddddddddd $$$$" << cond << std::endl;
 		where[cond] = op;
 
 		return op;
@@ -57,24 +57,28 @@ namespace Corpus2 {
 	void MWEParser::create_mwe()
 	{
 		print_current_mwe(true);
+		//std::cout << " kupa cond" << std::endl;
 		MWEBuilder::BoolOpPtr main = mwe_builder_->get_mwe_condition(
 					wccl_operator_);
+		//std::cout << " kupa head" << std::endl;
 		MWEBuilder::BoolOpPtr head = mwe_builder_->get_head_condition(
 					head_cond_);
-
+		//std::cout << " kupa " << std::endl;
 		if(group_type_ == "fix"){ // group_name_  -> lower case
-
+			//std::cout << " kupa fix" << std::endl;
 			mwe_index_.add_lexicalunit( LexicalUnit::Ptr(new FixedLU(mwe_base_, main, head,
 											  variables_)));
 		} else if(group_type_ == "flex"){
+			//std::cout << " kupa flex" << std::endl;
 			mwe_index_.add_lexicalunit(LexicalUnit::Ptr(new FlexLU(mwe_base_, main, head,
 											variables_)));
 		} else {
 			throw Wccl::WcclError("Unknown type of lexical unit:"
 									+ group_type_);
 		}
-
+		//std::cout << " kupa clear" << std::endl;
 		variables_.clear();
+		//std::cout << "po kupie " << std::endl;
 	}
 
 	std::string MWEParser::get_attribute(const AttributeList& attributes,
@@ -109,8 +113,9 @@ namespace Corpus2 {
 	void MWEParser::on_start_element(const Glib::ustring &name,
 			const AttributeList& attributes)
 	{
+		std::cout << "about to check" << std::endl;
 		std::cout << state_ << ": " << name << std::endl;
-
+		std::cout << "done with check" << std::endl;
 		if(state_ == NONE && name == "units_description"){
 			tagset_ = get_attribute(attributes, "tagset");
 			mwe_builder_ = boost::shared_ptr<MWEBuilder>(new MWEBuilder(Corpus2::get_named_tagset(tagset_)));
diff --git a/libmwereader/mwereader.cpp b/libmwereader/mwereader.cpp
index 83739ee..90b3bcd 100644
--- a/libmwereader/mwereader.cpp
+++ b/libmwereader/mwereader.cpp
@@ -47,10 +47,13 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
 				if(lex.is_disamb()){
 					std::string base = lex.lemma_utf8();
 					const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base);
+					std::cout << "potential " << potential.size() << std::endl;
 					foreach(LexicalUnit::Ptr pLU, potential){
 						std::set<int> positions;
 						int head;
+						//std::cout << " is " << std::endl;
 						bool is_here = pLU->IsHere(sc, positions, head);
+						//std::cout << " is out" << std::endl;
 						if(is_here){
 							std::string new_orth_utf8;
 							Corpus2::Sentence::Ptr sent = sc.get_sentence_ptr();
diff --git a/libmwereader/tests/mwefunctional.cpp b/libmwereader/tests/mwefunctional.cpp
index 141d3fc..5e0b082 100644
--- a/libmwereader/tests/mwefunctional.cpp
+++ b/libmwereader/tests/mwefunctional.cpp
@@ -31,6 +31,8 @@ struct Fixture{
 
 };
 
+
+
 BOOST_FIXTURE_TEST_CASE( preferred_lexeme, Fixture)
 {
 	BOOST_MESSAGE("test: finding preferred lexeme");
-- 
GitLab