From cc8c48adb0bea22be8d82344c8b8420a97054e40 Mon Sep 17 00:00:00 2001
From: Bartosz Broda <bartosz.broda@gmail.com>
Date: Mon, 13 Jun 2011 23:27:58 +0200
Subject: [PATCH] add add_lexicalunit to index, small improvement in creation
 of mwe in sax parser

---
 libmwereader/mwe.cpp       | 56 +++++++++++++++++++++++++++---------
 libmwereader/mwe.h         | 58 ++++++++++++++++++++++++++++++++------
 libmwereader/mweparser.cpp | 16 ++++++++---
 3 files changed, 104 insertions(+), 26 deletions(-)

diff --git a/libmwereader/mwe.cpp b/libmwereader/mwe.cpp
index e76aaf4..897c80e 100644
--- a/libmwereader/mwe.cpp
+++ b/libmwereader/mwe.cpp
@@ -4,27 +4,21 @@
 namespace Corpus2{
 
 LexicalUnit::LexicalUnit(const std::string &base,
-						 boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
-						 boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
-						 std::map<std::string, std::string> variables)
+						 LexicalUnit::BoolOpPtr condition,
+						 LexicalUnit::BoolOpPtr head_cond,
+						 LexicalUnit::strmap variables)
 	: condition_(condition),
 	  head_cond_(head_cond),
 	  variables_(variables),
 	  base_(base),
 	  nowhere_(Wccl::Position())
 {
-	// noop
+	for(strmap::iterator iter = variables_.begin();
+		iter != variables_.end(); ++iter)
+		potential_bases_.insert(iter->second);
 }
 
-FixedLU::FixedLU(const std::string &base,
-				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
-				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
-				std::map<std::string, std::string> variables)
-		: LexicalUnit(base, condition, head_cond, variables)
-{
-}
-
-bool FixedLU::IsHere(const Wccl::SentenceContext &sc,
+bool LexicalUnit::IsHere(const Wccl::SentenceContext &sc,
 					std::set<size_t> &out_position)
 {
 	boost::shared_ptr<const Wccl::Bool> pResult = condition_->apply(sc);
@@ -45,4 +39,40 @@ bool FixedLU::IsHere(const Wccl::SentenceContext &sc,
 	return true;
 }
 
+
+FixedLU::FixedLU(const std::string &base,
+				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
+				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
+				std::map<std::string, std::string> variables)
+		: LexicalUnit(base, condition, head_cond, variables)
+{
+}
+
+FlexLU::FlexLU(const std::string &base,
+				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
+				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
+				std::map<std::string, std::string> variables)
+		: LexicalUnit(base, condition, head_cond, variables)
+{
+}
+
+MWEIndex::MWEIndex()
+{
+
+}
+
+void MWEIndex::add_lexicalunit(LexicalUnitPtr lu)
+{
+	foreach(const std::string& base, lu->get_potential_bases()){
+		value_type::iterator find = index_.find(base);
+		if(find == index_.end()){ // not found -> create new one
+			luvec v;
+			v.push_back(lu);
+			index_.insert( std::make_pair(base, v));
+		}else{// already exists -> add lu
+			(find->second).push_back(lu);
+		}
+	}
+}
+
 }//ns Corpus2
diff --git a/libmwereader/mwe.h b/libmwereader/mwe.h
index f62f78f..0676ad0 100644
--- a/libmwereader/mwe.h
+++ b/libmwereader/mwe.h
@@ -1,6 +1,8 @@
 #ifndef LIBMWEREADER_MWE_H
 #define LIBMWEREADER_MWE_H
 
+#include <boost/unordered_map.hpp>
+
 #include <libcorpus2/io/reader.h>
 #include <libwccl/ops/operator.h>
 
@@ -11,24 +13,48 @@ namespace Corpus2 {
 class LexicalUnit
 {
 public:
-	LexicalUnit(const std::string &base,
-				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
-				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
-				std::map<std::string, std::string> variables
+	typedef std::map<std::string, std::string> strmap;
+	typedef std::set<std::string> strset;
+	typedef boost::shared_ptr<Wccl::Operator<Wccl::Bool> > BoolOpPtr;
+
+	LexicalUnit(const std::string &base, BoolOpPtr condition,
+				BoolOpPtr head_cond, strmap variables
 				);
 
+	/**
+	  * \param sc SentenceContext with position set to value which
+	  * will be checked
+	  * \param out_positions will contain absolute position in
+	  * SentenceContext (called with sc->get_abs_position) only if
+	  * the main condition of this LexicalUnit will return true in current
+	  * sentence context
+	  * \returns true if this lexical unit was found here
+	  */
 	virtual bool IsHere(const Wccl::SentenceContext& sc,
-						std::set<size_t> &out_position) = 0;
+						std::set<size_t> &out_positions) ;
+
+	const std::string & get_base() const{ return base_;}
+	const strmap & get_variables() const{ return variables_;}
+	const strset& get_potential_bases() const{ return potential_bases_;}
+
+
 
 protected:
+
 	boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition_;
 	boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond_;
-	std::map<std::string, std::string> variables_;
+	strmap variables_;
 	std::string base_;
 
+	strset potential_bases_;
+
 	const Wccl::Position nowhere_;
 };
 
+typedef boost::shared_ptr<LexicalUnit> LexicalUnitPtr;
+
+
+// TODO: czy bedzie potrzebny podzial na fix/flex w kodzie?
 class FixedLU : public LexicalUnit
 {
 public:
@@ -37,18 +63,32 @@ public:
 				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
 				std::map<std::string, std::string> variables
 				);
-	virtual bool IsHere(const Wccl::SentenceContext& sc,
-						std::set<size_t> &out_position);
-
 };
 
 class FlexLU : public LexicalUnit
 {
+public:
+	FlexLU(const std::string &base,
+				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
+				boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
+				std::map<std::string, std::string> variables
+				);
 
 };
 
 class MWEIndex // lub base -> vector<LexicalUnit>
 {
+public:
+	MWEIndex();
+
+	void add_lexicalunit(LexicalUnitPtr lu);
+
+protected:
+	typedef std::vector<LexicalUnitPtr> luvec;
+	typedef boost::unordered_map<std::string,luvec> value_type;
+
+
+	value_type index_;
 };
 
 }// ns Corpus2
diff --git a/libmwereader/mweparser.cpp b/libmwereader/mweparser.cpp
index 5f4e88d..c8fd91e 100644
--- a/libmwereader/mweparser.cpp
+++ b/libmwereader/mweparser.cpp
@@ -60,10 +60,17 @@ namespace Corpus2 {
 		MWEBuilder::BoolOpPtr head = mwe_builder_->get_head_condition(
 					head_cond_);
 
-		//foreach(const std::string&varname, main->valid_variable_names())
-			//if(boost::algorithm::starts_with(varname, "Pos"))
-				//std::cout << "Pozycja: " << varname << std::endl;
-
+		LexicalUnitPtr lu;
+		if(group_type_ == "fix"){ // group_name_  -> lower case
+			lu = LexicalUnitPtr(new FixedLU(mwe_base_, main, head,
+											  variables_));
+		} else if(group_type_ == "flex"){
+			lu = LexicalUnitPtr(new FlexLU(mwe_base_, main, head,
+											variables_));
+		} else {
+			throw Wccl::WcclError("Unknown type of lexical unit:"
+									+ group_type_);
+		}
 	}
 
 	std::string MWEParser::get_attribute(const AttributeList& attributes,
@@ -85,6 +92,7 @@ namespace Corpus2 {
 				group_name_ = a.value;
 			} else if(a.name == "type"){
 				group_type_ = a.value;
+				boost::algorithm::to_lower(group_type_);
 			} else if(a.name == "class"){
 				group_class_ = a.value;
 			}
-- 
GitLab