From 44625c67025676b1d1d73b0490053aef930a9db9 Mon Sep 17 00:00:00 2001
From: Bartosz Broda <bartosz.broda@gmail.com>
Date: Fri, 10 Jun 2011 21:00:23 +0200
Subject: [PATCH] some work on mwe parsing

---
 libmwereader/mweparser.cpp | 12 ++++++++++--
 libmwereader/mweparser.h   | 18 +++++++++++++++++-
 libmwereader/mwereader.cpp |  2 +-
 libmwereader/mwereader.h   |  2 +-
 libmwereader/mwertest.cpp  |  6 ++++--
 5 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/libmwereader/mweparser.cpp b/libmwereader/mweparser.cpp
index 6cedc52..add215e 100644
--- a/libmwereader/mweparser.cpp
+++ b/libmwereader/mweparser.cpp
@@ -1,6 +1,7 @@
 #include "mweparser.h"
 
 #include <libpwrutils/foreach.h>
+#include <libcorpus2/tagsetmanager.h>
 
 #include <libxml++/libxml++.h>
 #include <libxml2/libxml/parser.h>
@@ -9,8 +10,14 @@
 
 namespace Corpus2 {
 
-	MWEParser::MWEParser()
-		: BasicSaxParser(), state_(NONE)
+	MWEBuilder::MWEBuilder(const Tagset& tagset)
+		: tagset_(tagset)
+	{
+
+	}
+
+	MWEParser::MWEParser(MWEIndex &index)
+		: BasicSaxParser(), state_(NONE), mwe_index_(index)
 	{
 	}
 
@@ -47,6 +54,7 @@ namespace Corpus2 {
 
 		if(state_ == NONE && name == "units_description"){
 			tagset_ = get_attribute(attributes, "tagset");
+			mwe_builder_ = boost::shared_ptr<MWEBuilder>(new MWEBuilder(Corpus2::get_named_tagset(tagset_)));
 			state_ = UNITSDESC;
 		} else if(state_ == UNITSDESC && name == "mwegroup"){
 			parse_mwegroup_attributes(attributes);
diff --git a/libmwereader/mweparser.h b/libmwereader/mweparser.h
index 81313e5..85cde5f 100644
--- a/libmwereader/mweparser.h
+++ b/libmwereader/mweparser.h
@@ -3,18 +3,31 @@
 
 #include <libcorpus2/io/reader.h>
 #include <libcorpus2/io/sax.h>
+#include <boost/unordered_map.hpp>
+
+#include "mwe.h"
 
 namespace Corpus2 {
 
 class MWEBuilder
 {
+public:
+	MWEBuilder(const Tagset& tagset);
+	typedef boost::unordered_map<std::string, std::string> value_type;
+
+private:
+	const Tagset& tagset_;
+	/// str -> ptr to ccl operator
+	value_type main_conditions_;
+	/// str -> ptr to ccl operator
+	value_type head_conditions_;
 
 };
 
 class MWEParser : public BasicSaxParser
 {
 public:
-	MWEParser();
+	MWEParser(MWEIndex &index);
 
 protected:
 	typedef std::map<std::string, std::string> str_map;
@@ -56,6 +69,9 @@ protected:
 	std::string group_type_;
 	std::string group_class_;
 	std::string head_cond_;
+
+	MWEIndex &mwe_index_;
+	boost::shared_ptr<MWEBuilder> mwe_builder_;
 };
 
 } // ns Corpus2
diff --git a/libmwereader/mwereader.cpp b/libmwereader/mwereader.cpp
index b8e0909..d7b38c7 100644
--- a/libmwereader/mwereader.cpp
+++ b/libmwereader/mwereader.cpp
@@ -73,7 +73,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
 
 	void MWEReader::load_mwes(const std::string &filename)
 	{
-		MWEParser parser;
+		MWEParser parser(mwe_index_);
 		parser.parse_file(filename);
 	}
 
diff --git a/libmwereader/mwereader.h b/libmwereader/mwereader.h
index 7b8b4d9..4c65c2e 100644
--- a/libmwereader/mwereader.h
+++ b/libmwereader/mwereader.h
@@ -49,7 +49,7 @@ public:
 private:
 	void load_mwes(const std::string& filename);
 
-	//MWEIndex mwe_index_;
+	MWEIndex mwe_index_;
 	/// ptr to inner reader doing the real work of reading a corpus
 	TokenReaderPtr inner_reader_;
 	/// path for inner reader
diff --git a/libmwereader/mwertest.cpp b/libmwereader/mwertest.cpp
index cb78a83..c5b9ceb 100644
--- a/libmwereader/mwertest.cpp
+++ b/libmwereader/mwertest.cpp
@@ -5,8 +5,10 @@
 int main(int ac, char**av)
 {
 	using namespace Corpus2;
-	std::cout << "Starting tests" << std::endl;
+	std::cout << "Starting tests... " << ac<< std::endl;
 
-	MWEParser parser;
+	MWEIndex temp_index;
+
+	MWEParser parser(temp_index);
 	parser.parse_file(av[1]);
 }
-- 
GitLab