From e43767e121505804109de07dc1651a149a059cc1 Mon Sep 17 00:00:00 2001
From: Lukasz Bilenkij <lukasz.bilenkij@gmail.com>
Date: Mon, 22 Aug 2011 16:40:36 +0200
Subject: [PATCH] mwefile-list handling

---
 libmwereader/mwereader.cpp | 31 +++++++++++++++++++++++++++++--
 libmwereader/mwereader.h   |  4 ++++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/libmwereader/mwereader.cpp b/libmwereader/mwereader.cpp
index 0b91296..6f495c2 100644
--- a/libmwereader/mwereader.cpp
+++ b/libmwereader/mwereader.cpp
@@ -1,6 +1,7 @@
 #include "mwereader.h"
 #include "mweparser.h"
 #include <boost/algorithm/string.hpp>
+#include <boost/filesystem.hpp>
 
 
 namespace Corpus2{
@@ -11,7 +12,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
 	MWEReader::MWEReader(const Tagset &tagset, const std::string &filename)
 		: TokenReader(tagset), inner_filename_(filename)
 	{
-		// TODO implementataion?
+		mwes_counter=0;
 	}
 
 	MWEReader::~MWEReader()
@@ -156,8 +157,30 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
 			currentSentence= boost::make_shared<Sentence>();
 		}
 		if(boost::algorithm::starts_with(option, "mwefile:")) {
+
 			std::string mwefile = option.substr(8);
-			load_mwes(mwefile);
+			if(boost::filesystem::exists(mwefile))
+				load_mwes(mwefile);
+			else
+				throw std::runtime_error("File "+ mwefile + " does not exists");
+		}
+		if(boost::algorithm::starts_with(option, "mwefile-list:")) {
+			std::string mwefile = option.substr(13);
+			size_t found=mwefile.find(" ",0);
+			while(found!=std::string::npos)
+			{
+				std::string file = mwefile.substr(0,found);
+				if(boost::filesystem::exists(file))
+					load_mwes(file);
+				else
+					throw std::runtime_error("File "+ mwefile +" does not exists");
+				mwefile=mwefile.substr(found+1);
+				found=mwefile.find(" ",0);
+			}
+			if(boost::filesystem::exists(mwefile))
+				load_mwes(mwefile);
+			else
+				throw std::runtime_error("File "+ mwefile +" does not exists");
 		}
 
 
@@ -166,8 +189,11 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
 
 	void MWEReader::validate()
 	{
+		
 		if(inner_reader_ == NULL)
 			throw Corpus2Error("Inner reader not initialised.");
+		if(mwes_counter==0)
+			throw Corpus2Error("MWE files were not loaded");
 		// TODO MWE stuff
 	}
 
@@ -184,6 +210,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
 	{
 		MWEParser parser(mwe_index_);
 		parser.parse_file(filename);
+		mwes_counter++;
 
 	}
 
diff --git a/libmwereader/mwereader.h b/libmwereader/mwereader.h
index 44429b8..909fb9a 100644
--- a/libmwereader/mwereader.h
+++ b/libmwereader/mwereader.h
@@ -63,8 +63,12 @@ private:
 	std::string inner_filename_;
 	/// inner reader option
 	size_t token_index;
+	/// contains last processed sentence
 	Sentence::Ptr currentSentence;
+	/// contains last processed chunk
 	boost::shared_ptr<Chunk> currentChunk;
+	/// quantity of loaded mwes files
+	size_t mwes_counter;
 };
 
 } // ns Corpus2
-- 
GitLab