From e43767e121505804109de07dc1651a149a059cc1 Mon Sep 17 00:00:00 2001 From: Lukasz Bilenkij <lukasz.bilenkij@gmail.com> Date: Mon, 22 Aug 2011 16:40:36 +0200 Subject: [PATCH] mwefile-list handling --- libmwereader/mwereader.cpp | 31 +++++++++++++++++++++++++++++-- libmwereader/mwereader.h | 4 ++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/libmwereader/mwereader.cpp b/libmwereader/mwereader.cpp index 0b91296..6f495c2 100644 --- a/libmwereader/mwereader.cpp +++ b/libmwereader/mwereader.cpp @@ -1,6 +1,7 @@ #include "mwereader.h" #include "mweparser.h" #include <boost/algorithm/string.hpp> +#include <boost/filesystem.hpp> namespace Corpus2{ @@ -11,7 +12,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( MWEReader::MWEReader(const Tagset &tagset, const std::string &filename) : TokenReader(tagset), inner_filename_(filename) { - // TODO implementataion? + mwes_counter=0; } MWEReader::~MWEReader() @@ -156,8 +157,30 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( currentSentence= boost::make_shared<Sentence>(); } if(boost::algorithm::starts_with(option, "mwefile:")) { + std::string mwefile = option.substr(8); - load_mwes(mwefile); + if(boost::filesystem::exists(mwefile)) + load_mwes(mwefile); + else + throw std::runtime_error("File "+ mwefile + " does not exists"); + } + if(boost::algorithm::starts_with(option, "mwefile-list:")) { + std::string mwefile = option.substr(13); + size_t found=mwefile.find(" ",0); + while(found!=std::string::npos) + { + std::string file = mwefile.substr(0,found); + if(boost::filesystem::exists(file)) + load_mwes(file); + else + throw std::runtime_error("File "+ mwefile +" does not exists"); + mwefile=mwefile.substr(found+1); + found=mwefile.find(" ",0); + } + if(boost::filesystem::exists(mwefile)) + load_mwes(mwefile); + else + throw std::runtime_error("File "+ mwefile +" does not exists"); } @@ -166,8 +189,11 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( void MWEReader::validate() { + if(inner_reader_ == NULL) throw Corpus2Error("Inner reader not initialised."); + if(mwes_counter==0) + throw Corpus2Error("MWE files were not loaded"); // TODO MWE stuff } @@ -184,6 +210,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( { MWEParser parser(mwe_index_); parser.parse_file(filename); + mwes_counter++; } diff --git a/libmwereader/mwereader.h b/libmwereader/mwereader.h index 44429b8..909fb9a 100644 --- a/libmwereader/mwereader.h +++ b/libmwereader/mwereader.h @@ -63,8 +63,12 @@ private: std::string inner_filename_; /// inner reader option size_t token_index; + /// contains last processed sentence Sentence::Ptr currentSentence; + /// contains last processed chunk boost::shared_ptr<Chunk> currentChunk; + /// quantity of loaded mwes files + size_t mwes_counter; }; } // ns Corpus2 -- GitLab