diff --git a/libmwereader/mwereader.cpp b/libmwereader/mwereader.cpp index 0b91296c05d721de32162c35381d32566d572f99..6f495c2b8bea3e4dd10f44e0eebd21657fe38a8f 100644 --- a/libmwereader/mwereader.cpp +++ b/libmwereader/mwereader.cpp @@ -1,6 +1,7 @@ #include "mwereader.h" #include "mweparser.h" #include <boost/algorithm/string.hpp> +#include <boost/filesystem.hpp> namespace Corpus2{ @@ -11,7 +12,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( MWEReader::MWEReader(const Tagset &tagset, const std::string &filename) : TokenReader(tagset), inner_filename_(filename) { - // TODO implementataion? + mwes_counter=0; } MWEReader::~MWEReader() @@ -156,8 +157,30 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( currentSentence= boost::make_shared<Sentence>(); } if(boost::algorithm::starts_with(option, "mwefile:")) { + std::string mwefile = option.substr(8); - load_mwes(mwefile); + if(boost::filesystem::exists(mwefile)) + load_mwes(mwefile); + else + throw std::runtime_error("File "+ mwefile + " does not exists"); + } + if(boost::algorithm::starts_with(option, "mwefile-list:")) { + std::string mwefile = option.substr(13); + size_t found=mwefile.find(" ",0); + while(found!=std::string::npos) + { + std::string file = mwefile.substr(0,found); + if(boost::filesystem::exists(file)) + load_mwes(file); + else + throw std::runtime_error("File "+ mwefile +" does not exists"); + mwefile=mwefile.substr(found+1); + found=mwefile.find(" ",0); + } + if(boost::filesystem::exists(mwefile)) + load_mwes(mwefile); + else + throw std::runtime_error("File "+ mwefile +" does not exists"); } @@ -166,8 +189,11 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( void MWEReader::validate() { + if(inner_reader_ == NULL) throw Corpus2Error("Inner reader not initialised."); + if(mwes_counter==0) + throw Corpus2Error("MWE files were not loaded"); // TODO MWE stuff } @@ -184,6 +210,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( { MWEParser parser(mwe_index_); parser.parse_file(filename); + mwes_counter++; } diff --git a/libmwereader/mwereader.h b/libmwereader/mwereader.h index 44429b8a76a5008b0a50564f2d5f1d007ab65e13..909fb9acd88403fad6288abb38bd47f1379c10d9 100644 --- a/libmwereader/mwereader.h +++ b/libmwereader/mwereader.h @@ -63,8 +63,12 @@ private: std::string inner_filename_; /// inner reader option size_t token_index; + /// contains last processed sentence Sentence::Ptr currentSentence; + /// contains last processed chunk boost::shared_ptr<Chunk> currentChunk; + /// quantity of loaded mwes files + size_t mwes_counter; }; } // ns Corpus2