diff --git a/libmwereader/mwereader.cpp b/libmwereader/mwereader.cpp index 6fe0d1e5c0df1635eeea7f20b6d513d9b6dfe2af..b8e09098971e5737e2fde6d5dd05f900ea309bf1 100644 --- a/libmwereader/mwereader.cpp +++ b/libmwereader/mwereader.cpp @@ -1,10 +1,11 @@ #include "mwereader.h" +#include "mweparser.h" #include <boost/algorithm/string.hpp> namespace Corpus2{ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( - "mwereader","inner,mwepath"); // TODO more help? + "mwereader","inner,mwefile"); // TODO more help? MWEReader::MWEReader(const Tagset &tagset, const std::string &filename) : TokenReader(tagset), inner_filename_(filename) @@ -43,11 +44,15 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( if(boost::algorithm::starts_with(option, "inner:")) { std::string inner = option.substr(6); inner_reader_ = create_path_reader(inner, this->tagset(), - inner_filename_); + inner_filename_); + } + if(boost::algorithm::starts_with(option, "mwefile:")) { + std::string mwefile = option.substr(8); + load_mwes(mwefile); } - // TODO MWE stuff + // TODO more MWE stuff } void MWEReader::validate() @@ -66,5 +71,11 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( return inner_reader_->get_option(option); } + void MWEReader::load_mwes(const std::string &filename) + { + MWEParser parser; + parser.parse_file(filename); + } + }// ns Corpus2 diff --git a/libmwereader/mwereader.h b/libmwereader/mwereader.h index e1e2c2b468293251cc0a2f8b72abb0b67d2ca38d..7b8b4d9e39d4820b505f04b3fa1c5102c308db18 100644 --- a/libmwereader/mwereader.h +++ b/libmwereader/mwereader.h @@ -3,6 +3,8 @@ #include <libcorpus2/io/reader.h> +#include "mwe.h" + namespace Corpus2 { @@ -43,7 +45,11 @@ public: virtual void validate(); static bool registered; + private: + void load_mwes(const std::string& filename); + + //MWEIndex mwe_index_; /// ptr to inner reader doing the real work of reading a corpus TokenReaderPtr inner_reader_; /// path for inner reader