diff --git a/libmwereader/mwereader.cpp b/libmwereader/mwereader.cpp index 101ea1d7dc6d3c4662f08045d42bef9272158f6a..f6bb63b4d826c138e8903ad76a815596c9f38e6c 100644 --- a/libmwereader/mwereader.cpp +++ b/libmwereader/mwereader.cpp @@ -4,7 +4,7 @@ namespace Corpus2{ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( - "mwereader","token,chunk,sentence"); // TODO more help? + "mwereader","inner,mwepath"); // TODO more help? MWEReader::MWEReader(const Tagset &tagset, const std::string &filename) : TokenReader(tagset), inner_filename_(filename) @@ -20,6 +20,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( Token* MWEReader::get_next_token() { // TODO MWE stuff + // get whole sentence -> process it -> return token by token return inner_reader_->get_next_token(); } @@ -32,19 +33,20 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( boost::shared_ptr<Chunk> MWEReader::get_next_chunk() { // TODO MWE stuff + // get whole chunk -> process sentences -> return processed chunk return inner_reader_->get_next_chunk(); } void MWEReader::set_option(const std::string& option) { - if(boost::algorithm::starts_with(option, "inner:")) - { + if(boost::algorithm::starts_with(option, "inner:")) { std::string inner = option.substr(6); inner_reader_ = create_path_reader(inner, this->tagset(), inner_filename_); } + // TODO MWE stuff } diff --git a/libmwereader/mwereader.h b/libmwereader/mwereader.h index 7df21703467f05a6fd1d830e65c9775847aab391..e1e2c2b468293251cc0a2f8b72abb0b67d2ca38d 100644 --- a/libmwereader/mwereader.h +++ b/libmwereader/mwereader.h @@ -16,10 +16,16 @@ public: ~MWEReader(); + /// retrieves whole sentence, finds MWEs, and return tokens Token* get_next_token(); + /// the prefered mode for this reader Sentence::Ptr get_next_sentence(); + /** + * retrieves chunk with inner reader and then searches for MWEs within + * sentences. + */ boost::shared_ptr<Chunk> get_next_chunk(); void set_option(const std::string& option);