#include "mwereader.h" #include "mweparser.h" #include <boost/algorithm/string.hpp> namespace Corpus2{ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( "mwereader","inner,mwefile"); // TODO more help? MWEReader::MWEReader(const Tagset &tagset, const std::string &filename) : TokenReader(tagset), inner_filename_(filename) { // TODO implementataion? } MWEReader::~MWEReader() { // TODO implementataion } Token* MWEReader::get_next_token() { // TODO MWE stuff // get whole sentence -> process it -> return token by token return inner_reader_->get_next_token(); } Sentence::Ptr MWEReader::get_next_sentence() { // TODO MWE stuff Sentence::Ptr pSentence = inner_reader_->get_next_sentence(); Wccl::SentenceContext sc(pSentence); for(int i = 0; i < sc.size(); ++i){ sc.set_position(i); Corpus2::Token *pToken = (*pSentence)[i]; std::cout << pToken->orth_utf8() << " "; std::vector<Lexeme>& lexemes = pToken->lexemes(); foreach(const Lexeme& lex, lexemes){ if(lex.is_disamb()){ std::string base = lex.lemma_utf8(); const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base); if(potential.size()) std::cout << "# "; foreach(LexicalUnit::Ptr pLU, potential){ std::set<size_t> positions; int head; bool is_here = pLU->IsHere(sc, positions, head); if(is_here) std::cout << "** " << pLU->get_base() << "** "; } } } } std::cout << "ENDL\n"; return pSentence; } boost::shared_ptr<Chunk> MWEReader::get_next_chunk() { // TODO MWE stuff // get whole chunk -> process sentences -> return processed chunk return inner_reader_->get_next_chunk(); } void MWEReader::set_option(const std::string& option) { if(boost::algorithm::starts_with(option, "inner:")) { std::string inner = option.substr(6); inner_reader_ = create_path_reader(inner, this->tagset(), inner_filename_); } if(boost::algorithm::starts_with(option, "mwefile:")) { std::string mwefile = option.substr(8); load_mwes(mwefile); } // TODO more MWE stuff } void MWEReader::validate() { if(inner_reader_ == NULL) throw Corpus2Error("Inner reader not initialised."); // TODO MWE stuff } std::string MWEReader::get_option(const std::string& option) const { if(boost::algorithm::starts_with(option, "inner:") && inner_reader_ != NULL) return option; // TODO options for MWE return inner_reader_->get_option(option); } void MWEReader::load_mwes(const std::string &filename) { MWEParser parser(mwe_index_); parser.parse_file(filename); } }// ns Corpus2