Newer
Older
#include <boost/algorithm/string.hpp>
namespace Corpus2{
bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
MWEReader::MWEReader(const Tagset &tagset, const std::string &filename)
: TokenReader(tagset), inner_filename_(filename)
// TODO implementataion?
}
MWEReader::~MWEReader()
{
// TODO implementataion
}
Token* MWEReader::get_next_token()
{
// get whole sentence -> process it -> return token by token
return inner_reader_->get_next_token();
}
Sentence::Ptr MWEReader::get_next_sentence()
{
Sentence::Ptr pSentence = inner_reader_->get_next_sentence();
Wccl::SentenceContext sc(pSentence);
for(int i = 0; i < sc.size(); ++i){
sc.set_position(i);
Corpus2::Token *pToken = (*pSentence)[i];
std::cout << pToken->orth_utf8() << " ";
std::vector<Lexeme>& lexemes = pToken->lexemes();
foreach(const Lexeme& lex, lexemes){
if(lex.is_disamb()){
std::string base = lex.lemma_utf8();
const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base);
if(potential.size())
std::cout << "# ";
foreach(LexicalUnit::Ptr pLU, potential){
std::set<size_t> positions;
int head;
bool is_here = pLU->IsHere(sc, positions, head);
if(is_here)
std::cout << "** " << pLU->get_base() << "** ";
}
}
}
}
std::cout << "ENDL\n";
return pSentence;
}
boost::shared_ptr<Chunk> MWEReader::get_next_chunk()
{
// get whole chunk -> process sentences -> return processed chunk
return inner_reader_->get_next_chunk();
}
void MWEReader::set_option(const std::string& option)
{
std::string inner = option.substr(6);
inner_reader_ = create_path_reader(inner, this->tagset(),
inner_filename_);
}
if(boost::algorithm::starts_with(option, "mwefile:")) {
std::string mwefile = option.substr(8);
load_mwes(mwefile);
if(inner_reader_ == NULL)
throw Corpus2Error("Inner reader not initialised.");
// TODO MWE stuff
}
std::string MWEReader::get_option(const std::string& option) const
{
if(boost::algorithm::starts_with(option, "inner:")
&& inner_reader_ != NULL)
return option;
// TODO options for MWE
return inner_reader_->get_option(option);
void MWEReader::load_mwes(const std::string &filename)
{