Skip to content
Snippets Groups Projects
Commit e43767e1 authored by Lukasz Bilenkij's avatar Lukasz Bilenkij
Browse files

mwefile-list handling

parent 917e75bc
No related branches found
No related tags found
No related merge requests found
#include "mwereader.h"
#include "mweparser.h"
#include <boost/algorithm/string.hpp>
#include <boost/filesystem.hpp>
namespace Corpus2{
......@@ -11,7 +12,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
MWEReader::MWEReader(const Tagset &tagset, const std::string &filename)
: TokenReader(tagset), inner_filename_(filename)
{
// TODO implementataion?
mwes_counter=0;
}
MWEReader::~MWEReader()
......@@ -156,8 +157,30 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
currentSentence= boost::make_shared<Sentence>();
}
if(boost::algorithm::starts_with(option, "mwefile:")) {
std::string mwefile = option.substr(8);
if(boost::filesystem::exists(mwefile))
load_mwes(mwefile);
else
throw std::runtime_error("File "+ mwefile + " does not exists");
}
if(boost::algorithm::starts_with(option, "mwefile-list:")) {
std::string mwefile = option.substr(13);
size_t found=mwefile.find(" ",0);
while(found!=std::string::npos)
{
std::string file = mwefile.substr(0,found);
if(boost::filesystem::exists(file))
load_mwes(file);
else
throw std::runtime_error("File "+ mwefile +" does not exists");
mwefile=mwefile.substr(found+1);
found=mwefile.find(" ",0);
}
if(boost::filesystem::exists(mwefile))
load_mwes(mwefile);
else
throw std::runtime_error("File "+ mwefile +" does not exists");
}
......@@ -166,8 +189,11 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
void MWEReader::validate()
{
if(inner_reader_ == NULL)
throw Corpus2Error("Inner reader not initialised.");
if(mwes_counter==0)
throw Corpus2Error("MWE files were not loaded");
// TODO MWE stuff
}
......@@ -184,6 +210,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
{
MWEParser parser(mwe_index_);
parser.parse_file(filename);
mwes_counter++;
}
......
......@@ -63,8 +63,12 @@ private:
std::string inner_filename_;
/// inner reader option
size_t token_index;
/// contains last processed sentence
Sentence::Ptr currentSentence;
/// contains last processed chunk
boost::shared_ptr<Chunk> currentChunk;
/// quantity of loaded mwes files
size_t mwes_counter;
};
} // ns Corpus2
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment