Skip to content
Snippets Groups Projects
Commit 0c1e9467 authored by ilor's avatar ilor
Browse files

add --input-format/-i to wcclrun

parent 9b859ab8
No related branches found
No related tags found
No related merge requests found
......@@ -65,7 +65,7 @@ public:
}
void do_sentence(const boost::shared_ptr<Corpus2::Sentence>& sentence);
void do_stream(std::istream& is, bool first);
void run(boost::shared_ptr<Corpus2::TokenReader> , bool first);
void output_tabular(const std::vector< std::vector< UnicodeString > > outputs);
......@@ -225,12 +225,11 @@ void Runner::output_tabular(const std::vector<std::vector<UnicodeString> > outpu
}
}
void Runner::do_stream(std::istream& is, bool first)
void Runner::run(boost::shared_ptr<Corpus2::TokenReader> reader, bool first)
{
Corpus2::XcesReader xr(tagset_, is);
Corpus2::Sentence::Ptr s;
Corpus2::TokenTimer& timer = Corpus2::global_timer();
while ((s = xr.get_next_sentence())) {
while ((s = reader->get_next_sentence())) {
do_sentence(s);
timer.count_sentence(*s);
if (progress_) {
......@@ -245,9 +244,12 @@ int main(int argc, char** argv)
{
std::string tagset_load = "kipi";
bool first = false, progress = false;
std::string input_format;
std::vector<std::string> corpora_files, files, operator_strings;
bool corpus_stdin = false;
using boost::program_options::value;
std::string readers = boost::algorithm::join(Corpus2::TokenReader::available_reader_types_help(), " ");
std::string readers_help = "Input format, any of: " + readers + "\n";
boost::program_options::options_description desc("Allowed options");
desc.add_options()
......@@ -263,6 +265,8 @@ int main(int argc, char** argv)
"Read corpus from stdin")
("quiet,q", value(&quiet)->zero_tokens(),
"Suppress messages")
("input-format,i", value(&input_format)->default_value("xces"),
readers_help.c_str())
("first-sentence-only,1", value(&first)->zero_tokens(),
"Only process first sentence")
("tabs", value(&tabs)->zero_tokens(),
......@@ -336,15 +340,12 @@ int main(int argc, char** argv)
}
if (!runner.operators().empty()) {
foreach (const std::string& f, corpora_files) {
std::ifstream ifs(f.c_str());
if (ifs.good()) {
runner.do_stream(ifs, first);
} else {
std::cerr << "Error reading corpus from " << f << "\n";
}
runner.run(Corpus2::TokenReader::create_path_reader(
input_format, tagset, f), first);
}
if (corpus_stdin) {
runner.do_stream(std::cin, first);
runner.run(Corpus2::TokenReader::create_stream_reader(
input_format, tagset, std::cin), first);
}
if (progress) {
Corpus2::TokenTimer& timer = Corpus2::global_timer();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment