Skip to content
Snippets Groups Projects
Commit 19b27f93 authored by ilor's avatar ilor
Browse files

make wcclrules process a chunk at a time, add progress info

parent c25d539a
Branches
No related merge requests found
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
#include <libwccl/parser/Parser.h> #include <libwccl/parser/Parser.h>
#include <libwccl/ops/rulesequence.h> #include <libwccl/ops/rulesequence.h>
#include <libcorpus2/tagsetmanager.h> #include <libcorpus2/tagsetmanager.h>
#include <libcorpus2/util/tokentimer.h>
#include <boost/bind.hpp> #include <boost/bind.hpp>
#include <boost/algorithm/string.hpp> #include <boost/algorithm/string.hpp>
...@@ -20,6 +22,7 @@ ...@@ -20,6 +22,7 @@
namespace { namespace {
bool quiet = false; bool quiet = false;
bool progress = false;
struct options { struct options {
bool first; bool first;
...@@ -39,7 +42,10 @@ bool load_more_rules(Wccl::Parser& parser, const std::string& filename, Wccl::Ru ...@@ -39,7 +42,10 @@ bool load_more_rules(Wccl::Parser& parser, const std::string& filename, Wccl::Ru
ret = parser.parseRuleSequence(is); ret = parser.parseRuleSequence(is);
if (ret) { if (ret) {
std::cerr << ret->size() << "\n"; if (!quiet) {
std::cerr << "Loaded " << ret->size() << " rule(s) from "
<< filename << "\n";
}
std::copy(ret->begin(), ret->end(), std::back_inserter(rules)); std::copy(ret->begin(), ret->end(), std::back_inserter(rules));
return true; return true;
} else { } else {
...@@ -69,17 +75,27 @@ bool load_more_rules(Wccl::Parser& parser, const std::string& filename, Wccl::Ru ...@@ -69,17 +75,27 @@ bool load_more_rules(Wccl::Parser& parser, const std::string& filename, Wccl::Ru
void do_stream(boost::shared_ptr<Corpus2::TokenWriter> writer, const Corpus2::Tagset& tagset, Wccl::RuleSequence& rules, void do_stream(boost::shared_ptr<Corpus2::TokenWriter> writer, const Corpus2::Tagset& tagset, Wccl::RuleSequence& rules,
std::istream& is, const options& opts) std::istream& is, const options& opts)
{ {
Corpus2::XcesReader xr(tagset, is); Corpus2::XcesReader reader(tagset, is);
Corpus2::Sentence::Ptr s; Corpus2::TokenTimer& timer = Corpus2::global_timer();
while ((s = xr.get_next_sentence())) { while (boost::shared_ptr<Corpus2::Chunk> c = reader.get_next_chunk()) {
if (opts.until_done) { foreach (boost::shared_ptr<Corpus2::Sentence>& s, c->sentences()) {
rules.execute_until_done(s, opts.until_done_iterations); if (opts.until_done) {
} else { rules.execute_until_done(s, opts.until_done_iterations);
rules.execute_once(s); } else {
rules.execute_once(s);
}
timer.count_sentence(*s);
if (progress) {
timer.check_slice();
}
if (opts.first) break;
} }
writer->write_sentence(*s); writer->write_chunk(*c);
if (opts.first) break; if (opts.first) break;
} }
if (progress) {
timer.stats();
}
} }
...@@ -110,6 +126,8 @@ int main(int argc, char** argv) ...@@ -110,6 +126,8 @@ int main(int argc, char** argv)
"Files to load, looking at the extension to determine type\n") "Files to load, looking at the extension to determine type\n")
("output-format,o", value(&output_format)->default_value("xces"), ("output-format,o", value(&output_format)->default_value("xces"),
writers_help.c_str()) writers_help.c_str())
("progress,p", value(&progress)->zero_tokens(),
"Show progress info")
("quiet,q", value(&quiet)->zero_tokens(), ("quiet,q", value(&quiet)->zero_tokens(),
"Suppress messages\n") "Suppress messages\n")
("until-done,u", value(&opts.until_done)->zero_tokens(), ("until-done,u", value(&opts.until_done)->zero_tokens(),
...@@ -165,8 +183,11 @@ int main(int argc, char** argv) ...@@ -165,8 +183,11 @@ int main(int argc, char** argv)
} }
} }
if (!rules.empty()) { if (!rules.empty()) {
Corpus2::TokenTimer& timer = Corpus2::global_timer();
timer.register_signal_handler();
boost::shared_ptr<Corpus2::TokenWriter> writer; boost::shared_ptr<Corpus2::TokenWriter> writer;
writer.reset(Corpus2::TokenWriter::create(output_format, std::cout, tagset)); foreach (const std::string& f, corpora_files) { writer.reset(Corpus2::TokenWriter::create(output_format, std::cout, tagset));
foreach (const std::string& f, corpora_files) {
std::ifstream ifs(f.c_str()); std::ifstream ifs(f.c_str());
if (ifs.good()) { if (ifs.good()) {
do_stream(writer, tagset, rules, ifs, opts); do_stream(writer, tagset, rules, ifs, opts);
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment