Skip to content
Snippets Groups Projects
Commit 9b859ab8 authored by ilor's avatar ilor
Browse files

add --progress/-p in wccl-run, clean up progress output in wccl-match and wccl-rules

parent 5c54e731
Branches
No related tags found
No related merge requests found
......@@ -17,7 +17,6 @@
namespace {
bool quiet = false;
bool progress = false;
struct options {
bool first;
......@@ -30,10 +29,18 @@ class MatchRunner
{
public:
MatchRunner(const Corpus2::Tagset& tagset)
: tagset_(tagset), parser_(tagset_)
: tagset_(tagset), parser_(tagset_), progress_(false)
{
}
void use_progress(bool use) {
progress_ = use;
if (use) {
Corpus2::TokenTimer& timer = Corpus2::global_timer();
timer.register_signal_handler();
}
}
bool load_more_rules(const std::string &filename);
bool load_operator_string(const std::string &op_string);
......@@ -50,6 +57,7 @@ private:
Wccl::Parser parser_;
std::vector<std::string> rule_names_;
std::vector<boost::shared_ptr<Wccl::MatchRule> > rules_;
bool progress_;
};
bool MatchRunner::load_more_rules(const std::string& filename)
......@@ -95,16 +103,13 @@ void MatchRunner::apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader,
}
timer.count_sentence(*as);
if (progress) {
if (progress_) {
timer.check_slice();
}
//writer->write_sentence(*as);
}
writer->write_chunk(*c);
}
if (progress) {
timer.stats();
}
}
void usage(char* name)
......@@ -121,6 +126,7 @@ int main(int argc, char** argv)
std::string tagset_load = "kipi";
std::string input_format;
std::string output_format;
bool progress = false;
options opts;
opts.first = false;
opts.until_done = false;
......@@ -203,6 +209,7 @@ int main(int argc, char** argv)
try {
const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load);
MatchRunner runner(tagset);
runner.use_progress(progress);
foreach (const std::string& file, ccl_files) {
runner.load_more_rules(file);
}
......@@ -222,6 +229,10 @@ int main(int argc, char** argv)
reader->set_option("ann");
runner.apply_rules(reader, writer);
}
if (progress) {
Corpus2::TokenTimer& timer = Corpus2::global_timer();
timer.stats();
}
}
} catch (PwrNlp::PwrNlpError& e) {
std::cerr << e.info() << std::endl;
......
......@@ -229,6 +229,10 @@ int main(int argc, char** argv)
reader->set_option("ann");
apply_rules(reader, writer, rules, opts);
}
if (progress) {
Corpus2::TokenTimer& timer = Corpus2::global_timer();
timer.stats();
}
}
} catch (PwrNlp::PwrNlpError& e) {
std::cerr << e.info() << std::endl;
......
......@@ -7,6 +7,7 @@
#include <libwccl/values/strset.h>
#include <libwccl/parser/Parser.h>
#include <libcorpus2/tagsetmanager.h>
#include <libcorpus2/util/tokentimer.h>
#include <boost/bind.hpp>
#include <boost/algorithm/string.hpp>
......@@ -43,10 +44,18 @@ class Runner
{
public:
Runner(const Corpus2::Tagset& tagset)
: tagset_(tagset), parser_(tagset_), token_idx(0)
: tagset_(tagset), parser_(tagset_), token_idx(0), progress_(false)
{
}
void use_progress(bool use) {
progress_ = use;
if (use) {
Corpus2::TokenTimer& timer = Corpus2::global_timer();
timer.register_signal_handler();
}
}
bool load_more_operators(const std::string &filename);
bool load_operator_string(const std::string &op_string);
......@@ -66,6 +75,7 @@ private:
std::vector< boost::shared_ptr<Wccl::FunctionalOperator> > ops_;
std::vector< std::string > op_names_;
int token_idx;
bool progress_;
};
bool Runner::load_more_operators(const std::string& filename)
......@@ -219,8 +229,13 @@ void Runner::do_stream(std::istream& is, bool first)
{
Corpus2::XcesReader xr(tagset_, is);
Corpus2::Sentence::Ptr s;
Corpus2::TokenTimer& timer = Corpus2::global_timer();
while ((s = xr.get_next_sentence())) {
do_sentence(s);
timer.count_sentence(*s);
if (progress_) {
timer.check_slice();
}
std::cout << "\n";
if (first) break;
}
......@@ -229,7 +244,7 @@ void Runner::do_stream(std::istream& is, bool first)
int main(int argc, char** argv)
{
std::string tagset_load = "kipi";
bool first = false;
bool first = false, progress = false;
std::vector<std::string> corpora_files, files, operator_strings;
bool corpus_stdin = false;
using boost::program_options::value;
......@@ -262,6 +277,8 @@ int main(int argc, char** argv)
"Output operator variables")
("output-header,H", value(&output_header),
"Output table header")
("progress,p", value(&progress)->zero_tokens(),
"Show progress info")
("help,h", "Show help")
;
boost::program_options::variables_map vm;
......@@ -303,6 +320,7 @@ int main(int argc, char** argv)
try {
const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load);
Runner runner(tagset);
runner.use_progress(progress);
foreach (const std::string& f, operator_strings) {
if (boost::algorithm::ends_with(f, ".ccl")) {
size_t sz = runner.operators().size();
......@@ -328,6 +346,10 @@ int main(int argc, char** argv)
if (corpus_stdin) {
runner.do_stream(std::cin, first);
}
if (progress) {
Corpus2::TokenTimer& timer = Corpus2::global_timer();
timer.stats();
}
}
} catch (PwrNlp::PwrNlpError& e) {
std::cerr << e.info() << std::endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment