From 9b859ab883b9f0fb59caa3f9dcf0c1dfa091f8ce Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Thu, 28 Apr 2011 11:14:27 +0200 Subject: [PATCH] add --progress/-p in wccl-run, clean up progress output in wccl-match and wccl-rules --- wccl-apps/wccl-match.cpp | 23 +++++++++++++++++------ wccl-apps/wccl-rules.cpp | 4 ++++ wccl-apps/wccl-run.cpp | 26 ++++++++++++++++++++++++-- 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/wccl-apps/wccl-match.cpp b/wccl-apps/wccl-match.cpp index 2daa3b9..68f7efb 100644 --- a/wccl-apps/wccl-match.cpp +++ b/wccl-apps/wccl-match.cpp @@ -17,7 +17,6 @@ namespace { bool quiet = false; - bool progress = false; struct options { bool first; @@ -30,10 +29,18 @@ class MatchRunner { public: MatchRunner(const Corpus2::Tagset& tagset) - : tagset_(tagset), parser_(tagset_) + : tagset_(tagset), parser_(tagset_), progress_(false) { } + void use_progress(bool use) { + progress_ = use; + if (use) { + Corpus2::TokenTimer& timer = Corpus2::global_timer(); + timer.register_signal_handler(); + } + } + bool load_more_rules(const std::string &filename); bool load_operator_string(const std::string &op_string); @@ -50,6 +57,7 @@ private: Wccl::Parser parser_; std::vector<std::string> rule_names_; std::vector<boost::shared_ptr<Wccl::MatchRule> > rules_; + bool progress_; }; bool MatchRunner::load_more_rules(const std::string& filename) @@ -95,16 +103,13 @@ void MatchRunner::apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader, } timer.count_sentence(*as); - if (progress) { + if (progress_) { timer.check_slice(); } //writer->write_sentence(*as); } writer->write_chunk(*c); } - if (progress) { - timer.stats(); - } } void usage(char* name) @@ -121,6 +126,7 @@ int main(int argc, char** argv) std::string tagset_load = "kipi"; std::string input_format; std::string output_format; + bool progress = false; options opts; opts.first = false; opts.until_done = false; @@ -203,6 +209,7 @@ int main(int argc, char** argv) try { const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load); MatchRunner runner(tagset); + runner.use_progress(progress); foreach (const std::string& file, ccl_files) { runner.load_more_rules(file); } @@ -222,6 +229,10 @@ int main(int argc, char** argv) reader->set_option("ann"); runner.apply_rules(reader, writer); } + if (progress) { + Corpus2::TokenTimer& timer = Corpus2::global_timer(); + timer.stats(); + } } } catch (PwrNlp::PwrNlpError& e) { std::cerr << e.info() << std::endl; diff --git a/wccl-apps/wccl-rules.cpp b/wccl-apps/wccl-rules.cpp index 00fa7e2..e357f72 100644 --- a/wccl-apps/wccl-rules.cpp +++ b/wccl-apps/wccl-rules.cpp @@ -229,6 +229,10 @@ int main(int argc, char** argv) reader->set_option("ann"); apply_rules(reader, writer, rules, opts); } + if (progress) { + Corpus2::TokenTimer& timer = Corpus2::global_timer(); + timer.stats(); + } } } catch (PwrNlp::PwrNlpError& e) { std::cerr << e.info() << std::endl; diff --git a/wccl-apps/wccl-run.cpp b/wccl-apps/wccl-run.cpp index 7df353c..9dfd594 100644 --- a/wccl-apps/wccl-run.cpp +++ b/wccl-apps/wccl-run.cpp @@ -7,6 +7,7 @@ #include <libwccl/values/strset.h> #include <libwccl/parser/Parser.h> #include <libcorpus2/tagsetmanager.h> +#include <libcorpus2/util/tokentimer.h> #include <boost/bind.hpp> #include <boost/algorithm/string.hpp> @@ -43,10 +44,18 @@ class Runner { public: Runner(const Corpus2::Tagset& tagset) - : tagset_(tagset), parser_(tagset_), token_idx(0) + : tagset_(tagset), parser_(tagset_), token_idx(0), progress_(false) { } + void use_progress(bool use) { + progress_ = use; + if (use) { + Corpus2::TokenTimer& timer = Corpus2::global_timer(); + timer.register_signal_handler(); + } + } + bool load_more_operators(const std::string &filename); bool load_operator_string(const std::string &op_string); @@ -66,6 +75,7 @@ private: std::vector< boost::shared_ptr<Wccl::FunctionalOperator> > ops_; std::vector< std::string > op_names_; int token_idx; + bool progress_; }; bool Runner::load_more_operators(const std::string& filename) @@ -219,8 +229,13 @@ void Runner::do_stream(std::istream& is, bool first) { Corpus2::XcesReader xr(tagset_, is); Corpus2::Sentence::Ptr s; + Corpus2::TokenTimer& timer = Corpus2::global_timer(); while ((s = xr.get_next_sentence())) { do_sentence(s); + timer.count_sentence(*s); + if (progress_) { + timer.check_slice(); + } std::cout << "\n"; if (first) break; } @@ -229,7 +244,7 @@ void Runner::do_stream(std::istream& is, bool first) int main(int argc, char** argv) { std::string tagset_load = "kipi"; - bool first = false; + bool first = false, progress = false; std::vector<std::string> corpora_files, files, operator_strings; bool corpus_stdin = false; using boost::program_options::value; @@ -262,6 +277,8 @@ int main(int argc, char** argv) "Output operator variables") ("output-header,H", value(&output_header), "Output table header") + ("progress,p", value(&progress)->zero_tokens(), + "Show progress info") ("help,h", "Show help") ; boost::program_options::variables_map vm; @@ -303,6 +320,7 @@ int main(int argc, char** argv) try { const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load); Runner runner(tagset); + runner.use_progress(progress); foreach (const std::string& f, operator_strings) { if (boost::algorithm::ends_with(f, ".ccl")) { size_t sz = runner.operators().size(); @@ -328,6 +346,10 @@ int main(int argc, char** argv) if (corpus_stdin) { runner.do_stream(std::cin, first); } + if (progress) { + Corpus2::TokenTimer& timer = Corpus2::global_timer(); + timer.stats(); + } } } catch (PwrNlp::PwrNlpError& e) { std::cerr << e.info() << std::endl; -- GitLab