From 4dbd3b95c15a3881d1bdbf9c2c4c1ef82c72976e Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Wed, 27 Apr 2011 12:44:38 +0200 Subject: [PATCH] fix wccl-match and wccl-rules to use the ann reader option instead of manually wrapping sentences with annotatedsentences --- wccl-apps/wccl-match.cpp | 16 ++++++++++++---- wccl-apps/wccl-rules.cpp | 16 ++++++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/wccl-apps/wccl-match.cpp b/wccl-apps/wccl-match.cpp index 3ef315c..2daa3b9 100644 --- a/wccl-apps/wccl-match.cpp +++ b/wccl-apps/wccl-match.cpp @@ -83,7 +83,12 @@ void MatchRunner::apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader, while (boost::shared_ptr<Corpus2::Chunk> c = reader->get_next_chunk()) { foreach (boost::shared_ptr<Corpus2::Sentence>& s, c->sentences()) { boost::shared_ptr<Corpus2::AnnotatedSentence> as; - as = Corpus2::AnnotatedSentence::wrap_sentence(s); + as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(s); + if (!as) { + std::cerr << "Did not get an AnnotatedSentence from reader," + "'ann'' option broken?\n"; + return; + } foreach (const boost::shared_ptr<Wccl::MatchRule>& r, rules_) { r->apply(as); @@ -93,9 +98,9 @@ void MatchRunner::apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader, if (progress) { timer.check_slice(); } - writer->write_sentence(*as); + //writer->write_sentence(*as); } - //writer->write_chunk(*c); + writer->write_chunk(*c); } if (progress) { timer.stats(); @@ -107,7 +112,8 @@ void usage(char* name) std::cerr << "This program runs WCCL match rules.\n"; std::cerr << "Usage " << name << " [OPTIONS] FILES\n" << "Files ending with .xml are treated as corpora, otherwise \n" - << "as CCL files. Use - to read corpus from stdin (as with -I)\n"; + << "as CCL files. Use - to read corpus from stdin (as with -I)\n" + << "Note: the ann option is implied on all input formats\n"; } int main(int argc, char** argv) @@ -208,10 +214,12 @@ int main(int argc, char** argv) boost::shared_ptr<Corpus2::TokenReader> reader; foreach (std::string cf, corpora_files) { reader = Corpus2::TokenReader::create_path_reader(input_format, tagset, cf); + reader->set_option("ann"); runner.apply_rules(reader, writer); } if (corpus_stdin) { reader = Corpus2::TokenReader::create_stream_reader(input_format, tagset, std::cin); + reader->set_option("ann"); runner.apply_rules(reader, writer); } } diff --git a/wccl-apps/wccl-rules.cpp b/wccl-apps/wccl-rules.cpp index c8119c9..00fa7e2 100644 --- a/wccl-apps/wccl-rules.cpp +++ b/wccl-apps/wccl-rules.cpp @@ -80,7 +80,12 @@ void apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader, while (boost::shared_ptr<Corpus2::Chunk> c = reader->get_next_chunk()) { foreach (boost::shared_ptr<Corpus2::Sentence>& s, c->sentences()) { boost::shared_ptr<Corpus2::AnnotatedSentence> as; - as = Corpus2::AnnotatedSentence::wrap_sentence(s); + as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(s); + if (!as) { + std::cerr << "Did not get an AnnotatedSentence from reader," + "'ann'' option broken?\n"; + return; + } if (opts.until_done) { rules.execute_until_done(as, opts.until_done_iterations); } else { @@ -91,9 +96,9 @@ void apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader, timer.check_slice(); } if (opts.first) break; - writer->write_sentence(*as); + //writer->write_sentence(*as); } - //writer->write_chunk(*c); + writer->write_chunk(*c); if (opts.first) break; } if (progress) { @@ -106,7 +111,8 @@ void usage(char* name) std::cerr << "This program runs WCCL disambiguation rules.\n"; std::cerr << "Usage " << name << " [OPTIONS] FILES\n" << "Files ending with .xml are treated as corpora, otherwise \n" - << "as CCL files. Use - to read corpus from stdin (as with -I)\n"; + << "as CCL files. Use - to read corpus from stdin (as with -I)\n" + << "Note: the ann option is implied on all input formats\n"; } @@ -215,10 +221,12 @@ int main(int argc, char** argv) boost::shared_ptr<Corpus2::TokenReader> reader; foreach (const std::string& f, corpora_files) { reader = Corpus2::TokenReader::create_path_reader(input_format, tagset, f); + reader->set_option("ann"); apply_rules(reader, writer, rules, opts); } if (corpus_stdin) { reader = Corpus2::TokenReader::create_stream_reader(input_format, tagset, std::cin); + reader->set_option("ann"); apply_rules(reader, writer, rules, opts); } } -- GitLab