diff --git a/wccl-apps/wccl-match.cpp b/wccl-apps/wccl-match.cpp index 3ef315cf14d60c5a7d97955405564e0e35fe001e..2daa3b9027ba8420f11f40d86c7287220235591d 100644 --- a/wccl-apps/wccl-match.cpp +++ b/wccl-apps/wccl-match.cpp @@ -83,7 +83,12 @@ void MatchRunner::apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader, while (boost::shared_ptr<Corpus2::Chunk> c = reader->get_next_chunk()) { foreach (boost::shared_ptr<Corpus2::Sentence>& s, c->sentences()) { boost::shared_ptr<Corpus2::AnnotatedSentence> as; - as = Corpus2::AnnotatedSentence::wrap_sentence(s); + as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(s); + if (!as) { + std::cerr << "Did not get an AnnotatedSentence from reader," + "'ann'' option broken?\n"; + return; + } foreach (const boost::shared_ptr<Wccl::MatchRule>& r, rules_) { r->apply(as); @@ -93,9 +98,9 @@ void MatchRunner::apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader, if (progress) { timer.check_slice(); } - writer->write_sentence(*as); + //writer->write_sentence(*as); } - //writer->write_chunk(*c); + writer->write_chunk(*c); } if (progress) { timer.stats(); @@ -107,7 +112,8 @@ void usage(char* name) std::cerr << "This program runs WCCL match rules.\n"; std::cerr << "Usage " << name << " [OPTIONS] FILES\n" << "Files ending with .xml are treated as corpora, otherwise \n" - << "as CCL files. Use - to read corpus from stdin (as with -I)\n"; + << "as CCL files. Use - to read corpus from stdin (as with -I)\n" + << "Note: the ann option is implied on all input formats\n"; } int main(int argc, char** argv) @@ -208,10 +214,12 @@ int main(int argc, char** argv) boost::shared_ptr<Corpus2::TokenReader> reader; foreach (std::string cf, corpora_files) { reader = Corpus2::TokenReader::create_path_reader(input_format, tagset, cf); + reader->set_option("ann"); runner.apply_rules(reader, writer); } if (corpus_stdin) { reader = Corpus2::TokenReader::create_stream_reader(input_format, tagset, std::cin); + reader->set_option("ann"); runner.apply_rules(reader, writer); } } diff --git a/wccl-apps/wccl-rules.cpp b/wccl-apps/wccl-rules.cpp index c8119c96b7f8bbec0a2c2fcb08a0d275870cbe72..00fa7e2dc4e8c7c0cb1a270f1c5f3bd23ffd39af 100644 --- a/wccl-apps/wccl-rules.cpp +++ b/wccl-apps/wccl-rules.cpp @@ -80,7 +80,12 @@ void apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader, while (boost::shared_ptr<Corpus2::Chunk> c = reader->get_next_chunk()) { foreach (boost::shared_ptr<Corpus2::Sentence>& s, c->sentences()) { boost::shared_ptr<Corpus2::AnnotatedSentence> as; - as = Corpus2::AnnotatedSentence::wrap_sentence(s); + as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(s); + if (!as) { + std::cerr << "Did not get an AnnotatedSentence from reader," + "'ann'' option broken?\n"; + return; + } if (opts.until_done) { rules.execute_until_done(as, opts.until_done_iterations); } else { @@ -91,9 +96,9 @@ void apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader, timer.check_slice(); } if (opts.first) break; - writer->write_sentence(*as); + //writer->write_sentence(*as); } - //writer->write_chunk(*c); + writer->write_chunk(*c); if (opts.first) break; } if (progress) { @@ -106,7 +111,8 @@ void usage(char* name) std::cerr << "This program runs WCCL disambiguation rules.\n"; std::cerr << "Usage " << name << " [OPTIONS] FILES\n" << "Files ending with .xml are treated as corpora, otherwise \n" - << "as CCL files. Use - to read corpus from stdin (as with -I)\n"; + << "as CCL files. Use - to read corpus from stdin (as with -I)\n" + << "Note: the ann option is implied on all input formats\n"; } @@ -215,10 +221,12 @@ int main(int argc, char** argv) boost::shared_ptr<Corpus2::TokenReader> reader; foreach (const std::string& f, corpora_files) { reader = Corpus2::TokenReader::create_path_reader(input_format, tagset, f); + reader->set_option("ann"); apply_rules(reader, writer, rules, opts); } if (corpus_stdin) { reader = Corpus2::TokenReader::create_stream_reader(input_format, tagset, std::cin); + reader->set_option("ann"); apply_rules(reader, writer, rules, opts); } }