From 4dbd3b95c15a3881d1bdbf9c2c4c1ef82c72976e Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Wed, 27 Apr 2011 12:44:38 +0200
Subject: [PATCH] fix wccl-match and wccl-rules to use the ann reader option
 instead of manually wrapping sentences with annotatedsentences

---
 wccl-apps/wccl-match.cpp | 16 ++++++++++++----
 wccl-apps/wccl-rules.cpp | 16 ++++++++++++----
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/wccl-apps/wccl-match.cpp b/wccl-apps/wccl-match.cpp
index 3ef315c..2daa3b9 100644
--- a/wccl-apps/wccl-match.cpp
+++ b/wccl-apps/wccl-match.cpp
@@ -83,7 +83,12 @@ void MatchRunner::apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader,
 	while (boost::shared_ptr<Corpus2::Chunk> c = reader->get_next_chunk()) {
 		foreach (boost::shared_ptr<Corpus2::Sentence>& s, c->sentences()) {
 			boost::shared_ptr<Corpus2::AnnotatedSentence> as;
-			as = Corpus2::AnnotatedSentence::wrap_sentence(s);
+			as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(s);
+			if (!as) {
+				std::cerr << "Did not get an AnnotatedSentence from reader,"
+					"'ann'' option broken?\n";
+				return;
+			}
 
 			foreach (const boost::shared_ptr<Wccl::MatchRule>& r, rules_) {
 				r->apply(as);
@@ -93,9 +98,9 @@ void MatchRunner::apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader,
 			if (progress) {
 				timer.check_slice();
 			}
-			writer->write_sentence(*as);
+			//writer->write_sentence(*as);
 		}
-		//writer->write_chunk(*c);
+		writer->write_chunk(*c);
 	}
 	if (progress) {
 		timer.stats();
@@ -107,7 +112,8 @@ void usage(char* name)
 	std::cerr << "This program runs WCCL match rules.\n";
 	std::cerr << "Usage " << name << " [OPTIONS] FILES\n"
 		<< "Files ending with .xml are treated as corpora, otherwise \n"
-		<< "as CCL files. Use - to read corpus from stdin (as with -I)\n";
+		<< "as CCL files. Use - to read corpus from stdin (as with -I)\n"
+		<< "Note: the ann option is implied on all input formats\n";
 }
 
 int main(int argc, char** argv)
@@ -208,10 +214,12 @@ int main(int argc, char** argv)
 			boost::shared_ptr<Corpus2::TokenReader> reader;
 			foreach (std::string cf, corpora_files) {
 				reader = Corpus2::TokenReader::create_path_reader(input_format, tagset, cf);
+				reader->set_option("ann");
 				runner.apply_rules(reader, writer);
 			}
 			if (corpus_stdin) {
 				reader = Corpus2::TokenReader::create_stream_reader(input_format, tagset, std::cin);
+				reader->set_option("ann");
 				runner.apply_rules(reader, writer);
 			}
 		}
diff --git a/wccl-apps/wccl-rules.cpp b/wccl-apps/wccl-rules.cpp
index c8119c9..00fa7e2 100644
--- a/wccl-apps/wccl-rules.cpp
+++ b/wccl-apps/wccl-rules.cpp
@@ -80,7 +80,12 @@ void apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader,
 	while (boost::shared_ptr<Corpus2::Chunk> c = reader->get_next_chunk()) {
 		foreach (boost::shared_ptr<Corpus2::Sentence>& s, c->sentences()) {
 			boost::shared_ptr<Corpus2::AnnotatedSentence> as;
-			as = Corpus2::AnnotatedSentence::wrap_sentence(s);
+			as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(s);
+			if (!as) {
+				std::cerr << "Did not get an AnnotatedSentence from reader,"
+					"'ann'' option broken?\n";
+				return;
+			}
 			if (opts.until_done) {
 				rules.execute_until_done(as, opts.until_done_iterations);
 			} else {
@@ -91,9 +96,9 @@ void apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader,
 				timer.check_slice();
 			}
 			if (opts.first) break;
-			writer->write_sentence(*as);
+			//writer->write_sentence(*as);
 		}
-		//writer->write_chunk(*c);
+		writer->write_chunk(*c);
 		if (opts.first) break;
 	}
 	if (progress) {
@@ -106,7 +111,8 @@ void usage(char* name)
 	std::cerr << "This program runs WCCL disambiguation rules.\n";
 	std::cerr << "Usage " << name << " [OPTIONS] FILES\n"
 		<< "Files ending with .xml are treated as corpora, otherwise \n"
-		<< "as CCL files. Use - to read corpus from stdin (as with -I)\n";
+		<< "as CCL files. Use - to read corpus from stdin (as with -I)\n"
+		<< "Note: the ann option is implied on all input formats\n";
 }
 
 
@@ -215,10 +221,12 @@ int main(int argc, char** argv)
 			boost::shared_ptr<Corpus2::TokenReader> reader;
 			foreach (const std::string& f, corpora_files) {
 				reader = Corpus2::TokenReader::create_path_reader(input_format, tagset, f);
+				reader->set_option("ann");
 				apply_rules(reader, writer, rules, opts);
 			}
 			if (corpus_stdin) {
 				reader = Corpus2::TokenReader::create_stream_reader(input_format, tagset, std::cin);
+				reader->set_option("ann");
 				apply_rules(reader, writer, rules, opts);
 			}
 		}
-- 
GitLab