From 05a68caf4c4f9883863eee441adbb0804c847b68 Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Thu, 21 Apr 2011 18:42:02 +0200
Subject: [PATCH] add ,ann option in readers that makes them use
 AnnotatedSentence

---
 libcorpus2/io/fastxces.cpp  |  2 +-
 libcorpus2/io/reader.cpp    | 17 ++++++++++++++++-
 libcorpus2/io/reader.h      |  5 +++++
 libcorpus2/io/rft.cpp       |  2 +-
 libcorpus2/io/xmlreader.cpp |  2 +-
 5 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/libcorpus2/io/fastxces.cpp b/libcorpus2/io/fastxces.cpp
index 25d4a8c..a4813bb 100644
--- a/libcorpus2/io/fastxces.cpp
+++ b/libcorpus2/io/fastxces.cpp
@@ -104,7 +104,7 @@ private:
 			if (!sent_->empty()) {
 				chunk_->append(sent_);
 			}
-			sent_ = boost::make_shared<Sentence>();
+			sent_ = base_reader_.make_sentence();
 			tok_->set_wa(PwrNlp::Whitespace::Newline);
 		} else {
 			if (!chunk_->empty()) {
diff --git a/libcorpus2/io/reader.cpp b/libcorpus2/io/reader.cpp
index 0c75d9a..51777bf 100644
--- a/libcorpus2/io/reader.cpp
+++ b/libcorpus2/io/reader.cpp
@@ -17,12 +17,14 @@ or FITNESS FOR A PARTICULAR PURPOSE.
 #include <libcorpus2/io/reader.h>
 #include <boost/make_shared.hpp>
 #include <boost/algorithm/string.hpp>
+#include <libcorpus2/ann/annotatedsentence.h>
 #include <sstream>
 
 namespace Corpus2 {
 
 TokenReader::TokenReader(const Tagset& tagset)
-	: tagset_(tagset), tag_parse_mode_(Tagset::ParseDefault)
+	: tagset_(tagset), tag_parse_mode_(Tagset::ParseDefault),
+	use_annotated_sentences_(false)
 {
 }
 
@@ -41,6 +43,8 @@ void TokenReader::set_option(const std::string &option)
 	} else if (option == "strict") {
 		tag_parse_mode_ = static_cast<Tagset::ParseMode>(
 			Tagset::ParseDefault | (tag_parse_mode_ & Tagset::ParseFailWithIgn));
+	} else if (option == "ann") {
+		use_annotated_sentences_ = true;
 	} else {
 		throw Corpus2Error("Unknown option passed to reader: " + option);
 	}
@@ -56,11 +60,22 @@ std::string TokenReader::get_option(const std::string &option) const
 	} else if (option == "strict") {
 		return (tag_parse_mode_ & ~Tagset::ParseFailWithIgn)
 			== Tagset::ParseDefault ? option : "";
+	} else if (option == "ann") {
+		return use_annotated_sentences_ ? option : "";
 	} else {
 		return "unknown";
 	}
 }
 
+boost::shared_ptr<Sentence> TokenReader::make_sentence() const
+{
+	if (use_annotated_sentences_) {
+		return boost::make_shared<AnnotatedSentence>();
+	} else {
+		return boost::make_shared<Sentence>();
+	}
+}
+
 boost::shared_ptr<TokenReader> TokenReader::create_path_reader(
 	const std::string& class_id_params,
 	const Tagset& tagset,
diff --git a/libcorpus2/io/reader.h b/libcorpus2/io/reader.h
index b0b3a8b..4f52aac 100644
--- a/libcorpus2/io/reader.h
+++ b/libcorpus2/io/reader.h
@@ -152,12 +152,17 @@ public:
 		tag_parse_mode_ = mode;
 	}
 
+	boost::shared_ptr<Sentence> make_sentence() const;
+
 private:
 	/// Tagset used by the Reader
 	const Tagset& tagset_;
 
 	/// Tag parse mode
 	Tagset::ParseMode tag_parse_mode_;
+
+	/// Flag to force creation of sentences as AnnotatedSentences
+	bool use_annotated_sentences_;
 };
 
 namespace detail {
diff --git a/libcorpus2/io/rft.cpp b/libcorpus2/io/rft.cpp
index b35ed86..73003a6 100644
--- a/libcorpus2/io/rft.cpp
+++ b/libcorpus2/io/rft.cpp
@@ -148,7 +148,7 @@ Sentence::Ptr RftReader::actual_next_sentence()
 				t->set_orth(UnicodeString::fromUTF8(orth));
 				t->set_wa(PwrNlp::Whitespace::Space);
 				if (!s) {
-					s = boost::make_shared<Sentence>();
+					s = make_sentence();
 					t->set_wa(PwrNlp::Whitespace::Newline);
 				}
 				t->add_lexeme(Lexeme(t->orth(), tag));
diff --git a/libcorpus2/io/xmlreader.cpp b/libcorpus2/io/xmlreader.cpp
index 9fd41fd..5b34b5c 100644
--- a/libcorpus2/io/xmlreader.cpp
+++ b/libcorpus2/io/xmlreader.cpp
@@ -144,7 +144,7 @@ void XmlReader::start_sentence(const AttributeList &attributes)
 	if (type != "s") {
 		throw XcesError("Sub level <chunk> not type=\"s\"");
 	}
-	sent_ = boost::make_shared<Corpus2::Sentence>();
+	sent_ = base_reader_.make_sentence();
 	state_ = STATE_SENTENCE;
 }
 
-- 
GitLab