From b000d45bd7c08d268495d3974cb3dd8fbdebac94 Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Mon, 11 Oct 2010 11:41:24 +0200
Subject: [PATCH] preliminary disamb_sh support in sces reader, bumps version
 to 0.0.2

---
 libcorpus2/CMakeLists.txt    |  2 +-
 libcorpus2/io/xcesreader.cpp | 27 +++++++++++++++++++--------
 libcorpus2/io/xcesreader.h   |  2 +-
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt
index 424099b..3cf1325 100644
--- a/libcorpus2/CMakeLists.txt
+++ b/libcorpus2/CMakeLists.txt
@@ -3,7 +3,7 @@ PROJECT(corpus2)
 
 set(corpus2_ver_major "0")
 set(corpus2_ver_minor "0")
-set(corpus2_ver_patch "1")
+set(corpus2_ver_patch "2")
 
 
 if(NOT LIBCORPUS2_SRC_DATA_DIR)
diff --git a/libcorpus2/io/xcesreader.cpp b/libcorpus2/io/xcesreader.cpp
index 03e8f8c..af32af0 100644
--- a/libcorpus2/io/xcesreader.cpp
+++ b/libcorpus2/io/xcesreader.cpp
@@ -9,7 +9,7 @@ class XcesReaderImpl : public BasicSaxParser
 {
 public:
 	XcesReaderImpl(const Tagset& tagset, std::deque<Chunk*>& obuf,
-			bool disamb_only);
+			bool disamb_only, bool disamb_sh);
 
 	~XcesReaderImpl();
 
@@ -37,12 +37,14 @@ protected:
 	std::deque<Chunk*>& obuf_;
 
 	bool disamb_only_;
+
+	bool disamb_sh_;
 };
 
 XcesReader::XcesReader(const Tagset& tagset, std::istream& is,
-		bool disamb_only)
+		bool disamb_only, bool disamb_sh)
 	: BufferedChunkReader(tagset), is_(is)
-	, impl_(new XcesReaderImpl(tagset, chunk_buf_, disamb_only))
+	, impl_(new XcesReaderImpl(tagset, chunk_buf_, disamb_only, disamb_sh))
 {
 }
 
@@ -64,11 +66,11 @@ void XcesReader::ensure_more()
 }
 
 XcesReaderImpl::XcesReaderImpl(const Tagset& tagset,
-		std::deque<Chunk*>& obuf, bool disamb_only)
+		std::deque<Chunk*>& obuf, bool disamb_only, bool disamb_sh)
 	: BasicSaxParser()
 	, tagset_(tagset), state_(XS_NONE), wa_(PwrNlp::Whitespace::Newline)
 	, sbuf_(), tok_(NULL), sent_(NULL), chunk_(NULL), obuf_(obuf)
-	, disamb_only_(disamb_only)
+	, disamb_only_(disamb_only), disamb_sh_(disamb_sh)
 {
 }
 
@@ -119,9 +121,18 @@ void XcesReaderImpl::on_start_element(const Glib::ustring &name,
 	} else if (state_ == XS_TOK && name == "lex") {
 		assert(tok_ != NULL);
 		bool is_disamb = false;
-		foreach (const Attribute& a, attributes) {
-			if (a.name == "disamb" && a.value == "1") {
-				is_disamb = true;
+		if (!disamb_sh_) {
+			foreach (const Attribute& a, attributes) {
+				if (a.name == "disamb" && a.value == "1") {
+					is_disamb = true;
+				}
+			}
+		} else {
+			is_disamb = true;
+			foreach (const Attribute& a, attributes) {
+				if (a.name == "disamb_sh" && a.value == "0") {
+					is_disamb = false;
+				}
 			}
 		}
 		if (!disamb_only_ || is_disamb) {
diff --git a/libcorpus2/io/xcesreader.h b/libcorpus2/io/xcesreader.h
index 2af10d7..457a8aa 100644
--- a/libcorpus2/io/xcesreader.h
+++ b/libcorpus2/io/xcesreader.h
@@ -15,7 +15,7 @@ class XcesReader : public BufferedChunkReader
 {
 public:
 	XcesReader(const Tagset& tagset, std::istream& is,
-			bool disamb_only = false);
+			bool disamb_only = false, bool disamb_sh = false);
 
 	~XcesReader();
 
-- 
GitLab