From 071f2b1b234eda8ce153913940dae348696f8b91 Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Tue, 19 Apr 2011 15:37:21 +0200
Subject: [PATCH] fix xces/ccl ,disamb_only processing

---
 libcorpus2/io/xmlreader.cpp | 6 +++++-
 libcorpus2/io/xmlreader.h   | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/libcorpus2/io/xmlreader.cpp b/libcorpus2/io/xmlreader.cpp
index 85ba6e9..7d5ed6a 100644
--- a/libcorpus2/io/xmlreader.cpp
+++ b/libcorpus2/io/xmlreader.cpp
@@ -77,6 +77,8 @@ void XmlReader::on_start_element(const Glib::ustring &name,
 		state_ = STATE_TAG;
 		grab_characters_ = true;
 		clear_buf();
+	} else if (state_ == STATE_LEX_SKIP && name == "lex" || name == "base" || name == "ctag") {
+		//nop
 	} else if (name == "ns") {
 		wa_ = PwrNlp::Whitespace::None;
 	} else if (state_ == STATE_NONE && name == "tok") {
@@ -175,6 +177,8 @@ void XmlReader::start_lexeme(const AttributeList &attributes)
 		tok_->add_lexeme(Lexeme());
 		tok_->lexemes().back().set_disamb(is_disamb);
 		state_ = STATE_LEX;
+	} else {
+		state_ = STATE_LEX_SKIP;
 	}
 }
 
@@ -225,7 +229,7 @@ void XmlReader::on_end_element(const Glib::ustring &name)
 		tok_->lexemes().back().set_tag(tag);
 		grab_characters_ = false;
 		state_ = STATE_LEX;
-	} else if (state_ == STATE_LEX && name == "lex") {
+	} else if ((state_ == STATE_LEX || state_ == STATE_LEX_SKIP) && name == "lex") {
 		state_ = STATE_TOK;
 	} else if (state_ == STATE_TOK && name == "tok") {
 		finish_token();
diff --git a/libcorpus2/io/xmlreader.h b/libcorpus2/io/xmlreader.h
index 7b87571..d9234d4 100644
--- a/libcorpus2/io/xmlreader.h
+++ b/libcorpus2/io/xmlreader.h
@@ -84,6 +84,8 @@ protected:
 	static const int STATE_LEX = 5;
 	static const int STATE_LEMMA = 6;
 	static const int STATE_TAG = 7;
+	static const int STATE_LEX_SKIP = 8;
+
 
 	/// The state of the parser
 	int state_;
-- 
GitLab