From 071f2b1b234eda8ce153913940dae348696f8b91 Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Tue, 19 Apr 2011 15:37:21 +0200 Subject: [PATCH] fix xces/ccl ,disamb_only processing --- libcorpus2/io/xmlreader.cpp | 6 +++++- libcorpus2/io/xmlreader.h | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/libcorpus2/io/xmlreader.cpp b/libcorpus2/io/xmlreader.cpp index 85ba6e9..7d5ed6a 100644 --- a/libcorpus2/io/xmlreader.cpp +++ b/libcorpus2/io/xmlreader.cpp @@ -77,6 +77,8 @@ void XmlReader::on_start_element(const Glib::ustring &name, state_ = STATE_TAG; grab_characters_ = true; clear_buf(); + } else if (state_ == STATE_LEX_SKIP && name == "lex" || name == "base" || name == "ctag") { + //nop } else if (name == "ns") { wa_ = PwrNlp::Whitespace::None; } else if (state_ == STATE_NONE && name == "tok") { @@ -175,6 +177,8 @@ void XmlReader::start_lexeme(const AttributeList &attributes) tok_->add_lexeme(Lexeme()); tok_->lexemes().back().set_disamb(is_disamb); state_ = STATE_LEX; + } else { + state_ = STATE_LEX_SKIP; } } @@ -225,7 +229,7 @@ void XmlReader::on_end_element(const Glib::ustring &name) tok_->lexemes().back().set_tag(tag); grab_characters_ = false; state_ = STATE_LEX; - } else if (state_ == STATE_LEX && name == "lex") { + } else if ((state_ == STATE_LEX || state_ == STATE_LEX_SKIP) && name == "lex") { state_ = STATE_TOK; } else if (state_ == STATE_TOK && name == "tok") { finish_token(); diff --git a/libcorpus2/io/xmlreader.h b/libcorpus2/io/xmlreader.h index 7b87571..d9234d4 100644 --- a/libcorpus2/io/xmlreader.h +++ b/libcorpus2/io/xmlreader.h @@ -84,6 +84,8 @@ protected: static const int STATE_LEX = 5; static const int STATE_LEMMA = 6; static const int STATE_TAG = 7; + static const int STATE_LEX_SKIP = 8; + /// The state of the parser int state_; -- GitLab