From bcfed01572d7a76072d78d90c457274de20656d2 Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Sat, 26 Feb 2011 10:35:17 +0100
Subject: [PATCH] expand out-of-chunk token test and fix another issue

---
 libcorpus2/io/xmlreader.cpp |  9 ++++-----
 tests/io.cpp                | 12 ++++++++++++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/libcorpus2/io/xmlreader.cpp b/libcorpus2/io/xmlreader.cpp
index 0af9d16..5ce99f2 100644
--- a/libcorpus2/io/xmlreader.cpp
+++ b/libcorpus2/io/xmlreader.cpp
@@ -98,6 +98,10 @@ void XmlReader::on_start_element(const Glib::ustring &name,
 		//nop
 	} else if (process_start_element(name, attributes)) {
 		//nop
+	} else if (out_of_chunk_ && state_ == STATE_SENTENCE && name == "chunk") {
+		finish_sentence();
+		out_of_chunk_ = false;
+		start_chunk(attributes);
 	} else if (warn_on_unexpected_) {
 		std::cerr << "Unexpected tag <" << name << "> on line ";
 		std::cerr << this->context_->input->line << " (" << state_ << ")\n";
@@ -118,13 +122,8 @@ bool XmlReader::process_end_element(const Glib::ustring & /*name*/)
 
 void XmlReader::start_chunk(const AttributeList& attributes)
 {
-	if (out_of_chunk_) {
-		finish_sentence();
-		out_of_chunk_ = false;
-	}
 	std::string type = get_type_from_attributes(attributes);
 	chunk_ = boost::make_shared<Chunk>();
-
 	if (type == "s") {
 		// top-level chunk is a sentence
 		start_sentence(attributes);
diff --git a/tests/io.cpp b/tests/io.cpp
index 5ef5c85..fe6c65d 100644
--- a/tests/io.cpp
+++ b/tests/io.cpp
@@ -85,6 +85,14 @@ static char swiatopoglad_broken[] =
 "<orth>Uważam</orth>\n"
 "<lex disamb=\"1\"><base>uważać</base><ctag>fin:sg:pri:imperf</ctag></lex>\n"
 "</tok>\n"
+"<chunk id=\"ch51\" type=\"tok\">\n"
+"<chunk type=\"s\">\n"
+"<tok>\n"
+"<orth>Uważam</orth>\n"
+"<lex disamb=\"1\"><base>uważać</base><ctag>fin:sg:pri:imperf</ctag></lex>\n"
+"</tok>\n"
+"</chunk>\n"
+"</chunk>\n"
 "</chunkList>\n"
 "</cesAna>\n"
 ;
@@ -118,6 +126,10 @@ BOOST_AUTO_TEST_CASE( io_oo )
 	w->write_chunk(*chunk);
 	w->finish();
 	BOOST_CHECK_EQUAL(ss.str(), swiatopoglad);
+	chunk = xr.get_next_chunk();
+	BOOST_CHECK(chunk);
+	chunk = xr.get_next_chunk();
+	BOOST_CHECK(chunk);
 }
 
 
-- 
GitLab