From bcfed01572d7a76072d78d90c457274de20656d2 Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Sat, 26 Feb 2011 10:35:17 +0100 Subject: [PATCH] expand out-of-chunk token test and fix another issue --- libcorpus2/io/xmlreader.cpp | 9 ++++----- tests/io.cpp | 12 ++++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/libcorpus2/io/xmlreader.cpp b/libcorpus2/io/xmlreader.cpp index 0af9d16..5ce99f2 100644 --- a/libcorpus2/io/xmlreader.cpp +++ b/libcorpus2/io/xmlreader.cpp @@ -98,6 +98,10 @@ void XmlReader::on_start_element(const Glib::ustring &name, //nop } else if (process_start_element(name, attributes)) { //nop + } else if (out_of_chunk_ && state_ == STATE_SENTENCE && name == "chunk") { + finish_sentence(); + out_of_chunk_ = false; + start_chunk(attributes); } else if (warn_on_unexpected_) { std::cerr << "Unexpected tag <" << name << "> on line "; std::cerr << this->context_->input->line << " (" << state_ << ")\n"; @@ -118,13 +122,8 @@ bool XmlReader::process_end_element(const Glib::ustring & /*name*/) void XmlReader::start_chunk(const AttributeList& attributes) { - if (out_of_chunk_) { - finish_sentence(); - out_of_chunk_ = false; - } std::string type = get_type_from_attributes(attributes); chunk_ = boost::make_shared<Chunk>(); - if (type == "s") { // top-level chunk is a sentence start_sentence(attributes); diff --git a/tests/io.cpp b/tests/io.cpp index 5ef5c85..fe6c65d 100644 --- a/tests/io.cpp +++ b/tests/io.cpp @@ -85,6 +85,14 @@ static char swiatopoglad_broken[] = "<orth>Uważam</orth>\n" "<lex disamb=\"1\"><base>uważać</base><ctag>fin:sg:pri:imperf</ctag></lex>\n" "</tok>\n" +"<chunk id=\"ch51\" type=\"tok\">\n" +"<chunk type=\"s\">\n" +"<tok>\n" +"<orth>Uważam</orth>\n" +"<lex disamb=\"1\"><base>uważać</base><ctag>fin:sg:pri:imperf</ctag></lex>\n" +"</tok>\n" +"</chunk>\n" +"</chunk>\n" "</chunkList>\n" "</cesAna>\n" ; @@ -118,6 +126,10 @@ BOOST_AUTO_TEST_CASE( io_oo ) w->write_chunk(*chunk); w->finish(); BOOST_CHECK_EQUAL(ss.str(), swiatopoglad); + chunk = xr.get_next_chunk(); + BOOST_CHECK(chunk); + chunk = xr.get_next_chunk(); + BOOST_CHECK(chunk); } -- GitLab