Skip to content
Snippets Groups Projects
Commit bcfed015 authored by ilor's avatar ilor
Browse files

expand out-of-chunk token test and fix another issue

parent 5c66f5fa
Branches
No related tags found
No related merge requests found
......@@ -98,6 +98,10 @@ void XmlReader::on_start_element(const Glib::ustring &name,
//nop
} else if (process_start_element(name, attributes)) {
//nop
} else if (out_of_chunk_ && state_ == STATE_SENTENCE && name == "chunk") {
finish_sentence();
out_of_chunk_ = false;
start_chunk(attributes);
} else if (warn_on_unexpected_) {
std::cerr << "Unexpected tag <" << name << "> on line ";
std::cerr << this->context_->input->line << " (" << state_ << ")\n";
......@@ -118,13 +122,8 @@ bool XmlReader::process_end_element(const Glib::ustring & /*name*/)
void XmlReader::start_chunk(const AttributeList& attributes)
{
if (out_of_chunk_) {
finish_sentence();
out_of_chunk_ = false;
}
std::string type = get_type_from_attributes(attributes);
chunk_ = boost::make_shared<Chunk>();
if (type == "s") {
// top-level chunk is a sentence
start_sentence(attributes);
......
......@@ -85,6 +85,14 @@ static char swiatopoglad_broken[] =
"<orth>Uważam</orth>\n"
"<lex disamb=\"1\"><base>uważać</base><ctag>fin:sg:pri:imperf</ctag></lex>\n"
"</tok>\n"
"<chunk id=\"ch51\" type=\"tok\">\n"
"<chunk type=\"s\">\n"
"<tok>\n"
"<orth>Uważam</orth>\n"
"<lex disamb=\"1\"><base>uważać</base><ctag>fin:sg:pri:imperf</ctag></lex>\n"
"</tok>\n"
"</chunk>\n"
"</chunk>\n"
"</chunkList>\n"
"</cesAna>\n"
;
......@@ -118,6 +126,10 @@ BOOST_AUTO_TEST_CASE( io_oo )
w->write_chunk(*chunk);
w->finish();
BOOST_CHECK_EQUAL(ss.str(), swiatopoglad);
chunk = xr.get_next_chunk();
BOOST_CHECK(chunk);
chunk = xr.get_next_chunk();
BOOST_CHECK(chunk);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment