diff --git a/libcorpus2/io/iob-chan.cpp b/libcorpus2/io/iob-chan.cpp index d231b0f45f76aedb1363a9c1e58cb9a760fc0b82..dd07211fc94d3ef0456ef22d020514c08eb033c2 100644 --- a/libcorpus2/io/iob-chan.cpp +++ b/libcorpus2/io/iob-chan.cpp @@ -141,12 +141,12 @@ Sentence::Ptr IobChanReader::actual_next_sentence() while (is().good()) { std::getline(is(), line); if (line.empty()) { - return s; + break; } std::vector<std::string> spl; boost::algorithm::split(spl, line, boost::is_any_of("\t")); if (spl.size() != 4) { - std::cerr << "Invalid line: " << line << "\n"; + std::cerr << "Invalid line: " << line << "(" << spl.size() << ")\n"; } else { const std::string& orth = spl[0]; const std::string& lemma = spl[1]; @@ -160,6 +160,10 @@ Sentence::Ptr IobChanReader::actual_next_sentence() if (disamb_) { t->lexemes().back().set_disamb(true); } + if (!s) { + s = boost::make_shared<AnnotatedSentence>(); + t->set_wa(PwrNlp::Whitespace::Newline); + } s->append(t); std::vector<std::string> annsplit; boost::algorithm::split(annsplit, anns, boost::is_any_of(",")); @@ -184,10 +188,11 @@ Sentence::Ptr IobChanReader::actual_next_sentence() } } } - foreach (const AnnotatedSentence::chan_map_t::value_type& v, s->all_channels()) { - s->get_channel(v.first).make_segments_from_iob(); + if (s) { + foreach (const AnnotatedSentence::chan_map_t::value_type& v, s->all_channels()) { + s->get_channel(v.first).make_segments_from_iob(); + } } - return s; }