From b6b6351e18173d19bef591a6f893e6a5e3e1a254 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20K=C4=99dzia?= <Pawel.Kedzia@pwr.wroc.pl> Date: Tue, 15 Nov 2011 13:26:29 +0100 Subject: [PATCH] Sentence id handling in cclreader/writer --- libcorpus2/io/cclreader.cpp | 13 +++++++++++-- libcorpus2/io/cclwriter.cpp | 10 +++++++++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/libcorpus2/io/cclreader.cpp b/libcorpus2/io/cclreader.cpp index 49fcc03..f796eb2 100644 --- a/libcorpus2/io/cclreader.cpp +++ b/libcorpus2/io/cclreader.cpp @@ -140,9 +140,18 @@ void CclReaderImpl::start_chunk(const AttributeList& attributes) -void CclReaderImpl::start_sentence(const AttributeList& /*attributes*/) +void CclReaderImpl::start_sentence(const AttributeList &attributes) { - ann_sent_ = boost::make_shared<AnnotatedSentence>(); + // find sentence id + std::string id = ""; + foreach (const Attribute& a, attributes) { + if (a.name == "id") { + id = a.value; + break; + } + } + + ann_sent_ = boost::make_shared<AnnotatedSentence>(id); sent_ = ann_sent_; state_ = STATE_SENTENCE; } diff --git a/libcorpus2/io/cclwriter.cpp b/libcorpus2/io/cclwriter.cpp index 221d6a3..4909291 100644 --- a/libcorpus2/io/cclwriter.cpp +++ b/libcorpus2/io/cclwriter.cpp @@ -33,7 +33,15 @@ void CclWriter::write_sentence(const Sentence& s) void CclWriter::write_sentence_int(const Sentence &s) { const AnnotatedSentence* ann = dynamic_cast<const AnnotatedSentence*>(&s); - osi() << "<sentence>\n"; + + std::string id = s.id(); + if (id == "") { + osi() << "<sentence>\n"; + } + else { + osi() << "<sentence id=\"" << id << "\">\n"; + } + if (use_indent_) indent_more(); for (size_t idx = 0; idx < s.size(); ++idx) { const Token* t = s.tokens()[idx]; -- GitLab