/* Copyright (C) 2010 Tomasz Ĺšniatowski, Adam Radziszewski Part of the libcorpus2 project This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE and COPYING files for more details. */ #include <libcorpus2/io/premorphwriter.h> #include <libpwrutils/foreach.h> namespace Corpus2 { bool PremorphWriter::registered = TokenWriter::register_writer<PremorphWriter>( "premorph", "chunk"); PremorphWriter::PremorphWriter(std::ostream& os, const Tagset& tagset, const string_range_vector& params) : TokenWriter(os, tagset, params), cid_(0), force_chunk_(false) { foreach (const string_range& param, params) { std::string p = boost::copy_range<std::string>(param); if (p == "chunk") { force_chunk_ = true; } } do_header(); } PremorphWriter::~PremorphWriter() { finish(); } void PremorphWriter::write_token(const Token &t) { os() << PwrNlp::Whitespace::to_whitespace(t.wa()) << t.orth_utf8(); } void PremorphWriter::write_sentence(const Sentence &s) { os() << "<chunk type=\"s\">"; if (!s.tokens().empty()) { os() << s[0]->orth_utf8(); } for (size_t i = 1; i < s.tokens().size(); ++i) { write_token(*s[i]); } os() << "</chunk>\n"; } void PremorphWriter::write_chunk(const Chunk &c) { paragraph_head(c); foreach (const Sentence::ConstPtr& s, c.sentences()) { write_sentence(*s); } os() << "</chunk>\n"; } void PremorphWriter::do_header() { os() << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; os() << "<!DOCTYPE cesAna SYSTEM \"xcesAnaIPI.dtd\">\n"; os() << "<cesAna"; os() << " xmlns:xlink=\"http://www.w3.org/1999/xlink\""; os() << " version=\"1.0\" type=\"premorph\">\n"; os() << "<chunkList>\n"; if (force_chunk_) { paragraph_head(); } } void PremorphWriter::do_footer() { if (force_chunk_) { os() << "</chunk>\n"; } os() << "</chunkList>\n"; os() << "</cesAna>\n"; } void PremorphWriter::paragraph_head() { os() << "<chunk id=\"ch" << ++cid_ << "\"" << " type=\"p\">\n"; } void PremorphWriter::paragraph_head(const Chunk& c) { os() << "<chunk"; foreach (const Chunk::attr_map_t::value_type& v, c.attributes()) { os() << " " << v.first << "=\"" << v.second << "\""; } os() << ">\n"; } } /* end ns Corpus2 */