Skip to content
Snippets Groups Projects
Commit 9d901818 authored by ilor's avatar ilor
Browse files

add a simple premorph writer io class

parent 07d6f1b8
Branches
No related merge requests found
......@@ -52,6 +52,7 @@ SET(libcorpus2_STAT_SRC
token.cpp
io/orthwriter.cpp
io/plainwriter.cpp
io/premorphwriter.cpp
io/reader.cpp
io/rft.cpp
io/sax.cpp
......
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#include <libcorpus2/io/premorphwriter.h>
#include <libpwrutils/foreach.h>
namespace Corpus2 {
bool PremorphWriter::registered = TokenWriter::register_writer<PremorphWriter>(
"premorph", "chunk");
PremorphWriter::PremorphWriter(std::ostream& os, const Tagset& tagset,
const string_range_vector& params)
: TokenWriter(os, tagset, params), cid_(0), force_chunk_(false)
{
foreach (const string_range& param, params) {
std::string p = boost::copy_range<std::string>(param);
if (p == "chunk") {
force_chunk_ = true;
}
}
do_header();
}
PremorphWriter::~PremorphWriter()
{
finish();
}
void PremorphWriter::write_token(const Token &t)
{
os() << PwrNlp::Whitespace::to_whitespace(t.wa()) << t.orth_utf8();
}
void PremorphWriter::write_sentence(const Sentence &s)
{
os() << "<chunk type=\"s\">";
if (!s.tokens().empty()) {
os() << s[0]->orth_utf8();
}
for (size_t i = 1; i < s.tokens().size(); ++i) {
write_token(*s[i]);
}
os() << "</chunk>\n";
}
void PremorphWriter::write_chunk(const Chunk &c)
{
paragraph_head(c);
foreach (const Sentence* s, c.sentences()) {
write_sentence(*s);
}
os() << "</chunk>\n";
}
void PremorphWriter::do_header()
{
os() << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
os() << "<!DOCTYPE cesAna SYSTEM \"xcesAnaIPI.dtd\">\n";
os() << "<cesAna";
os() << " xmlns:xlink=\"http://www.w3.org/1999/xlink\"";
os() << " version=\"1.0\" type=\"premorph\">\n";
os() << "<chunkList>\n";
if (force_chunk_) {
paragraph_head();
}
}
void PremorphWriter::do_footer()
{
if (force_chunk_) {
os() << "</chunk>\n";
}
os() << "</chunkList>\n";
os() << "</cesAna>\n";
}
void PremorphWriter::paragraph_head()
{
os() << "<chunk id=\"ch" << ++cid_ << "\""
<< " type=\"p\">\n";
}
void PremorphWriter::paragraph_head(const Chunk& c)
{
os() << "<chunk";
foreach (const Chunk::attr_map_t::value_type& v, c.attributes()) {
os() << " " << v.first << "=\"" << v.second << "\"";
}
os() << ">\n";
}
} /* end ns Corpus2 */
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#ifndef LIBCORPUS2_IO_PREMORPHWRITER_H
#define LIBCORPUS2_IO_PREMORPHWRITER_H
#include <libcorpus2/io/writer.h>
namespace Corpus2 {
class PremorphWriter : public TokenWriter {
public:
PremorphWriter(std::ostream& os, const Tagset& tagset,
const string_range_vector& params);
~PremorphWriter();
void write_token(const Token &t);
void write_sentence(const Sentence &s);
void write_chunk(const Chunk &c);
protected:
void do_header();
void do_footer();
void paragraph_head();
void paragraph_head(const Chunk& c);
int cid_;
bool force_chunk_;
static bool registered;
};
} /* end ns Corpus2 */
#endif // LIBCORPUS2_IO_PREMORPHWRITER_H
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment