Select Git revision
exception.h
xcescommon.cpp 3.82 KiB
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE.CORPUS2, LICENSE.POLIQARP, COPYING.LESSER and COPYING files for more details.
*/
#include <libcorpus2/io/xcescommon.h>
#include <boost/foreach.hpp>
#include <sstream>
namespace Corpus2 {
namespace {
std::ostream& osi(std::ostream& os, int indent) {
for (int i = 0; i < indent; ++i) {
os << " ";
}
return os;
}
inline void lexeme_as_xces_xml(std::ostream& os, const Tagset& tagset,
const Lexeme& l, bool output_disamb)
{
if (output_disamb && l.is_disamb()) {
os << "<lex disamb=\"1\">";
} else {
os << "<lex>";
}
os << "<base>";
encode_xml_entities_into(os, l.lemma_utf8());
os << "</base>";
os << "<ctag>";
encode_xml_entities_into(os, tagset.tag_to_string(l.tag()));
os << "</ctag>";
os << "</lex>\n";
}
}
void token_as_xces_xml_head(std::ostream& os,
const Token& t, int indent, bool whitespace_info /* false */)
{
if (t.wa() == PwrNlp::Whitespace::None) {
osi(os, indent) << "<ns/>\n";
}
if (!whitespace_info) {
osi(os, indent) << "<tok>\n";
} else {
osi(os, indent) << "<tok ws=\""
<< PwrNlp::Whitespace::to_string(t.wa()) << "\">\n";
}
}
void token_as_xces_xml_body(std::ostream& os, const Tagset& tagset,
const Token& t, int indent, bool output_disamb /* = false */,
bool sort /* = false */)
{
osi(os, indent) << "<orth>";
encode_xml_entities_into(os, t.orth_utf8());
os << "</orth>\n";
if (!sort) {