Skip to content
Snippets Groups Projects
Commit 37fa00cb authored by ilor's avatar ilor
Browse files

simple io test to make sure slight xces writer refactoring does not break things

parent 3722ddc3
Branches
No related merge requests found
......@@ -46,9 +46,8 @@ namespace {
}
}
void token_as_xces_xml(std::ostream& os, const Tagset& tagset,
const Token& t, int indent, bool output_disamb /* = false */,
bool sort /* = false */, bool whitespace_info /* false */)
void token_as_xces_xml_head(std::ostream& os,
const Token& t, int indent, bool whitespace_info /* false */)
{
if (t.wa() == PwrNlp::Whitespace::None) {
osi(os, indent) << "<ns/>\n";
......@@ -59,7 +58,12 @@ void token_as_xces_xml(std::ostream& os, const Tagset& tagset,
osi(os, indent) << "<tok ws=\""
<< PwrNlp::Whitespace::to_string(t.wa()) << "\">\n";
}
++indent;
}
void token_as_xces_xml_body(std::ostream& os, const Tagset& tagset,
const Token& t, int indent, bool output_disamb /* = false */,
bool sort /* = false */)
{
osi(os, indent) << "<orth>";
encode_xml_entities_into(os, t.orth_utf8());
os << "</orth>\n";
......@@ -80,6 +84,15 @@ void token_as_xces_xml(std::ostream& os, const Tagset& tagset,
os << s;
}
}
}
void token_as_xces_xml(std::ostream& os, const Tagset& tagset,
const Token& t, int indent, bool output_disamb /* = false */,
bool sort /* = false */, bool whitespace_info /* false */)
{
token_as_xces_xml_head(os, t, indent, whitespace_info);
++indent;
token_as_xces_xml_body(os, tagset, t, indent, output_disamb, sort);
--indent;
osi(os, indent) << "</tok>\n";
}
......
......@@ -29,6 +29,12 @@ void token_as_xces_xml(std::ostream& os, const Tagset& tagset,
const Token& t, int indent, bool output_disamb = false,
bool sort = false, bool whitespace_info = false);
void token_as_xces_xml_head(std::ostream& os,
const Token& t, int indent, bool whitespace_info /* false */);
void token_as_xces_xml_body(std::ostream& os, const Tagset& tagset,
const Token& t, int indent, bool output_disamb /* = false */,
bool sort /* = false */);
/**
* Output a xml-encoded version of the given string into the given ostream.
* The default XML entity substitutions are made: less than, greater than,
......
......@@ -8,6 +8,7 @@ add_executable( tests
main.cpp
ann_basic.cpp
basic.cpp
io.cpp
tag_split.cpp
tagset_parse.cpp
)
......
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#include <boost/test/unit_test.hpp>
#include <set>
#include <libpwrutils/foreach.h>
#include <libpwrutils/bitset.h>
#include <libcorpus2/tagsetmanager.h>
#include <libcorpus2/io/xcesreader.h>
#include <libcorpus2/io/writer.h>
namespace {
static char swiatopoglad[] =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<!DOCTYPE cesAna SYSTEM \"xcesAnaIPI.dtd\">\n"
"<cesAna xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.0\" type=\"lex disamb\">\n"
"<chunkList>\n"
"<chunk id=\"ch51\" type=\"tok\">\n"
"<chunk type=\"s\">\n"
"<tok>\n"
"<orth>Uważam</orth>\n"
"<lex disamb=\"1\"><base>uważać</base><ctag>fin:sg:pri:imperf</ctag></lex>\n"
"</tok>\n"
"<ns/>\n"
"<tok>\n"
"<orth>,</orth>\n"
"<lex disamb=\"1\"><base>,</base><ctag>interp</ctag></lex>\n"
"</tok>\n"
"<tok>\n"
"<orth>że</orth>\n"
"<lex disamb=\"1\"><base>że</base><ctag>conj</ctag></lex>\n"
"</tok>\n"
"<tok>\n"
"<orth>światopogląd</orth>\n"
"<lex><base>światopogląd</base><ctag>subst:sg:acc:m3</ctag></lex>\n"
"<lex disamb=\"1\"><base>światopogląd</base><ctag>subst:sg:nom:m3</ctag></lex>\n"
"</tok>\n"
"</chunk>\n"
"</chunk>\n"
"</chunkList>\n"
"</cesAna>\n"
;
}
BOOST_AUTO_TEST_SUITE( io )
BOOST_AUTO_TEST_CASE( iobase )
{
const Corpus2::Tagset& tagset = Corpus2::get_named_tagset("kipi");
std::stringstream ssin;
ssin << swiatopoglad;
Corpus2::XcesReader xr(tagset, ssin);
boost::shared_ptr<Corpus2::Chunk> chunk = xr.get_next_chunk();
std::stringstream ss;
boost::shared_ptr<Corpus2::TokenWriter> w(Corpus2::TokenWriter::create("xces,flat", ss, tagset));
w->write_chunk(*chunk);
w->finish();
BOOST_CHECK_EQUAL(ss.str(), swiatopoglad);
}
BOOST_AUTO_TEST_SUITE_END();
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment