From 37fa00cb27b5cbf65da68293f104383758b8d69b Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Fri, 18 Feb 2011 14:34:50 +0100
Subject: [PATCH] simple io test to make sure slight xces writer refactoring
 does not break things

---
 libcorpus2/io/xcescommon.cpp | 21 ++++++++--
 libcorpus2/io/xcescommon.h   |  6 +++
 tests/CMakeLists.txt         |  1 +
 tests/io.cpp                 | 74 ++++++++++++++++++++++++++++++++++++
 4 files changed, 98 insertions(+), 4 deletions(-)
 create mode 100644 tests/io.cpp

diff --git a/libcorpus2/io/xcescommon.cpp b/libcorpus2/io/xcescommon.cpp
index 7bcb4c5..52edd59 100644
--- a/libcorpus2/io/xcescommon.cpp
+++ b/libcorpus2/io/xcescommon.cpp
@@ -46,9 +46,8 @@ namespace {
 	}
 }
 
-void token_as_xces_xml(std::ostream& os, const Tagset& tagset,
-		const Token& t, int indent, bool output_disamb /* = false */,
-		bool sort /* = false */, bool whitespace_info /* false */)
+void token_as_xces_xml_head(std::ostream& os,
+		const Token& t, int indent, bool whitespace_info /* false */)
 {
 	if (t.wa() == PwrNlp::Whitespace::None) {
 		osi(os, indent) << "<ns/>\n";
@@ -59,7 +58,12 @@ void token_as_xces_xml(std::ostream& os, const Tagset& tagset,
 		osi(os, indent) << "<tok ws=\""
 			<< PwrNlp::Whitespace::to_string(t.wa()) << "\">\n";
 	}
-	++indent;
+}
+
+void token_as_xces_xml_body(std::ostream& os, const Tagset& tagset,
+		const Token& t, int indent, bool output_disamb /* = false */,
+		bool sort /* = false */)
+{
 	osi(os, indent) << "<orth>";
 	encode_xml_entities_into(os, t.orth_utf8());
 	os << "</orth>\n";
@@ -80,6 +84,15 @@ void token_as_xces_xml(std::ostream& os, const Tagset& tagset,
 			os << s;
 		}
 	}
+}
+
+void token_as_xces_xml(std::ostream& os, const Tagset& tagset,
+		const Token& t, int indent, bool output_disamb /* = false */,
+		bool sort /* = false */, bool whitespace_info /* false */)
+{
+	token_as_xces_xml_head(os, t, indent, whitespace_info);
+	++indent;
+	token_as_xces_xml_body(os, tagset, t, indent, output_disamb, sort);
 	--indent;
 	osi(os, indent) << "</tok>\n";
 }
diff --git a/libcorpus2/io/xcescommon.h b/libcorpus2/io/xcescommon.h
index da1e808..204e272 100644
--- a/libcorpus2/io/xcescommon.h
+++ b/libcorpus2/io/xcescommon.h
@@ -29,6 +29,12 @@ void token_as_xces_xml(std::ostream& os, const Tagset& tagset,
 		const Token& t, int indent, bool output_disamb = false,
 		bool sort = false, bool whitespace_info = false);
 
+void token_as_xces_xml_head(std::ostream& os,
+		const Token& t, int indent, bool whitespace_info /* false */);
+
+void token_as_xces_xml_body(std::ostream& os, const Tagset& tagset,
+		const Token& t, int indent, bool output_disamb /* = false */,
+		bool sort /* = false */);
 /**
  * Output a xml-encoded version of the given string into the given ostream.
  * The default XML entity substitutions are made: less than, greater than,
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 874b60a..2d7d8bb 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -8,6 +8,7 @@ add_executable( tests
 	main.cpp
 	ann_basic.cpp
 	basic.cpp
+	io.cpp
 	tag_split.cpp
 	tagset_parse.cpp
 )
diff --git a/tests/io.cpp b/tests/io.cpp
new file mode 100644
index 0000000..c4c7541
--- /dev/null
+++ b/tests/io.cpp
@@ -0,0 +1,74 @@
+/*
+    Copyright (C) 2010 Tomasz Åšniatowski, Adam Radziszewski
+    Part of the libcorpus2 project
+
+    This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+    This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.
+
+    See the LICENSE and COPYING files for more details.
+*/
+
+#include <boost/test/unit_test.hpp>
+#include <set>
+#include <libpwrutils/foreach.h>
+#include <libpwrutils/bitset.h>
+#include <libcorpus2/tagsetmanager.h>
+#include <libcorpus2/io/xcesreader.h>
+#include <libcorpus2/io/writer.h>
+
+namespace {
+static char swiatopoglad[] =
+"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+"<!DOCTYPE cesAna SYSTEM \"xcesAnaIPI.dtd\">\n"
+"<cesAna xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.0\" type=\"lex disamb\">\n"
+"<chunkList>\n"
+"<chunk id=\"ch51\" type=\"tok\">\n"
+"<chunk type=\"s\">\n"
+"<tok>\n"
+"<orth>Uważam</orth>\n"
+"<lex disamb=\"1\"><base>uważać</base><ctag>fin:sg:pri:imperf</ctag></lex>\n"
+"</tok>\n"
+"<ns/>\n"
+"<tok>\n"
+"<orth>,</orth>\n"
+"<lex disamb=\"1\"><base>,</base><ctag>interp</ctag></lex>\n"
+"</tok>\n"
+"<tok>\n"
+"<orth>że</orth>\n"
+"<lex disamb=\"1\"><base>że</base><ctag>conj</ctag></lex>\n"
+"</tok>\n"
+"<tok>\n"
+"<orth>światopogląd</orth>\n"
+"<lex><base>światopogląd</base><ctag>subst:sg:acc:m3</ctag></lex>\n"
+"<lex disamb=\"1\"><base>światopogląd</base><ctag>subst:sg:nom:m3</ctag></lex>\n"
+"</tok>\n"
+"</chunk>\n"
+"</chunk>\n"
+"</chunkList>\n"
+"</cesAna>\n"
+;
+}
+
+BOOST_AUTO_TEST_SUITE( io )
+
+BOOST_AUTO_TEST_CASE( iobase )
+{
+	const Corpus2::Tagset& tagset = Corpus2::get_named_tagset("kipi");
+	std::stringstream ssin;
+	ssin << swiatopoglad;
+	Corpus2::XcesReader xr(tagset, ssin);
+	boost::shared_ptr<Corpus2::Chunk> chunk = xr.get_next_chunk();
+	std::stringstream ss;
+	boost::shared_ptr<Corpus2::TokenWriter> w(Corpus2::TokenWriter::create("xces,flat", ss, tagset));
+	w->write_chunk(*chunk);
+	w->finish();
+	BOOST_CHECK_EQUAL(ss.str(), swiatopoglad);
+}
+
+BOOST_AUTO_TEST_SUITE_END();
-- 
GitLab