/*
    Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
    Part of the libcorpus2 project

    This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.

    This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.

    See the LICENSE and COPYING files for more details.
*/

#include <boost/test/unit_test.hpp>
#include <set>
#include <libpwrutils/foreach.h>
#include <libpwrutils/bitset.h>
#include <libcorpus2/tagsetmanager.h>
#include <libcorpus2/io/cclreader.h>
#include <libcorpus2/io/writer.h>
#include <libcorpus2/ann/annotatedsentence.h>

namespace {
static char swiatopoglad[] =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<!DOCTYPE cesAna SYSTEM \"xcesAnaIPI.dtd\">\n"
"<cesAna xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.0\" type=\"lex disamb\">\n"
"<chunkList>\n"
"<chunk id=\"ch51\" type=\"tok\">\n"
"<chunk type=\"s\">\n"
"<tok>\n"
"<orth>Uważam</orth>\n"
"<lex disamb=\"1\"><base>uważać</base><ctag>fin:sg:pri:imperf</ctag></lex>\n"
"</tok>\n"
"<ns/>\n"
"<tok>\n"
"<orth>,</orth>\n"
"<lex disamb=\"1\"><base>,</base><ctag>interp</ctag></lex>\n"
"</tok>\n"
"<tok>\n"
"<orth>że</orth>\n"
"<lex disamb=\"1\"><base>że</base><ctag>conj</ctag></lex>\n"
"</tok>\n"
"<tok>\n"
"<orth>światopogląd</orth>\n"
"<lex><base>światopogląd</base><ctag>subst:sg:acc:m3</ctag></lex>\n"
"<lex disamb=\"1\"><base>światopogląd</base><ctag>subst:sg:nom:m3</ctag></lex>\n"
"</tok>\n"
"</chunk>\n"
"</chunk>\n"
"</chunkList>\n"
"</cesAna>\n"
;

static char swiatopoglad_ann[] =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<chunkList>\n"
"<chunk id=\"ch51\" type=\"tok\">\n"
"<sentence>\n"
"<tok>\n"
"<ann chan=\"cute\">1</ann>\n"
"<ann chan=\"meh\">1</ann>\n"
"<orth>Uważam</orth>\n"
"<lex disamb=\"1\"><base>uważać</base><ctag>fin:sg:pri:imperf</ctag></lex>\n"
"</tok>\n"
"<ns/>\n"
"<tok>\n"
"<ann chan=\"cute\">1</ann>\n"
"<ann chan=\"meh\">2</ann>\n"
"<orth>,</orth>\n"
"<lex disamb=\"1\"><base>,</base><ctag>interp</ctag></lex>\n"
"</tok>\n"
"<tok>\n"
"<ann chan=\"meh\" head=\"1\">1</ann>\n"
"<orth>że</orth>\n"
"<lex disamb=\"1\"><base>że</base><ctag>conj</ctag></lex>\n"
"</tok>\n"
"<tok>\n"
"<orth>światopogląd</orth>\n"
"<lex><base>światopogląd</base><ctag>subst:sg:acc:m3</ctag></lex>\n"
"<lex disamb=\"1\"><base>światopogląd</base><ctag>subst:sg:nom:m3</ctag></lex>\n"
"<ann chan=\"cute\">2</ann>\n"
"</tok>\n"
"</sentence>\n"
"</chunk>\n"
"</chunkList>\n"
;
}

BOOST_AUTO_TEST_SUITE( ioann )

BOOST_AUTO_TEST_CASE( iobase )
{
	const Corpus2::Tagset& tagset = Corpus2::get_named_tagset("kipi");
	std::stringstream ssin;
	ssin << swiatopoglad_ann;
	Corpus2::CclReader xr(tagset, ssin);
	boost::shared_ptr<Corpus2::Chunk> chunk = xr.get_next_chunk();
	BOOST_REQUIRE(chunk);
	std::stringstream ss;
	boost::shared_ptr<Corpus2::TokenWriter> w(Corpus2::TokenWriter::create_stream_writer("xces,flat", ss, tagset));
	//boost::shared_ptr<Corpus2::TokenWriter> wann(Corpus2::TokenWriter::create("ccl", std::cerr, tagset));
	//wann->write_chunk(*chunk);
	//wann->finish();
	w->write_chunk(*chunk);
	w->finish();
	BOOST_CHECK_EQUAL(ss.str(), swiatopoglad);
	BOOST_REQUIRE(!chunk->sentences().empty());
	boost::shared_ptr<Corpus2::AnnotatedSentence> as;
	as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(chunk->sentences()[0]);
	BOOST_REQUIRE(as);

	BOOST_REQUIRE(as->has_channel("cute"));
	as->get_channel("cute").make_iob_from_segments();
	BOOST_CHECK_EQUAL(as->get_channel("cute").dump_iob(), "BIOB");
	BOOST_CHECK_EQUAL(as->get_channel("cute").dump_segments(), "1102");
	BOOST_CHECK_EQUAL(as->get_channel("cute").dump_heads(), "    ");
	Corpus2::Sentence::Ptr cute = Corpus2::create_view(as, "cute");
	BOOST_REQUIRE_EQUAL(cute->size(), 3);
	BOOST_CHECK_EQUAL(cute->tokens()[0]->orth_utf8(), "Uważam,");
	BOOST_CHECK_EQUAL(cute->tokens()[1]->orth_utf8(), "że");
	BOOST_CHECK_EQUAL(cute->tokens()[2]->orth_utf8(), "światopogląd");

	BOOST_REQUIRE(as->has_channel("meh"));
	as->get_channel("meh").make_iob_from_segments();
	BOOST_CHECK_EQUAL(as->get_channel("meh").dump_iob(), "BBBO");
	Corpus2::Sentence::Ptr meh = Corpus2::create_view(as, "meh");
	BOOST_REQUIRE_EQUAL(meh->size(), 3);
	BOOST_CHECK_EQUAL(meh->tokens()[0]->orth_utf8(), ",");
	BOOST_CHECK_EQUAL(meh->tokens()[1]->orth_utf8(), "Uważam że");
	BOOST_CHECK_EQUAL(meh->tokens()[2]->orth_utf8(), "światopogląd");

	//std::cerr << as->annotation_info();
}

BOOST_AUTO_TEST_SUITE_END();