/*
    Copyright (C) 2010 Tomasz Ĺšniatowski, Adam Radziszewski
    Part of the libcorpus2 project

    This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.

    This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. 

    See the LICENSE.CORPUS2, LICENSE.POLIQARP, COPYING.LESSER and COPYING files for more details.
*/

#ifndef LIBSORPUS2_IO_IOB_CHAN_H
#define LIBCORPUS2_IO_IOB_CHAN_H

#include <libcorpus2/io/reader.h>
#include <libcorpus2/io/writer.h>
#include <boost/scoped_ptr.hpp>

namespace Corpus2 {

/**
 * Simple writer class to output token in RFTagger-compatible corpora form.
 *
 * One token per line, token line consists of the orth, followed by the
 * tag, followed by newline (one tag per token only). Each sentence
 * is followed by a blank line.
 *
 * The first lexeme is used. No-lexeme tokens trigger a warning unless
 * nowarn is passed.
 */
class IobChanWriter : public TokenWriter
{
public:
	IobChanWriter(std::ostream& os, const Tagset& tagset,
			const string_range_vector& params);

	void write_token(const Token& t);

	void write_sentence(const Sentence& s);

	void write_chunk(const Chunk &p);

	void write_tag(const Tag& tag);

	static bool registered;

private:
	bool warn_on_no_lexemes_;

	bool force_;
};

class IobChanReader : public BufferedSentenceReader
{
public:
	IobChanReader(const Tagset& tagset, std::istream& is);

	IobChanReader(const Tagset& tagset, const std::string& filename);

	std::istream& is() {
		return *is_;
	}

	void set_option(const std::string& option);

	std::string get_option(const std::string& option) const;

	static bool registered;

protected:
	/// BufferedSentenceReader override
	Sentence::Ptr actual_next_sentence();

	std::istream* is_;
	boost::scoped_ptr<std::istream> is_owned_;

	/// Whether to mark all incoming tags as disambiguated
	bool disamb_;
};

} /* end ns Corpus2 */

#endif // LIBCORPUS2_IO_IOB_CHAN_H