/*
    Copyright (C) 2010 Tomasz Ĺšniatowski, Adam Radziszewski
    Part of the libcorpus2 project

    This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.

    This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. 

    See the LICENSE and COPYING files for more details.
*/

#include <libcorpus2/io/reader.h>
#include <boost/make_shared.hpp>
#include <boost/algorithm/string.hpp>
#include <sstream>

namespace Corpus2 {

TokenReader::TokenReader(const Tagset& tagset)
	: tagset_(tagset)
{
}

TokenReader::~TokenReader()
{
}

boost::shared_ptr<TokenReader> TokenReader::create_path_reader(
	const std::string& class_id_params,
	const Tagset& tagset,
	const std::string& path)
{
	string_range_vector params;
	boost::algorithm::split(params, class_id_params,
							boost::is_any_of(std::string(",")));
	std::string class_id = boost::copy_range<std::string>(params[0]);
	params.erase(params.begin(), params.begin() + 1);
	try {
		return boost::shared_ptr<TokenReader>(
		detail::TokenReaderFactorySingleton::Instance().path_factory.CreateObject(
			class_id, tagset, path, params));
	} catch (detail::TokenReaderFactoryException& e) {
		throw Corpus2Error("Reader class not found: " + class_id);
	}
}

boost::shared_ptr<TokenReader> TokenReader::create_stream_reader(
	const std::string& class_id_params,
	const Tagset& tagset,
	std::istream& stream)
{
	string_range_vector params;
	boost::algorithm::split(params, class_id_params,
							boost::is_any_of(std::string(",")));
	std::string class_id = boost::copy_range<std::string>(params[0]);
	params.erase(params.begin(), params.begin() + 1);
	try {
		return boost::shared_ptr<TokenReader>(
		detail::TokenReaderFactorySingleton::Instance().stream_factory.CreateObject(
			class_id, tagset, stream, params));
	} catch (detail::TokenReaderFactoryException& e) {
		std::vector<std::string> ids;
		ids = detail::TokenReaderFactorySingleton::Instance().path_factory.RegisteredIds();
		if (std::find(ids.begin(), ids.end(), class_id) == ids.end()) {
			throw Corpus2Error("Reader class not found: " + class_id);
		} else {
			throw Corpus2Error("This reader does not support stream mode: " + class_id);
		}
	}
}

std::vector<std::string> TokenReader::available_reader_types()
{
	return detail::TokenReaderFactorySingleton::Instance().path_factory.RegisteredIds();
}

std::string TokenReader::reader_help(const std::string& class_id)
{
	std::map<std::string, std::string>::const_iterator c;
	c = detail::TokenReaderFactorySingleton::Instance().help.find(class_id);
	if (c != detail::TokenReaderFactorySingleton::Instance().help.end()) {
		return c->second;
	} else {
		return "";
	}
}

std::vector<std::string> TokenReader::available_reader_types_help()
{
	std::vector<std::string> v = available_reader_types();
	foreach (std::string& id, v) {
		std::stringstream ss;
		std::map<std::string, std::string>::const_iterator c;
		c = detail::TokenReaderFactorySingleton::Instance().help.find(id);
		if (c != detail::TokenReaderFactorySingleton::Instance().help.end()) {
			ss << id << "[";
			ss << c->second;
			ss << "]";
		}
		id = ss.str();
	}
	return v;
}

BufferedChunkReader::BufferedChunkReader(const Tagset& tagset)
	: TokenReader(tagset)
{
}

BufferedChunkReader::~BufferedChunkReader()
{
	foreach (Token* t, token_buf_) {
		delete t;
	}
}

Token* BufferedChunkReader::get_next_token()
{
	bool more = true;
	while (token_buf_.empty() && more) {
		ensure_more();
		Sentence::Ptr s = get_next_sentence();
		if (s) {
			std::copy(s->tokens().begin(), s->tokens().end(),
					std::back_inserter(token_buf_));
			s->release_tokens();
		} else {
			more = false;
		}
	}
	if (token_buf_.empty()) {
		return NULL;
	} else {
		Token* t = token_buf_.front();
		token_buf_.pop_front();
		return t;
	}
}

Sentence::Ptr BufferedChunkReader::get_next_sentence()
{
	bool more = true;
	while (sentence_buf_.empty() && more) {
		ensure_more();
		boost::shared_ptr<Chunk> c = get_next_chunk();
		if (c) {
			std::copy(c->sentences().begin(), c->sentences().end(),
					std::back_inserter(sentence_buf_));
		} else {
			more = false;
		}
	}
	if (sentence_buf_.empty()) {
		return Sentence::Ptr();
	} else {
		Sentence::Ptr s = sentence_buf_.front();
		sentence_buf_.pop_front();
		return s;
	}
}

boost::shared_ptr<Chunk> BufferedChunkReader::get_next_chunk()
{
	ensure_more();
	if (chunk_buf_.empty()) {
		return boost::shared_ptr<Chunk>();
	} else {
		boost::shared_ptr<Chunk> t = chunk_buf_.front();
		chunk_buf_.pop_front();
		return t;
	}
}

BufferedSentenceReader::BufferedSentenceReader(const Tagset& tagset)
	: TokenReader(tagset), chunkify_(true)
	, sentence_buf_(), token_buf_()
{
}

Token* BufferedSentenceReader::get_next_token()
{
	bool more = true;
	while (token_buf_.empty() && more) {
		Sentence::Ptr s = get_next_sentence();
		if (s) {
			std::copy(s->tokens().begin(), s->tokens().end(),
				std::back_inserter(token_buf_));
		} else {
			more = false;
		}
	}
	if (token_buf_.empty()) {
		return NULL;
	} else {
		Token* t = token_buf_.front();
		token_buf_.pop_front();
		return t;
	}
}

Sentence::Ptr BufferedSentenceReader::get_next_sentence()
{
	if (sentence_buf_ != NULL) {
		Sentence::Ptr s = sentence_buf_;
		sentence_buf_.reset();
		return s;
	} else {
		return actual_next_sentence();
	}
}

boost::shared_ptr<Chunk> BufferedSentenceReader::get_next_chunk()
{
	Sentence::Ptr s = get_next_sentence();
	if (!s) {
		return boost::shared_ptr<Chunk>();
	} else {
		boost::shared_ptr<Chunk> c = boost::make_shared<Chunk>();
		c->append(s);
		s = get_next_sentence();
		while (s && (!chunkify_ || s->first_token()->wa() !=
				PwrNlp::Whitespace::ManyNewlines)) {
			c->append(s);
			s = get_next_sentence();
		}
		if (s) {
			sentence_buf_ = s;
		}
		return c;
	}
}

} /* end ns Corpus2 */