#include <libwccl/values/strset.h>
#include <libpwrutils/foreach.h>
#include <libpwrutils/util.h>
#include <sstream>
#include <boost/algorithm/string.hpp>

namespace Wccl {

const char* StrSet::type_name = "StrSet";

std::string StrSet::to_raw_string() const
{
	std::stringstream ss;
	ss << "[";
	value_type::const_iterator it = set_.begin();
	while(it != set_.end()) {
		ss << '\"';
		std::string item = PwrNlp::to_utf8(*it);
		boost::algorithm::replace_all(item, "\\", "\\\\");
		boost::algorithm::replace_all(item, "\"", "\\\"");
		ss << item;
		ss << '\"';
		if(++it != set_.end()) {
			ss << ", ";
		}
	}
	ss << "]";
	return ss.str();
}

UnicodeString StrSet::to_raw_string_u() const
{
	UnicodeString u;
	u.append(UNICODE_STRING("[", 1));
	value_type::const_iterator it = set_.begin();
	while(it != set_.end()) {
		u.append(UNICODE_STRING("\"", 1));
		UnicodeString item = *it;
		item.findAndReplace(UNICODE_STRING("\\", 1), UNICODE_STRING("\\\\", 2));
		item.findAndReplace(UNICODE_STRING("\"", 1), UNICODE_STRING("\\\"", 2));
		u.append(item);
		u.append(UNICODE_STRING("\"", 1));
		if(++it != set_.end()) {
			u.append(UNICODE_STRING(", ", 2));
		}
	}
	u.append(UNICODE_STRING("]", 1));
	return u;
}

std::string StrSet::to_compact_string(const Corpus2::Tagset& /* tagset */)
		const
{
	if (set_.empty()) {
		return "-";
	}

	std::stringstream ss;
	value_type::const_iterator it = set_.begin();
	while(it != set_.end()) {
		ss << '\"';
		std::string item = PwrNlp::to_utf8(*it);
		boost::algorithm::replace_all(item, "-", "\\u002d");
		boost::algorithm::replace_all(item, ".", "\\u002e");
		boost::algorithm::replace_all(item, " ", "\\u0020");
		boost::algorithm::replace_all(item, "\t", "\\u0009");
		ss << item;
		ss << '\"';
		if(++it != set_.end()) {
			ss << "-";
		}
	}
	return ss.str();
}

UnicodeString StrSet::to_compact_string_u(const Corpus2::Tagset& /* tagset */)
		const
{
	UnicodeString u;

	if (set_.empty()) {
		u.append(UNICODE_STRING("-", 1));
		return u;
	}

	value_type::const_iterator it = set_.begin();
	while(it != set_.end()) {
		u.append(UNICODE_STRING("\"", 1));
		UnicodeString item = *it;
		item.findAndReplace(UNICODE_STRING("-", 1), UNICODE_STRING("\\u002d", 6));
		item.findAndReplace(UNICODE_STRING(".", 1), UNICODE_STRING("\\u002e", 6));
		item.findAndReplace(UNICODE_STRING(" ", 1), UNICODE_STRING("\\u0020", 6));
		item.findAndReplace(UNICODE_STRING("\t", 1), UNICODE_STRING("\\u0009", 6));
		u.append(item);
		u.append(UNICODE_STRING("\"", 1));
		if(++it != set_.end()) {
			u.append(UNICODE_STRING("-", 1));
		}
	}
	return u;
}

bool StrSet::intersects(const StrSet &other) const {
	if (empty() || other.empty()) {
		return false;
	}
	//We just want to check if there is an intersection, no
	//need to actually compute it to check if it's empty.
	//Doing it like below sounds faster than, say, sorting
	//the sets and using set_intersection.
	//It's faster to iterate through the smaller set and check in
	//the larger than it is to do the opposite, hence the &?: below.
	const value_type& smaller = size() < other.size() ? set_ : other.set_;
	const value_type& bigger = size() < other.size() ? other.set_ : set_;
	foreach (const UnicodeString& u, smaller) {
		if (bigger.find(u) != bigger.end()) {
			return true;
		}
	}
	return false;
}

bool StrSet::is_subset_of(const StrSet &other) const
{
	if (size() > other.size()) {
		return false;
	}
	foreach (const UnicodeString& u, set_) {
		if (other.set_.find(u) == other.set_.end()) {
			return false;
		}
	}
	return true;
}

std::string StrSet::var_repr(const std::string &var_name)
{
	std::stringstream ss;
	ss << "$s:" << var_name;
	return ss.str();
}

} /* end ns Wccl */