/*
    Copyright (C) 2010 Tomasz Ĺšniatowski, Adam Radziszewski
    Part of the libcorpus2 project

    This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.

    This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. 

    See the LICENSE and COPYING files for more details.
*/

#ifndef LIBCORPUS2_LEXEME_H
#define LIBCORPUS2_LEXEME_H

#include <unicode/unistr.h>
#include <libcorpus2/tag.h>
#include <libpwrutils/util.h>

//#include <boost/flyweight.hpp>

// This is to allow boost hashed containers to work with ICU UnicodeStrings
// The function apparently needs to be in the same namespace as UnicodeString.
U_NAMESPACE_BEGIN
inline size_t hash_value(const UnicodeString& u)
{
	return u.hashCode();
}
U_NAMESPACE_END


namespace Corpus2 {

/**
 * A lexeme bundles together a dictionary form (lemma) and a
 * morphosyntactic tag (tag).
 */
class Lexeme
	: boost::equality_comparable<Lexeme>
	, boost::less_than_comparable<Lexeme>
{
public:
	/// Null lexeme creation
	Lexeme();

	/// Lemma and tag constructor
	Lexeme(const UnicodeString& lemma, const Tag& tag);

	/// Helper creation function
	static Lexeme create(const UnicodeString& lemma, const Tag& tag);

	/// Lemma accessor
	const UnicodeString& lemma() const {
		return lemma_;
	}

	/// Lemma setter
	void set_lemma(const UnicodeString& l) {
		lemma_ = l;
	}

	/// UTF-8 lemma convenience accessor
	const std::string lemma_utf8() const {
		return PwrNlp::to_utf8(lemma_);
	}

	/// Tag accessor
	const Tag& tag() const {
		return tag_;
	}

	/// Tag setter
	void set_tag(const Tag& tag) {
		tag_ = tag;
	}

	/// Validity checking -- a lexeme is invalid (null) when it has
	/// a zero-length lemma or a tag with no tagset
	bool is_null() const;

	/// Disambiguated flag setter
	void set_disamb(bool value) {
		disamb_ = value;
	}

	/// Disambiguated flag getter
	bool is_disamb() const {
		return disamb_;
	}

	/**
	 * Lexeme comparison. Lexemes are ordered by lemma and then by tag, see
	 * Tag::operator< . Boost template magic provides other comparison ops.
	 */
	bool operator<(const Lexeme& other) const;

	/**
	 * Lexeme equality. Lemma and tag must compare equal. Boost template
	 * magic provides operator!=.
	 */
	bool operator==(const Lexeme& other) const;

	/**
	 * Disamb-ignoring lexeme comparison
	 */
	struct DisamblessComparator
	{
		bool operator()(const Lexeme& l1, const Lexeme& l2) const;
	};

private:
	/// The lemma -- basic form
	//boost::flyweight<UnicodeString> lemma_;
	UnicodeString lemma_;

	/// The tag
	//boost::flyweight<Tag> tag_;
	Tag tag_;

	/// Compatibility "disambiguated" flag
	bool disamb_;
};

} /* end ns Corpus2 */

#endif // LIBCORPUS2_LEXEME_H