Skip to content
Snippets Groups Projects
tset.h 2.77 KiB
Newer Older
ilor's avatar
ilor committed
#ifndef LIBWCCL_VALUES_TSET_H
#define LIBWCCL_VALUES_TSET_H

#include <libwccl/values/value.h>
#include <libcorpus2/tag.h>

namespace Wccl {

/**
 * A Value subtype representing a set of tagset symbols, essentially
 * a wrapper on a Corpus2::Tag.
 *
 * Default-constructed TSets are empty.
 */
ilor's avatar
ilor committed
class TSet : public Value
{
public:
	WCCL_VALUE_PREAMBLE;

ilor's avatar
ilor committed
	typedef Corpus2::Tag value_type;

ilor's avatar
ilor committed
	TSet()
		: tag_()
	{
	}

	explicit TSet(Corpus2::Tag tag)
		: tag_(tag)
	{
	}

	const Corpus2::Tag& get_value() const {
ilor's avatar
ilor committed
		return tag_;
	}

	void set_value(const Corpus2::Tag& tag) {
ilor's avatar
ilor committed
		tag_ = tag;
	}

ilor's avatar
ilor committed
	const Corpus2::Tag& contents() const {
ilor's avatar
ilor committed
		return tag_;
	}

	/**
	 * Nonconst variant of contents()
	 */
	Corpus2::Tag& contents() {
		return tag_;
	}

	/**
	 * Convenience function to add a symbol from a tagset by name.
	 *
	 * Note: slow. Avoid in code that gets repeatedly executed.
	 */
	void insert_symbol(const Corpus2::Tagset& tagset, const std::string& s);

	bool empty() const {
		return tag_.is_null();
	}

	size_t size() const {
		return tag_.pos_count() + PwrNlp::count_bits_set(tag_.get_values());
	}

	/**
	 * @return true if each tagset symbol from this set exists in the other set
	 *         (note that an empty set is a subset of anything)
	 */
	bool is_subset_of(const TSet& other) const {
		return tag_.get_masked(other.tag_) == tag_;
	/**
	 * @return true if there is at least one common symbol between this set and
	 *         the other set (an empty set intersects with nothing)
	 */
	bool intersects(const TSet& other) const {
		return !tag_.get_masked(other.tag_).is_null();
	bool equals(const TSet& other) const {
		return tag_ == other.tag_;
	/**
	 * @return Number of categories present in this symbol set according
	 *         to supplied tagset.
	 * @note A category is word class or an attribute.
	 */
	int categories_count(const Corpus2::Tagset& tagset) const;

	/**
	 * @return How many categories present in the supplied tag match with
	 *         this symbol set.
	 * @warning The underlying assumption is that the supplied tag has at most
	 *          1 value per category. Otherwise the value will be incorrect.
	 * @note The symbol set may have partially defined categories. Only values
	 *       present in this symbol set count when matching values in the tag.
	 */
	int matching_categories(const Corpus2::Tag& tag) const;

	void combine_with(const Corpus2::Tag& other) {
		tag_.combine_with(other);
	}

	void combine_with(const TSet& other) {
		tag_.combine_with(other.get_value());
	}

	std::string to_string(const Corpus2::Tagset &) const;

	std::string to_raw_string() const;

	/// Value override
	std::string to_compact_string(const Corpus2::Tagset& tagset) const;

ilor's avatar
ilor committed
private:
	Corpus2::Tag tag_;
};


} /* end ns Wccl */

#endif // LIBWCCL_VALUES_TSET_H