Skip to content
Snippets Groups Projects
strset.h 2.95 KiB
Newer Older
/*
    Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia,
    Adam Radziszewski, Bartosz Broda
    Part of the WCCL project

    This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.

    This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. 

    See the LICENSE and COPYING files for more details.
*/

ilor's avatar
ilor committed
#ifndef LIBWCCL_VALUES_STRSET_H
#define LIBWCCL_VALUES_STRSET_H

#include <libwccl/values/value.h>
#include <boost/unordered_set.hpp>
#include <libcorpus2/lexeme.h> // for unicodestring hash

namespace Wccl {

/**
 * A Value subtype representing a set of strings.
 *
 * No guarantees for the order of elements are given at this time.
 *
 * By default the set is empty.
 */
ilor's avatar
ilor committed
class StrSet : public Value
{
public:
	WCCL_VALUE_PREAMBLE

	typedef boost::unordered_set<UnicodeString> value_type;
ilor's avatar
ilor committed

ilor's avatar
ilor committed
	StrSet()
		: set_()
	{
	}

	explicit StrSet(const value_type& s)
ilor's avatar
ilor committed
		: set_(s)
	{
	}

	const value_type& get_value() const {
ilor's avatar
ilor committed
		return set_;
	}

	void set_value(const value_type& set) {
ilor's avatar
ilor committed
		set_ = set;
	}

	/**
	 * get_value() alias.
	 */
	const value_type& contents() const {
		return set_;
	/**
	 * Nonconst variant of get_value()
	 */
	value_type& contents() {
		return set_;
	/**
	 * Wrapper for boost::unordered set
	 */
	std::set<UnicodeString> to_std_set() const {
		return std::set<UnicodeString>(this->set_.begin(), this->set_.end());
	}

ilor's avatar
ilor committed
	void swap(StrSet& ss) {
		ss.set_.swap(set_);
	}

	/// Convenience function to add a new UnicodeString to the set
ilor's avatar
ilor committed
	void insert(const UnicodeString& u) {
		set_.insert(u);
	}

	/// Convenience function to add a new string to the set, treated as UTF-8
ilor's avatar
ilor committed
	void insert_utf8(const std::string& u) {
		insert(UnicodeString::fromUTF8(u));
	}

	/// Convenience size accesor
ilor's avatar
ilor committed
	int size() const {
		return set_.size();
	}

	/// Convenience empty checker
	/**
	 * @return true if each string from this set exists in the other set
	 *         (note that an empty set is a subset of anything)
	 */
	/**
	 * @return true if there is at least one common string between this set and
	 *         the other set (an empty set intersects with nothing)
	 */
	bool equals(const StrSet& other) const {
		return set_ == other.set_;
ilor's avatar
ilor committed
	/// Value override
	std::string to_raw_string() const;

	/// Value override
	UnicodeString to_raw_string_u() const;

	/// Value override
	std::string to_compact_string(const Corpus2::Tagset& tagset) const;

	/// Value override
	UnicodeString to_compact_string_u(const Corpus2::Tagset& tagset) const;

ilor's avatar
ilor committed
private:
ilor's avatar
ilor committed
};

} /* end ns Wccl */

#endif // LIBWCCL_VALUES_STRSET_H