Skip to content
Snippets Groups Projects
strset.h 3.43 KiB
Newer Older
Adam Radziszewski's avatar
Adam Radziszewski committed
	Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia,
	Adam Radziszewski, Bartosz Broda
	Part of the WCCL project
Adam Radziszewski's avatar
Adam Radziszewski committed
	This program is free software; you can redistribute it and/or modify it
Pawel Orlowicz's avatar
Pawel Orlowicz committed
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.

Adam Radziszewski's avatar
Adam Radziszewski committed
	This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Adam Radziszewski's avatar
Adam Radziszewski committed
or FITNESS FOR A PARTICULAR PURPOSE.
Pawel Orlowicz's avatar
Pawel Orlowicz committed
	See the LICENSE, COPYING.LESSER and COPYING files for more details.
ilor's avatar
ilor committed
#ifndef LIBWCCL_VALUES_STRSET_H
#define LIBWCCL_VALUES_STRSET_H

#include <libwccl/values/value.h>
#include <boost/unordered_set.hpp>
#include <libcorpus2/lexeme.h> // for unicodestring hash

namespace Wccl {

/**
 * A Value subtype representing a set of strings.
 *
 * No guarantees for the order of elements are given at this time.
 *
 * By default the set is empty.
 */
ilor's avatar
ilor committed
class StrSet : public Value
{
public:
	WCCL_VALUE_PREAMBLE

	//typedef boost::unordered_set<UnicodeString> value_type;
	typedef std::set<UnicodeString> value_type;
ilor's avatar
ilor committed

ilor's avatar
ilor committed
	StrSet()
		: set_()
	{
	}

	explicit StrSet(const value_type& s)
ilor's avatar
ilor committed
		: set_(s)
	{
	}

	// Good job, boost. https://svn.boost.org/trac/boost/ticket/6167
	// "Assignment from a temporary of a class containing
	// boost::unordered_map members fails with GNU GCC"
	// Work around this by manually defining op=(const&), otherwise
	// on boost 1.48 the following code will fail in non-c++0x mode:
	//     StrSet s;
	//     s = StrSet(); //compile error
	StrSet& operator=(const StrSet& s) {
		set_ = s.set_;
		return *this;
	}


	const value_type& get_value() const {
ilor's avatar
ilor committed
		return set_;
	}

	void set_value(const value_type& set) {
ilor's avatar
ilor committed
		set_ = set;
	}

	/**
	 * get_value() alias.
	 */
	const value_type& contents() const {
		return set_;
	/**
	 * Nonconst variant of get_value()
	 */
	value_type& contents() {
		return set_;
	/**
	 * Wrapper for boost::unordered set
	 */
	std::set<UnicodeString> to_std_set() const {
		return std::set<UnicodeString>(this->set_.begin(), this->set_.end());
	}

ilor's avatar
ilor committed
	void swap(StrSet& ss) {
		ss.set_.swap(set_);
	}

	/// Convenience function to add a new UnicodeString to the set
ilor's avatar
ilor committed
	void insert(const UnicodeString& u) {
		set_.insert(u);
	}

	/// Convenience function to add a new string to the set, treated as UTF-8
ilor's avatar
ilor committed
	void insert_utf8(const std::string& u) {
		insert(UnicodeString::fromUTF8(u));
	}

ilor's avatar
ilor committed
	int size() const {
		return set_.size();
	}

	/// Convenience empty checker
	/**
	 * @return true if each string from this set exists in the other set
	 *         (note that an empty set is a subset of anything)
	 */
	/**
	 * @return true if there is at least one common string between this set and
	 *         the other set (an empty set intersects with nothing)
	 */
	bool equals(const StrSet& other) const {
		return set_ == other.set_;
ilor's avatar
ilor committed
	/// Value override
	std::string to_raw_string() const;

	/// Value override
	UnicodeString to_raw_string_u() const;

	/// Value override
	std::string to_compact_string(const Corpus2::Tagset& tagset) const;

	/// Value override
	UnicodeString to_compact_string_u(const Corpus2::Tagset& tagset) const;

ilor's avatar
ilor committed
private:
ilor's avatar
ilor committed
};

} /* end ns Wccl */

#endif // LIBWCCL_VALUES_STRSET_H