From 2702b4e1dc855ab2cce727caabf599245dd9919c Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Mon, 15 Nov 2010 10:43:31 +0100
Subject: [PATCH] Implement TSet:: is_subset_of, equals and intersects Change
 StrSet::is_subset_of to correctly handle empty sets, add some comments Add
 Tset::insert_symbol Add tests for new functions Bump required corpus2 version
 to 0.1.2 due to a fixed bug there

---
 libwccl/CMakeLists.txt    |  2 +-
 libwccl/values/strset.cpp | 19 +++++++------
 libwccl/values/strset.h   |  8 ++++++
 libwccl/values/tset.cpp   |  5 ++++
 libwccl/values/tset.h     | 25 ++++++++++------
 tests/values.cpp          | 60 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 101 insertions(+), 18 deletions(-)

diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt
index b69bbb4..8c68a29 100644
--- a/libwccl/CMakeLists.txt
+++ b/libwccl/CMakeLists.txt
@@ -4,7 +4,7 @@ PROJECT(wccl)
 
 include_directories( ${CMAKE_CURRENT_BINARY_DIR}/include/ )
 
-find_package(Corpus2 0.1.1 REQUIRED)
+find_package(Corpus2 0.1.2 REQUIRED)
 set(LIBS ${LIBS} ${Corpus2_LIBRARY})
 
 find_package(PwrUtils 0.0.3 REQUIRED)
diff --git a/libwccl/values/strset.cpp b/libwccl/values/strset.cpp
index ed6b4bc..02489f4 100644
--- a/libwccl/values/strset.cpp
+++ b/libwccl/values/strset.cpp
@@ -24,29 +24,32 @@ std::string StrSet::to_raw_string() const
 }
 
 bool StrSet::intersects(const StrSet &other) const {
-	if(empty() || other.empty()) {
+	if (empty() || other.empty()) {
 		return false;
 	}
-	//we just want to check if there is an intersection, no
+	//We just want to check if there is an intersection, no
 	//need to actually compute it to check if it's empty.
-	//doing it like below sounds faster than, say, sorting
-	//the sets and using set_intersection
+	//Doing it like below sounds faster than, say, sorting
+	//the sets and using set_intersection.
+	//it's faster to iterate through the smaller set and check in
+	//the larger than it is to do the opposite, hence the &?: below
 	const set_t& smaller = size() < other.size() ? set_ : other.set_;
 	const set_t& bigger = size() < other.size() ? other.set_ : set_;
 	foreach (const UnicodeString& u, smaller) {
-		if(bigger.find(u) != bigger.end()) {
+		if (bigger.find(u) != bigger.end()) {
 			return true;
 		}
 	}
 	return false;
 }
 
-bool StrSet::is_subset_of(const StrSet &other) const {
-	if(empty() || size() > other.size()) {
+bool StrSet::is_subset_of(const StrSet &other) const
+{
+	if (size() > other.size()) {
 		return false;
 	}
 	foreach (const UnicodeString& u, set_) {
-		if(other.set_.find(u) == other.set_.end()) {
+		if (other.set_.find(u) == other.set_.end()) {
 			return false;
 		}
 	}
diff --git a/libwccl/values/strset.h b/libwccl/values/strset.h
index e984bd0..3545e80 100644
--- a/libwccl/values/strset.h
+++ b/libwccl/values/strset.h
@@ -56,8 +56,16 @@ public:
 		return set_.empty();
 	}
 
+	/**
+	 * @return true if each string from this set exists in the other set
+	 *         (note that an empty set is a subset of anything)
+	 */
 	bool is_subset_of(const StrSet& other) const;
 
+	/**
+	 * @return true if there is at least one common string between this set and
+	 *         the other set (an empty set intersects with nothing)
+	 */
 	bool intersects(const StrSet& other) const;
 
 	bool equals(const StrSet& other) const {
diff --git a/libwccl/values/tset.cpp b/libwccl/values/tset.cpp
index 9573bcb..8b1090e 100644
--- a/libwccl/values/tset.cpp
+++ b/libwccl/values/tset.cpp
@@ -15,4 +15,9 @@ std::string TSet::to_string(const Corpus2::Tagset& tagset) const
 	return tagset.tag_to_symbol_string(tag_);
 }
 
+void TSet::insert_symbol(const Corpus2::Tagset& tagset, const std::string& s)
+{
+	tag_.combine_with(tagset.parse_symbol(s));
+}
+
 } /* end ns Wccl */
diff --git a/libwccl/values/tset.h b/libwccl/values/tset.h
index 3c7d125..6d6813b 100644
--- a/libwccl/values/tset.h
+++ b/libwccl/values/tset.h
@@ -33,23 +33,30 @@ public:
 		return tag_;
 	}
 
+	void insert_symbol(const Corpus2::Tagset& tagset, const std::string& s);
+
 	bool empty() const {
 		return tag_.is_null();
 	}
 
-	bool is_subset_of(const TSet& /*tset*/) const {
-		//TODO: implement this
-		return false;
+	/**
+	 * @return true if each tagset symbol from this set exists in the other set
+	 *         (note that an empty set is a subset of anything)
+	 */
+	bool is_subset_of(const TSet& other) const {
+		return tag_.get_masked(other.tag_) == tag_;
 	}
 
-	bool intersects(const TSet& /*tset*/) const {
-		//TODO: implement this
-		return false;
+	/**
+	 * @return true if there is at least one common symbol between this set and
+	 *         the other set (an empty set intersects with nothing)
+	 */
+	bool intersects(const TSet& other) const {
+		return !tag_.get_masked(other.tag_).is_null();
 	}
 
-	bool equals(const TSet& /*tset*/) const {
-		//TODO: implement this
-		return false;
+	bool equals(const TSet& other) const {
+		return tag_ == other.tag_;
 	}
 
 	std::string to_string(const Corpus2::Tagset &) const;
diff --git a/tests/values.cpp b/tests/values.cpp
index 6d2ac2d..78df7ec 100644
--- a/tests/values.cpp
+++ b/tests/values.cpp
@@ -1,5 +1,6 @@
 #include <boost/test/unit_test.hpp>
 #include <boost/bind.hpp>
+#include <libcorpus2/tagsetmanager.h>
 
 #include <libwccl/variables.h>
 
@@ -25,4 +26,63 @@ BOOST_AUTO_TEST_CASE(tsetz)
 	BOOST_CHECK_EQUAL(v.get_type_name(), TSet::type_name);
 }
 
+BOOST_AUTO_TEST_CASE(strset_ops)
+{
+	StrSet s1, s2;
+	BOOST_CHECK(s1.equals(s2));
+	BOOST_CHECK(s1.is_subset_of(s2));
+	BOOST_CHECK(s2.is_subset_of(s1));
+	BOOST_CHECK(!s1.intersects(s2));
+	s1.insert_utf8("aaa");
+	BOOST_CHECK(!s1.equals(s2));
+	BOOST_CHECK(!s1.is_subset_of(s2));
+	BOOST_CHECK(s2.is_subset_of(s1));
+	BOOST_CHECK(!s1.intersects(s2));
+	s2.insert_utf8("bbb");
+	BOOST_CHECK(!s1.equals(s2));
+	BOOST_CHECK(!s1.is_subset_of(s2));
+	BOOST_CHECK(!s2.is_subset_of(s1));
+	BOOST_CHECK(!s1.intersects(s2));
+	s2.insert_utf8("aaa");
+	BOOST_CHECK(!s1.equals(s2));
+	BOOST_CHECK(s1.is_subset_of(s2));
+	BOOST_CHECK(!s2.is_subset_of(s1));
+	BOOST_CHECK(s1.intersects(s2));
+	s1.insert_utf8("bbb");
+	BOOST_CHECK(s1.equals(s2));
+	BOOST_CHECK(s1.is_subset_of(s2));
+	BOOST_CHECK(s2.is_subset_of(s1));
+	BOOST_CHECK(s1.intersects(s2));
+}
+
+BOOST_AUTO_TEST_CASE(tset_ops)
+{
+	TSet s1, s2;
+	const Corpus2::Tagset& tagset = Corpus2::get_named_tagset("kipi");
+	BOOST_CHECK(s1.equals(s2));
+	BOOST_CHECK(s1.is_subset_of(s2));
+	BOOST_CHECK(s2.is_subset_of(s1));
+	BOOST_CHECK(!s1.intersects(s2));
+	s1.insert_symbol(tagset, "subst");
+	BOOST_CHECK(!s1.equals(s2));
+	BOOST_CHECK(!s1.is_subset_of(s2));
+	BOOST_CHECK(s2.is_subset_of(s1));
+	BOOST_CHECK(!s1.intersects(s2));
+	s2.insert_symbol(tagset, "pl");
+	BOOST_CHECK(!s1.equals(s2));
+	BOOST_CHECK(!s1.is_subset_of(s2));
+	BOOST_CHECK(!s2.is_subset_of(s1));
+	BOOST_CHECK(!s1.intersects(s2));
+	s2.insert_symbol(tagset, "subst");
+	BOOST_CHECK(!s1.equals(s2));
+	BOOST_CHECK(s1.is_subset_of(s2));
+	BOOST_CHECK(!s2.is_subset_of(s1));
+	BOOST_CHECK(s1.intersects(s2));
+	s1.insert_symbol(tagset, "pl");
+	BOOST_CHECK(s1.equals(s2));
+	BOOST_CHECK(s1.is_subset_of(s2));
+	BOOST_CHECK(s2.is_subset_of(s1));
+	BOOST_CHECK(s1.intersects(s2));
+}
+
 BOOST_AUTO_TEST_SUITE_END()
-- 
GitLab