From fabf34935d2f9116a7ec38e9030a9c4457c137e9 Mon Sep 17 00:00:00 2001
From: Adam Wardynski <award@.(B-4.4.46a)>
Date: Thu, 9 Dec 2010 13:38:46 +0100
Subject: [PATCH] categories_count(tagset) method for TSet.

---
 libwccl/values/tset.cpp | 14 +++++++++++++-
 libwccl/values/tset.h   | 10 ++++++++--
 tests/values.cpp        |  8 ++++++++
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/libwccl/values/tset.cpp b/libwccl/values/tset.cpp
index 16a62b3..79971c9 100644
--- a/libwccl/values/tset.cpp
+++ b/libwccl/values/tset.cpp
@@ -1,4 +1,5 @@
 #include <libwccl/values/tset.h>
+#include <libpwrutils/foreach.h>
 #include <sstream>
 
 namespace Wccl {
@@ -17,11 +18,22 @@ std::string TSet::to_string(const Corpus2::Tagset& tagset) const
 
 std::string TSet::var_repr(const std::string &var_name)
 {
-	std::stringstream ss;
+	std::ostringstream ss;
 	ss << "$t:" << var_name;
 	return ss.str();
 }
 
+int TSet::categories_count(const Corpus2::Tagset& tagset) const
+{
+	int cats = (tag_.get_pos().any()) ? 1 : 0;
+	foreach (const Corpus2::mask_t& mask, tagset.all_attribute_masks()) {
+		if (tag_.get_values_for(mask).any()) {
+			++cats;
+		}
+	}
+	return cats;
+}
+
 void TSet::insert_symbol(const Corpus2::Tagset& tagset, const std::string& s)
 {
 	tag_.combine_with(tagset.parse_symbol(s));
diff --git a/libwccl/values/tset.h b/libwccl/values/tset.h
index a89d0eb..e0aa99f 100644
--- a/libwccl/values/tset.h
+++ b/libwccl/values/tset.h
@@ -52,7 +52,7 @@ public:
 	}
 
 	/**
-	 * Convenience function to add a symbol from a tagste by name.
+	 * Convenience function to add a symbol from a tagset by name.
 	 *
 	 * Note: slow. Avoid in code that gets repeatedly executed.
 	 */
@@ -82,6 +82,13 @@ public:
 		return tag_ == other.tag_;
 	}
 
+	/**
+	 * @return Number of categories present in this symbol set according
+	 *         to supplied tagset.
+	 * @note A category is word class or an attribute.
+	 */
+	int categories_count(const Corpus2::Tagset& tagset) const;
+
 	void combine_with(const Corpus2::Tag& other) {
 		tag_.combine_with(other);
 	}
@@ -90,7 +97,6 @@ public:
 		tag_.combine_with(other.get_value());
 	}
 
-
 	std::string to_string(const Corpus2::Tagset &) const;
 
 	std::string to_raw_string() const;
diff --git a/tests/values.cpp b/tests/values.cpp
index d5f02c8..daad844 100644
--- a/tests/values.cpp
+++ b/tests/values.cpp
@@ -72,17 +72,21 @@ BOOST_AUTO_TEST_CASE(tset_ops)
 	BOOST_CHECK(s1.is_subset_of(s2));
 	BOOST_CHECK(s2.is_subset_of(s1));
 	BOOST_CHECK(!s1.intersects(s2));
+	BOOST_CHECK_EQUAL(0, s1.categories_count(tagset));
 	s1.insert_symbol(tagset, "subst");
+	BOOST_CHECK_EQUAL(1, s1.categories_count(tagset));
 	BOOST_CHECK(!s1.equals(s2));
 	BOOST_CHECK(!s1.is_subset_of(s2));
 	BOOST_CHECK(s2.is_subset_of(s1));
 	BOOST_CHECK(!s1.intersects(s2));
 	s2.insert_symbol(tagset, "pl");
+	BOOST_CHECK_EQUAL(1, s2.categories_count(tagset));
 	BOOST_CHECK(!s1.equals(s2));
 	BOOST_CHECK(!s1.is_subset_of(s2));
 	BOOST_CHECK(!s2.is_subset_of(s1));
 	BOOST_CHECK(!s1.intersects(s2));
 	s2.insert_symbol(tagset, "subst");
+	BOOST_CHECK_EQUAL(2, s2.categories_count(tagset));
 	BOOST_CHECK(!s1.equals(s2));
 	BOOST_CHECK(s1.is_subset_of(s2));
 	BOOST_CHECK(!s2.is_subset_of(s1));
@@ -92,6 +96,10 @@ BOOST_AUTO_TEST_CASE(tset_ops)
 	BOOST_CHECK(s1.is_subset_of(s2));
 	BOOST_CHECK(s2.is_subset_of(s1));
 	BOOST_CHECK(s1.intersects(s2));
+	s1.insert_symbol(tagset, "sg");
+	BOOST_CHECK_EQUAL(2, s1.categories_count(tagset));
+	s1.insert_symbol(tagset, "f");
+	BOOST_CHECK_EQUAL(3, s1.categories_count(tagset));
 }
 
 BOOST_AUTO_TEST_CASE(position_ops)
-- 
GitLab