diff --git a/libwccl/values/tset.cpp b/libwccl/values/tset.cpp index 16a62b33271da69be7b89827e2f088aa665b7819..79971c955931ba5358ad27fa7caa749d86d69ac1 100644 --- a/libwccl/values/tset.cpp +++ b/libwccl/values/tset.cpp @@ -1,4 +1,5 @@ #include <libwccl/values/tset.h> +#include <libpwrutils/foreach.h> #include <sstream> namespace Wccl { @@ -17,11 +18,22 @@ std::string TSet::to_string(const Corpus2::Tagset& tagset) const std::string TSet::var_repr(const std::string &var_name) { - std::stringstream ss; + std::ostringstream ss; ss << "$t:" << var_name; return ss.str(); } +int TSet::categories_count(const Corpus2::Tagset& tagset) const +{ + int cats = (tag_.get_pos().any()) ? 1 : 0; + foreach (const Corpus2::mask_t& mask, tagset.all_attribute_masks()) { + if (tag_.get_values_for(mask).any()) { + ++cats; + } + } + return cats; +} + void TSet::insert_symbol(const Corpus2::Tagset& tagset, const std::string& s) { tag_.combine_with(tagset.parse_symbol(s)); diff --git a/libwccl/values/tset.h b/libwccl/values/tset.h index a89d0eb27d3449cd1f77f4d450f610800bdb769b..e0aa99ff9ff61fce6751ae2c758a803e3ceb926a 100644 --- a/libwccl/values/tset.h +++ b/libwccl/values/tset.h @@ -52,7 +52,7 @@ public: } /** - * Convenience function to add a symbol from a tagste by name. + * Convenience function to add a symbol from a tagset by name. * * Note: slow. Avoid in code that gets repeatedly executed. */ @@ -82,6 +82,13 @@ public: return tag_ == other.tag_; } + /** + * @return Number of categories present in this symbol set according + * to supplied tagset. + * @note A category is word class or an attribute. + */ + int categories_count(const Corpus2::Tagset& tagset) const; + void combine_with(const Corpus2::Tag& other) { tag_.combine_with(other); } @@ -90,7 +97,6 @@ public: tag_.combine_with(other.get_value()); } - std::string to_string(const Corpus2::Tagset &) const; std::string to_raw_string() const; diff --git a/tests/values.cpp b/tests/values.cpp index d5f02c842f5fa3a531c7a2a39a254bbc01cb269c..daad844f8bd2b80f939e1e8314b6edcf26501f40 100644 --- a/tests/values.cpp +++ b/tests/values.cpp @@ -72,17 +72,21 @@ BOOST_AUTO_TEST_CASE(tset_ops) BOOST_CHECK(s1.is_subset_of(s2)); BOOST_CHECK(s2.is_subset_of(s1)); BOOST_CHECK(!s1.intersects(s2)); + BOOST_CHECK_EQUAL(0, s1.categories_count(tagset)); s1.insert_symbol(tagset, "subst"); + BOOST_CHECK_EQUAL(1, s1.categories_count(tagset)); BOOST_CHECK(!s1.equals(s2)); BOOST_CHECK(!s1.is_subset_of(s2)); BOOST_CHECK(s2.is_subset_of(s1)); BOOST_CHECK(!s1.intersects(s2)); s2.insert_symbol(tagset, "pl"); + BOOST_CHECK_EQUAL(1, s2.categories_count(tagset)); BOOST_CHECK(!s1.equals(s2)); BOOST_CHECK(!s1.is_subset_of(s2)); BOOST_CHECK(!s2.is_subset_of(s1)); BOOST_CHECK(!s1.intersects(s2)); s2.insert_symbol(tagset, "subst"); + BOOST_CHECK_EQUAL(2, s2.categories_count(tagset)); BOOST_CHECK(!s1.equals(s2)); BOOST_CHECK(s1.is_subset_of(s2)); BOOST_CHECK(!s2.is_subset_of(s1)); @@ -92,6 +96,10 @@ BOOST_AUTO_TEST_CASE(tset_ops) BOOST_CHECK(s1.is_subset_of(s2)); BOOST_CHECK(s2.is_subset_of(s1)); BOOST_CHECK(s1.intersects(s2)); + s1.insert_symbol(tagset, "sg"); + BOOST_CHECK_EQUAL(2, s1.categories_count(tagset)); + s1.insert_symbol(tagset, "f"); + BOOST_CHECK_EQUAL(3, s1.categories_count(tagset)); } BOOST_AUTO_TEST_CASE(position_ops)