From fabf34935d2f9116a7ec38e9030a9c4457c137e9 Mon Sep 17 00:00:00 2001 From: Adam Wardynski <award@.(B-4.4.46a)> Date: Thu, 9 Dec 2010 13:38:46 +0100 Subject: [PATCH] categories_count(tagset) method for TSet. --- libwccl/values/tset.cpp | 14 +++++++++++++- libwccl/values/tset.h | 10 ++++++++-- tests/values.cpp | 8 ++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/libwccl/values/tset.cpp b/libwccl/values/tset.cpp index 16a62b3..79971c9 100644 --- a/libwccl/values/tset.cpp +++ b/libwccl/values/tset.cpp @@ -1,4 +1,5 @@ #include <libwccl/values/tset.h> +#include <libpwrutils/foreach.h> #include <sstream> namespace Wccl { @@ -17,11 +18,22 @@ std::string TSet::to_string(const Corpus2::Tagset& tagset) const std::string TSet::var_repr(const std::string &var_name) { - std::stringstream ss; + std::ostringstream ss; ss << "$t:" << var_name; return ss.str(); } +int TSet::categories_count(const Corpus2::Tagset& tagset) const +{ + int cats = (tag_.get_pos().any()) ? 1 : 0; + foreach (const Corpus2::mask_t& mask, tagset.all_attribute_masks()) { + if (tag_.get_values_for(mask).any()) { + ++cats; + } + } + return cats; +} + void TSet::insert_symbol(const Corpus2::Tagset& tagset, const std::string& s) { tag_.combine_with(tagset.parse_symbol(s)); diff --git a/libwccl/values/tset.h b/libwccl/values/tset.h index a89d0eb..e0aa99f 100644 --- a/libwccl/values/tset.h +++ b/libwccl/values/tset.h @@ -52,7 +52,7 @@ public: } /** - * Convenience function to add a symbol from a tagste by name. + * Convenience function to add a symbol from a tagset by name. * * Note: slow. Avoid in code that gets repeatedly executed. */ @@ -82,6 +82,13 @@ public: return tag_ == other.tag_; } + /** + * @return Number of categories present in this symbol set according + * to supplied tagset. + * @note A category is word class or an attribute. + */ + int categories_count(const Corpus2::Tagset& tagset) const; + void combine_with(const Corpus2::Tag& other) { tag_.combine_with(other); } @@ -90,7 +97,6 @@ public: tag_.combine_with(other.get_value()); } - std::string to_string(const Corpus2::Tagset &) const; std::string to_raw_string() const; diff --git a/tests/values.cpp b/tests/values.cpp index d5f02c8..daad844 100644 --- a/tests/values.cpp +++ b/tests/values.cpp @@ -72,17 +72,21 @@ BOOST_AUTO_TEST_CASE(tset_ops) BOOST_CHECK(s1.is_subset_of(s2)); BOOST_CHECK(s2.is_subset_of(s1)); BOOST_CHECK(!s1.intersects(s2)); + BOOST_CHECK_EQUAL(0, s1.categories_count(tagset)); s1.insert_symbol(tagset, "subst"); + BOOST_CHECK_EQUAL(1, s1.categories_count(tagset)); BOOST_CHECK(!s1.equals(s2)); BOOST_CHECK(!s1.is_subset_of(s2)); BOOST_CHECK(s2.is_subset_of(s1)); BOOST_CHECK(!s1.intersects(s2)); s2.insert_symbol(tagset, "pl"); + BOOST_CHECK_EQUAL(1, s2.categories_count(tagset)); BOOST_CHECK(!s1.equals(s2)); BOOST_CHECK(!s1.is_subset_of(s2)); BOOST_CHECK(!s2.is_subset_of(s1)); BOOST_CHECK(!s1.intersects(s2)); s2.insert_symbol(tagset, "subst"); + BOOST_CHECK_EQUAL(2, s2.categories_count(tagset)); BOOST_CHECK(!s1.equals(s2)); BOOST_CHECK(s1.is_subset_of(s2)); BOOST_CHECK(!s2.is_subset_of(s1)); @@ -92,6 +96,10 @@ BOOST_AUTO_TEST_CASE(tset_ops) BOOST_CHECK(s1.is_subset_of(s2)); BOOST_CHECK(s2.is_subset_of(s1)); BOOST_CHECK(s1.intersects(s2)); + s1.insert_symbol(tagset, "sg"); + BOOST_CHECK_EQUAL(2, s1.categories_count(tagset)); + s1.insert_symbol(tagset, "f"); + BOOST_CHECK_EQUAL(3, s1.categories_count(tagset)); } BOOST_AUTO_TEST_CASE(position_ops) -- GitLab