Skip to content
Snippets Groups Projects
Commit fabf3493 authored by Adam Wardynski's avatar Adam Wardynski
Browse files

categories_count(tagset) method for TSet.

parent 47a70582
Branches
No related merge requests found
#include <libwccl/values/tset.h>
#include <libpwrutils/foreach.h>
#include <sstream>
namespace Wccl {
......@@ -17,11 +18,22 @@ std::string TSet::to_string(const Corpus2::Tagset& tagset) const
std::string TSet::var_repr(const std::string &var_name)
{
std::stringstream ss;
std::ostringstream ss;
ss << "$t:" << var_name;
return ss.str();
}
int TSet::categories_count(const Corpus2::Tagset& tagset) const
{
int cats = (tag_.get_pos().any()) ? 1 : 0;
foreach (const Corpus2::mask_t& mask, tagset.all_attribute_masks()) {
if (tag_.get_values_for(mask).any()) {
++cats;
}
}
return cats;
}
void TSet::insert_symbol(const Corpus2::Tagset& tagset, const std::string& s)
{
tag_.combine_with(tagset.parse_symbol(s));
......
......@@ -52,7 +52,7 @@ public:
}
/**
* Convenience function to add a symbol from a tagste by name.
* Convenience function to add a symbol from a tagset by name.
*
* Note: slow. Avoid in code that gets repeatedly executed.
*/
......@@ -82,6 +82,13 @@ public:
return tag_ == other.tag_;
}
/**
* @return Number of categories present in this symbol set according
* to supplied tagset.
* @note A category is word class or an attribute.
*/
int categories_count(const Corpus2::Tagset& tagset) const;
void combine_with(const Corpus2::Tag& other) {
tag_.combine_with(other);
}
......@@ -90,7 +97,6 @@ public:
tag_.combine_with(other.get_value());
}
std::string to_string(const Corpus2::Tagset &) const;
std::string to_raw_string() const;
......
......@@ -72,17 +72,21 @@ BOOST_AUTO_TEST_CASE(tset_ops)
BOOST_CHECK(s1.is_subset_of(s2));
BOOST_CHECK(s2.is_subset_of(s1));
BOOST_CHECK(!s1.intersects(s2));
BOOST_CHECK_EQUAL(0, s1.categories_count(tagset));
s1.insert_symbol(tagset, "subst");
BOOST_CHECK_EQUAL(1, s1.categories_count(tagset));
BOOST_CHECK(!s1.equals(s2));
BOOST_CHECK(!s1.is_subset_of(s2));
BOOST_CHECK(s2.is_subset_of(s1));
BOOST_CHECK(!s1.intersects(s2));
s2.insert_symbol(tagset, "pl");
BOOST_CHECK_EQUAL(1, s2.categories_count(tagset));
BOOST_CHECK(!s1.equals(s2));
BOOST_CHECK(!s1.is_subset_of(s2));
BOOST_CHECK(!s2.is_subset_of(s1));
BOOST_CHECK(!s1.intersects(s2));
s2.insert_symbol(tagset, "subst");
BOOST_CHECK_EQUAL(2, s2.categories_count(tagset));
BOOST_CHECK(!s1.equals(s2));
BOOST_CHECK(s1.is_subset_of(s2));
BOOST_CHECK(!s2.is_subset_of(s1));
......@@ -92,6 +96,10 @@ BOOST_AUTO_TEST_CASE(tset_ops)
BOOST_CHECK(s1.is_subset_of(s2));
BOOST_CHECK(s2.is_subset_of(s1));
BOOST_CHECK(s1.intersects(s2));
s1.insert_symbol(tagset, "sg");
BOOST_CHECK_EQUAL(2, s1.categories_count(tagset));
s1.insert_symbol(tagset, "f");
BOOST_CHECK_EQUAL(3, s1.categories_count(tagset));
}
BOOST_AUTO_TEST_CASE(position_ops)
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment