diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt index bd66f8d2db9aceeaa24a1118be805d3cb459ef28..1b19902bdc232c91fa020df55819975dc1ed1b24 100644 --- a/libcorpus2/CMakeLists.txt +++ b/libcorpus2/CMakeLists.txt @@ -3,7 +3,7 @@ PROJECT(corpus2) set(corpus2_ver_major "1") set(corpus2_ver_minor "0") -set(corpus2_ver_patch "1") +set(corpus2_ver_patch "2") if(NOT LIBCORPUS2_SRC_DATA_DIR) diff --git a/libcorpus2/tagset.cpp b/libcorpus2/tagset.cpp index 8f3a4f62d088fbcbffc3c62614a24ea14a5b8efa..475bf1034500b06808cc400aed9c57d341a4555a 100644 --- a/libcorpus2/tagset.cpp +++ b/libcorpus2/tagset.cpp @@ -535,11 +535,33 @@ idx_t Tagset::get_attribute_index(const string_range& a) const return attribute_dict_.get_id(a); } +idx_t Tagset::get_attribute_index(mask_t a) const +{ + std::map<mask_t, idx_t>::const_iterator ci; + ci = attribute_mask_to_index_.find(a); + if (ci == attribute_mask_to_index_.end()) { + return -1; + } else { + return ci->second; + } +} + const std::string& Tagset::get_attribute_name(idx_t a) const { return attribute_dict_.get_string(a); } +const std::string& Tagset::get_attribute_name(mask_t a) const +{ + static std::string nullstr; + idx_t index = get_attribute_index(a); + if (index < 0 || index > attribute_count()) { + return nullstr; + } else { + return attribute_dict_.get_string(index); + } +} + const std::vector<mask_t>& Tagset::get_attribute_values(idx_t a) const { static std::vector<mask_t> null_vec; diff --git a/libcorpus2/tagset.h b/libcorpus2/tagset.h index f413d56837568ada2153ff44bf66c54898d2f7ee..4b199b6251f9b56f4a7c4670dfc89729082d1133 100644 --- a/libcorpus2/tagset.h +++ b/libcorpus2/tagset.h @@ -375,9 +375,17 @@ public: /// @returns -1 on invalid name idx_t get_attribute_index(const string_range& a) const; + /// Attribute mask -> index mapping + /// @returns -1 on invalid mask + idx_t get_attribute_index(mask_t a) const; + /// Attribute index -> name /// @returns empty string on invalid index - const std::string& get_attribute_name(idx_t pos) const; + const std::string& get_attribute_name(idx_t a) const; + + /// Attribute mask -> name + /// @returns empty string on invalid mask + const std::string& get_attribute_name(mask_t a) const; /// Value mask -> attribute index mapping. /// if the value mask contains values from more than one attribute, @@ -577,6 +585,9 @@ private: /// Attribute index to combined value mask std::vector<mask_t> attribute_masks_; + /// Attribute combined mask to attribute name + std::map<mask_t, idx_t> attribute_mask_to_index_; + /// reverse mapping, from a value mask to the respective attribute /// index (values are assumed to be unique and not shared between /// attributes) diff --git a/libcorpus2/tagsetparser.cpp b/libcorpus2/tagsetparser.cpp index 4a4307b173d9c01f5caf5e4cba6452f9423a9620..93da97c7ca32e940ac41bebf6a53423aeb2eecdf 100644 --- a/libcorpus2/tagsetparser.cpp +++ b/libcorpus2/tagsetparser.cpp @@ -117,6 +117,8 @@ Tagset TagsetParser::load_ini(std::istream &is) current_value <<= 1; } tagset.attribute_masks_.push_back(attribute_mask); + tagset.attribute_mask_to_index_.insert(std::make_pair( + attribute_mask, current_attribute_index)); ++current_attribute_index; } tagset.attribute_dict_.load_sorted_data(vec); diff --git a/tests/tag_split.cpp b/tests/tag_split.cpp index f4749311f3e197ad67598b34e0df7c098aa5733e..4258b7ba032961c4e38405d89a9e8194d58a70c7 100644 --- a/tests/tag_split.cpp +++ b/tests/tag_split.cpp @@ -245,6 +245,16 @@ BOOST_FIXTURE_TEST_CASE( symbols, F ) BOOST_CHECK_THROW(tagset->parse_symbol("asdf"), Corpus2::TagParseError); } +BOOST_FIXTURE_TEST_CASE(attribute_mask_to_name, F) +{ + foreach (Corpus2::mask_t a, tagset->all_attribute_masks()) { + std::string aname = tagset->get_attribute_name(a); + BOOST_CHECK(!aname.empty()); + Corpus2::mask_t aa = tagset->get_attribute_mask(aname); + BOOST_CHECK_EQUAL(a, aa); + } +} + BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_CASE(bs_split) @@ -257,3 +267,7 @@ BOOST_AUTO_TEST_CASE(bs_split) } BOOST_CHECK_EQUAL(x, y); } + + + +