From a2a05ba0a9c3c5852f9f1af7c28f1f55ee4cb8ff Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Tue, 30 Nov 2010 14:29:28 +0100 Subject: [PATCH] add Tagset attribute mask to index/name mappingand related functions. Bump version to 1.0.2 --- libcorpus2/CMakeLists.txt | 2 +- libcorpus2/tagset.cpp | 22 ++++++++++++++++++++++ libcorpus2/tagset.h | 13 ++++++++++++- libcorpus2/tagsetparser.cpp | 2 ++ tests/tag_split.cpp | 14 ++++++++++++++ 5 files changed, 51 insertions(+), 2 deletions(-) diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt index bd66f8d..1b19902 100644 --- a/libcorpus2/CMakeLists.txt +++ b/libcorpus2/CMakeLists.txt @@ -3,7 +3,7 @@ PROJECT(corpus2) set(corpus2_ver_major "1") set(corpus2_ver_minor "0") -set(corpus2_ver_patch "1") +set(corpus2_ver_patch "2") if(NOT LIBCORPUS2_SRC_DATA_DIR) diff --git a/libcorpus2/tagset.cpp b/libcorpus2/tagset.cpp index 8f3a4f6..475bf10 100644 --- a/libcorpus2/tagset.cpp +++ b/libcorpus2/tagset.cpp @@ -535,11 +535,33 @@ idx_t Tagset::get_attribute_index(const string_range& a) const return attribute_dict_.get_id(a); } +idx_t Tagset::get_attribute_index(mask_t a) const +{ + std::map<mask_t, idx_t>::const_iterator ci; + ci = attribute_mask_to_index_.find(a); + if (ci == attribute_mask_to_index_.end()) { + return -1; + } else { + return ci->second; + } +} + const std::string& Tagset::get_attribute_name(idx_t a) const { return attribute_dict_.get_string(a); } +const std::string& Tagset::get_attribute_name(mask_t a) const +{ + static std::string nullstr; + idx_t index = get_attribute_index(a); + if (index < 0 || index > attribute_count()) { + return nullstr; + } else { + return attribute_dict_.get_string(index); + } +} + const std::vector<mask_t>& Tagset::get_attribute_values(idx_t a) const { static std::vector<mask_t> null_vec; diff --git a/libcorpus2/tagset.h b/libcorpus2/tagset.h index f413d56..4b199b6 100644 --- a/libcorpus2/tagset.h +++ b/libcorpus2/tagset.h @@ -375,9 +375,17 @@ public: /// @returns -1 on invalid name idx_t get_attribute_index(const string_range& a) const; + /// Attribute mask -> index mapping + /// @returns -1 on invalid mask + idx_t get_attribute_index(mask_t a) const; + /// Attribute index -> name /// @returns empty string on invalid index - const std::string& get_attribute_name(idx_t pos) const; + const std::string& get_attribute_name(idx_t a) const; + + /// Attribute mask -> name + /// @returns empty string on invalid mask + const std::string& get_attribute_name(mask_t a) const; /// Value mask -> attribute index mapping. /// if the value mask contains values from more than one attribute, @@ -577,6 +585,9 @@ private: /// Attribute index to combined value mask std::vector<mask_t> attribute_masks_; + /// Attribute combined mask to attribute name + std::map<mask_t, idx_t> attribute_mask_to_index_; + /// reverse mapping, from a value mask to the respective attribute /// index (values are assumed to be unique and not shared between /// attributes) diff --git a/libcorpus2/tagsetparser.cpp b/libcorpus2/tagsetparser.cpp index 4a4307b..93da97c 100644 --- a/libcorpus2/tagsetparser.cpp +++ b/libcorpus2/tagsetparser.cpp @@ -117,6 +117,8 @@ Tagset TagsetParser::load_ini(std::istream &is) current_value <<= 1; } tagset.attribute_masks_.push_back(attribute_mask); + tagset.attribute_mask_to_index_.insert(std::make_pair( + attribute_mask, current_attribute_index)); ++current_attribute_index; } tagset.attribute_dict_.load_sorted_data(vec); diff --git a/tests/tag_split.cpp b/tests/tag_split.cpp index f474931..4258b7b 100644 --- a/tests/tag_split.cpp +++ b/tests/tag_split.cpp @@ -245,6 +245,16 @@ BOOST_FIXTURE_TEST_CASE( symbols, F ) BOOST_CHECK_THROW(tagset->parse_symbol("asdf"), Corpus2::TagParseError); } +BOOST_FIXTURE_TEST_CASE(attribute_mask_to_name, F) +{ + foreach (Corpus2::mask_t a, tagset->all_attribute_masks()) { + std::string aname = tagset->get_attribute_name(a); + BOOST_CHECK(!aname.empty()); + Corpus2::mask_t aa = tagset->get_attribute_mask(aname); + BOOST_CHECK_EQUAL(a, aa); + } +} + BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_CASE(bs_split) @@ -257,3 +267,7 @@ BOOST_AUTO_TEST_CASE(bs_split) } BOOST_CHECK_EQUAL(x, y); } + + + + -- GitLab