diff --git a/CMakeLists.txt b/CMakeLists.txt index bddb922bde6f8e8ca92cc32b7a01764f1cf1d971..bfb5d06fa0b2ec7679943c7725e51394627f23ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,7 +24,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) # Strict compilation for C files is disabled until somebody wants to clean them. - set(CMAKE_C_FLAGS"-W -Wall -ansi $ENV{CFLAGS}" + set(CMAKE_C_FLAGS "-W -Wall -ansi $ENV{CFLAGS}" CACHE STRING "Flags used by the C compiler during normal builds." FORCE) set(CMAKE_C_FLAGS_DEBUG "-O0 -DDEBUG -ggdb3 -W -Wall -ansi $ENV{CFLAGS}" CACHE STRING "Flags used by the C compiler during debug builds." FORCE) diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt index 551cf96e85873e41de170b1312964451c49063fa..1b19902bdc232c91fa020df55819975dc1ed1b24 100644 --- a/libcorpus2/CMakeLists.txt +++ b/libcorpus2/CMakeLists.txt @@ -3,7 +3,7 @@ PROJECT(corpus2) set(corpus2_ver_major "1") set(corpus2_ver_minor "0") -set(corpus2_ver_patch "0") +set(corpus2_ver_patch "2") if(NOT LIBCORPUS2_SRC_DATA_DIR) diff --git a/libcorpus2/tagset.cpp b/libcorpus2/tagset.cpp index 4cc688181442f235518949877119fb41f914ee31..475bf1034500b06808cc400aed9c57d341a4555a 100644 --- a/libcorpus2/tagset.cpp +++ b/libcorpus2/tagset.cpp @@ -122,7 +122,10 @@ Tag Tagset::parse_symbol(const std::string& s) const return Tag(0, m); } m = get_value_mask(s); - return Tag(0, m); + if (m.any()) { + return Tag(0, m); + } + throw TagParseError("Not a tagset symbol", s, "", id_string()); } void Tagset::parse_tag(const string_range &s, bool allow_extra, @@ -532,11 +535,33 @@ idx_t Tagset::get_attribute_index(const string_range& a) const return attribute_dict_.get_id(a); } +idx_t Tagset::get_attribute_index(mask_t a) const +{ + std::map<mask_t, idx_t>::const_iterator ci; + ci = attribute_mask_to_index_.find(a); + if (ci == attribute_mask_to_index_.end()) { + return -1; + } else { + return ci->second; + } +} + const std::string& Tagset::get_attribute_name(idx_t a) const { return attribute_dict_.get_string(a); } +const std::string& Tagset::get_attribute_name(mask_t a) const +{ + static std::string nullstr; + idx_t index = get_attribute_index(a); + if (index < 0 || index > attribute_count()) { + return nullstr; + } else { + return attribute_dict_.get_string(index); + } +} + const std::vector<mask_t>& Tagset::get_attribute_values(idx_t a) const { static std::vector<mask_t> null_vec; diff --git a/libcorpus2/tagset.h b/libcorpus2/tagset.h index b6e1c1afd1ec6fd1782f2684a6900135128e5570..4b199b6251f9b56f4a7c4670dfc89729082d1133 100644 --- a/libcorpus2/tagset.h +++ b/libcorpus2/tagset.h @@ -129,7 +129,7 @@ public: * The resulting tags will usually be invalid as standalone tags, so * there is no validation performed. * - * An invalid string will result in a null tag being returned. + * An invalid string will result in a TagParseError exception. */ Tag parse_symbol(const std::string& s) const; @@ -375,9 +375,17 @@ public: /// @returns -1 on invalid name idx_t get_attribute_index(const string_range& a) const; + /// Attribute mask -> index mapping + /// @returns -1 on invalid mask + idx_t get_attribute_index(mask_t a) const; + /// Attribute index -> name /// @returns empty string on invalid index - const std::string& get_attribute_name(idx_t pos) const; + const std::string& get_attribute_name(idx_t a) const; + + /// Attribute mask -> name + /// @returns empty string on invalid mask + const std::string& get_attribute_name(mask_t a) const; /// Value mask -> attribute index mapping. /// if the value mask contains values from more than one attribute, @@ -577,6 +585,9 @@ private: /// Attribute index to combined value mask std::vector<mask_t> attribute_masks_; + /// Attribute combined mask to attribute name + std::map<mask_t, idx_t> attribute_mask_to_index_; + /// reverse mapping, from a value mask to the respective attribute /// index (values are assumed to be unique and not shared between /// attributes) diff --git a/libcorpus2/tagsetparser.cpp b/libcorpus2/tagsetparser.cpp index 4a4307b173d9c01f5caf5e4cba6452f9423a9620..93da97c7ca32e940ac41bebf6a53423aeb2eecdf 100644 --- a/libcorpus2/tagsetparser.cpp +++ b/libcorpus2/tagsetparser.cpp @@ -117,6 +117,8 @@ Tagset TagsetParser::load_ini(std::istream &is) current_value <<= 1; } tagset.attribute_masks_.push_back(attribute_mask); + tagset.attribute_mask_to_index_.insert(std::make_pair( + attribute_mask, current_attribute_index)); ++current_attribute_index; } tagset.attribute_dict_.load_sorted_data(vec); diff --git a/tests/tag_split.cpp b/tests/tag_split.cpp index 098ef7a5e0c96bcd8ca79d613057d0c73558287c..4258b7ba032961c4e38405d89a9e8194d58a70c7 100644 --- a/tests/tag_split.cpp +++ b/tests/tag_split.cpp @@ -242,6 +242,17 @@ BOOST_FIXTURE_TEST_CASE( symbols, F ) t = tagset->parse_symbol("C"); BOOST_CHECK_EQUAL(tagset->tag_to_symbol_string(t), "C"); BOOST_CHECK(tagset->tag_to_symbol_string(t, false) != "C"); + BOOST_CHECK_THROW(tagset->parse_symbol("asdf"), Corpus2::TagParseError); +} + +BOOST_FIXTURE_TEST_CASE(attribute_mask_to_name, F) +{ + foreach (Corpus2::mask_t a, tagset->all_attribute_masks()) { + std::string aname = tagset->get_attribute_name(a); + BOOST_CHECK(!aname.empty()); + Corpus2::mask_t aa = tagset->get_attribute_mask(aname); + BOOST_CHECK_EQUAL(a, aa); + } } BOOST_AUTO_TEST_SUITE_END() @@ -256,3 +267,7 @@ BOOST_AUTO_TEST_CASE(bs_split) } BOOST_CHECK_EQUAL(x, y); } + + + +