diff --git a/CMakeLists.txt b/CMakeLists.txt index c44aa9a972a3a068053146af5967feb2523ea2ba..75783dd3c7e5d6dce8316c2750ff6892e2230929 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ PROJECT(Corpus2Library) set(corpus2_ver_major "1") set(corpus2_ver_minor "3") -set(corpus2_ver_patch "0") +set(corpus2_ver_patch "1") cmake_minimum_required(VERSION 2.8.0) diff --git a/libcorpus2/tagging.cpp b/libcorpus2/tagging.cpp index 785edf9ffe411dd9fd138df371a5171fcdb3eebb..a814970d37e45590d0154384dcb7938d43e30c0b 100644 --- a/libcorpus2/tagging.cpp +++ b/libcorpus2/tagging.cpp @@ -59,6 +59,12 @@ int mask_card(const Tag& mask) + PwrNlp::count_bits_set(mask.get_values()); } +Tag with_values_masked(Tag input, Tag attr_value, Tag attr_mask) { + Tag output(input); + output.add_values_masked(attr_value.get_values(), attr_mask.get_values()); + return output; +} + bool select_preferred_disamb(const Tagset& tagset, Token* token) { size_t lex_idx = token->get_preferred_lexeme_index(tagset); diff --git a/libcorpus2/tagging.h b/libcorpus2/tagging.h index 4beaba5096ac616291e822824971838aa33e2142..9039bd8c36c1f713584d2cc996670be04f298497 100644 --- a/libcorpus2/tagging.h +++ b/libcorpus2/tagging.h @@ -45,6 +45,13 @@ Tag mask_token(const Token& token, const Tag& mask, bool disamb_only); /** Returns the number of set elements belonging to the mask given. */ int mask_card(const Tag& mask); +/** + * Returns a copy of the given input tag with the attribute referred + * to by attr_mask value set to attr_value (possibly empty). + * NOTE: only attribute part of the masks are considered. + */ +Tag with_values_masked(Tag input, Tag attr_value, Tag attr_mask); + /** Forces one disamb lexeme per token. The selection is based on tagset * definition order. Returns if any disamb found. */ @@ -99,6 +106,7 @@ bool disambiguate_subset(Token* token, const Tag& mask_where, /** Sets lexemes' disamb markers iff lexeme.tag is wanted_tag. */ void set_disambs(Token *token, const Tag& wanted_tag); + } /* end ns Corpus2 */ #endif // LIBCORPUS2_TAGGING_H diff --git a/swig/tagging.i b/swig/tagging.i index ec90902b9fc3121809429e8807a85a44e2a95ae1..b4695cef1dd7f248d582b201f62dfe68f45fc36b 100644 --- a/swig/tagging.i +++ b/swig/tagging.i @@ -21,6 +21,8 @@ Tag mask_token(const Token& token, const Tag& mask, bool disamb_only); int mask_card(const Tag& mask); +Tag with_values_masked(Tag input, Tag attr_value, Tag attr_mask); + bool select_preferred_disamb(const Tagset& tagset, Token* token); void select_preferred_lexeme(const Tagset& tagset, Token* token);