diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt index e705ffff817643829c3f583d7f371c1dbfe1cd7c..b02207e9f34658d1d410e1411ce54179ad70efbd 100644 --- a/libcorpus2/CMakeLists.txt +++ b/libcorpus2/CMakeLists.txt @@ -3,7 +3,7 @@ PROJECT(corpus2) set(corpus2_ver_major "1") set(corpus2_ver_minor "0") -set(corpus2_ver_patch "11") +set(corpus2_ver_patch "12") if(NOT LIBCORPUS2_SRC_DATA_DIR) diff --git a/libcorpus2/tag.cpp b/libcorpus2/tag.cpp index bb08d81f01e9269ae7714ccdb2885977d3ccf791..fc263d374707d3b7c60595c973475a7d883c7f2a 100644 --- a/libcorpus2/tag.cpp +++ b/libcorpus2/tag.cpp @@ -73,4 +73,6 @@ size_t hash_value(const Tag& tag) return seed; } +const Tag Tag::all_pos_mask(filled_mask); + } /* end ns Corpus2 */ diff --git a/libcorpus2/tag.h b/libcorpus2/tag.h index 9041ca445f03c4506123b8f38af41df289fbc794..d2e61557cc32b93db75263d6f6ee90fd9dee0735 100644 --- a/libcorpus2/tag.h +++ b/libcorpus2/tag.h @@ -34,6 +34,7 @@ typedef boost::int8_t idx_t; typedef PwrNlp::bitset<64> mask_t; BOOST_STRONG_TYPEDEF(boost::uint32_t, tagset_idx_t); +/// Full mask (1's only), may be used for all-POS tags. const mask_t filled_mask(PwrNlp::filled_bitset<64>()); /** @@ -45,6 +46,12 @@ const mask_t filled_mask(PwrNlp::filled_bitset<64>()); * you an appropriate Tagset object. These operations include tag creation, * getting tag string representation and retrieving values of particular * attributes. + * + * NOTE: tags are essentialy binary masks, hence they may be either valid + * "singular" tags or they may be used as POS or attribute masks used to + * retrieve values of some attributes/POS from another tags. Note that given + * a Tag object alone it is not possible to determine if a tag is singular or + * valid. See comments of the Tagset class for those details. */ class Tag // : boost::equality_comparable<Tag>, boost::less_than_comparable<Tag> @@ -166,6 +173,12 @@ public: ar & values_; } + /** + * A mask with the whole POS part filled with 1's, useful to extract POS + * values from other tags. + */ + const static Tag all_pos_mask; + private: /// the POS id mask_t pos_;