diff --git a/libcorpus2/tagging.cpp b/libcorpus2/tagging.cpp index d009c95166e41b93a695a2d36845fcce23acc220..41b073353b8a6a446c874a0bd4ff77a1002320b8 100644 --- a/libcorpus2/tagging.cpp +++ b/libcorpus2/tagging.cpp @@ -77,10 +77,10 @@ bool select_preferred_disamb(const Tagset& tagset, return true; } -void expand_unspec_attrs(const Tagset& tagset, Token* token) +void expand_optional_attrs(const Tagset& tagset, Token* token) { foreach (Lexeme& lex, token->lexemes()) { - lex.set_tag(tagset.expand_unspec_attrs(lex.tag())); + lex.set_tag(tagset.expand_optional_attrs(lex.tag())); } } diff --git a/libcorpus2/tagging.h b/libcorpus2/tagging.h index 0cbb4438a3b854d787d590f09dae29083344e56f..7ae0a39dd49b1e429b00f0625256afb7c0b38ab0 100644 --- a/libcorpus2/tagging.h +++ b/libcorpus2/tagging.h @@ -50,11 +50,11 @@ int mask_card(const Tag& mask); */ bool select_preferred_disamb(const Tagset& tagset, Token* token); -/** Encodes attributes with unspecified values as each value set. +/** Encodes optional attributes with unspecified values as each value set. * This is to facilitate safe masking when the value in question is not to be * skipped. */ -void expand_unspec_attrs(const Tagset& tagset, Token* token); +void expand_optional_attrs(const Tagset& tagset, Token* token); /** Repairs multivalue tags. Optional attributes will be cleared if * multi-value. Regular attributes will be set to lowest value given. diff --git a/libcorpus2/tagset.cpp b/libcorpus2/tagset.cpp index 8ae89fe8ce636626ab3651e2670429508580a815..5869159ff0de3113969f1473b0d73e2b20ccaf2d 100644 --- a/libcorpus2/tagset.cpp +++ b/libcorpus2/tagset.cpp @@ -604,12 +604,11 @@ Tag Tagset::select_singular(const Tag& tag) const return new_tag; } -Tag Tagset::expand_unspec_attrs(const Tag& tag) const +Tag Tagset::expand_optional_attrs(const Tag& tag) const { Tag new_tag(tag); idx_t pos_idx = tag.get_pos_index(); - const std::vector<idx_t>& attrs = get_pos_attributes(pos_idx); - for (idx_t a = 0; a < attribute_count(); ++a) { + foreach (idx_t a, get_pos_attributes(pos_idx)) { mask_t attr_mask = get_attribute_mask(a); mask_t value = tag.get_values_for(attr_mask); if (!value.any()) { // no value given diff --git a/libcorpus2/tagset.h b/libcorpus2/tagset.h index 504dc22343da59260d43996edce3a9874041cd14..821ec9a3ac5bca71dccf434441ace7c8edc1774f 100644 --- a/libcorpus2/tagset.h +++ b/libcorpus2/tagset.h @@ -374,14 +374,14 @@ public: Tag select_singular(const Tag& tag) const; /** - * Creates a copy of the given tag where any attribute with no value given - * is encoded as each possible value set. NOTE: this produce an invalid - * tag (multiple values set for one attribute), yet it is convenient for - * some tagging scenarios to be able to retrieve unspecified attr value as - * a non-zero mask. This can always be decoded into a valid tag by using - * select_singular. + * Creates a copy of the given tag where any optional attribute with no + * value given is encoded as each possible value set. NOTE: this may + * an invalid tag (multiple values set for one attribute), yet it is + * convenient for some tagging scenarios to be able to retrieve + * unspecified attr value as a non-zero mask. This can always be decoded + * into a valid tag by using select_singular. */ - Tag expand_unspec_attrs(const Tag& tag) const; + Tag expand_optional_attrs(const Tag& tag) const; /// POS name <-> index dictionary getter const SymbolDictionary<idx_t>& pos_dictionary() const { diff --git a/swig/tagging.i b/swig/tagging.i index 96d1bb84c34a6ac0da2ff4560fcfb9754fa18902..c9fdd9cdf922473f2f2ac88e487570dabfa03e00 100644 --- a/swig/tagging.i +++ b/swig/tagging.i @@ -23,7 +23,7 @@ int mask_card(const Tag& mask); bool select_preferred_disamb(const Tagset& tagset, Token* token); -void expand_unspec_attrs(const Tagset& tagset, Token* token); +void expand_optional_attrs(const Tagset& tagset, Token* token); void select_singular_tags(const Tagset& tagset, Token* token); diff --git a/swig/tagset.i b/swig/tagset.i index 1d620388f7a2fd5aced5cd7e5736e5b26c218882..3df9d956c221a983ca83a29d2aef632ba26643d6 100644 --- a/swig/tagset.i +++ b/swig/tagset.i @@ -92,7 +92,7 @@ namespace Corpus2 { /* --------------------------------------------------------------------- */ std::vector<Tag> split_tag(const Tag& tag) const; Tag select_singular(const Tag& tag) const; - Tag expand_unspec_attrs(const Tag& tag) const; + Tag expand_optional_attrs(const Tag& tag) const; /* --------------------------------------------------------------------- */ int pos_count() const; int attribute_count() const;