Skip to content
Snippets Groups Projects
Commit b7bb1239 authored by ilor's avatar ilor
Browse files

fill more structures in tagset during parsing

implement get_pos_value_mask and get_pos_required_mask
use int for *_count to avoid signed/unsigned errors
handel missing required attributes partially, add a todo
parent c108b40a
Branches
No related tags found
No related merge requests found
...@@ -144,6 +144,7 @@ void Tagset::parse_tag(const string_range_vector &fields, bool allow_extra, ...@@ -144,6 +144,7 @@ void Tagset::parse_tag(const string_range_vector &fields, bool allow_extra,
std::vector<mask_t> values; std::vector<mask_t> values;
foreach (string_range& dot, dots) { foreach (string_range& dot, dots) {
mask_t v = get_value_mask(boost::copy_range<std::string>(dot)); mask_t v = get_value_mask(boost::copy_range<std::string>(dot));
//TODO ensure all in one attribute, pass mask to append_
if (v.none()) { if (v.none()) {
throw TagParseError("Unknown attribute value", throw TagParseError("Unknown attribute value",
boost::copy_range<std::string>(r), "", boost::copy_range<std::string>(r), "",
...@@ -159,6 +160,7 @@ void Tagset::parse_tag(const string_range_vector &fields, bool allow_extra, ...@@ -159,6 +160,7 @@ void Tagset::parse_tag(const string_range_vector &fields, bool allow_extra,
"", "", id_string()); "", "", id_string());
} }
idx_t attr = pos_attributes_[pos_idx][fi - 1]; idx_t attr = pos_attributes_[pos_idx][fi - 1];
//TODO use attr mask
append_to_multi_tag(all_variants, attribute_values_[attr]); append_to_multi_tag(all_variants, attribute_values_[attr]);
} // else empty, do nothing } // else empty, do nothing
} }
...@@ -218,15 +220,29 @@ Tag Tagset::parse_simple_tag(const string_range_vector &ts, ...@@ -218,15 +220,29 @@ Tag Tagset::parse_simple_tag(const string_range_vector &ts,
id_string()); id_string());
} }
} else { } else {
values |= val; mask_t a = get_attribute_mask(get_value_attribute(val));
values = (values & ~a) | val;
} }
} }
} }
return Tag(get_pos_mask(pos_idx), values);
return make_tag(pos_idx, values, allow_extra);
} }
Tag Tagset::make_tag(idx_t pos_idx, mask_t values, bool allow_extra) const Tag Tagset::make_tag(idx_t pos_idx, mask_t values, bool allow_extra) const
{ {
mask_t required_values = get_pos_value_mask(pos_idx);
//std::cerr << values << "\n";
//std::cerr << required_values << "\n";
//std::cerr << (required_values & values) << "\n";
//std::cerr << PwrNlp::count_bits_set(required_values & values)
// << pos_required_attributes_idx_[pos_idx].size() << "\n";
size_t has_req = PwrNlp::count_bits_set(required_values & values);
if (has_req != pos_required_attributes_idx_[pos_idx].size()) {
throw TagParseError("Required attribute missing",
"",
get_pos_name(pos_idx), id_string());
}
mask_t valid_values = get_pos_value_mask(pos_idx); mask_t valid_values = get_pos_value_mask(pos_idx);
mask_t invalid = values & ~valid_values; mask_t invalid = values & ~valid_values;
if (invalid.any() && !allow_extra) { if (invalid.any() && !allow_extra) {
...@@ -267,7 +283,7 @@ bool Tagset::validate_tag(const Tag &t, bool allow_extra, ...@@ -267,7 +283,7 @@ bool Tagset::validate_tag(const Tag &t, bool allow_extra,
std::vector<bool> valid = get_pos_attributes_flag(pos_idx); std::vector<bool> valid = get_pos_attributes_flag(pos_idx);
std::vector<bool> required = get_pos_required_attributes(pos_idx); std::vector<bool> required = get_pos_required_attributes(pos_idx);
for (idx_t i = 0; i < attribute_dict_.size(); ++i) { for (idx_t i = 0; i < attribute_count(); ++i) {
mask_t value = t.get_values_for(get_attribute_mask(i)); mask_t value = t.get_values_for(get_attribute_mask(i));
if (value == 0) { if (value == 0) {
if (required[i]) { if (required[i]) {
...@@ -309,7 +325,7 @@ std::string Tagset::tag_to_string(const Tag &tag) const ...@@ -309,7 +325,7 @@ std::string Tagset::tag_to_string(const Tag &tag) const
} }
} }
// print extra attributes // print extra attributes
for (idx_t a = 0; a < attribute_dict_.size(); ++a) { for (idx_t a = 0; a < attribute_count(); ++a) {
if (!pos_has_attribute(pos_idx, a)) { if (!pos_has_attribute(pos_idx, a)) {
mask_t value = tag.get_values_for(get_attribute_mask(a)); mask_t value = tag.get_values_for(get_attribute_mask(a));
if (value.any()) { if (value.any()) {
...@@ -431,7 +447,7 @@ const std::string& Tagset::get_attribute_name(idx_t a) const ...@@ -431,7 +447,7 @@ const std::string& Tagset::get_attribute_name(idx_t a) const
const std::vector<mask_t>& Tagset::get_attribute_values(idx_t a) const const std::vector<mask_t>& Tagset::get_attribute_values(idx_t a) const
{ {
static std::vector<mask_t> null_vec; static std::vector<mask_t> null_vec;
if (a < 0 || a >= attribute_dict_.size()) { if (a < 0 || a >= attribute_count()) {
return null_vec; return null_vec;
} else { } else {
return attribute_values_[a]; return attribute_values_[a];
...@@ -440,7 +456,7 @@ const std::vector<mask_t>& Tagset::get_attribute_values(idx_t a) const ...@@ -440,7 +456,7 @@ const std::vector<mask_t>& Tagset::get_attribute_values(idx_t a) const
mask_t Tagset::get_attribute_mask(idx_t a) const mask_t Tagset::get_attribute_mask(idx_t a) const
{ {
if (a < 0 || a >= attribute_dict_.size()) { if (a < 0 || a >= attribute_count()) {
return 0; return 0;
} else { } else {
return attribute_masks_[a]; return attribute_masks_[a];
...@@ -531,25 +547,25 @@ bool Tagset::pos_has_attribute(idx_t pos, idx_t attribute) const ...@@ -531,25 +547,25 @@ bool Tagset::pos_has_attribute(idx_t pos, idx_t attribute) const
mask_t Tagset::get_pos_value_mask(idx_t pos) const mask_t Tagset::get_pos_value_mask(idx_t pos) const
{ {
return 0; //TODO return pos_valid_value_masks_[pos];
} }
mask_t Tagset::get_pos_required_mask(idx_t pos) const mask_t Tagset::get_pos_required_mask(idx_t pos) const
{ {
return 0; //TODO return pos_required_value_masks_[pos];
} }
size_t Tagset::pos_count() const int Tagset::pos_count() const
{ {
return pos_dict_.size(); return pos_dict_.size();
} }
size_t Tagset::attribute_count() const int Tagset::attribute_count() const
{ {
return attribute_dict_.size(); return attribute_dict_.size();
} }
size_t Tagset::value_count() const int Tagset::value_count() const
{ {
return value_mask_to_string_.size(); return value_mask_to_string_.size();
} }
......
...@@ -343,11 +343,11 @@ public: ...@@ -343,11 +343,11 @@ public:
mask_t get_pos_required_mask(idx_t pos) const; mask_t get_pos_required_mask(idx_t pos) const;
size_t pos_count() const; int pos_count() const;
size_t attribute_count() const; int attribute_count() const;
size_t value_count() const; int value_count() const;
/** /**
* Tagset cardinality counter -- the number of different valid tags * Tagset cardinality counter -- the number of different valid tags
...@@ -489,6 +489,8 @@ private: ...@@ -489,6 +489,8 @@ private:
/// output and the behavior of the _ special character in parsing /// output and the behavior of the _ special character in parsing
std::vector< std::vector<idx_t> > pos_attributes_; std::vector< std::vector<idx_t> > pos_attributes_;
std::vector< std::vector<idx_t> > pos_required_attributes_idx_;
std::vector<mask_t> pos_valid_value_masks_; std::vector<mask_t> pos_valid_value_masks_;
std::vector<mask_t> pos_required_value_masks_; std::vector<mask_t> pos_required_value_masks_;
......
...@@ -143,13 +143,24 @@ Tagset TagsetParser::load_ini(std::istream &is) ...@@ -143,13 +143,24 @@ Tagset TagsetParser::load_ini(std::istream &is)
vec.clear(); vec.clear();
foreach (const pmap_t::value_type v, pmap) { foreach (const pmap_t::value_type v, pmap) {
vec.push_back(v.first); vec.push_back(v.first);
mask_t valid(0);
mask_t required(0);
tagset.pos_attributes_.push_back(v.second); tagset.pos_attributes_.push_back(v.second);
tagset.pos_required_attributes_idx_.resize(
tagset.pos_required_attributes_idx_.size() + 1);
tagset.pos_valid_attributes_.push_back( tagset.pos_valid_attributes_.push_back(
std::vector<bool>(tagset.attribute_values_.size(), false)); std::vector<bool>(tagset.attribute_values_.size(), false));
foreach (idx_t a, v.second) { foreach (idx_t a, v.second) {
valid |= tagset.get_attribute_mask(a);
if (reqmap[v.first][a]) {
required |= tagset.get_attribute_mask(a);
tagset.pos_required_attributes_idx_.back().push_back(a);
}
tagset.pos_valid_attributes_.back()[a] = true; tagset.pos_valid_attributes_.back()[a] = true;
} }
tagset.pos_required_attributes_.push_back(reqmap[v.first]); tagset.pos_required_attributes_.push_back(reqmap[v.first]);
tagset.pos_valid_value_masks_.push_back(valid);
tagset.pos_required_value_masks_.push_back(required);
} }
tagset.pos_dict_.load_sorted_data(vec); tagset.pos_dict_.load_sorted_data(vec);
if (tagset.pos_dict_.size() == 0) { if (tagset.pos_dict_.size() == 0) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment