diff --git a/libcorpus2/tagset.cpp b/libcorpus2/tagset.cpp index 53f0009226ead59ca531d4cd4443061ad340a6e4..f389fa032390e2ead6a9395e9a35efe66e70d642 100644 --- a/libcorpus2/tagset.cpp +++ b/libcorpus2/tagset.cpp @@ -176,10 +176,18 @@ void Tagset::parse_tag(const string_range_vector &fields, ParseMode mode /* = ParseDefault*/) const { if (fields.empty()) { + if (mode & ParseFailWithIgn) { + sink(make_ign_tag()); + return; + } throw TagParseError("No POS", "", "", id_string()); } idx_t pos_idx = get_pos_index(fields[0]); if (pos_idx < 0) { + if (mode & ParseFailWithIgn) { + sink(make_ign_tag()); + return; + } throw TagParseError("Invalid POS", boost::copy_range<std::string>(fields[0]), "", id_string()); @@ -202,11 +210,19 @@ void Tagset::parse_tag(const string_range_vector &fields, if (amask.none()) { amask = curr; } else if (amask != curr) { + if (mode & ParseFailWithIgn) { + sink(make_ign_tag()); + return; + } throw TagParseError("Values from two attributes split by dot", boost::copy_range<std::string>(r), "", id_string()); } if (v.none()) { + if (mode & ParseFailWithIgn) { + sink(make_ign_tag()); + return; + } throw TagParseError("Unknown attribute value", boost::copy_range<std::string>(r), "", id_string()); @@ -216,6 +232,10 @@ void Tagset::parse_tag(const string_range_vector &fields, append_to_multi_tag(all_variants, values, amask); } else if (!r.empty()) { // underscore handling if (fi - 1 >= pos_attributes_[pos_idx].size()) { + if (mode & ParseFailWithIgn) { + sink(make_ign_tag()); + return; + } throw TagParseError( "Underscore beyond last attribute for this POS", "", "", id_string()); @@ -260,11 +280,17 @@ Tag Tagset::parse_simple_tag(const string_range_vector &ts, ParseMode mode /* = ParseDefault*/) const { if (ts.empty()) { + if (mode & ParseFailWithIgn) { + return make_ign_tag(); + } throw TagParseError("Empty POS+attribute list", "", "", id_string()); } idx_t pos_idx = get_pos_index(ts[0]); if (pos_idx < 0) { + if (mode & ParseFailWithIgn) { + return make_ign_tag(); + } throw TagParseError("Invalid POS", boost::copy_range<std::string>(ts[0]), "", id_string()); } @@ -277,6 +303,9 @@ Tag Tagset::parse_simple_tag(const string_range_vector &ts, if (a.any()) { values &= (~a); } else { + if (mode & ParseFailWithIgn) { + return make_ign_tag(); + } throw TagParseError("Unknown attribute value", boost::copy_range<std::string>(ts[i]), "", id_string()); @@ -306,12 +335,18 @@ Tag Tagset::make_tag(idx_t pos_idx, mask_t values, if (pos_requires_attribute(pos_idx, a)) { mask_t amask = get_attribute_mask(a); if ((values & amask).none()) { + if (mode & ParseFailWithIgn) { + return make_ign_tag(); + } throw TagParseError("Required attribute missing", tag_to_string(Tag(get_pos_mask(pos_idx), values)), get_attribute_name(a), id_string()); } } } + if (mode & ParseFailWithIgn) { + return make_ign_tag(); + } throw TagParseError("Required attribute missing", tag_to_string(Tag(get_pos_mask(pos_idx), values)), get_pos_name(pos_idx), id_string()); @@ -321,6 +356,9 @@ Tag Tagset::make_tag(idx_t pos_idx, mask_t values, mask_t valid_values = get_pos_value_mask(pos_idx); mask_t invalid = values & ~valid_values; if (invalid.any()) { + if (mode & ParseFailWithIgn) { + return make_ign_tag(); + } mask_t first_invalid = PwrNlp::lowest_bit(invalid); throw TagParseError("Attribute not valid for this POS", get_value_name(first_invalid), @@ -330,6 +368,9 @@ Tag Tagset::make_tag(idx_t pos_idx, mask_t values, Tag tag(get_pos_mask(pos_idx), values); if (mode & ParseCheckSingular) { if (!tag_is_singular(tag)) { + if (mode & ParseFailWithIgn) { + return make_ign_tag(); + } throw TagParseError("Parsed tag not singular", tag_to_symbol_string(tag, false), get_pos_name(pos_idx), id_string()); diff --git a/libcorpus2/tagset.h b/libcorpus2/tagset.h index 75c98dddff53d972042aee0e312a1093d1f0a666..95d0aa536fc7e176abfe7d5554bba1c33f64af97 100644 --- a/libcorpus2/tagset.h +++ b/libcorpus2/tagset.h @@ -127,6 +127,7 @@ public: ParseCheckRequired = 1, /// Check for presence of required attributes ParseAllowExtra = 2, /// Allow extra attributes ParseCheckSingular = 4, /// Check tag singularity + ParseFailWithIgn = 8, /// Parsing error results in an ign tag, not an exception ParseDefault = ParseCheckRequired, /// Default mode ParseRequiredWithExtra = ParseCheckRequired | ParseAllowExtra,