diff --git a/libcorpus2/tagset.cpp b/libcorpus2/tagset.cpp index f389fa032390e2ead6a9395e9a35efe66e70d642..91d17d25d761444b55e52e905154755cab64e159 100644 --- a/libcorpus2/tagset.cpp +++ b/libcorpus2/tagset.cpp @@ -338,6 +338,7 @@ Tag Tagset::make_tag(idx_t pos_idx, mask_t values, if (mode & ParseFailWithIgn) { return make_ign_tag(); } + std::cerr << mode << "\n"; throw TagParseError("Required attribute missing", tag_to_string(Tag(get_pos_mask(pos_idx), values)), get_attribute_name(a), id_string()); @@ -381,11 +382,7 @@ Tag Tagset::make_tag(idx_t pos_idx, mask_t values, Tag Tagset::make_ign_tag() const { - - static const std::string ign("ign"); - mask_t ign_pos_mask = get_pos_mask(ign); - assert(ign_pos_mask.any()); - return Tag(ign_pos_mask); + return ign_tag_; } bool Tagset::validate_tag(const Tag &t, ParseMode mode /* = ParseDefault*/, diff --git a/libcorpus2/tagset.h b/libcorpus2/tagset.h index 95d0aa536fc7e176abfe7d5554bba1c33f64af97..ae407a45be504a1670ee86d2e803e329110cf567 100644 --- a/libcorpus2/tagset.h +++ b/libcorpus2/tagset.h @@ -644,6 +644,9 @@ private: /// Valid POS mask mask_t valid_pos_mask_; + + /// The ign tag + Tag ign_tag_; }; /* implementation */ diff --git a/libcorpus2/tagsetparser.cpp b/libcorpus2/tagsetparser.cpp index 945cc00db9b0be1150a39cb363f7bb7358cabc49..0e37c3cb1fc5dada804cc6ba08c0139f6f21388e 100644 --- a/libcorpus2/tagsetparser.cpp +++ b/libcorpus2/tagsetparser.cpp @@ -128,6 +128,7 @@ Tagset TagsetParser::load_ini(std::istream &is) boost::algorithm::trim(line); ++line_no; if (!line.empty() && line[0] != '#') { + if (line[0] == '[') break; std::deque<std::string> v; boost::algorithm::split(v, line, boost::is_any_of(sep), boost::algorithm::token_compress_on); @@ -155,7 +156,18 @@ Tagset TagsetParser::load_ini(std::istream &is) req_mask[a] = required; } } + } + std::string ign_tag_string = "ign"; + if (line != "[IGN]") { + while (std::getline(is, line)) { + if (line == "[IGN]") break; + } + } + if (line == "[IGN]") { + if (std::getline(is, line)) { + ign_tag_string = line; + } } vec.clear(); @@ -189,7 +201,7 @@ Tagset TagsetParser::load_ini(std::istream &is) tagset.original_pos_indices_.insert(std::make_pair(p,i)); tagset.valid_pos_mask_ |= (mask_t(1) << i); } - + tagset.ign_tag_ = tagset.parse_simple_tag(ign_tag_string); return tagset; } diff --git a/tests/basic.cpp b/tests/basic.cpp index 153b80147623a81df6a161c0d91eeabb7e43c45c..2c449ed4917eb825b6b0dc7ac8c74863e9528e2a 100644 --- a/tests/basic.cpp +++ b/tests/basic.cpp @@ -22,6 +22,7 @@ const char tagsetstr1[] = "[ATTR]\n" "A tag tog other a3 \n" "B data thing tag-thing thang\n" "C a b c \n" + "ign\n" "[POS]\n some A B [C]\n"; BOOST_AUTO_TEST_CASE( token ) diff --git a/tests/tag_split.cpp b/tests/tag_split.cpp index ddac048238cd0e55f1351973079541508b5929c7..78c50fa8a1408179a19ef13ad68aa1996a67ac21 100644 --- a/tests/tag_split.cpp +++ b/tests/tag_split.cpp @@ -29,7 +29,8 @@ struct F { "A tag tog other a3 \n" "B data thing tag-thing thang\n" "C a b c \n" - "[POS]\n some A B [C]\n same A B \n P3 [A] [B]\n"; + "[POS]\n some A B [C]\n same A B \n P3 [A] [B]\n" + "[IGN]\nP3\n"; tagset.reset(new Corpus2::Tagset()); *tagset = Corpus2::Tagset::from_data(tagset_string); } diff --git a/tests/tagset_parse.cpp b/tests/tagset_parse.cpp index 69100e52fa7e7a40ad3084c6acd749d2add78425..d7099d1f486a57369b3f3f50c3181425df0da5b0 100644 --- a/tests/tagset_parse.cpp +++ b/tests/tagset_parse.cpp @@ -19,6 +19,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <libpwrutils/foreach.h> #include <libcorpus2/tagsetparser.h> #include <libcorpus2/tagsetmanager.h> +#include <libcorpus2/util/settings.h> #include <iostream> BOOST_AUTO_TEST_SUITE( tagset_parse ); @@ -31,7 +32,7 @@ Corpus2::Tagset parse(const char* s) } #define PRE "[ATTR]\n" -#define POSA "[POS]\n POS1\n" +#define POSA "[POS]\n ign\n" BOOST_AUTO_TEST_CASE( empty ) { @@ -54,7 +55,7 @@ BOOST_AUTO_TEST_CASE( minimal_nonewline ) { Corpus2::Tagset t; try { - t = parse(PRE "[POS]\n POS1"); + t = parse(PRE "[POS]\n ign"); } catch (Corpus2::TagsetParseError& e) { BOOST_FAIL(e.info()); } @@ -147,6 +148,8 @@ BOOST_AUTO_TEST_CASE( size6 ) BOOST_AUTO_TEST_CASE( load_named ) { + PwrNlp::ConfigPathSetter ps(Corpus2::Path::Instance(), + LIBCORPUS2_TEST_DATA_DIR); BOOST_CHECK_NO_THROW( try { Corpus2::get_named_tagset("test"); diff --git a/corpus2data/test.tagset b/tests/test.tagset similarity index 93% rename from corpus2data/test.tagset rename to tests/test.tagset index 98c24272102e5596e1ff0f0f642ea6d6e6164d16..beaeafa28dabaf63dbd50f4e3637665e4933fa75 100644 --- a/corpus2data/test.tagset +++ b/tests/test.tagset @@ -9,3 +9,6 @@ P1 = a b c P2 = P3 = a b [c] P4 = [a] [b] [c] + +[IGN] +P4