From 614176513464915b02210f89798d1f3ba810069e Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Thu, 14 Apr 2011 15:47:23 +0200 Subject: [PATCH] Allow custom "ign" tag equivalent in tagsets, fix tests accordingly as now a valid ign tag is required --- libcorpus2/tagset.cpp | 7 ++----- libcorpus2/tagset.h | 3 +++ libcorpus2/tagsetparser.cpp | 14 +++++++++++++- tests/basic.cpp | 1 + tests/tag_split.cpp | 3 ++- tests/tagset_parse.cpp | 7 +++++-- {corpus2data => tests}/test.tagset | 3 +++ 7 files changed, 29 insertions(+), 9 deletions(-) rename {corpus2data => tests}/test.tagset (93%) diff --git a/libcorpus2/tagset.cpp b/libcorpus2/tagset.cpp index f389fa0..91d17d2 100644 --- a/libcorpus2/tagset.cpp +++ b/libcorpus2/tagset.cpp @@ -338,6 +338,7 @@ Tag Tagset::make_tag(idx_t pos_idx, mask_t values, if (mode & ParseFailWithIgn) { return make_ign_tag(); } + std::cerr << mode << "\n"; throw TagParseError("Required attribute missing", tag_to_string(Tag(get_pos_mask(pos_idx), values)), get_attribute_name(a), id_string()); @@ -381,11 +382,7 @@ Tag Tagset::make_tag(idx_t pos_idx, mask_t values, Tag Tagset::make_ign_tag() const { - - static const std::string ign("ign"); - mask_t ign_pos_mask = get_pos_mask(ign); - assert(ign_pos_mask.any()); - return Tag(ign_pos_mask); + return ign_tag_; } bool Tagset::validate_tag(const Tag &t, ParseMode mode /* = ParseDefault*/, diff --git a/libcorpus2/tagset.h b/libcorpus2/tagset.h index 95d0aa5..ae407a4 100644 --- a/libcorpus2/tagset.h +++ b/libcorpus2/tagset.h @@ -644,6 +644,9 @@ private: /// Valid POS mask mask_t valid_pos_mask_; + + /// The ign tag + Tag ign_tag_; }; /* implementation */ diff --git a/libcorpus2/tagsetparser.cpp b/libcorpus2/tagsetparser.cpp index 945cc00..0e37c3c 100644 --- a/libcorpus2/tagsetparser.cpp +++ b/libcorpus2/tagsetparser.cpp @@ -128,6 +128,7 @@ Tagset TagsetParser::load_ini(std::istream &is) boost::algorithm::trim(line); ++line_no; if (!line.empty() && line[0] != '#') { + if (line[0] == '[') break; std::deque<std::string> v; boost::algorithm::split(v, line, boost::is_any_of(sep), boost::algorithm::token_compress_on); @@ -155,7 +156,18 @@ Tagset TagsetParser::load_ini(std::istream &is) req_mask[a] = required; } } + } + std::string ign_tag_string = "ign"; + if (line != "[IGN]") { + while (std::getline(is, line)) { + if (line == "[IGN]") break; + } + } + if (line == "[IGN]") { + if (std::getline(is, line)) { + ign_tag_string = line; + } } vec.clear(); @@ -189,7 +201,7 @@ Tagset TagsetParser::load_ini(std::istream &is) tagset.original_pos_indices_.insert(std::make_pair(p,i)); tagset.valid_pos_mask_ |= (mask_t(1) << i); } - + tagset.ign_tag_ = tagset.parse_simple_tag(ign_tag_string); return tagset; } diff --git a/tests/basic.cpp b/tests/basic.cpp index 153b801..2c449ed 100644 --- a/tests/basic.cpp +++ b/tests/basic.cpp @@ -22,6 +22,7 @@ const char tagsetstr1[] = "[ATTR]\n" "A tag tog other a3 \n" "B data thing tag-thing thang\n" "C a b c \n" + "ign\n" "[POS]\n some A B [C]\n"; BOOST_AUTO_TEST_CASE( token ) diff --git a/tests/tag_split.cpp b/tests/tag_split.cpp index ddac048..78c50fa 100644 --- a/tests/tag_split.cpp +++ b/tests/tag_split.cpp @@ -29,7 +29,8 @@ struct F { "A tag tog other a3 \n" "B data thing tag-thing thang\n" "C a b c \n" - "[POS]\n some A B [C]\n same A B \n P3 [A] [B]\n"; + "[POS]\n some A B [C]\n same A B \n P3 [A] [B]\n" + "[IGN]\nP3\n"; tagset.reset(new Corpus2::Tagset()); *tagset = Corpus2::Tagset::from_data(tagset_string); } diff --git a/tests/tagset_parse.cpp b/tests/tagset_parse.cpp index 69100e5..d7099d1 100644 --- a/tests/tagset_parse.cpp +++ b/tests/tagset_parse.cpp @@ -19,6 +19,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <libpwrutils/foreach.h> #include <libcorpus2/tagsetparser.h> #include <libcorpus2/tagsetmanager.h> +#include <libcorpus2/util/settings.h> #include <iostream> BOOST_AUTO_TEST_SUITE( tagset_parse ); @@ -31,7 +32,7 @@ Corpus2::Tagset parse(const char* s) } #define PRE "[ATTR]\n" -#define POSA "[POS]\n POS1\n" +#define POSA "[POS]\n ign\n" BOOST_AUTO_TEST_CASE( empty ) { @@ -54,7 +55,7 @@ BOOST_AUTO_TEST_CASE( minimal_nonewline ) { Corpus2::Tagset t; try { - t = parse(PRE "[POS]\n POS1"); + t = parse(PRE "[POS]\n ign"); } catch (Corpus2::TagsetParseError& e) { BOOST_FAIL(e.info()); } @@ -147,6 +148,8 @@ BOOST_AUTO_TEST_CASE( size6 ) BOOST_AUTO_TEST_CASE( load_named ) { + PwrNlp::ConfigPathSetter ps(Corpus2::Path::Instance(), + LIBCORPUS2_TEST_DATA_DIR); BOOST_CHECK_NO_THROW( try { Corpus2::get_named_tagset("test"); diff --git a/corpus2data/test.tagset b/tests/test.tagset similarity index 93% rename from corpus2data/test.tagset rename to tests/test.tagset index 98c2427..beaeafa 100644 --- a/corpus2data/test.tagset +++ b/tests/test.tagset @@ -9,3 +9,6 @@ P1 = a b c P2 = P3 = a b [c] P4 = [a] [b] [c] + +[IGN] +P4 -- GitLab