Skip to content
Snippets Groups Projects
Commit 61417651 authored by ilor's avatar ilor
Browse files

Allow custom "ign" tag equivalent in tagsets, fix tests accordingly as now a...

Allow custom "ign" tag equivalent in tagsets, fix tests accordingly as now a valid ign tag is required
parent d57f5f11
Branches
No related tags found
No related merge requests found
...@@ -338,6 +338,7 @@ Tag Tagset::make_tag(idx_t pos_idx, mask_t values, ...@@ -338,6 +338,7 @@ Tag Tagset::make_tag(idx_t pos_idx, mask_t values,
if (mode & ParseFailWithIgn) { if (mode & ParseFailWithIgn) {
return make_ign_tag(); return make_ign_tag();
} }
std::cerr << mode << "\n";
throw TagParseError("Required attribute missing", throw TagParseError("Required attribute missing",
tag_to_string(Tag(get_pos_mask(pos_idx), values)), tag_to_string(Tag(get_pos_mask(pos_idx), values)),
get_attribute_name(a), id_string()); get_attribute_name(a), id_string());
...@@ -381,11 +382,7 @@ Tag Tagset::make_tag(idx_t pos_idx, mask_t values, ...@@ -381,11 +382,7 @@ Tag Tagset::make_tag(idx_t pos_idx, mask_t values,
Tag Tagset::make_ign_tag() const Tag Tagset::make_ign_tag() const
{ {
return ign_tag_;
static const std::string ign("ign");
mask_t ign_pos_mask = get_pos_mask(ign);
assert(ign_pos_mask.any());
return Tag(ign_pos_mask);
} }
bool Tagset::validate_tag(const Tag &t, ParseMode mode /* = ParseDefault*/, bool Tagset::validate_tag(const Tag &t, ParseMode mode /* = ParseDefault*/,
......
...@@ -644,6 +644,9 @@ private: ...@@ -644,6 +644,9 @@ private:
/// Valid POS mask /// Valid POS mask
mask_t valid_pos_mask_; mask_t valid_pos_mask_;
/// The ign tag
Tag ign_tag_;
}; };
/* implementation */ /* implementation */
......
...@@ -128,6 +128,7 @@ Tagset TagsetParser::load_ini(std::istream &is) ...@@ -128,6 +128,7 @@ Tagset TagsetParser::load_ini(std::istream &is)
boost::algorithm::trim(line); boost::algorithm::trim(line);
++line_no; ++line_no;
if (!line.empty() && line[0] != '#') { if (!line.empty() && line[0] != '#') {
if (line[0] == '[') break;
std::deque<std::string> v; std::deque<std::string> v;
boost::algorithm::split(v, line, boost::is_any_of(sep), boost::algorithm::split(v, line, boost::is_any_of(sep),
boost::algorithm::token_compress_on); boost::algorithm::token_compress_on);
...@@ -155,7 +156,18 @@ Tagset TagsetParser::load_ini(std::istream &is) ...@@ -155,7 +156,18 @@ Tagset TagsetParser::load_ini(std::istream &is)
req_mask[a] = required; req_mask[a] = required;
} }
} }
}
std::string ign_tag_string = "ign";
if (line != "[IGN]") {
while (std::getline(is, line)) {
if (line == "[IGN]") break;
}
}
if (line == "[IGN]") {
if (std::getline(is, line)) {
ign_tag_string = line;
}
} }
vec.clear(); vec.clear();
...@@ -189,7 +201,7 @@ Tagset TagsetParser::load_ini(std::istream &is) ...@@ -189,7 +201,7 @@ Tagset TagsetParser::load_ini(std::istream &is)
tagset.original_pos_indices_.insert(std::make_pair(p,i)); tagset.original_pos_indices_.insert(std::make_pair(p,i));
tagset.valid_pos_mask_ |= (mask_t(1) << i); tagset.valid_pos_mask_ |= (mask_t(1) << i);
} }
tagset.ign_tag_ = tagset.parse_simple_tag(ign_tag_string);
return tagset; return tagset;
} }
......
...@@ -22,6 +22,7 @@ const char tagsetstr1[] = "[ATTR]\n" ...@@ -22,6 +22,7 @@ const char tagsetstr1[] = "[ATTR]\n"
"A tag tog other a3 \n" "A tag tog other a3 \n"
"B data thing tag-thing thang\n" "B data thing tag-thing thang\n"
"C a b c \n" "C a b c \n"
"ign\n"
"[POS]\n some A B [C]\n"; "[POS]\n some A B [C]\n";
BOOST_AUTO_TEST_CASE( token ) BOOST_AUTO_TEST_CASE( token )
......
...@@ -29,7 +29,8 @@ struct F { ...@@ -29,7 +29,8 @@ struct F {
"A tag tog other a3 \n" "A tag tog other a3 \n"
"B data thing tag-thing thang\n" "B data thing tag-thing thang\n"
"C a b c \n" "C a b c \n"
"[POS]\n some A B [C]\n same A B \n P3 [A] [B]\n"; "[POS]\n some A B [C]\n same A B \n P3 [A] [B]\n"
"[IGN]\nP3\n";
tagset.reset(new Corpus2::Tagset()); tagset.reset(new Corpus2::Tagset());
*tagset = Corpus2::Tagset::from_data(tagset_string); *tagset = Corpus2::Tagset::from_data(tagset_string);
} }
......
...@@ -19,6 +19,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. ...@@ -19,6 +19,7 @@ or FITNESS FOR A PARTICULAR PURPOSE.
#include <libpwrutils/foreach.h> #include <libpwrutils/foreach.h>
#include <libcorpus2/tagsetparser.h> #include <libcorpus2/tagsetparser.h>
#include <libcorpus2/tagsetmanager.h> #include <libcorpus2/tagsetmanager.h>
#include <libcorpus2/util/settings.h>
#include <iostream> #include <iostream>
BOOST_AUTO_TEST_SUITE( tagset_parse ); BOOST_AUTO_TEST_SUITE( tagset_parse );
...@@ -31,7 +32,7 @@ Corpus2::Tagset parse(const char* s) ...@@ -31,7 +32,7 @@ Corpus2::Tagset parse(const char* s)
} }
#define PRE "[ATTR]\n" #define PRE "[ATTR]\n"
#define POSA "[POS]\n POS1\n" #define POSA "[POS]\n ign\n"
BOOST_AUTO_TEST_CASE( empty ) BOOST_AUTO_TEST_CASE( empty )
{ {
...@@ -54,7 +55,7 @@ BOOST_AUTO_TEST_CASE( minimal_nonewline ) ...@@ -54,7 +55,7 @@ BOOST_AUTO_TEST_CASE( minimal_nonewline )
{ {
Corpus2::Tagset t; Corpus2::Tagset t;
try { try {
t = parse(PRE "[POS]\n POS1"); t = parse(PRE "[POS]\n ign");
} catch (Corpus2::TagsetParseError& e) { } catch (Corpus2::TagsetParseError& e) {
BOOST_FAIL(e.info()); BOOST_FAIL(e.info());
} }
...@@ -147,6 +148,8 @@ BOOST_AUTO_TEST_CASE( size6 ) ...@@ -147,6 +148,8 @@ BOOST_AUTO_TEST_CASE( size6 )
BOOST_AUTO_TEST_CASE( load_named ) BOOST_AUTO_TEST_CASE( load_named )
{ {
PwrNlp::ConfigPathSetter ps(Corpus2::Path::Instance(),
LIBCORPUS2_TEST_DATA_DIR);
BOOST_CHECK_NO_THROW( BOOST_CHECK_NO_THROW(
try { try {
Corpus2::get_named_tagset("test"); Corpus2::get_named_tagset("test");
......
...@@ -9,3 +9,6 @@ P1 = a b c ...@@ -9,3 +9,6 @@ P1 = a b c
P2 = P2 =
P3 = a b [c] P3 = a b [c]
P4 = [a] [b] [c] P4 = [a] [b] [c]
[IGN]
P4
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment