From ddf87ee1aa7b91920ba87244abf7d4be9f277808 Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Mon, 14 Feb 2011 15:42:55 +0100 Subject: [PATCH] CCL Rules data-driven tests. How-to on writing these tests is included in a short README. Note: one of the tests fails on purpose so everyone can see what it looks like. --- tests/CMakeLists.txt | 1 + tests/datadriven.cpp | 5 + tests/datarule.cpp | 227 +++++++++++++++++++++++++ tests/datarule.h | 8 + tests/main.cpp | 5 +- tests/rules-data/README | 7 + tests/rules-data/kipi.is-the-tagset | 0 tests/rules-data/zero/rule-bad.ccl | 8 + tests/rules-data/zero/rule-bad.out.xml | 168 ++++++++++++++++++ tests/rules-data/zero/rule.ccl | 8 + tests/rules-data/zero/rule.out.xml | 167 ++++++++++++++++++ tests/rules-data/zero/t01.xml | 168 ++++++++++++++++++ 12 files changed, 771 insertions(+), 1 deletion(-) create mode 100644 tests/datarule.cpp create mode 100644 tests/datarule.h create mode 100644 tests/rules-data/README create mode 100644 tests/rules-data/kipi.is-the-tagset create mode 100644 tests/rules-data/zero/rule-bad.ccl create mode 100644 tests/rules-data/zero/rule-bad.out.xml create mode 100644 tests/rules-data/zero/rule.ccl create mode 100644 tests/rules-data/zero/rule.out.xml create mode 100644 tests/rules-data/zero/t01.xml diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6299d51..55e7aa8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -20,6 +20,7 @@ add_executable(tests constant.cpp context.cpp datadriven.cpp + datarule.cpp getlemmas.cpp getsymbols.cpp getsymbolsinrange.cpp diff --git a/tests/datadriven.cpp b/tests/datadriven.cpp index 3b2f276..bbfce4f 100644 --- a/tests/datadriven.cpp +++ b/tests/datadriven.cpp @@ -33,6 +33,9 @@ using boost::filesystem::is_directory; using boost::filesystem::path; using boost::filesystem::ifstream; + +namespace { + struct compare_test { path in_file; @@ -189,6 +192,8 @@ void test_one_item(const compare_test& c) } } +} + void init_data_suite(boost::unit_test::test_suite *ts, const std::string& path) { std::string subdir_name = LIBWCCL_TEST_DATA_DIR "data"; diff --git a/tests/datarule.cpp b/tests/datarule.cpp new file mode 100644 index 0000000..83d2b48 --- /dev/null +++ b/tests/datarule.cpp @@ -0,0 +1,227 @@ +#include "datadriven.h" + +#include <libpwrutils/util.h> +#include <libpwrutils/foreach.h> +#include <libpwrutils/pathsearch.h> +#include <libcorpus2/util/settings.h> +#include <libcorpus2/tagsetmanager.h> +#include <libcorpus2/io/xcesreader.h> + +#include <libwccl/sentencecontext.h> +#include <libwccl/parser/Parser.h> +#include <libwccl/ops/funexeccontext.h> + +#include <fstream> +#include <boost/filesystem/fstream.hpp> +#include <iostream> +#include <iomanip> +#include <map> +#include <set> +#include <sstream> + +#include <boost/algorithm/string.hpp> +#include <boost/bind.hpp> +#include <boost/filesystem.hpp> +#include <boost/foreach.hpp> +#include <boost/shared_ptr.hpp> +#include <boost/test/unit_test.hpp> +#include <boost/test/parameterized_test.hpp> + +using boost::filesystem::directory_iterator; +using boost::filesystem::exists; +using boost::filesystem::is_directory; +using boost::filesystem::path; +using boost::filesystem::ifstream; + + +namespace { + +struct rule_compare_test +{ + std::string tagset; + path corpus_file; + path rule_file; + path output_corpus; +}; + +typedef std::map<std::string, boost::shared_ptr<Corpus2::Chunk> > corpus_cache_t; +corpus_cache_t corpus_cache; + +boost::shared_ptr<Corpus2::Chunk> get_corpus(const std::string& path, const Corpus2::Tagset& tagset) +{ + corpus_cache_t::const_iterator i; + i = corpus_cache.find(path); + if (i != corpus_cache.end()) { + return i->second; + } else { + Corpus2::XcesReader xr(tagset, path); + boost::shared_ptr<Corpus2::Chunk> chunk = xr.get_next_chunk(); + corpus_cache.insert(std::make_pair(path, chunk)); + return chunk; + } +} + +void dump_lexemes(const std::set<Corpus2::Lexeme>& lex, std::set<Corpus2::Lexeme>& expected_lex, const Corpus2::Tagset& tagset) +{ + std::set<Corpus2::Lexeme>::const_iterator i = lex.begin(); + std::set<Corpus2::Lexeme>::const_iterator ei = expected_lex.begin(); + while (i != lex.end() && ei != expected_lex.end()) { + if (*i > *ei) { + std::cerr << "EXTRA: " << i->lemma_utf8() << "\t" << tagset.tag_to_string(i->tag()) << "\n"; + ++ei; + } else if (*i < *ei) { + std::cerr << "MISSING: " << ei->lemma_utf8() << "\t" << tagset.tag_to_string(ei->tag()) << "\n"; + ++i; + } else if (*i == *ei) { + std::cerr << "OK: " << i->lemma_utf8() << "\t" << tagset.tag_to_string(i->tag()) << "\n"; + ++i; + ++ei; + } else { + std::cerr << "DARK FORCES AT PLAY: " + << i->lemma_utf8() << "\t" << tagset.tag_to_string(i->tag()) << "\t" + << ei->lemma_utf8() << "\t" << tagset.tag_to_string(ei->tag()) << "\n"; + } + } + while (i != lex.end()) { + std::cerr << "EXTRA: " << i->lemma_utf8() << "\t" << tagset.tag_to_string(i->tag()) << "\n"; + ++i; + } + while (ei != expected_lex.end()) { + std::cerr << "MISSING: " << ei->lemma_utf8() << "\t" << tagset.tag_to_string(ei->tag()) << "\n"; + ++ei; + } +} + +void test_one_rule_item_actual(const rule_compare_test& c) +{ + const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(c.tagset); + boost::shared_ptr<Corpus2::Chunk> chunk = get_corpus(c.corpus_file.string(), tagset); + if (!chunk) { + BOOST_ERROR("Empty chunk loaded"); + } + Corpus2::XcesReader xr(tagset, c.output_corpus.string()); + boost::shared_ptr<Corpus2::Chunk> expected = xr.get_next_chunk(); + BOOST_REQUIRE_EQUAL(chunk->sentences().size(), expected->sentences().size()); + Wccl::Parser parser(tagset); + std::string rf = c.rule_file.string(); + std::ifstream is(rf.c_str()); + BOOST_REQUIRE(is.good()); + boost::shared_ptr<Wccl::RuleSequence> rules = parser.parseRuleSequence(is); + for (size_t i = 0; i < chunk->sentences().size(); ++i) { + Corpus2::Sentence::Ptr sentence = chunk->sentences()[i]->clone_shared(); + Corpus2::Sentence::Ptr expected_sentence = expected->sentences()[i]; + BOOST_REQUIRE_EQUAL(sentence->size(), expected_sentence->size()); + rules->execute_once(sentence); + BOOST_REQUIRE_EQUAL(sentence->size(), expected_sentence->size()); + for (size_t ti = 0; ti < sentence->size(); ++ti) { + Corpus2::Token& token = *sentence->tokens()[ti]; + Corpus2::Token& expected_token = *expected_sentence->tokens()[ti]; + BOOST_CHECK_EQUAL(token.orth_utf8(), expected_token.orth_utf8()); + std::set<Corpus2::Lexeme> lex; + std::copy(token.lexemes().begin(), token.lexemes().end(), + std::inserter(lex, lex.begin())); + std::set<Corpus2::Lexeme> expected_lex; + std::copy(expected_token.lexemes().begin(), expected_token.lexemes().end(), + std::inserter(expected_lex, expected_lex.begin())); + if (lex != expected_lex) { + BOOST_ERROR("Lexeme mismatch in sentence " << i + << ", token " << ti << " [" << expected_token.orth_utf8() << "]" + << " (" << lex.size() << ", expected " << expected_lex.size() << ")"); + dump_lexemes(lex, expected_lex, tagset); + } + } + } +} + +struct init_status +{ + path input; + std::string tagset; +}; + +int init_subdir(const path& dir, std::string ps, std::vector<rule_compare_test>& tests, + init_status status = init_status()) +{ + int count = 0; + ps += dir.string(); + if (!ps.empty()) { + ps += Corpus2::Path::Instance().get_path_separator(); + } + + directory_iterator end_itr; // default-constructed is past-the-end + std::set<path> txt_tests; + std::set<path> subdirs; + + for (directory_iterator itr(dir); itr != end_itr; ++itr) { + if (is_directory(itr->status())) { + subdirs.insert(itr->path()); + } else { + if (itr->path().extension() == ".ccl") { + txt_tests.insert(itr->path()); + } else if (itr->path().extension() == ".xml") { + if (!boost::algorithm::ends_with(itr->path().stem(), ".out")) { + status.input = itr->path(); + } + } else if (itr->path().extension() == ".is-the-tagset") { + status.tagset = itr->path().stem(); + } + } + } + foreach (const path& s, txt_tests) { + path o = s; + o.replace_extension(".out.xml"); + if (boost::filesystem::is_regular_file(o)) { + rule_compare_test c; + c.corpus_file = status.input; + c.rule_file = s; + c.output_corpus = o; + c.tagset = status.tagset; + tests.push_back(c); + ++count; + } else { + std::cerr << "No output file: " << o.string() << "\n"; + } + } + BOOST_TEST_MESSAGE("Found " << count << " valid data test case" + << (count > 1 ? "s" : "") + << " in " << dir + << " [" << ps << "]" + ); + foreach (const path& s, subdirs) { + count += init_subdir(s, ps, tests, status); + } + return count; +} + +void test_one_rule_item(const rule_compare_test& c) +{ + try { + test_one_rule_item_actual(c); + } catch (PwrNlp::PwrNlpError& e) { + BOOST_ERROR("Caught " << e.scope() << " exception: \n" << e.info()); + } +} + +} + +void init_data_rule_suite(boost::unit_test::test_suite *ts, const std::string& path) +{ + std::string subdir_name = LIBWCCL_TEST_DATA_DIR "rules-data"; + if (!path.empty()) { + subdir_name = path; + } + if (!exists(subdir_name)) { + BOOST_TEST_MESSAGE("Rules test data subdir does not exist"); + } + std::vector<rule_compare_test> compares; + init_subdir(subdir_name, "", compares); + foreach (const rule_compare_test& ci, compares) { + std::string rel_path = boost::algorithm::replace_first_copy( + ci.rule_file.string(), subdir_name, ""); + std::string name = "rule_data_test:" + rel_path; + std::cout << name << "\n"; + ts->add(boost::unit_test::make_test_case( + boost::bind(test_one_rule_item, ci), name)); + } +} + diff --git a/tests/datarule.h b/tests/datarule.h new file mode 100644 index 0000000..10da9c2 --- /dev/null +++ b/tests/datarule.h @@ -0,0 +1,8 @@ +#ifndef LIBWCCL_TESTS_DATADRULE_H +#define LIBWCCL_TESTS_DATADRULE_H + +#include <boost/test/unit_test.hpp> + +void init_data_rule_suite(boost::unit_test::test_suite* ts, const std::string& path); + +#endif // LIBWCCL_TESTS_DATADRULE_H diff --git a/tests/main.cpp b/tests/main.cpp index beb1c01..0f24bf9 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -2,6 +2,7 @@ #include <boost/test/included/unit_test.hpp> #include <unicode/uclean.h> #include "datadriven.h" +#include "datarule.h" BOOST_AUTO_TEST_CASE(test_test) { @@ -37,6 +38,8 @@ boost::unit_test::test_suite* init_unit_test_suite(int argc, char* argv[]) } init_data_suite(ts1, compare_path); boost::unit_test::framework::master_test_suite().add(ts1); - + boost::unit_test::test_suite* ts2 = BOOST_TEST_SUITE("rule-compare"); + init_data_rule_suite(ts2, compare_path); + boost::unit_test::framework::master_test_suite().add(ts2); return 0; } diff --git a/tests/rules-data/README b/tests/rules-data/README new file mode 100644 index 0000000..fcb85ff --- /dev/null +++ b/tests/rules-data/README @@ -0,0 +1,7 @@ +Test cases are defined by .ccl files, one .ccl file is one testcase. + +A test case loads a corpus from an .xml file thet is not an .out.xml file from the test case directory, or directories above if there are none. Behavior is undefined if there is more than one .xml file. Only the first chunk is processed (all sentences from the chunk). + +A foo.ccl file should be accompanied by a foo.out.xml file defining the expected output. Output is compared intelligently, lexeme order / duplicates does not matter. + +A magic file with the extension .is-the-tagset defines the tagset for testcases in this directory and all subdirectories, unless overrden by another .is-the-tagset file. diff --git a/tests/rules-data/kipi.is-the-tagset b/tests/rules-data/kipi.is-the-tagset new file mode 100644 index 0000000..e69de29 diff --git a/tests/rules-data/zero/rule-bad.ccl b/tests/rules-data/zero/rule-bad.ccl new file mode 100644 index 0000000..d9689ad --- /dev/null +++ b/tests/rules-data/zero/rule-bad.ccl @@ -0,0 +1,8 @@ +rules( + rule("rule1", + in(class[0], {conj, qub}), + delete( + equal(class[0], {qub}) + ) + ) +) diff --git a/tests/rules-data/zero/rule-bad.out.xml b/tests/rules-data/zero/rule-bad.out.xml new file mode 100644 index 0000000..3c7bc4f --- /dev/null +++ b/tests/rules-data/zero/rule-bad.out.xml @@ -0,0 +1,168 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna version="1.0" type="lex disamb"> +<chunkList> +<chunk id="ch51" type="tok"> +<chunk type="s"> +<tok> +<orth>Uważam</orth> +<lex disamb="1"><base>uważać</base><ctag>fin:sg:pri:imperf</ctag></lex> +</tok> +<ns/> +<tok> +<orth>,</orth> +<lex disamb="1"><base>,</base><ctag>interp</ctag></lex> +</tok> +<tok> +<orth>że</orth> +<lex disamb="1"><base>że</base><ctag>conj</ctag></lex> +</tok> +<tok> +<orth>światopogląd</orth> +<lex><base>światopogląd</base><ctag>subst:sg:acc:m3</ctag></lex> +<lex disamb="1"><base>światopogląd</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>może</orth> +<lex><base>może</base><ctag>qub</ctag></lex> +<lex disamb="1"><base>móc</base><ctag>fin:sg:ter:imperf</ctag></lex> +</tok> +<tok> +<orth>być</orth> +<lex disamb="1"><base>być</base><ctag>inf:imperf</ctag></lex> +</tok> +<tok> +<orth>materialistyczny</orth> +<lex><base>materialistyczny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>materialistyczny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>materialistyczny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>materialistyczny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<tok> +<orth>lub</orth> +<lex disamb="1"><base>lub</base><ctag>conj</ctag></lex> +<lex><base>lubić</base><ctag>impt:sg:sec:imperf</ctag></lex> +</tok> +<tok> +<orth>idealistyczny</orth> +<lex><base>idealistyczny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>idealistyczny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>idealistyczny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>idealistyczny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<tok> +<orth>(</orth> +<lex disamb="1"><base>(</base><ctag>interp</ctag></lex> +</tok> +<ns/> +<tok> +<orth>w</orth> +<lex><base>w</base><ctag>prep:acc:nwok</ctag></lex> +<lex disamb="1"><base>w</base><ctag>prep:loc:nwok</ctag></lex> +</tok> +<tok> +<orth>prymitywnych</orth> +<lex><base>prymitywny</base><ctag>adj:pl:acc:m1:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:f:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:m1:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:m2:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:m3:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:n:pos</ctag></lex> +<lex disamb="1"><base>prymitywny</base><ctag>adj:pl:loc:f:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:loc:m1:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:loc:m2:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:loc:m3:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:loc:n:pos</ctag></lex> +</tok> +<tok> +<orth>cywilizacjach</orth> +<lex disamb="1"><base>cywilizacja</base><ctag>subst:pl:loc:f</ctag></lex> +</tok> +<tok> +<orth>nawet</orth> +<lex disamb="1"><base>nawet</base><ctag>qub</ctag></lex> +</tok> +<tok> +<orth>magiczny</orth> +<lex><base>magiczny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>magiczny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>magiczny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>magiczny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<ns/> +<tok> +<orth>)</orth> +<lex disamb="1"><base>)</base><ctag>interp</ctag></lex> +</tok> +<ns/> +<tok> +<orth>,</orth> +<lex disamb="1"><base>,</base><ctag>interp</ctag></lex> +</tok> +<tok> +<orth>ale</orth> +<lex disamb="1"><base>ale</base><ctag>conj</ctag></lex> +</tok> +<tok> +<orth>nie</orth> +<lex disamb="1"><base>nie</base><ctag>qub</ctag></lex> +<lex><base>on</base><ctag>ppron3:pl:acc:f:ter:praep</ctag></lex> +<lex><base>on</base><ctag>ppron3:pl:acc:m2:ter:praep</ctag></lex> +<lex><base>on</base><ctag>ppron3:pl:acc:m3:ter:praep</ctag></lex> +<lex><base>on</base><ctag>ppron3:pl:acc:n:ter:praep</ctag></lex> +<lex><base>on</base><ctag>ppron3:sg:acc:n:ter:praep</ctag></lex> +</tok> +<tok> +<orth>morski</orth> +<lex><base>morski</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>morski</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>morski</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>morski</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<ns/> +<tok> +<orth>,</orth> +<lex disamb="1"><base>,</base><ctag>interp</ctag></lex> +</tok> +<tok> +<orth>lotniczy</orth> +<lex><base>lotniczy</base><ctag>adj:pl:nom:m1:pos</ctag></lex> +<lex><base>lotniczy</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>lotniczy</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>lotniczy</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>lotniczy</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<ns/> +<tok> +<orth>,</orth> +<lex disamb="1"><base>,</base><ctag>interp</ctag></lex> +</tok> +<tok> +<orth>rolny</orth> +<lex><base>rolny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>rolny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>rolny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>rolny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<tok> +<orth>czy</orth> +<lex disamb="1"><base>czy</base><ctag>conj</ctag></lex> +<lex><base>czy</base><ctag>qub</ctag></lex> +</tok> +<tok> +<orth>leśny</orth> +<lex><base>leśny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>leśny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>leśny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>leśny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<ns/> +<tok> +<orth>.</orth> +<lex disamb="1"><base>.</base><ctag>interp</ctag></lex> +</tok> +</chunk> +</chunk> +</chunkList> +</cesAna> + diff --git a/tests/rules-data/zero/rule.ccl b/tests/rules-data/zero/rule.ccl new file mode 100644 index 0000000..d9689ad --- /dev/null +++ b/tests/rules-data/zero/rule.ccl @@ -0,0 +1,8 @@ +rules( + rule("rule1", + in(class[0], {conj, qub}), + delete( + equal(class[0], {qub}) + ) + ) +) diff --git a/tests/rules-data/zero/rule.out.xml b/tests/rules-data/zero/rule.out.xml new file mode 100644 index 0000000..2c4b1e0 --- /dev/null +++ b/tests/rules-data/zero/rule.out.xml @@ -0,0 +1,167 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna version="1.0" type="lex disamb"> +<chunkList> +<chunk id="ch51" type="tok"> +<chunk type="s"> +<tok> +<orth>Uważam</orth> +<lex disamb="1"><base>uważać</base><ctag>fin:sg:pri:imperf</ctag></lex> +</tok> +<ns/> +<tok> +<orth>,</orth> +<lex disamb="1"><base>,</base><ctag>interp</ctag></lex> +</tok> +<tok> +<orth>że</orth> +<lex disamb="1"><base>że</base><ctag>conj</ctag></lex> +</tok> +<tok> +<orth>światopogląd</orth> +<lex><base>światopogląd</base><ctag>subst:sg:acc:m3</ctag></lex> +<lex disamb="1"><base>światopogląd</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>może</orth> +<lex><base>może</base><ctag>qub</ctag></lex> +<lex disamb="1"><base>móc</base><ctag>fin:sg:ter:imperf</ctag></lex> +</tok> +<tok> +<orth>być</orth> +<lex disamb="1"><base>być</base><ctag>inf:imperf</ctag></lex> +</tok> +<tok> +<orth>materialistyczny</orth> +<lex><base>materialistyczny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>materialistyczny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>materialistyczny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>materialistyczny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<tok> +<orth>lub</orth> +<lex disamb="1"><base>lub</base><ctag>conj</ctag></lex> +<lex><base>lubić</base><ctag>impt:sg:sec:imperf</ctag></lex> +</tok> +<tok> +<orth>idealistyczny</orth> +<lex><base>idealistyczny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>idealistyczny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>idealistyczny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>idealistyczny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<tok> +<orth>(</orth> +<lex disamb="1"><base>(</base><ctag>interp</ctag></lex> +</tok> +<ns/> +<tok> +<orth>w</orth> +<lex><base>w</base><ctag>prep:acc:nwok</ctag></lex> +<lex disamb="1"><base>w</base><ctag>prep:loc:nwok</ctag></lex> +</tok> +<tok> +<orth>prymitywnych</orth> +<lex><base>prymitywny</base><ctag>adj:pl:acc:m1:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:f:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:m1:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:m2:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:m3:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:n:pos</ctag></lex> +<lex disamb="1"><base>prymitywny</base><ctag>adj:pl:loc:f:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:loc:m1:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:loc:m2:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:loc:m3:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:loc:n:pos</ctag></lex> +</tok> +<tok> +<orth>cywilizacjach</orth> +<lex disamb="1"><base>cywilizacja</base><ctag>subst:pl:loc:f</ctag></lex> +</tok> +<tok> +<orth>nawet</orth> +<lex disamb="1"><base>nawet</base><ctag>qub</ctag></lex> +</tok> +<tok> +<orth>magiczny</orth> +<lex><base>magiczny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>magiczny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>magiczny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>magiczny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<ns/> +<tok> +<orth>)</orth> +<lex disamb="1"><base>)</base><ctag>interp</ctag></lex> +</tok> +<ns/> +<tok> +<orth>,</orth> +<lex disamb="1"><base>,</base><ctag>interp</ctag></lex> +</tok> +<tok> +<orth>ale</orth> +<lex disamb="1"><base>ale</base><ctag>conj</ctag></lex> +</tok> +<tok> +<orth>nie</orth> +<lex disamb="1"><base>nie</base><ctag>qub</ctag></lex> +<lex><base>on</base><ctag>ppron3:pl:acc:f:ter:praep</ctag></lex> +<lex><base>on</base><ctag>ppron3:pl:acc:m2:ter:praep</ctag></lex> +<lex><base>on</base><ctag>ppron3:pl:acc:m3:ter:praep</ctag></lex> +<lex><base>on</base><ctag>ppron3:pl:acc:n:ter:praep</ctag></lex> +<lex><base>on</base><ctag>ppron3:sg:acc:n:ter:praep</ctag></lex> +</tok> +<tok> +<orth>morski</orth> +<lex><base>morski</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>morski</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>morski</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>morski</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<ns/> +<tok> +<orth>,</orth> +<lex disamb="1"><base>,</base><ctag>interp</ctag></lex> +</tok> +<tok> +<orth>lotniczy</orth> +<lex><base>lotniczy</base><ctag>adj:pl:nom:m1:pos</ctag></lex> +<lex><base>lotniczy</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>lotniczy</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>lotniczy</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>lotniczy</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<ns/> +<tok> +<orth>,</orth> +<lex disamb="1"><base>,</base><ctag>interp</ctag></lex> +</tok> +<tok> +<orth>rolny</orth> +<lex><base>rolny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>rolny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>rolny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>rolny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<tok> +<orth>czy</orth> +<lex disamb="1"><base>czy</base><ctag>conj</ctag></lex> +</tok> +<tok> +<orth>leśny</orth> +<lex><base>leśny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>leśny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>leśny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>leśny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<ns/> +<tok> +<orth>.</orth> +<lex disamb="1"><base>.</base><ctag>interp</ctag></lex> +</tok> +</chunk> +</chunk> +</chunkList> +</cesAna> + diff --git a/tests/rules-data/zero/t01.xml b/tests/rules-data/zero/t01.xml new file mode 100644 index 0000000..3c7bc4f --- /dev/null +++ b/tests/rules-data/zero/t01.xml @@ -0,0 +1,168 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna version="1.0" type="lex disamb"> +<chunkList> +<chunk id="ch51" type="tok"> +<chunk type="s"> +<tok> +<orth>Uważam</orth> +<lex disamb="1"><base>uważać</base><ctag>fin:sg:pri:imperf</ctag></lex> +</tok> +<ns/> +<tok> +<orth>,</orth> +<lex disamb="1"><base>,</base><ctag>interp</ctag></lex> +</tok> +<tok> +<orth>że</orth> +<lex disamb="1"><base>że</base><ctag>conj</ctag></lex> +</tok> +<tok> +<orth>światopogląd</orth> +<lex><base>światopogląd</base><ctag>subst:sg:acc:m3</ctag></lex> +<lex disamb="1"><base>światopogląd</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>może</orth> +<lex><base>może</base><ctag>qub</ctag></lex> +<lex disamb="1"><base>móc</base><ctag>fin:sg:ter:imperf</ctag></lex> +</tok> +<tok> +<orth>być</orth> +<lex disamb="1"><base>być</base><ctag>inf:imperf</ctag></lex> +</tok> +<tok> +<orth>materialistyczny</orth> +<lex><base>materialistyczny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>materialistyczny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>materialistyczny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>materialistyczny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<tok> +<orth>lub</orth> +<lex disamb="1"><base>lub</base><ctag>conj</ctag></lex> +<lex><base>lubić</base><ctag>impt:sg:sec:imperf</ctag></lex> +</tok> +<tok> +<orth>idealistyczny</orth> +<lex><base>idealistyczny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>idealistyczny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>idealistyczny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>idealistyczny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<tok> +<orth>(</orth> +<lex disamb="1"><base>(</base><ctag>interp</ctag></lex> +</tok> +<ns/> +<tok> +<orth>w</orth> +<lex><base>w</base><ctag>prep:acc:nwok</ctag></lex> +<lex disamb="1"><base>w</base><ctag>prep:loc:nwok</ctag></lex> +</tok> +<tok> +<orth>prymitywnych</orth> +<lex><base>prymitywny</base><ctag>adj:pl:acc:m1:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:f:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:m1:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:m2:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:m3:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:gen:n:pos</ctag></lex> +<lex disamb="1"><base>prymitywny</base><ctag>adj:pl:loc:f:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:loc:m1:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:loc:m2:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:loc:m3:pos</ctag></lex> +<lex><base>prymitywny</base><ctag>adj:pl:loc:n:pos</ctag></lex> +</tok> +<tok> +<orth>cywilizacjach</orth> +<lex disamb="1"><base>cywilizacja</base><ctag>subst:pl:loc:f</ctag></lex> +</tok> +<tok> +<orth>nawet</orth> +<lex disamb="1"><base>nawet</base><ctag>qub</ctag></lex> +</tok> +<tok> +<orth>magiczny</orth> +<lex><base>magiczny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>magiczny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>magiczny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>magiczny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<ns/> +<tok> +<orth>)</orth> +<lex disamb="1"><base>)</base><ctag>interp</ctag></lex> +</tok> +<ns/> +<tok> +<orth>,</orth> +<lex disamb="1"><base>,</base><ctag>interp</ctag></lex> +</tok> +<tok> +<orth>ale</orth> +<lex disamb="1"><base>ale</base><ctag>conj</ctag></lex> +</tok> +<tok> +<orth>nie</orth> +<lex disamb="1"><base>nie</base><ctag>qub</ctag></lex> +<lex><base>on</base><ctag>ppron3:pl:acc:f:ter:praep</ctag></lex> +<lex><base>on</base><ctag>ppron3:pl:acc:m2:ter:praep</ctag></lex> +<lex><base>on</base><ctag>ppron3:pl:acc:m3:ter:praep</ctag></lex> +<lex><base>on</base><ctag>ppron3:pl:acc:n:ter:praep</ctag></lex> +<lex><base>on</base><ctag>ppron3:sg:acc:n:ter:praep</ctag></lex> +</tok> +<tok> +<orth>morski</orth> +<lex><base>morski</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>morski</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>morski</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>morski</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<ns/> +<tok> +<orth>,</orth> +<lex disamb="1"><base>,</base><ctag>interp</ctag></lex> +</tok> +<tok> +<orth>lotniczy</orth> +<lex><base>lotniczy</base><ctag>adj:pl:nom:m1:pos</ctag></lex> +<lex><base>lotniczy</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>lotniczy</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>lotniczy</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>lotniczy</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<ns/> +<tok> +<orth>,</orth> +<lex disamb="1"><base>,</base><ctag>interp</ctag></lex> +</tok> +<tok> +<orth>rolny</orth> +<lex><base>rolny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>rolny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>rolny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>rolny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<tok> +<orth>czy</orth> +<lex disamb="1"><base>czy</base><ctag>conj</ctag></lex> +<lex><base>czy</base><ctag>qub</ctag></lex> +</tok> +<tok> +<orth>leśny</orth> +<lex><base>leśny</base><ctag>adj:sg:acc:m3:pos</ctag></lex> +<lex><base>leśny</base><ctag>adj:sg:nom:m1:pos</ctag></lex> +<lex><base>leśny</base><ctag>adj:sg:nom:m2:pos</ctag></lex> +<lex disamb="1"><base>leśny</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<ns/> +<tok> +<orth>.</orth> +<lex disamb="1"><base>.</base><ctag>interp</ctag></lex> +</tok> +</chunk> +</chunk> +</chunkList> +</cesAna> + -- GitLab