From a41674f7735a72210cbde6408e5af05205f1c1e1 Mon Sep 17 00:00:00 2001 From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl> Date: Thu, 12 May 2011 17:02:07 +0200 Subject: [PATCH] a test for lex --- tests/rules-data/match/lexicon/cclmatch-1.ccl | 19 +++ .../match/lexicon/cclmatch-1.out.xml | 138 ++++++++++++++++++ tests/rules-data/match/lexicon/cclmatch.xml | 108 ++++++++++++++ .../match/lexicon/simple.is-the-tagset | 1 + 4 files changed, 266 insertions(+) create mode 100644 tests/rules-data/match/lexicon/cclmatch-1.ccl create mode 100644 tests/rules-data/match/lexicon/cclmatch-1.out.xml create mode 100644 tests/rules-data/match/lexicon/cclmatch.xml create mode 100644 tests/rules-data/match/lexicon/simple.is-the-tagset diff --git a/tests/rules-data/match/lexicon/cclmatch-1.ccl b/tests/rules-data/match/lexicon/cclmatch-1.ccl new file mode 100644 index 0000000..5ef1d67 --- /dev/null +++ b/tests/rules-data/match/lexicon/cclmatch-1.ccl @@ -0,0 +1,19 @@ +import("prep-qub.lex", "qp") + +match_rules( + apply( + match( + repeat( + equal(lex(lower(orth[0]), "qp"), "qub") + ) + ), + actions(mark(M, "QP")) + ); + apply( + match( + equal(lex(lower(orth[0]), "qp"), ["prep"]), + inter(class[0], {noun,other}) + ), + actions(mark(M, "PrepNP")) + ) +) diff --git a/tests/rules-data/match/lexicon/cclmatch-1.out.xml b/tests/rules-data/match/lexicon/cclmatch-1.out.xml new file mode 100644 index 0000000..b173c17 --- /dev/null +++ b/tests/rules-data/match/lexicon/cclmatch-1.out.xml @@ -0,0 +1,138 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" type="lex disamb"> +<chunkList> + <chunk> + <sentence> + <tok> + <orth>To</orth> + <lex><base>ten</base><ctag>adj</ctag></lex> + <lex><base>to</base><ctag>noun</ctag></lex> + <lex><base>to</base><ctag>other</ctag></lex> + <lex><base>to</base><ctag>qub</ctag></lex> + <lex><base>to</base><ctag>verb</ctag></lex> + <ann chan="QP">0</ann> + </tok> + <tok> + <orth>już</orth> + <lex><base>już</base><ctag>qub</ctag></lex> + <ann chan="QP" head="1">1</ann> + </tok> + <tok> + <orth>dziś</orth> + <lex><base>dziś</base><ctag>noun</ctag></lex> + <lex><base>dziś</base><ctag>qub</ctag></lex> + <ann chan="QP">1</ann> + </tok> + <tok> + <orth>wcale</orth> + <lex><base>wcale</base><ctag>qub</ctag></lex> + <ann chan="QP">1</ann> + </tok> + <tok> + <orth>nie</orth> + <lex><base>nie</base><ctag>other</ctag></lex> + <lex><base>nie</base><ctag>qub</ctag></lex> + <lex><base>on</base><ctag>other</ctag></lex> + <ann chan="QP">1</ann> + </tok> + <tok> + <orth>jest</orth> + <lex><base>być</base><ctag>verb</ctag></lex> + <ann chan="QP">0</ann> + </tok> + <tok> + <orth>śmieszne</orth> + <lex><base>śmieszny</base><ctag>adj</ctag></lex> + <ann chan="QP">0</ann> + </tok> + <ns/> + <tok> + <orth>!</orth> + <lex><base>!</base><ctag>interp</ctag></lex> + <ann chan="QP">0</ann> + </tok> + </sentence> + <sentence> + <tok> + <orth>To</orth> + <lex><base>ten</base><ctag>adj</ctag></lex> + <lex><base>to</base><ctag>noun</ctag></lex> + <lex><base>to</base><ctag>other</ctag></lex> + <lex><base>to</base><ctag>qub</ctag></lex> + <lex><base>to</base><ctag>verb</ctag></lex> + <ann chan="PrepNP">0</ann> + <ann chan="QP">0</ann> + </tok> + <tok> + <orth>dla</orth> + <lex><base>dla</base><ctag>other</ctag></lex> + <ann chan="PrepNP" head="1">1</ann> + <ann chan="QP">0</ann> + </tok> + <tok> + <orth>niego</orth> + <lex><base>on</base><ctag>other</ctag></lex> + <ann chan="PrepNP">1</ann> + <ann chan="QP">0</ann> + </tok> + <tok> + <orth>i</orth> + <lex><base>i</base><ctag>other</ctag></lex> + <lex><base>i</base><ctag>qub</ctag></lex> + <ann chan="PrepNP">0</ann> + <ann chan="QP">0</ann> + </tok> + <tok> + <orth>dla</orth> + <lex><base>dla</base><ctag>other</ctag></lex> + <ann chan="PrepNP" head="1">2</ann> + <ann chan="QP">0</ann> + </tok> + <tok> + <orth>nas</orth> + <lex><base>NASA</base><ctag>noun</ctag></lex> + <lex><base>my</base><ctag>other</ctag></lex> + <ann chan="PrepNP">2</ann> + <ann chan="QP">0</ann> + </tok> + <tok> + <orth>wręcz</orth> + <lex><base>wręcz</base><ctag>qub</ctag></lex> + <lex><base>wręczyć</base><ctag>verb</ctag></lex> + <ann chan="PrepNP">0</ann> + <ann chan="QP" head="1">1</ann> + </tok> + <tok> + <orth>nie</orth> + <lex><base>nie</base><ctag>other</ctag></lex> + <lex><base>nie</base><ctag>qub</ctag></lex> + <lex><base>on</base><ctag>other</ctag></lex> + <ann chan="PrepNP">0</ann> + <ann chan="QP">1</ann> + </tok> + <tok> + <orth>do</orth> + <lex><base>do</base><ctag>noun</ctag></lex> + <lex><base>do</base><ctag>other</ctag></lex> + <ann chan="PrepNP" head="1">3</ann> + <ann chan="QP">0</ann> + </tok> + <tok> + <orth>pomyślenia</orth> + <lex><base>pomyślenie</base><ctag>noun</ctag></lex> + <lex><base>pomyśleć</base><ctag>noun</ctag></lex> + <ann chan="PrepNP">3</ann> + <ann chan="QP">0</ann> + </tok> + <ns/> + <tok> + <orth>.</orth> + <lex><base>.</base><ctag>interp</ctag></lex> + <ann chan="PrepNP">0</ann> + <ann chan="QP">0</ann> + </tok> + </sentence> + </chunk> +</chunkList> +</cesAna> diff --git a/tests/rules-data/match/lexicon/cclmatch.xml b/tests/rules-data/match/lexicon/cclmatch.xml new file mode 100644 index 0000000..300ddc2 --- /dev/null +++ b/tests/rules-data/match/lexicon/cclmatch.xml @@ -0,0 +1,108 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" type="lex disamb"> +<chunkList> + <chunk> + <sentence> + <tok> + <orth>To</orth> + <lex><base>ten</base><ctag>adj</ctag></lex> + <lex><base>to</base><ctag>noun</ctag></lex> + <lex><base>to</base><ctag>other</ctag></lex> + <lex><base>to</base><ctag>qub</ctag></lex> + <lex><base>to</base><ctag>verb</ctag></lex> + </tok> + <tok> + <orth>już</orth> + <lex><base>już</base><ctag>qub</ctag></lex> + </tok> + <tok> + <orth>dziś</orth> + <lex><base>dziś</base><ctag>noun</ctag></lex> + <lex><base>dziś</base><ctag>qub</ctag></lex> + </tok> + <tok> + <orth>wcale</orth> + <lex><base>wcale</base><ctag>qub</ctag></lex> + </tok> + <tok> + <orth>nie</orth> + <lex><base>nie</base><ctag>other</ctag></lex> + <lex><base>nie</base><ctag>qub</ctag></lex> + <lex><base>on</base><ctag>other</ctag></lex> + </tok> + <tok> + <orth>jest</orth> + <lex><base>być</base><ctag>verb</ctag></lex> + </tok> + <tok> + <orth>śmieszne</orth> + <lex><base>śmieszny</base><ctag>adj</ctag></lex> + </tok> + <ns/> + <tok> + <orth>!</orth> + <lex><base>!</base><ctag>interp</ctag></lex> + </tok> + </sentence> + <sentence> + <tok> + <orth>To</orth> + <lex><base>ten</base><ctag>adj</ctag></lex> + <lex><base>to</base><ctag>noun</ctag></lex> + <lex><base>to</base><ctag>other</ctag></lex> + <lex><base>to</base><ctag>qub</ctag></lex> + <lex><base>to</base><ctag>verb</ctag></lex> + </tok> + <tok> + <orth>dla</orth> + <lex><base>dla</base><ctag>other</ctag></lex> + </tok> + <tok> + <orth>niego</orth> + <lex><base>on</base><ctag>other</ctag></lex> + </tok> + <tok> + <orth>i</orth> + <lex><base>i</base><ctag>other</ctag></lex> + <lex><base>i</base><ctag>qub</ctag></lex> + </tok> + <tok> + <orth>dla</orth> + <lex><base>dla</base><ctag>other</ctag></lex> + </tok> + <tok> + <orth>nas</orth> + <lex><base>NASA</base><ctag>noun</ctag></lex> + <lex><base>my</base><ctag>other</ctag></lex> + </tok> + <tok> + <orth>wręcz</orth> + <lex><base>wręcz</base><ctag>qub</ctag></lex> + <lex><base>wręczyć</base><ctag>verb</ctag></lex> + </tok> + <tok> + <orth>nie</orth> + <lex><base>nie</base><ctag>other</ctag></lex> + <lex><base>nie</base><ctag>qub</ctag></lex> + <lex><base>on</base><ctag>other</ctag></lex> + </tok> + <tok> + <orth>do</orth> + <lex><base>do</base><ctag>noun</ctag></lex> + <lex><base>do</base><ctag>other</ctag></lex> + </tok> + <tok> + <orth>pomyślenia</orth> + <lex><base>pomyślenie</base><ctag>noun</ctag></lex> + <lex><base>pomyśleć</base><ctag>noun</ctag></lex> + </tok> + <ns/> + <tok> + <orth>.</orth> + <lex><base>.</base><ctag>interp</ctag></lex> + </tok> + </sentence> + </chunk> +</chunkList> +</cesAna> diff --git a/tests/rules-data/match/lexicon/simple.is-the-tagset b/tests/rules-data/match/lexicon/simple.is-the-tagset new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/rules-data/match/lexicon/simple.is-the-tagset @@ -0,0 +1 @@ + -- GitLab