From 730c1d35ef7f4f4ff8e00b4bd9eee02173869121 Mon Sep 17 00:00:00 2001 From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl> Date: Fri, 10 Dec 2010 14:32:57 +0100 Subject: [PATCH] tests for underspecified tokens in wagr --- tests/data/agreement/kobieta.xml | 72 +++++++++++++++++++++++ tests/data/agreement/nadzieje.xml | 96 +++++++++++++++++++++++++++++++ tests/data/agreement/wagr2.ccl | 59 +++++++++++++++++++ tests/data/agreement/wagr3.ccl | 44 ++++++++++++++ 4 files changed, 271 insertions(+) create mode 100644 tests/data/agreement/kobieta.xml create mode 100644 tests/data/agreement/nadzieje.xml create mode 100644 tests/data/agreement/wagr2.ccl create mode 100644 tests/data/agreement/wagr3.ccl diff --git a/tests/data/agreement/kobieta.xml b/tests/data/agreement/kobieta.xml new file mode 100644 index 0000000..2081f5a --- /dev/null +++ b/tests/data/agreement/kobieta.xml @@ -0,0 +1,72 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" type="lex disamb"> +<chunkList> + <chunk> + <chunk type="s"> + <tok> + <orth>Kobieta</orth> + <lex><base>kobieta</base><ctag>subst:sg:nom:f</ctag></lex> + </tok> + <tok> + <orth>nie</orth> + <lex><base>nie</base><ctag>qub</ctag></lex> + <lex><base>on</base><ctag>ppron3:sg:acc:n:ter:praep</ctag></lex> + <lex><base>on</base><ctag>ppron3:pl:acc:m2:ter:praep</ctag></lex> + <lex><base>on</base><ctag>ppron3:pl:acc:m3:ter:praep</ctag></lex> + <lex><base>on</base><ctag>ppron3:pl:acc:f:ter:praep</ctag></lex> + <lex><base>on</base><ctag>ppron3:pl:acc:n:ter:praep</ctag></lex> + </tok> + <tok> + <orth>bita</orth> + <lex><base>bity</base><ctag>adj:sg:nom:f:pos</ctag></lex> + <lex><base>bity</base><ctag>adj:sg:voc:f:pos</ctag></lex> + <lex><base>bić</base><ctag>ppas:sg:nom:f:imperf:aff</ctag></lex> + <lex><base>bić</base><ctag>ppas:sg:voc:f:imperf:aff</ctag></lex> + </tok> + <tok> + <orth>to</orth> + <lex><base>ten</base><ctag>adj:sg:nom:n:pos</ctag></lex> + <lex><base>ten</base><ctag>adj:sg:acc:n:pos</ctag></lex> + <lex><base>ten</base><ctag>adj:sg:voc:n:pos</ctag></lex> + <lex><base>to</base><ctag>conj</ctag></lex> + </tok> + <tok> + <orth>jak</orth> + <lex><base>jak</base><ctag>conj</ctag></lex> + <lex><base>jak</base><ctag>subst:sg:nom:m1</ctag></lex> + <lex><base>jak</base><ctag>subst:sg:nom:m2</ctag></lex> + <lex><base>jak</base><ctag>subst:sg:nom:m3</ctag></lex> + <lex><base>jaka</base><ctag>subst:pl:gen:f</ctag></lex> + </tok> + <tok> + <orth>kosa</orth> + <lex><base>kos</base><ctag>subst:sg:gen:m2</ctag></lex> + <lex><base>kos</base><ctag>subst:sg:acc:m2</ctag></lex> + <lex><base>kosa</base><ctag>subst:sg:nom:f</ctag></lex> + <lex><base>kosy</base><ctag>adj:sg:nom:f:pos</ctag></lex> + <lex><base>kosy</base><ctag>adj:sg:voc:f:pos</ctag></lex> + </tok> + <tok> + <orth>nie</orth> + <lex><base>nie</base><ctag>qub</ctag></lex> + <lex><base>on</base><ctag>ppron3:sg:acc:n:ter:praep</ctag></lex> + <lex><base>on</base><ctag>ppron3:pl:acc:m2:ter:praep</ctag></lex> + <lex><base>on</base><ctag>ppron3:pl:acc:m3:ter:praep</ctag></lex> + <lex><base>on</base><ctag>ppron3:pl:acc:f:ter:praep</ctag></lex> + <lex><base>on</base><ctag>ppron3:pl:acc:n:ter:praep</ctag></lex> + </tok> + <tok> + <orth>klepana</orth> + <lex><base>klepać</base><ctag>ppas:sg:nom:f:imperf:aff</ctag></lex> + <lex><base>klepać</base><ctag>ppas:sg:voc:f:imperf:aff</ctag></lex> + </tok> + <ns/> + <tok> + <orth>!</orth> + <lex><base>!</base><ctag>interp</ctag></lex> + </tok> + </chunk> + </chunk> +</chunkList> +</cesAna> diff --git a/tests/data/agreement/nadzieje.xml b/tests/data/agreement/nadzieje.xml new file mode 100644 index 0000000..c169402 --- /dev/null +++ b/tests/data/agreement/nadzieje.xml @@ -0,0 +1,96 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" type="lex disamb"> +<chunkList> + <chunk> + <chunk type="s"> + <tok> + <orth>Nasze</orth> + <lex><base>nasz</base><ctag>adj:sg:nom:n:pos</ctag></lex> + <lex><base>nasz</base><ctag>adj:sg:acc:n:pos</ctag></lex> + <lex><base>nasz</base><ctag>adj:sg:voc:n:pos</ctag></lex> + <lex><base>nasz</base><ctag>adj:pl:nom:m2:pos</ctag></lex> + <lex><base>nasz</base><ctag>adj:pl:acc:m2:pos</ctag></lex> + <lex><base>nasz</base><ctag>adj:pl:voc:m2:pos</ctag></lex> + <lex><base>nasz</base><ctag>adj:pl:nom:m3:pos</ctag></lex> + <lex><base>nasz</base><ctag>adj:pl:acc:m3:pos</ctag></lex> + <lex><base>nasz</base><ctag>adj:pl:voc:m3:pos</ctag></lex> + <lex><base>nasz</base><ctag>adj:pl:nom:f:pos</ctag></lex> + <lex><base>nasz</base><ctag>adj:pl:acc:f:pos</ctag></lex> + <lex><base>nasz</base><ctag>adj:pl:voc:f:pos</ctag></lex> + <lex><base>nasz</base><ctag>adj:pl:nom:n:pos</ctag></lex> + <lex><base>nasz</base><ctag>adj:pl:acc:n:pos</ctag></lex> + <lex><base>nasz</base><ctag>adj:pl:voc:n:pos</ctag></lex> + </tok> + <tok> + <orth>nadzieje</orth> + <lex><base>nadziać</base><ctag>fin:sg:ter:perf</ctag></lex> + <lex><base>nadzieja</base><ctag>subst:pl:nom:f</ctag></lex> + <lex><base>nadzieja</base><ctag>subst:pl:acc:f</ctag></lex> + <lex><base>nadzieja</base><ctag>subst:pl:voc:f</ctag></lex> + </tok> + <tok> + <orth>wspaniałe</orth> + <lex><base>wspaniały</base><ctag>adj:sg:nom:n:pos</ctag></lex> + <lex><base>wspaniały</base><ctag>adj:sg:acc:n:pos</ctag></lex> + <lex><base>wspaniały</base><ctag>adj:sg:voc:n:pos</ctag></lex> + <lex><base>wspaniały</base><ctag>adj:pl:nom:m2:pos</ctag></lex> + <lex><base>wspaniały</base><ctag>adj:pl:acc:m2:pos</ctag></lex> + <lex><base>wspaniały</base><ctag>adj:pl:voc:m2:pos</ctag></lex> + <lex><base>wspaniały</base><ctag>adj:pl:nom:m3:pos</ctag></lex> + <lex><base>wspaniały</base><ctag>adj:pl:acc:m3:pos</ctag></lex> + <lex><base>wspaniały</base><ctag>adj:pl:voc:m3:pos</ctag></lex> + <lex><base>wspaniały</base><ctag>adj:pl:nom:f:pos</ctag></lex> + <lex><base>wspaniały</base><ctag>adj:pl:acc:f:pos</ctag></lex> + <lex><base>wspaniały</base><ctag>adj:pl:voc:f:pos</ctag></lex> + <lex><base>wspaniały</base><ctag>adj:pl:nom:n:pos</ctag></lex> + <lex><base>wspaniały</base><ctag>adj:pl:acc:n:pos</ctag></lex> + <lex><base>wspaniały</base><ctag>adj:pl:voc:n:pos</ctag></lex> + </tok> + <ns/> + <tok> + <orth>;</orth> + <lex><base>;</base><ctag>interp</ctag></lex> + </tok> + <tok> + <orth>ona</orth> + <lex><base>on</base><ctag>ppron3:sg:nom:f:ter</ctag></lex> + </tok> + <tok> + <orth>nadzieje</orth> + <lex><base>nadziać</base><ctag>fin:sg:ter:perf</ctag></lex> + <lex><base>nadzieja</base><ctag>subst:pl:nom:f</ctag></lex> + <lex><base>nadzieja</base><ctag>subst:pl:acc:f</ctag></lex> + <lex><base>nadzieja</base><ctag>subst:pl:voc:f</ctag></lex> + </tok> + <tok> + <orth>pierwsza</orth> + <lex><base>pierwsza</base><ctag>subst:sg:nom:f</ctag></lex> + <lex><base>pierwszy</base><ctag>adj:sg:nom:f:pos</ctag></lex> + <lex><base>pierwszy</base><ctag>adj:sg:voc:f:pos</ctag></lex> + </tok> + <ns/> + <tok> + <orth>;</orth> + <lex><base>;</base><ctag>interp</ctag></lex> + </tok> + <tok> + <orth>oni</orth> + <lex><base>on</base><ctag>ppron3:pl:nom:m1:ter</ctag></lex> + </tok> + <tok> + <orth>nadzieje</orth> + <lex><base>nadziać</base><ctag>fin:sg:ter:perf</ctag></lex> + <lex><base>nadzieja</base><ctag>subst:pl:nom:f</ctag></lex> + <lex><base>nadzieja</base><ctag>subst:pl:acc:f</ctag></lex> + <lex><base>nadzieja</base><ctag>subst:pl:voc:f</ctag></lex> + </tok> + <tok> + <orth>pierwsi</orth> + <lex><base>pierwszy</base><ctag>adj:pl:nom:m1:pos</ctag></lex> + <lex><base>pierwszy</base><ctag>adj:pl:voc:m1:pos</ctag></lex> + </tok> + </chunk> + </chunk> +</chunkList> +</cesAna> diff --git a/tests/data/agreement/wagr2.ccl b/tests/data/agreement/wagr2.ccl new file mode 100644 index 0000000..d5720f2 --- /dev/null +++ b/tests/data/agreement/wagr2.ccl @@ -0,0 +1,59 @@ +tagset=kipi +sentence=nadzieje.xml +0.Nasze:True 1.nadzieje:True 2.wspaniałe:False 3.;:False 4.ona:False 5.nadzieje:True 6.pierwsza:False 7.;:False 8.oni:False 9.nadzieje:False 10.pierwsi:False +--- +position=0 +wagr(-1,1,{nmb,gnd,cas}) + +True +--- +position=1 +wagr(-1,1,{nmb,gnd,cas}) + +True +--- +position=2 +wagr(-1,1,{nmb,gnd,cas}) + +False +--- +position=3 +wagr(-1,1,{nmb,gnd,cas}) + +False +--- +position=4 +wagr(-1,1,{nmb,gnd,cas}) + +False +--- +position=5 +wagr(-1,1,{nmb,gnd,cas}) + +True +--- +position=6 +wagr(-1,1,{nmb,gnd,cas}) + +False +--- +position=7 +wagr(-1,1,{nmb,gnd,cas}) + +False +--- +position=8 +wagr(-1,1,{nmb,gnd,cas}) + +False +--- +position=9 +wagr(-1,1,{nmb,gnd,cas}) + +False +--- +position=10 +wagr(-1,1,{nmb,gnd,cas}) + +False +--- diff --git a/tests/data/agreement/wagr3.ccl b/tests/data/agreement/wagr3.ccl new file mode 100644 index 0000000..d05c663 --- /dev/null +++ b/tests/data/agreement/wagr3.ccl @@ -0,0 +1,44 @@ +tagset=kipi +sentence=kobieta.xml +0.Kobieta:False 1.nie:True 2.bita:False 3.to:False 4.jak:False 5.kosa:False 6.nie:True 7.klepana:False +--- +position=0 +wagr(-1,1,{nmb,gnd,cas}) + +False +--- +position=1 +wagr(-1,1,{nmb,gnd,cas}) + +True +--- +position=2 +wagr(-1,1,{nmb,gnd,cas}) + +False +--- +position=3 +wagr(-1,1,{nmb,gnd,cas}) + +False +--- +position=4 +wagr(-1,1,{nmb,gnd,cas}) + +False +--- +position=5 +wagr(-1,1,{nmb,gnd,cas}) + +False +--- +position=6 +wagr(-1,1,{nmb,gnd,cas}) + +True +--- +position=7 +wagr(-1,1,{nmb,gnd,cas}) + +False +--- -- GitLab