diff --git a/swig/libcclexpression.i b/swig/libcclexpression.i index 128f7a9f0492a0a05defd21bab6d3d501b54e14b..be265e1634862447bf5c9bade01d516e400d90a9 100644 --- a/swig/libcclexpression.i +++ b/swig/libcclexpression.i @@ -7,7 +7,7 @@ #include <boost/noncopyable.hpp> %} -%include "libcorpustagset.i" +%include "libcorpus/libcorpustagset.i" %include "std_string.i" namespace Wccl { diff --git a/swig/libcclmatch.i b/swig/libcclmatch.i index 41ba586bd2fc24102b69d20a9bc29dc212d2081d..1e3d64bcfd0cbe5317cceb72e1768eed4c1bd8e1 100644 --- a/swig/libcclmatch.i +++ b/swig/libcclmatch.i @@ -7,12 +7,18 @@ #include <libwccl/values/match.h> %} -%include "libcclvalue.i" %include "std_string.i" +%include "libcclvalue.i" +%include "libcclposition.i" +// %include "libcclmatchdata.i" +// %include "libccltokenmatch.i" +%include "libcorpus/libcorpusannotatedsentence.i" %feature("notabstract") Wccl::Match; namespace Wccl { + // class MatchData; + class Match : public Value { public: const char* get_type_name() const { return type_name; } @@ -22,12 +28,27 @@ namespace Wccl { } Match(); + // Match(const boost::shared_ptr<MatchData>& data); + // Match(const boost::shared_ptr<TokenMatch>& data); + // Match(const boost::shared_ptr<AnnotationMatch>& data); + // Match(const boost::shared_ptr<MatchVector>& data); + + // Match(const MatchData& data); + // Match(const Match& match); + + // %rename(MatchEq) operator=(const Match& match); + // Match& operator=(const Match& match); + + // const MatchData& get_value() const; + // MatchData& get_value(); + + // void set_value(const MatchData& m); + // bool empty() const; - // TODO // virtual Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const; // virtual Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const; - std::string to_raw_string() const; + // std::string to_raw_string() const; }; } diff --git a/swig/libcclmatchdata.i b/swig/libcclmatchdata.i index 3d75e4fda3ab4246d001da2c057661bb2ca45652..a39b2be634b61aa59e10e6c02a581c82dd048071 100644 --- a/swig/libcclmatchdata.i +++ b/swig/libcclmatchdata.i @@ -11,7 +11,7 @@ %include "libcclposition.i" %include "libcorpus/libcorpusannotatedsentence.i" -// %template(MatchDataPtr) boost::shared_ptr<Wccl::MatchData>; +%template(MatchDataPtr) boost::shared_ptr<Wccl::MatchData>; namespace Wccl { class MatchData { diff --git a/swig/libcclparser.i b/swig/libcclparser.i index fc2284bbf6efe82dcca453060980e12062fce2c2..84deead5ea8c3c134e364665a020e78aba6dbb59 100644 --- a/swig/libcclparser.i +++ b/swig/libcclparser.i @@ -6,10 +6,13 @@ #include <libwccl/parser/Parser.h> %} -%include "libcorpustagsetmanager.i" +%include "libcclbool.i" +%include "libccloperator.i" +%include "libcorpus/libcorpustagsetmanager.i" %include "std_string.i" %include "std_vector.i" +%include "boost_shared_ptr.i" namespace Wccl { class Parser { @@ -17,10 +20,13 @@ namespace Wccl { Parser(const Corpus2::Tagset&); ~TagsetManager(); - /* --------------------------------------------------------------------- */ + + // %rename (SharedPtrBoolOperator) boost::shared_ptr<Operator<Bool> >; + shared_ptr<Operator<Bool> > parseBoolOperator(const std::string&) const; }; } +using namespace boost; using namespace std; using namespace Corpus2; diff --git a/swig/libcclsentencecontext.i b/swig/libcclsentencecontext.i index b1bb4459e9bbe96460fbf1037e28be6dabd136e0..e3202d8ac16261c70f06ed60126b3411dbfbe0d2 100644 --- a/swig/libcclsentencecontext.i +++ b/swig/libcclsentencecontext.i @@ -6,9 +6,9 @@ #include <libwccl/sentencecontext.h> %} -%include "libcorpustoken.i" -%include "libcorpuslexeme.i" -%include "libcorpussentence.i" +%include "libcorpus/libcorpustoken.i" +%include "libcorpus/libcorpuslexeme.i" +%include "libcorpus/libcorpussentence.i" %include "libcclposition.i" diff --git a/tests/rules-data/match/postcond2/cclmatch2.out.ccl b/tests/rules-data/match/postcond2/cclmatch2.out.xml similarity index 100% rename from tests/rules-data/match/postcond2/cclmatch2.out.ccl rename to tests/rules-data/match/postcond2/cclmatch2.out.xml diff --git a/tests/rules-data/match/postcond2/cclmatch5.ccl b/tests/rules-data/match/postcond2/cclmatch5.ccl new file mode 100644 index 0000000000000000000000000000000000000000..51ff9f06e57b7557a4c43747d7ee2cf593d56156 --- /dev/null +++ b/tests/rules-data/match/postcond2/cclmatch5.ccl @@ -0,0 +1,14 @@ +apply( + match( + repeat( + optional(equal(orth[0], "not:here")), + equal(class[0], adj), + optional(equal(orth[0], "not:there")) + ) + ), + // first(M) -> position + // regex… -> starting with w + cond(regex(orth[first(M)], "w.*")), + //cond(debug(orth[first(M)])), + actions(mark(M,"C")) +) diff --git a/tests/rules-data/match/postcond2/cclmatch5.out.xml b/tests/rules-data/match/postcond2/cclmatch5.out.xml new file mode 100644 index 0000000000000000000000000000000000000000..c45a8fc1dff12873863dbcd5c9b34c3404950b5d --- /dev/null +++ b/tests/rules-data/match/postcond2/cclmatch5.out.xml @@ -0,0 +1,80 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" type="lex disamb"> +<chunkList> + <chunk> + <sentence> + <tok> + <orth>Dwa</orth> + <lex><base>dwa</base><ctag>other</ctag></lex> + <ann chan="A">1</ann> + <ann chan="B" head="1">1</ann> + <ann chan="C">0</ann> + </tok> + <tok> + <orth>wielkie</orth> + <lex><base>wielki</base><ctag>adj</ctag></lex> + <ann chan="A">1</ann> + <ann chan="B" head="1">2</ann> + <ann chan="C" head="1">1</ann> + </tok> + <tok> + <orth>włochate</orth> + <lex><base>włochaty</base><ctag>adj</ctag></lex> + <ann chan="A">1</ann> + <ann chan="B">2</ann> + <ann chan="C">1</ann> + </tok> + <tok> + <orth>zapchlone</orth> + <lex><base>zapchlić</base><ctag>adj</ctag></lex> + <ann chan="A">1</ann> + <ann chan="B">2</ann> + <ann chan="C">1</ann> + </tok> + <tok> + <orth>koty</orth> + <lex><base>kot</base><ctag>noun</ctag></lex> + <ann chan="A" head="1">1</ann> + <ann chan="B" head="1">3</ann> + <ann chan="C">0</ann> + </tok> + <tok> + <orth>zjadły</orth> + <lex><base>zjeść</base><ctag>verb</ctag></lex> + <ann chan="A">0</ann> + <ann chan="B" head="1">4</ann> + <ann chan="C">0</ann> + </tok> + <tok> + <orth>pięć</orth> + <lex><base>pięć</base><ctag>other</ctag></lex> + <ann chan="A" head="1">2</ann> + <ann chan="B">4</ann> + <ann chan="C">0</ann> + </tok> + <tok> + <orth>tłustych</orth> + <lex><base>tłusty</base><ctag>adj</ctag></lex> + <ann chan="A">2</ann> + <ann chan="B">4</ann> + <ann chan="C">0</ann> + </tok> + <tok> + <orth>soczystych</orth> + <lex><base>soczysty</base><ctag>adj</ctag></lex> + <ann chan="A">2</ann> + <ann chan="B">4</ann> + <ann chan="C">0</ann> + </tok> + <tok> + <orth>much</orth> + <lex><base>mucha</base><ctag>noun</ctag></lex> + <ann chan="A">2</ann> + <ann chan="B">0</ann> + <ann chan="C">0</ann> + </tok> + </sentence> + </chunk> +</chunkList> +</cesAna> diff --git a/tests/rules-data/match/postcond3/cclmatch.xml b/tests/rules-data/match/postcond3/cclmatch.xml new file mode 100644 index 0000000000000000000000000000000000000000..557855b7be41f2c0a94e87de4f5863267a13bb9c --- /dev/null +++ b/tests/rules-data/match/postcond3/cclmatch.xml @@ -0,0 +1,73 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" type="lex disamb"> +<chunkList> + <chunk> + <sentence> + <tok> + <orth>Początek</orth> + <lex><base>początek</base><ctag>subst:sg:nom:m3</ctag></lex> + <lex><base>początek</base><ctag>subst:sg:acc:m3</ctag></lex> + </tok> + <tok> + <orth>nowego</orth> + <lex><base>nowy</base><ctag>adj:sg:gen:m1:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:acc:m1:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:gen:m2:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:acc:m2:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:gen:m3:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:gen:n:pos</ctag></lex> + </tok> + <tok> + <orth>zdania</orth> + <lex><base>zdanie</base><ctag>subst:sg:gen:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:nom:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:acc:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:voc:n</ctag></lex> + <lex><base>zdać</base><ctag>ger:sg:gen:n:perf:aff</ctag></lex> + </tok> + <ns/> + <tok> + <orth>.</orth> + <lex><base>.</base><ctag>interp</ctag></lex> + </tok> + </sentence> + <sentence> + <tok> + <orth>Nowy</orth> + <lex><base>nowy</base><ctag>adj:sg:nom:m1:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:nom:m2:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:nom:m3:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:acc:m3:pos</ctag></lex> + </tok> + <tok> + <orth>początek</orth> + <lex><base>początek</base><ctag>subst:sg:nom:m3</ctag></lex> + <lex><base>początek</base><ctag>subst:sg:acc:m3</ctag></lex> + </tok> + <tok> + <orth>starego</orth> + <lex><base>stary</base><ctag>adj:sg:gen:m1:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:acc:m1:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:gen:m2:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:acc:m2:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:gen:m3:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:gen:n:pos</ctag></lex> + </tok> + <tok> + <orth>zdania</orth> + <lex><base>zdanie</base><ctag>subst:sg:gen:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:nom:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:acc:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:voc:n</ctag></lex> + <lex><base>zdać</base><ctag>ger:sg:gen:n:perf:aff</ctag></lex> + </tok> + <ns/> + <tok> + <orth>.</orth> + <lex><base>.</base><ctag>interp</ctag></lex> + </tok> + </sentence> + </chunk> +</chunkList> +</cesAna> diff --git a/tests/rules-data/match/postcond3/cclmatch1.ccl b/tests/rules-data/match/postcond3/cclmatch1.ccl new file mode 100644 index 0000000000000000000000000000000000000000..e69473a7ba46067543fd0dc88e763e8850c536a6 --- /dev/null +++ b/tests/rules-data/match/postcond3/cclmatch1.ccl @@ -0,0 +1,12 @@ +apply( + match( + optional(repeat(inter(class[0], {adj}))), + repeat(inter(class[0], {subst})) + ), + cond( + not(empty(:1)) + ), + actions( + mark(M, "NP") + ) +) diff --git a/tests/rules-data/match/postcond3/cclmatch1.out.xml b/tests/rules-data/match/postcond3/cclmatch1.out.xml new file mode 100644 index 0000000000000000000000000000000000000000..13ba3c81fdf9512598894a1c43fae1e75593af1a --- /dev/null +++ b/tests/rules-data/match/postcond3/cclmatch1.out.xml @@ -0,0 +1,82 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" type="lex disamb"> +<chunkList> + <chunk> + <sentence> + <tok> + <orth>Początek</orth> + <lex><base>początek</base><ctag>subst:sg:nom:m3</ctag></lex> + <lex><base>początek</base><ctag>subst:sg:acc:m3</ctag></lex> + <ann chan="NP">0</ann> + </tok> + <tok> + <orth>nowego</orth> + <lex><base>nowy</base><ctag>adj:sg:gen:m1:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:acc:m1:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:gen:m2:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:acc:m2:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:gen:m3:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:gen:n:pos</ctag></lex> + <ann chan="NP" head="1">1</ann> + </tok> + <tok> + <orth>zdania</orth> + <lex><base>zdanie</base><ctag>subst:sg:gen:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:nom:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:acc:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:voc:n</ctag></lex> + <lex><base>zdać</base><ctag>ger:sg:gen:n:perf:aff</ctag></lex> + <ann chan="NP">1</ann> + </tok> + <ns/> + <tok> + <orth>.</orth> + <lex><base>.</base><ctag>interp</ctag></lex> + <ann chan="NP">0</ann> + </tok> + </sentence> + <sentence> + <tok> + <orth>Nowy</orth> + <lex><base>nowy</base><ctag>adj:sg:nom:m1:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:nom:m2:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:nom:m3:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:acc:m3:pos</ctag></lex> + <ann chan="NP" head="1">1</ann> + </tok> + <tok> + <orth>początek</orth> + <lex><base>początek</base><ctag>subst:sg:nom:m3</ctag></lex> + <lex><base>początek</base><ctag>subst:sg:acc:m3</ctag></lex> + <ann chan="NP">1</ann> + </tok> + <tok> + <orth>starego</orth> + <lex><base>stary</base><ctag>adj:sg:gen:m1:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:acc:m1:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:gen:m2:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:acc:m2:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:gen:m3:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:gen:n:pos</ctag></lex> + <ann chan="NP" head="1">2</ann> + </tok> + <tok> + <orth>zdania</orth> + <lex><base>zdanie</base><ctag>subst:sg:gen:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:nom:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:acc:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:voc:n</ctag></lex> + <lex><base>zdać</base><ctag>ger:sg:gen:n:perf:aff</ctag></lex> + <ann chan="NP">2</ann> + </tok> + <ns/> + <tok> + <orth>.</orth> + <lex><base>.</base><ctag>interp</ctag></lex> + <ann chan="NP">0</ann> + </tok> + </sentence> + </chunk> +</chunkList> +</cesAna> diff --git a/tests/rules-data/match/postcond3/cclmatch2.ccl b/tests/rules-data/match/postcond3/cclmatch2.ccl new file mode 100644 index 0000000000000000000000000000000000000000..d8d9560b59da2f5dbb8a374db0f47d0296423f4d --- /dev/null +++ b/tests/rules-data/match/postcond3/cclmatch2.ccl @@ -0,0 +1,12 @@ +apply( + match( + optional(repeat(inter(class[0], {adj}))), + repeat(inter(class[0], {subst})) + ), + cond( + equal(orth[last(:2)], "zdania") + ), + actions( + mark(M, "NP") + ) +) diff --git a/tests/rules-data/match/postcond3/cclmatch2.out.xml b/tests/rules-data/match/postcond3/cclmatch2.out.xml new file mode 100644 index 0000000000000000000000000000000000000000..d23be4f7ca56f90645f68f25676326a3f02eb5cf --- /dev/null +++ b/tests/rules-data/match/postcond3/cclmatch2.out.xml @@ -0,0 +1,82 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" type="lex disamb"> +<chunkList> + <chunk> + <sentence> + <tok> + <orth>Początek</orth> + <lex><base>początek</base><ctag>subst:sg:nom:m3</ctag></lex> + <lex><base>początek</base><ctag>subst:sg:acc:m3</ctag></lex> + <ann chan="NP">0</ann> + </tok> + <tok> + <orth>nowego</orth> + <lex><base>nowy</base><ctag>adj:sg:gen:m1:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:acc:m1:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:gen:m2:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:acc:m2:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:gen:m3:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:gen:n:pos</ctag></lex> + <ann chan="NP" head="1">1</ann> + </tok> + <tok> + <orth>zdania</orth> + <lex><base>zdanie</base><ctag>subst:sg:gen:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:nom:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:acc:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:voc:n</ctag></lex> + <lex><base>zdać</base><ctag>ger:sg:gen:n:perf:aff</ctag></lex> + <ann chan="NP">1</ann> + </tok> + <ns/> + <tok> + <orth>.</orth> + <lex><base>.</base><ctag>interp</ctag></lex> + <ann chan="NP">0</ann> + </tok> + </sentence> + <sentence> + <tok> + <orth>Nowy</orth> + <lex><base>nowy</base><ctag>adj:sg:nom:m1:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:nom:m2:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:nom:m3:pos</ctag></lex> + <lex><base>nowy</base><ctag>adj:sg:acc:m3:pos</ctag></lex> + <ann chan="NP">0</ann> + </tok> + <tok> + <orth>początek</orth> + <lex><base>początek</base><ctag>subst:sg:nom:m3</ctag></lex> + <lex><base>początek</base><ctag>subst:sg:acc:m3</ctag></lex> + <ann chan="NP">0</ann> + </tok> + <tok> + <orth>starego</orth> + <lex><base>stary</base><ctag>adj:sg:gen:m1:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:acc:m1:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:gen:m2:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:acc:m2:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:gen:m3:pos</ctag></lex> + <lex><base>stary</base><ctag>adj:sg:gen:n:pos</ctag></lex> + <ann chan="NP" head="1">1</ann> + </tok> + <tok> + <orth>zdania</orth> + <lex><base>zdanie</base><ctag>subst:sg:gen:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:nom:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:acc:n</ctag></lex> + <lex><base>zdanie</base><ctag>subst:pl:voc:n</ctag></lex> + <lex><base>zdać</base><ctag>ger:sg:gen:n:perf:aff</ctag></lex> + <ann chan="NP">1</ann> + </tok> + <ns/> + <tok> + <orth>.</orth> + <lex><base>.</base><ctag>interp</ctag></lex> + <ann chan="NP">0</ann> + </tok> + </sentence> + </chunk> +</chunkList> +</cesAna>