diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 16f2979bedd2b9c0e204bdcb5df4881b0bf1c502..5627a1d717e78385d9270eb481bfedfd0c514e42 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,7 +1,17 @@ stages: + - test - check_style - build +test_python: + stage: test + image: docker:18.09.7 + services: + - docker:18.09.7-dind + script: + - docker build . -t wccl_test -f src/tests/docker/Dockerfile + - docker run --rm -v /tmp/test-out:/test/testdata/out wccl_test + build_image: stage: build image: docker:18.09.7 diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000000000000000000000000000000000000..41ae417eb5a4236320cfb1942e821669479b2537 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,6 @@ +corpus-ccl +cclutils +pudb +pytest +pytest-pudb +wccl-annotator \ No newline at end of file diff --git a/src/libmwereader/mwereader.cpp b/src/libmwereader/mwereader.cpp index a0acbcb66a930d3bfe7393b7fc6331126538123d..013841d1e78ca8dd9f70702f65a30c9dc3df5fd4 100644 --- a/src/libmwereader/mwereader.cpp +++ b/src/libmwereader/mwereader.cpp @@ -155,7 +155,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( // move context position to next token after MWE elements int curr_position = sentence_ctx->get_position(); if (curr_position + all.size() < sentence->size()) { - sentence_ctx->set_position(sentence_ctx->get_position() + all.size()); + sentence_ctx->set_position(sentence_ctx->get_position() + all.size() - 1); } } @@ -171,6 +171,8 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( // TODO: pass annotated sentence to methods // AnnotatedSentencePtr ann_sentence = AnnotatedSentence::wrap_sentence(sentence); int annotation_number = 0; + // keeps position of last annotated token + int last_ann_pos = -1; SentenceContextPtr sc = boost::make_shared<Wccl::SentenceContext>(sentence); for (sc->goto_start(); sc->is_current_inside(); sc->advance()) @@ -184,6 +186,10 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( if(lex.is_disamb()){ std::string base = lex.lemma_utf8(); const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base); + // variables holding values for selected lu + LexicalUnit::Ptr sel_pLU; + std::set<int> sel_positions; + int sel_head; BOOST_FOREACH (LexicalUnit::Ptr pLU, potential) { bool ok = true; @@ -202,20 +208,45 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( int head; bool is_here = pLU->IsHere(sc, positions, head); if(is_here) { - if (annotate) { - add_mwe_channel( - sc, head, positions, - ++annotation_number, - pLU->get_base()); - } - else { - sc = clone_sentence_add_mwe( - sc, head, positions, - pLU->get_base()); + if (positions.size() > sel_positions.size()) { + sel_pLU = pLU; + sel_positions = positions; + sel_head = head; } } } } + if (sel_pLU) { + int first_curr_pos; + int last_curr_pos; + if(!sel_positions.empty()) + first_curr_pos = *sel_positions.begin(); + // annotate only if tokens have not been already + // anotated; Below check relies on first token and + // prevents from re-anotating the last token in the + // sentence (grom previous annotating) + if (first_curr_pos > last_ann_pos) { + if (annotate) { + add_mwe_channel( + sc, + sel_head, + sel_positions, + ++annotation_number, + sel_pLU->get_base() + ); + } + else { + sc = clone_sentence_add_mwe( + sc, + sel_head, + sel_positions, + sel_pLU->get_base() + ); + } + last_curr_pos = *sel_positions.rbegin(); + last_ann_pos = last_curr_pos; + } + } } } } diff --git a/src/tests/docker/Dockerfile b/src/tests/docker/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..efb5c582ccc096194f4ff155132d16880948af45 --- /dev/null +++ b/src/tests/docker/Dockerfile @@ -0,0 +1,50 @@ +FROM clarinpl/python:3.6 + +RUN apt-get update && apt-get install -y \ + libxml++2.6-dev \ + libloki-dev \ + libboost-all-dev \ + libicu-dev \ + libffi-dev \ + libssl-dev \ + libxml2-utils \ + cmake \ + swig \ + pwrutils \ + gdebi-core \ + antlr \ + libantlr-dev \ + default-jdk \ + git + +RUN mkdir -p /home/install +WORKDIR /home/install + +RUN bash -c "wget -q -O - http://apt.clarin-pl.eu/KEY.gpg | apt-key add -" +RUN bash -c "echo 'deb https://apt.clarin-pl.eu/ /' > /etc/apt/sources.list.d/clarin.list" + +# Install corpus2 and corpus2mwe +RUN apt-get update && apt-get install -y \ + corpus2-python3.6 \ + corpus2mwe-python3.6 + +# copy src +COPY ./ ./wccl + +# Install pip test dependencies +RUN pip install -r wccl/requirements-test.txt + +# copy tests +COPY ./src/tests/python-tests /test + +# Install wccl from this local repo +WORKDIR /home/install +RUN mkdir wccl/src/build && \ + cd wccl/src/build && \ + cmake .. && \ + make -j 8 && \ + make install && \ + ldconfig + +WORKDIR /test +CMD ["python", "-m", "pytest"] diff --git a/src/tests/python-tests/__init__.py b/src/tests/python-tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/tests/python-tests/ccl_ann_test.py b/src/tests/python-tests/ccl_ann_test.py new file mode 100644 index 0000000000000000000000000000000000000000..60fb862702e92d4b70b71fe49841f8727eae668f --- /dev/null +++ b/src/tests/python-tests/ccl_ann_test.py @@ -0,0 +1,177 @@ +from typing import Any, List, Tuple + +from corpus_ccl import token_utils as tokutils + + +class CclIndex(object): + """ + Class builds indices useful for convenient referencing tokens from CCL + document. + """ + + def __init__(self, ccl_doc: "corpus2.DocumentPtr"): + self._doc = ccl_doc + self._tok_idx, self._sent_idx = self._make_indices() + + def get_token(self, par_idx: int, sent_idx: int, tok_idx: int) -> "corpus2.Token": + """ + Returns specified token. Indices are counts of document entities. + Counting starts from 0 (e.g. get_token(0, 1, 2) will return third token from + second sentence, located in first paragraph). + """ + return self._tok_idx[par_idx][sent_idx][tok_idx] + + def get_sent_by_token_idx( + self, token_idx: Tuple[int, int, int] + ) -> "corpus2.Sentence": + return self._sent_idx[token_idx] + + def _make_indices(self) -> Tuple[dict, dict]: + """ + Generates nested dict of tokens from CCL document, with structure + corresponding to such document. Tokens can be retrieved from generated + dict using indices of paragraph, sentence and token. + Note: indexing starts from 0 (e.g. d[0][1][2] refers to third token from + second sentence, located in first paragraph). + + Args: + doc: CCL document + + Returns: + (dict, dict), where first is a nested dict containing tokens + and the second is an index of sentences referred by the tokens. + """ + d = {} + sents = {} + for pidx, p in enumerate(self._doc.paragraphs()): + pd = {} + for sidx, s in enumerate(p.sentences()): + sd = {} + for tidx, t in enumerate(s.tokens()): + sd[tidx] = t + sents[(pidx, sidx, tidx)] = s + pd[sidx] = sd + d[pidx] = pd + return d, sents + + +def assert_has_ann( + tokens_idx_list: List[Tuple[int, int, int]], + doc_idx: CclIndex, + ann_names: Tuple[str, str], + base_ann_value, +) -> bool: + """ + Main assertion method to check correctness of annotation to one or more + adjecent tokens. + + Checks whether one or more tokens have been annotated with certain + annotation. It verifies: + 1) whether base annotation value is specified for exactly one token (head) + and there is no other bases for given annotation channel, + 2) in case of more than one tokens: if they belongs to the same annotation + (if have same channel value, index) + """ + ann_chan_name, ann_chan_base_name = ann_names + has_base = False + found_bases = [] + for token_idx in tokens_idx_list: + found_base = get_prop_val( + doc_idx.get_token(*token_idx), + ann_chan_base_name, + ) + if found_base is not None: + found_bases.append(found_base) + if not found_bases: + toks_str = [doc_idx.get_token(*ti).orth_utf8() for ti in tokens_idx_list] + raise AssertionError( + f"None of specified tokens with ids={tokens_idx_list} ({toks_str}) " + f"contains requested base name '{base_ann_value}' " + f"for annotation channel '{ann_chan_base_name}'" + ) + if len(found_bases) > 1: + toks_str = [doc_idx.get_token(*ti).orth_utf8() for ti in tokens_idx_list] + raise AssertionError( + f"Found more than one base value in specified tokens with ids=" + f"{tokens_idx_list} ({toks_str}) for annotation channel '{ann_chan_base_name}':" + f"{found_bases}" + ) + if found_bases[0] != base_ann_value: + toks_str = [doc_idx.get_token(*ti).orth_utf8() for ti in tokens_idx_list] + raise AssertionError( + f"Found incorrect base value in specified tokens with ids=" + f"{tokens_idx_list} ({toks_str}) for annotation channel '{ann_chan_base_name}':" + f"'{found_bases[0]}' (expected: {base_ann_value})" + ) + + ann_chan_idx = 0 + for token_idx in tokens_idx_list: + tok = doc_idx.get_token(*token_idx) + curr_ann_chan_idx = get_chan_ann_id( + tok, + doc_idx.get_sent_by_token_idx(token_idx), + ann_chan_name, + ) + + if not curr_ann_chan_idx: + raise AssertionError( + f"token with id={token_idx} ('{tok.orth_utf8()}') does not have " + f"set annotation for annotation channel '{ann_chan_name}'" + ) + if not ann_chan_idx: + ann_chan_idx = curr_ann_chan_idx + else: + if ann_chan_idx != curr_ann_chan_idx: + raise AssertionError( + "Given tokens belongs to different annotation channels for " + f"annotation '{ann_chan_name}' (found channels: " + f"{ann_chan_idx} , {curr_ann_chan_idx})" + ) + + +def get_prop_val( + tok: "corpus2.Token", + key_name: str, +) -> Any: + """ + Returns value of property from given token. + """ + if tok.has_metadata(): + md = tok.get_metadata() + else: + return None + keys = md.attributes().keys() + val = md.get_attribute(key_name) + return val + + +def get_chan_ann_id(token: "corpus2.Token", sent: "corpus2.Sentence", ann) -> int: + """ + Returns value of channel annotation in sentence for given token. + If 0 is returned, then it mean that there is no annotation for given token. + """ + return tokutils.get_annotation(sent, token, ann, 0) + + +def expand_tok_idx(tokens_idx_def): + """ + Small function to generate list of identifiers based on shortened + description of tokens range. E.g. converts (0, 0, (1, 3)) into + [ + (0, 0, 1), + (0, 0, 2), + (0, 0, 3) + ]. + Notes: + - it includes both ends of a range! + - if third element represents a range of tokens, it must be a tuple. + """ + if isinstance(tokens_idx_def, tuple) and isinstance(tokens_idx_def[2], tuple): + par_idx, sent_idx, tok_def = tokens_idx_def + exp = [] + tok_first, tok_last = tok_def + for i in range(tok_first, tok_last + 1): + exp.append((par_idx, sent_idx, i)) + return exp + else: + return tokens_idx_def diff --git a/src/tests/python-tests/test_wccl.py b/src/tests/python-tests/test_wccl.py new file mode 100644 index 0000000000000000000000000000000000000000..4e88cfd8cfe91ceee027124177faa60014fe29b0 --- /dev/null +++ b/src/tests/python-tests/test_wccl.py @@ -0,0 +1,197 @@ +from collections import namedtuple +import sys +import os +from typing import List + +import pytest + +import cclutils as ccl +from wccl_annotator.wccl_annotator import WcclAnnotator + +import ccl_ann_test + + +TEST_ROOT_DIR = "/test/testdata" +TEST_OUT_DIR = "/test/testdata/out" +TEST_ANN_CHAN = "test_ann" +TEST_ANN_CHAN_BASE = "test_ann_base" +TEST_ANNS = (TEST_ANN_CHAN, TEST_ANN_CHAN_BASE) + +if not os.path.isdir(TEST_OUT_DIR): + os.mkdir(TEST_OUT_DIR) + +# description of annotation to detect; contains information about location +# in document, channel and expected base value +AnnCfg = namedtuple("AnnCfg", "tokens ann_names base_ann_value") + + +@pytest.mark.parametrize( + "ccl_file, wccl_dict, out_file, tagset, expected_anns", + [ + pytest.param( + f"{TEST_ROOT_DIR}/doc_01.xml", + f"{TEST_ROOT_DIR}/wccl_dict_01.xml", + f"{TEST_OUT_DIR}/doc_01_annotated.xml", + "nkjp", + [ + AnnCfg((0, 0, (0, 1)), TEST_ANNS, "plac zabaw"), + AnnCfg([(0, 0, 3)], TEST_ANNS, "tenis"), + AnnCfg((0, 0, (6, 7)), TEST_ANNS, "all inclusive"), + ], + id="basic-test-word-and-phrases", + ), + pytest.param( + f"{TEST_ROOT_DIR}/doc_02.xml", + f"{TEST_ROOT_DIR}/wccl_dict_02.xml", + f"{TEST_OUT_DIR}/doc_02_annotated.xml", + "nkjp", + [ + AnnCfg([(0, 0, 0)], TEST_ANNS, "ZO"), + AnnCfg((0, 0, (1, 2)), TEST_ANNS, "all inclusive"), + AnnCfg((0, 0, (4, 5)), TEST_ANNS, "plac zabaw"), + ], + id="adjacent-anns", + ), + pytest.param( + f"{TEST_ROOT_DIR}/doc_03.xml", + f"{TEST_ROOT_DIR}/wccl_dict_03a.xml", + f"{TEST_OUT_DIR}/doc_03a_annotated.xml", + "nkjp", + [ + AnnCfg((0, 0, (1, 2)), TEST_ANNS, "plac zabaw"), + ], + id="dict-terms-inclusion-longer-first", + ), + pytest.param( + f"{TEST_ROOT_DIR}/doc_03.xml", + f"{TEST_ROOT_DIR}/wccl_dict_03b.xml", + f"{TEST_OUT_DIR}/doc_03b_annotated.xml", + "nkjp", + [ + AnnCfg((0, 0, (1, 2)), TEST_ANNS, "plac zabaw"), + ], + id="dict-terms-inclusion-shorter-first", + ), + pytest.param( + f"{TEST_ROOT_DIR}/doc_03b.xml", + f"{TEST_ROOT_DIR}/wccl_dict_03a.xml", + f"{TEST_OUT_DIR}/doc_03a_annotated_wb.xml", + "nkjp", + [ + AnnCfg((0, 0, (1, 2)), TEST_ANNS, "plac zabaw"), + ], + id="dict-terms-inclusion-longer-first-and-adjecent-word-before", + ), + pytest.param( + f"{TEST_ROOT_DIR}/doc_03b.xml", + f"{TEST_ROOT_DIR}/wccl_dict_03b.xml", + f"{TEST_OUT_DIR}/doc_03b_annotated_wb.xml", + "nkjp", + [ + AnnCfg([(0, 0, 0)], TEST_ANNS, "plac"), + AnnCfg((0, 0, (1, 2)), TEST_ANNS, "plac zabaw"), + ], + id="dict-terms-inclusion-shorter-first-and-adjecent-word-before", + ), + pytest.param( + f"{TEST_ROOT_DIR}/doc_03c.xml", + f"{TEST_ROOT_DIR}/wccl_dict_03a.xml", + f"{TEST_OUT_DIR}/doc_03c_annotated_wa.xml", + "nkjp", + [ + AnnCfg((0, 0, (1, 2)), TEST_ANNS, "plac zabaw"), + ], + id="dict-terms-inclusion-longer-first-and-adjecent-word-after", + ), + pytest.param( + f"{TEST_ROOT_DIR}/doc_03c.xml", + f"{TEST_ROOT_DIR}/wccl_dict_03b.xml", + f"{TEST_OUT_DIR}/doc_03c_annotated_wa.xml", + "nkjp", + [ + AnnCfg((0, 0, (1, 2)), TEST_ANNS, "plac zabaw"), + ], + id="dict-terms-inclusion-shorter-first-and-adjecent-word-after", + ), + pytest.param( + f"{TEST_ROOT_DIR}/doc_03b.xml", + f"{TEST_ROOT_DIR}/wccl_dict_04a.xml", + f"{TEST_OUT_DIR}/doc_03b-04a_annotated.xml", + "nkjp", + [ + AnnCfg([(0, 0, 0)], TEST_ANNS, "plac"), + AnnCfg((0, 0, (1, 2)), TEST_ANNS, "plac zabaw"), + ], + id="dict-terms-double-inclusion-longer-first", + ), + pytest.param( + f"{TEST_ROOT_DIR}/doc_03b.xml", + f"{TEST_ROOT_DIR}/wccl_dict_04b.xml", + f"{TEST_OUT_DIR}/doc_03b-04b_annotated.xml", + "nkjp", + [ + AnnCfg([(0, 0, 0)], TEST_ANNS, "plac"), + AnnCfg((0, 0, (1, 2)), TEST_ANNS, "plac zabaw"), + ], + id="dict-terms-double-inclusion-shorter-first", + ), + pytest.param( + f"{TEST_ROOT_DIR}/doc_03e.xml", + f"{TEST_ROOT_DIR}/wccl_dict_04b.xml", + f"{TEST_OUT_DIR}/doc_03e-04b_annotated.xml", + "nkjp", + [ + AnnCfg([(0, 0, 0)], TEST_ANNS, "plac"), + AnnCfg((0, 0, (1, 2)), TEST_ANNS, "plac zabaw"), + ], + id="dict-terms-double-inclusion-shorter-first-both-words", + ), + pytest.param( + f"{TEST_ROOT_DIR}/doc_03d.xml", + f"{TEST_ROOT_DIR}/wccl_dict_04b.xml", + f"{TEST_OUT_DIR}/doc_03b-04b_annotated-alt.xml", + "nkjp", + [ + AnnCfg([(0, 0, 0)], TEST_ANNS, "plac"), + AnnCfg((0, 0, (1, 2)), TEST_ANNS, "plac zabaw"), + ], + id="dict-terms-double-inclusion-shorter-first-additional-last-token", + ), + pytest.param( + f"{TEST_ROOT_DIR}/doc_03b.xml", + f"{TEST_ROOT_DIR}/wccl_dict_04c.xml", + f"{TEST_OUT_DIR}/doc_03b-04c_annotated.xml", + "nkjp", + [ + AnnCfg([(0, 0, 0)], TEST_ANNS, "plac"), + AnnCfg((0, 0, (1, 2)), TEST_ANNS, "plac zabaw"), + ], + id="dict-terms-double-inclusion-shifted-head", + ), + ], +) +def test_wccl_annotations( + ccl_file: str, + wccl_dict: str, + out_file: str, + tagset: str, + expected_anns: List[AnnCfg], +): + WcclAnnotator().process( + ccl_file, + out_file, + annotation=TEST_ANN_CHAN, + wccl_dict=wccl_dict, + tagset=tagset, + ) + # WcclAnnotator stores annotated document, so we read it from file + doc = ccl.read(out_file) + doc_idx = ccl_ann_test.CclIndex(doc) + for exp in expected_anns: + tokens_indices = ccl_ann_test.expand_tok_idx(exp.tokens) + ccl_ann_test.assert_has_ann( + tokens_indices, + doc_idx, + exp.ann_names, + exp.base_ann_value, + ) diff --git a/src/tests/python-tests/testdata/doc_01.xml b/src/tests/python-tests/testdata/doc_01.xml new file mode 100644 index 0000000000000000000000000000000000000000..26917ba00a2d626434dde82bbce5f1e30add5252 --- /dev/null +++ b/src/tests/python-tests/testdata/doc_01.xml @@ -0,0 +1,40 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE chunkList SYSTEM "ccl.dtd"> +<chunkList> +<chunk id="1" type="p"> +<sentence id="1"> +<tok> +<orth>plac</orth> +<lex disamb="1"><base>plac</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>zabaw</orth> +<lex disamb="1"><base>zabawa</base><ctag>subst:pl:gen:f</ctag></lex> +</tok> +<tok> +<orth>i</orth> +<lex disamb="1"><base>i</base><ctag>conj</ctag></lex> +</tok> +<tok> +<orth>tenis</orth> +<lex disamb="1"><base>tenis</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>stołowy</orth> +<lex disamb="1"><base>stołowy</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<tok> +<orth>z</orth> +<lex disamb="1"><base>z</base><ctag>prep:gen:nwok</ctag></lex> +</tok> +<tok> +<orth>all</orth> +<lex disamb="1"><base>all</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>inclusive</orth> +<lex disamb="1"><base>inclusive</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +</sentence> +</chunk> +</chunkList> diff --git a/src/tests/python-tests/testdata/doc_01_annotated.xml b/src/tests/python-tests/testdata/doc_01_annotated.xml new file mode 100644 index 0000000000000000000000000000000000000000..35827bb6a4c027f03da5e80406528460bae7b7c8 --- /dev/null +++ b/src/tests/python-tests/testdata/doc_01_annotated.xml @@ -0,0 +1,51 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE chunkList SYSTEM "ccl.dtd"> +<chunkList> + <chunk id="1"> + <sentence id="1"> + <tok> + <orth>plac</orth> + <lex disamb="1"><base>plac</base><ctag>subst:sg:nom:m3</ctag></lex> + <ann chan="test_ann">1</ann> + <prop key="test_ann_base">plac zbaw</prop> + </tok> + <tok> + <orth>zabaw</orth> + <lex disamb="1"><base>zabawa</base><ctag>subst:pl:gen:f</ctag></lex> + <ann chan="test_ann">1</ann> + </tok> + <tok> + <orth>i</orth> + <lex disamb="1"><base>i</base><ctag>conj</ctag></lex> + <ann chan="test_ann">0</ann> + </tok> + <tok> + <orth>tenis</orth> + <lex disamb="1"><base>tenis</base><ctag>subst:sg:nom:m3</ctag></lex> + <ann chan="test_ann">2</ann> + <prop key="test_ann_base">tenis stołowy</prop> + </tok> + <tok> + <orth>stołowy</orth> + <lex disamb="1"><base>stołowy</base><ctag>adj:sg:nom:m3:pos</ctag></lex> + <ann chan="test_ann">2</ann> + </tok> + <tok> + <orth>z</orth> + <lex disamb="1"><base>z</base><ctag>prep:gen:nwok</ctag></lex> + <ann chan="test_ann">0</ann> + </tok> + <tok> + <orth>all</orth> + <lex disamb="1"><base>all</base><ctag>subst:sg:nom:m3</ctag></lex> + <ann chan="test_ann">3</ann> + <prop key="test_ann_base">all inclusive</prop> + </tok> + <tok> + <orth>inclusive</orth> + <lex disamb="1"><base>inclusive</base><ctag>adj:sg:nom:m3:pos</ctag></lex> + <ann chan="test_ann">3</ann> + </tok> + </sentence> + </chunk> +</chunkList> diff --git a/src/tests/python-tests/testdata/doc_02.xml b/src/tests/python-tests/testdata/doc_02.xml new file mode 100644 index 0000000000000000000000000000000000000000..ee01d6675a6763aa7de4d6b9df72d25ecf1a0158 --- /dev/null +++ b/src/tests/python-tests/testdata/doc_02.xml @@ -0,0 +1,32 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE chunkList SYSTEM "ccl.dtd"> +<chunkList> +<chunk id="1" type="p"> +<sentence id="1"> +<tok> +<orth>z</orth> +<lex disamb="1"><base>z</base><ctag>prep:gen:nwok</ctag></lex> +</tok> +<tok> +<orth>all</orth> +<lex disamb="1"><base>all</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>inclusive</orth> +<lex disamb="1"><base>inclusive</base><ctag>adj:sg:nom:m3:pos</ctag></lex> +</tok> +<tok> +<orth>i</orth> +<lex disamb="1"><base>i</base><ctag>conj</ctag></lex> +</tok> +<tok> +<orth>plac</orth> +<lex disamb="1"><base>plac</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>zabaw</orth> +<lex disamb="1"><base>zabawa</base><ctag>subst:pl:gen:f</ctag></lex> +</tok> +</sentence> +</chunk> +</chunkList> diff --git a/src/tests/python-tests/testdata/doc_03.xml b/src/tests/python-tests/testdata/doc_03.xml new file mode 100644 index 0000000000000000000000000000000000000000..230b0f8b7cbcff0f6e060f413200a4a0c07346e5 --- /dev/null +++ b/src/tests/python-tests/testdata/doc_03.xml @@ -0,0 +1,20 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE chunkList SYSTEM "ccl.dtd"> +<chunkList> +<chunk id="1" type="p"> +<sentence id="1"> +<tok> +<orth>z</orth> +<lex disamb="1"><base>z</base><ctag>prep:gen:nwok</ctag></lex> +</tok> +<tok> +<orth>placem</orth> +<lex disamb="1"><base>plac</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>zabaw</orth> +<lex disamb="1"><base>zabawa</base><ctag>subst:pl:gen:f</ctag></lex> +</tok> +</sentence> +</chunk> +</chunkList> diff --git a/src/tests/python-tests/testdata/doc_03b.xml b/src/tests/python-tests/testdata/doc_03b.xml new file mode 100644 index 0000000000000000000000000000000000000000..0cc5fdab2c8c15ead2b484c4b6295e679067d7d6 --- /dev/null +++ b/src/tests/python-tests/testdata/doc_03b.xml @@ -0,0 +1,20 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE chunkList SYSTEM "ccl.dtd"> +<chunkList> +<chunk id="1" type="p"> +<sentence id="1"> +<tok> +<orth>plac</orth> +<lex disamb="1"><base>plac</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>placem</orth> +<lex disamb="1"><base>plac</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>zabaw</orth> +<lex disamb="1"><base>zabawa</base><ctag>subst:pl:gen:f</ctag></lex> +</tok> +</sentence> +</chunk> +</chunkList> diff --git a/src/tests/python-tests/testdata/doc_03c.xml b/src/tests/python-tests/testdata/doc_03c.xml new file mode 100644 index 0000000000000000000000000000000000000000..54e7e3c084708c809de9855a64e30771349f3c18 --- /dev/null +++ b/src/tests/python-tests/testdata/doc_03c.xml @@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE chunkList SYSTEM "ccl.dtd"> +<chunkList> +<chunk id="1" type="p"> +<sentence id="1"> +<tok> +<orth>z</orth> +<lex disamb="1"><base>z</base><ctag>prep:gen:nwok</ctag></lex> +</tok> +<tok> +<orth>placem</orth> +<lex disamb="1"><base>plac</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>zabaw</orth> +<lex disamb="1"><base>zabawa</base><ctag>subst:pl:gen:f</ctag></lex> +</tok> +<tok> +<orth>plac</orth> +<lex disamb="1"><base>plac</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +</sentence> +</chunk> +</chunkList> diff --git a/src/tests/python-tests/testdata/doc_03d.xml b/src/tests/python-tests/testdata/doc_03d.xml new file mode 100644 index 0000000000000000000000000000000000000000..ac667e69428875cc3d2532700433d22455ddc6a4 --- /dev/null +++ b/src/tests/python-tests/testdata/doc_03d.xml @@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE chunkList SYSTEM "ccl.dtd"> +<chunkList> +<chunk id="1" type="p"> +<sentence id="1"> +<tok> +<orth>plac</orth> +<lex disamb="1"><base>plac</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>placem</orth> +<lex disamb="1"><base>plac</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>zabaw</orth> +<lex disamb="1"><base>zabawa</base><ctag>subst:pl:gen:f</ctag></lex> +</tok> +<tok> +<orth>z</orth> +<lex disamb="1"><base>z</base><ctag>prep:gen:nwok</ctag></lex> +</tok> +</sentence> +</chunk> +</chunkList> diff --git a/src/tests/python-tests/testdata/doc_03e.xml b/src/tests/python-tests/testdata/doc_03e.xml new file mode 100644 index 0000000000000000000000000000000000000000..02d3953b8211a6c29fa1c9fb18b111e5397f9c88 --- /dev/null +++ b/src/tests/python-tests/testdata/doc_03e.xml @@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE chunkList SYSTEM "ccl.dtd"> +<chunkList> +<chunk id="1" type="p"> +<sentence id="1"> +<tok> +<orth>plac</orth> +<lex disamb="1"><base>plac</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>placem</orth> +<lex disamb="1"><base>plac</base><ctag>subst:sg:nom:m3</ctag></lex> +</tok> +<tok> +<orth>zabaw</orth> +<lex disamb="1"><base>zabawa</base><ctag>subst:pl:gen:f</ctag></lex> +</tok> +<tok> +<orth>zabawa</orth> +<lex disamb="1"><base>zabawa</base><ctag>subst:pl:gen:f</ctag></lex> +</tok> +</sentence> +</chunk> +</chunkList> diff --git a/src/tests/python-tests/testdata/wccl_dict_01.xml b/src/tests/python-tests/testdata/wccl_dict_01.xml new file mode 100644 index 0000000000000000000000000000000000000000..da59cb7bd8e5956fe36fa19153cb8ddc9e5fda95 --- /dev/null +++ b/src/tests/python-tests/testdata/wccl_dict_01.xml @@ -0,0 +1,103 @@ +<units_description tagset="nkjp"> + + <mwegroup class="neo" id="SubstSubst" name="SubstSubst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{subst}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="plac zabaw"> + <var name="A">plac</var> +<var name="B">zabawa</var> + </MWE> + + </instances> + + </mwegroup> + + <mwegroup class="neo" id="Subst" name="Subst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + inter(base[0],$s:A), inter(class[0],{subst}), + setvar($Pos0, 0), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="tenis"> + <var name="A">tenis</var> + </MWE> + + </instances> + + </mwegroup> + + <mwegroup class="neo" id="SubstIgn" name="SubstIgn" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{adj}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Head, 0) + ) + </condition> + + + <instances> + <MWE base="all inclusive"> + <var name="A">all</var> +<var name="B">inclusive</var> + </MWE> + + </instances> + + </mwegroup> + + <mwegroup class="neo" id="SubstInterpIgn" name="SubstInterpIgn" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + equal(base[2],$s:C), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{interp}), + inter(base[2],$s:C), inter(class[2],{ign}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Pos2, 2), + setvar($Head, 0) + ) + </condition> + + + <instances> + <MWE base="all - inclusive"> + <var name="A">all</var> +<var name="B">-</var> +<var name="C">inclusive</var> + </MWE> + </instances> +</mwegroup> + + </units_description> diff --git a/src/tests/python-tests/testdata/wccl_dict_02.xml b/src/tests/python-tests/testdata/wccl_dict_02.xml new file mode 100644 index 0000000000000000000000000000000000000000..e1623d7cc263fb605a991d2c325e077a1704aa13 --- /dev/null +++ b/src/tests/python-tests/testdata/wccl_dict_02.xml @@ -0,0 +1,127 @@ +<units_description tagset="nkjp"> + + <mwegroup class="neo" id="SubstAdj" name="SubstAdj" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{adj}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="tenis stołowy"> + <var name="A">tenis</var> +<var name="B">stołowy</var> + </MWE> + + </instances> + + </mwegroup> + + <mwegroup class="neo" id="SubstSubst" name="SubstSubst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{subst}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="plac zabaw"> + <var name="A">plac</var> +<var name="B">zabawa</var> + </MWE> + + </instances> + + </mwegroup> + + <mwegroup class="neo" id="SubstIgn" name="SubstIgn" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{adj}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Head, 0) + ) + </condition> + + + <instances> + <MWE base="all inclusive"> + <var name="A">all</var> +<var name="B">inclusive</var> + </MWE> + + </instances> + + </mwegroup> + + <mwegroup class="neo" id="SubstInterpIgn" name="SubstInterpIgn" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + equal(base[2],$s:C), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{interp}), + inter(base[2],$s:C), inter(class[2],{ign}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Pos2, 2), + setvar($Head, 0) + ) + </condition> + + + <instances> + <MWE base="all - inclusive"> + <var name="A">all</var> +<var name="B">-</var> +<var name="C">inclusive</var> + </MWE> + </instances> +</mwegroup> + <mwegroup class="neo" id="Prep" name="Prep" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + inter(base[0],$s:A), inter(class[0],{prep}), + setvar($Pos0, 0), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="ZO"> + <var name="A">z</var> + </MWE> + + </instances> + + </mwegroup> + </units_description> diff --git a/src/tests/python-tests/testdata/wccl_dict_03.xml b/src/tests/python-tests/testdata/wccl_dict_03.xml new file mode 100644 index 0000000000000000000000000000000000000000..32ca85416a393998822300b3987993c238fa1737 --- /dev/null +++ b/src/tests/python-tests/testdata/wccl_dict_03.xml @@ -0,0 +1,217 @@ +<units_description tagset="nkjp"> + + <mwegroup class="neo" id="SubstAdj" name="SubstAdj" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{adj}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="tenis stołowy"> + <var name="A">tenis</var> +<var name="B">stołowy</var> + </MWE> + + </instances> + + </mwegroup> + + <mwegroup class="neo" id="SubstSubst" name="SubstSubst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{subst}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="plac zabaw"> + <var name="A">plac</var> +<var name="B">zabawa</var> + </MWE> + + </instances> + + </mwegroup> + + <mwegroup class="neo" id="Subst" name="Subst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + inter(base[0],$s:A), inter(class[0],{subst}), + setvar($Pos0, 0), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="tenis"> + <var name="A">tenis</var> + </MWE> + + + <MWE base="plac"> + <var name="A">plac</var> + </MWE> + + </instances> + + </mwegroup> + + <mwegroup class="neo" id="SubstIgn" name="SubstIgn" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{adj}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Head, 0) + ) + </condition> + + + <instances> + <MWE base="all inclusive"> + <var name="A">all</var> +<var name="B">inclusive</var> + </MWE> + + </instances> + + </mwegroup> + + + <mwegroup class="neo" id="Subst" name="Subst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + inter(base[0],$s:A), inter(class[0],{subst}), + setvar($Pos0, 0), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="tenis"> + <var name="A">tenis</var> + </MWE> + + + <MWE base="plac"> + <var name="A">plac</var> + </MWE> + + </instances> + + </mwegroup> + + + <mwegroup class="neo" id="QubSubstInterpIgn" name="QubSubstInterpIgn" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + equal(base[2],$s:C), + equal(base[3],$s:D), + inter(base[0],$s:A), inter(class[0],{qub}), + inter(base[1],$s:B), inter(class[1],{subst}), + inter(base[2],$s:C), inter(class[2],{interp}), + inter(base[3],$s:D), inter(class[3],{ign}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Pos2, 2), + setvar($Pos3, 3), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="ultra all - inclusive"> + <var name="A">ultra</var> +<var name="B">all</var> +<var name="C">-</var> +<var name="D">inclusive</var> + </MWE> + + </instances> + + </mwegroup> + + <mwegroup class="neo" id="SubstInterpIgn" name="SubstInterpIgn" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + equal(base[2],$s:C), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{interp}), + inter(base[2],$s:C), inter(class[2],{ign}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Pos2, 2), + setvar($Head, 0) + ) + </condition> + + + <instances> + <MWE base="all - inclusive"> + <var name="A">all</var> +<var name="B">-</var> +<var name="C">inclusive</var> + </MWE> + </instances> +</mwegroup> + <mwegroup class="neo" id="Prep" name="Prep" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + inter(base[0],$s:A), inter(class[0],{prep}), + setvar($Pos0, 0), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="ZO"> + <var name="A">z</var> + </MWE> + + </instances> + + </mwegroup> + </units_description> diff --git a/src/tests/python-tests/testdata/wccl_dict_03a.xml b/src/tests/python-tests/testdata/wccl_dict_03a.xml new file mode 100644 index 0000000000000000000000000000000000000000..9cecfb3aacdd44db5d350d7637cb17170165fd4d --- /dev/null +++ b/src/tests/python-tests/testdata/wccl_dict_03a.xml @@ -0,0 +1,56 @@ +<units_description tagset="nkjp"> + + <mwegroup class="neo" id="SubstSubst" name="SubstSubst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{subst}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="plac zabaw"> + <var name="A">plac</var> +<var name="B">zabawa</var> + </MWE> + + </instances> + + </mwegroup> + + <mwegroup class="neo" id="Subst" name="Subst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + inter(base[0],$s:A), inter(class[0],{subst}), + setvar($Pos0, 0), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="tenis"> + <var name="A">tenis</var> + </MWE> + + + <MWE base="plac"> + <var name="A">plac</var> + </MWE> + + </instances> + + </mwegroup> + + </units_description> diff --git a/src/tests/python-tests/testdata/wccl_dict_03b.xml b/src/tests/python-tests/testdata/wccl_dict_03b.xml new file mode 100644 index 0000000000000000000000000000000000000000..4a671bd890c736346427a248f6e56b99d8fda1ec --- /dev/null +++ b/src/tests/python-tests/testdata/wccl_dict_03b.xml @@ -0,0 +1,56 @@ +<units_description tagset="nkjp"> + + <mwegroup class="neo" id="Subst" name="Subst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + inter(base[0],$s:A), inter(class[0],{subst}), + setvar($Pos0, 0), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="tenis"> + <var name="A">tenis</var> + </MWE> + + + <MWE base="plac"> + <var name="A">plac</var> + </MWE> + + </instances> + + </mwegroup> + + <mwegroup class="neo" id="SubstSubst" name="SubstSubst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{subst}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="plac zabaw"> + <var name="A">plac</var> +<var name="B">zabawa</var> + </MWE> + + </instances> + + </mwegroup> + + </units_description> diff --git a/src/tests/python-tests/testdata/wccl_dict_04a.xml b/src/tests/python-tests/testdata/wccl_dict_04a.xml new file mode 100644 index 0000000000000000000000000000000000000000..b1685d740b9d819a2c0e195a5d36f99cab8c3d47 --- /dev/null +++ b/src/tests/python-tests/testdata/wccl_dict_04a.xml @@ -0,0 +1,54 @@ +<units_description tagset="nkjp"> + + <mwegroup class="neo" id="SubstSubst" name="SubstSubst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{subst}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="plac zabaw"> + <var name="A">plac</var> +<var name="B">zabawa</var> + </MWE> + + </instances> + + </mwegroup> + + <mwegroup class="neo" id="Subst" name="Subst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + inter(base[0],$s:A), inter(class[0],{subst}), + setvar($Pos0, 0), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="plac"> + <var name="A">plac</var> + </MWE> + + <MWE base="zabawa"> + <var name="A">zabawa</var> + </MWE> + </instances> + + </mwegroup> + + </units_description> diff --git a/src/tests/python-tests/testdata/wccl_dict_04b.xml b/src/tests/python-tests/testdata/wccl_dict_04b.xml new file mode 100644 index 0000000000000000000000000000000000000000..8015ddbde0ecd3fa0f990242fe17d059376ea752 --- /dev/null +++ b/src/tests/python-tests/testdata/wccl_dict_04b.xml @@ -0,0 +1,53 @@ +<units_description tagset="nkjp"> + <mwegroup class="neo" id="Subst" name="Subst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + inter(base[0],$s:A), inter(class[0],{subst}), + setvar($Pos0, 0), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="plac"> + <var name="A">plac</var> + </MWE> + + <MWE base="zabawa"> + <var name="A">zabawa</var> + </MWE> + </instances> + + </mwegroup> + + <mwegroup class="neo" id="SubstSubst" name="SubstSubst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{subst}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="plac zabaw"> + <var name="A">plac</var> +<var name="B">zabawa</var> + </MWE> + + </instances> + + </mwegroup> + + </units_description> diff --git a/src/tests/python-tests/testdata/wccl_dict_04c.xml b/src/tests/python-tests/testdata/wccl_dict_04c.xml new file mode 100644 index 0000000000000000000000000000000000000000..e3cd50d96375a4f4cb329815dd0e982314ad1528 --- /dev/null +++ b/src/tests/python-tests/testdata/wccl_dict_04c.xml @@ -0,0 +1,53 @@ +<units_description tagset="nkjp"> + <mwegroup class="neo" id="Subst" name="Subst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + inter(base[0],$s:A), inter(class[0],{subst}), + setvar($Pos0, 0), + setvar($Head, 0) + ) + </condition> + + + <instances> + + <MWE base="plac"> + <var name="A">plac</var> + </MWE> + + <MWE base="zabawa"> + <var name="A">zabawa</var> + </MWE> + </instances> + + </mwegroup> + + <mwegroup class="neo" id="SubstSubst" name="SubstSubst" type="fix"> + + <condition> + and( + equal(base[0],$s:A), + equal(base[1],$s:B), + inter(base[0],$s:A), inter(class[0],{subst}), + inter(base[1],$s:B), inter(class[1],{subst}), + setvar($Pos0, 0), + setvar($Pos1, 1), + setvar($Head, 1) + ) + </condition> + + + <instances> + + <MWE base="plac zabaw"> + <var name="A">plac</var> +<var name="B">zabawa</var> + </MWE> + + </instances> + + </mwegroup> + + </units_description>