From 405a05dccd5d19f1e780869bebea0056125ae73d Mon Sep 17 00:00:00 2001
From: bmatysiak <bartosz.matysiak@pwr.edu.pl>
Date: Thu, 30 Mar 2023 14:23:23 +0200
Subject: [PATCH 1/6] Add support & tests 4 de,es,pt,fr,ru

---
 .gitignore                           |   1 +
 pos_tagger.yaml                      |  50 ++++++
 tests/test.py                        | 236 ++++++++++++++++++++++++++-
 tests/testdata/input/pos_tagger.yaml |  50 ++++++
 4 files changed, 333 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 91d3885..f46ef3e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@
 *__pycache__
 htmlcov
 config-test.ini
+/tests/tmp-test.py
diff --git a/pos_tagger.yaml b/pos_tagger.yaml
index 4418310..cc7bf6c 100644
--- a/pos_tagger.yaml
+++ b/pos_tagger.yaml
@@ -9,6 +9,31 @@ taggers:
         lpmn: [{"spacy":{"lang":"en"}}]
         output: json
         tagset: ud
+    de:
+      default:
+        lpmn: [{"spacy":{"lang":"de"}}]
+        output: json
+        tagset: ud
+    es:
+      default:
+        lpmn: [{"spacy":{"lang":"es"}}]
+        output: json
+        tagset: ud
+    pt:
+      default:
+        lpmn: [{"spacy":{"lang":"pt"}}]
+        output: json
+        tagset: ud
+    fr:
+      default:
+        lpmn: [{"spacy":{"lang":"fr"}}]
+        output: json
+        tagset: ud
+    ru:
+      default:
+        lpmn: [{"spacy":{"lang":"ru"}}]
+        output: json
+        tagset: ud
 ners:
     pl:
       default:
@@ -20,3 +45,28 @@ ners:
         lpmn: [{"spacy":{"lang":"en", 'method': 'ner'}}]
         output: json
         tagset: ud
+    de:
+      default:
+        lpmn: [{"spacy":{"lang":"de", 'method': 'ner'}}]
+        output: json
+        tagset: ud
+    es:
+      default:
+        lpmn: [{"spacy":{"lang":"es", 'method': 'ner'}}]
+        output: json
+        tagset: ud
+    pt:
+      default:
+        lpmn: [{"spacy":{"lang":"pt", 'method': 'ner'}}]
+        output: json
+        tagset: ud
+    fr:
+      default:
+        lpmn: [{"spacy":{"lang":"fr", 'method': 'ner'}}]
+        output: json
+        tagset: ud
+    ru:
+      default:
+        lpmn: [{"spacy":{"lang":"ru", 'method': 'ner'}}]
+        output: json
+        tagset: ud
\ No newline at end of file
diff --git a/tests/test.py b/tests/test.py
index d79f410..174a0d0 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -22,7 +22,7 @@ def test_init():
     assert type(worker).__name__ == 'TaggerWorker'
 
 
-def test_base_process_file(mocker, worker, input_dir, input_file1,
+def test_base_process_file_en(mocker, worker, input_dir, input_file1,
                         output_dir, expected_dir):
     mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
     mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
@@ -34,14 +34,14 @@ def test_base_process_file(mocker, worker, input_dir, input_file1,
     )
     worker.process(
         os.path.join(input_dir, input_file1),
-        {}, os.path.join(output_dir, input_file1)
+        {"lang": "en"}, os.path.join(output_dir, input_file1)
     )
     assert cmp(os.path.join(output_dir, input_file1),
                os.path.join(expected_dir, input_file1))
     os.remove(os.path.join(output_dir, input_file1))
 
 
-def test_base_process_file_small_limit(mocker, worker_small, input_dir, input_file_small,
+def test_base_process_file_small_limit_en(mocker, worker_small, input_dir, input_file_small,
                         output_dir, expected_dir):
     mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
     mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
@@ -53,7 +53,235 @@ def test_base_process_file_small_limit(mocker, worker_small, input_dir, input_fi
     )
     worker_small.process(
         os.path.join(input_dir, input_file_small),
-        {}, os.path.join(output_dir, input_file_small)
+        {"lang": "en"}, os.path.join(output_dir, input_file_small)
+    )
+    assert cmp(os.path.join(output_dir, input_file_small),
+               os.path.join(expected_dir, input_file_small))
+    os.remove(os.path.join(output_dir, input_file_small))
+
+
+def test_base_process_file_pl(mocker, worker, input_dir, input_file1,
+                        output_dir, expected_dir):
+    mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
+    mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
+    mocker.patch('nlp_ws._subtask.SubTask.prepare_subtask', prepare_subtask)
+    mocker.patch('nlp_ws._worker.NLPWorker.update_progress')
+    SubTask.prepare_subtask(
+        {"q_in": ap.AioQueue(), "q_out": ap.AioQueue()},
+        os.getpid()
+    )
+    worker.process(
+        os.path.join(input_dir, input_file1),
+        {"lang": "pl"}, os.path.join(output_dir, input_file1)
+    )
+    assert cmp(os.path.join(output_dir, input_file1),
+               os.path.join(expected_dir, input_file1))
+    os.remove(os.path.join(output_dir, input_file1))
+
+
+def test_base_process_file_small_limit_pl(mocker, worker_small, input_dir, input_file_small,
+                        output_dir, expected_dir):
+    mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
+    mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
+    mocker.patch('nlp_ws._subtask.SubTask.prepare_subtask', prepare_subtask)
+    mocker.patch('nlp_ws._worker.NLPWorker.update_progress')
+    SubTask.prepare_subtask(
+        {"q_in": ap.AioQueue(), "q_out": ap.AioQueue()},
+        os.getpid()
+    )
+    worker_small.process(
+        os.path.join(input_dir, input_file_small),
+        {"lang": "pl"}, os.path.join(output_dir, input_file_small)
+    )
+    assert cmp(os.path.join(output_dir, input_file_small),
+               os.path.join(expected_dir, input_file_small))
+    os.remove(os.path.join(output_dir, input_file_small))
+
+
+def test_base_process_file_de(mocker, worker, input_dir, input_file1,
+                        output_dir, expected_dir):
+    mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
+    mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
+    mocker.patch('nlp_ws._subtask.SubTask.prepare_subtask', prepare_subtask)
+    mocker.patch('nlp_ws._worker.NLPWorker.update_progress')
+    SubTask.prepare_subtask(
+        {"q_in": ap.AioQueue(), "q_out": ap.AioQueue()},
+        os.getpid()
+    )
+    worker.process(
+        os.path.join(input_dir, input_file1),
+        {"lang": "de"}, os.path.join(output_dir, input_file1)
+    )
+    assert cmp(os.path.join(output_dir, input_file1),
+               os.path.join(expected_dir, input_file1))
+    os.remove(os.path.join(output_dir, input_file1))
+
+
+def test_base_process_file_small_limit_de(mocker, worker_small, input_dir, input_file_small,
+                        output_dir, expected_dir):
+    mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
+    mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
+    mocker.patch('nlp_ws._subtask.SubTask.prepare_subtask', prepare_subtask)
+    mocker.patch('nlp_ws._worker.NLPWorker.update_progress')
+    SubTask.prepare_subtask(
+        {"q_in": ap.AioQueue(), "q_out": ap.AioQueue()},
+        os.getpid()
+    )
+    worker_small.process(
+        os.path.join(input_dir, input_file_small),
+        {"lang": "de"}, os.path.join(output_dir, input_file_small)
+    )
+    assert cmp(os.path.join(output_dir, input_file_small),
+               os.path.join(expected_dir, input_file_small))
+    os.remove(os.path.join(output_dir, input_file_small))
+
+
+def test_base_process_file_es(mocker, worker, input_dir, input_file1,
+                        output_dir, expected_dir):
+    mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
+    mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
+    mocker.patch('nlp_ws._subtask.SubTask.prepare_subtask', prepare_subtask)
+    mocker.patch('nlp_ws._worker.NLPWorker.update_progress')
+    SubTask.prepare_subtask(
+        {"q_in": ap.AioQueue(), "q_out": ap.AioQueue()},
+        os.getpid()
+    )
+    worker.process(
+        os.path.join(input_dir, input_file1),
+        {"lang": "es"}, os.path.join(output_dir, input_file1)
+    )
+    assert cmp(os.path.join(output_dir, input_file1),
+               os.path.join(expected_dir, input_file1))
+    os.remove(os.path.join(output_dir, input_file1))
+
+
+def test_base_process_file_small_limit_es(mocker, worker_small, input_dir, input_file_small,
+                        output_dir, expected_dir):
+    mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
+    mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
+    mocker.patch('nlp_ws._subtask.SubTask.prepare_subtask', prepare_subtask)
+    mocker.patch('nlp_ws._worker.NLPWorker.update_progress')
+    SubTask.prepare_subtask(
+        {"q_in": ap.AioQueue(), "q_out": ap.AioQueue()},
+        os.getpid()
+    )
+    worker_small.process(
+        os.path.join(input_dir, input_file_small),
+        {"lang": "es"}, os.path.join(output_dir, input_file_small)
+    )
+    assert cmp(os.path.join(output_dir, input_file_small),
+               os.path.join(expected_dir, input_file_small))
+    os.remove(os.path.join(output_dir, input_file_small))
+
+
+def test_base_process_file_pt(mocker, worker, input_dir, input_file1,
+                        output_dir, expected_dir):
+    mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
+    mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
+    mocker.patch('nlp_ws._subtask.SubTask.prepare_subtask', prepare_subtask)
+    mocker.patch('nlp_ws._worker.NLPWorker.update_progress')
+    SubTask.prepare_subtask(
+        {"q_in": ap.AioQueue(), "q_out": ap.AioQueue()},
+        os.getpid()
+    )
+    worker.process(
+        os.path.join(input_dir, input_file1),
+        {"lang": "pt"}, os.path.join(output_dir, input_file1)
+    )
+    assert cmp(os.path.join(output_dir, input_file1),
+               os.path.join(expected_dir, input_file1))
+    os.remove(os.path.join(output_dir, input_file1))
+
+
+def test_base_process_file_small_limit_pt(mocker, worker_small, input_dir, input_file_small,
+                        output_dir, expected_dir):
+    mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
+    mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
+    mocker.patch('nlp_ws._subtask.SubTask.prepare_subtask', prepare_subtask)
+    mocker.patch('nlp_ws._worker.NLPWorker.update_progress')
+    SubTask.prepare_subtask(
+        {"q_in": ap.AioQueue(), "q_out": ap.AioQueue()},
+        os.getpid()
+    )
+    worker_small.process(
+        os.path.join(input_dir, input_file_small),
+        {"lang": "pt"}, os.path.join(output_dir, input_file_small)
+    )
+    assert cmp(os.path.join(output_dir, input_file_small),
+               os.path.join(expected_dir, input_file_small))
+    os.remove(os.path.join(output_dir, input_file_small))
+
+
+def test_base_process_file_fr(mocker, worker, input_dir, input_file1,
+                        output_dir, expected_dir):
+    mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
+    mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
+    mocker.patch('nlp_ws._subtask.SubTask.prepare_subtask', prepare_subtask)
+    mocker.patch('nlp_ws._worker.NLPWorker.update_progress')
+    SubTask.prepare_subtask(
+        {"q_in": ap.AioQueue(), "q_out": ap.AioQueue()},
+        os.getpid()
+    )
+    worker.process(
+        os.path.join(input_dir, input_file1),
+        {"lang": "fr"}, os.path.join(output_dir, input_file1)
+    )
+    assert cmp(os.path.join(output_dir, input_file1),
+               os.path.join(expected_dir, input_file1))
+    os.remove(os.path.join(output_dir, input_file1))
+
+
+def test_base_process_file_small_limit_fr(mocker, worker_small, input_dir, input_file_small,
+                        output_dir, expected_dir):
+    mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
+    mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
+    mocker.patch('nlp_ws._subtask.SubTask.prepare_subtask', prepare_subtask)
+    mocker.patch('nlp_ws._worker.NLPWorker.update_progress')
+    SubTask.prepare_subtask(
+        {"q_in": ap.AioQueue(), "q_out": ap.AioQueue()},
+        os.getpid()
+    )
+    worker_small.process(
+        os.path.join(input_dir, input_file_small),
+        {"lang": "fr"}, os.path.join(output_dir, input_file_small)
+    )
+    assert cmp(os.path.join(output_dir, input_file_small),
+               os.path.join(expected_dir, input_file_small))
+    os.remove(os.path.join(output_dir, input_file_small))
+
+
+def test_base_process_file_ru(mocker, worker, input_dir, input_file1,
+                        output_dir, expected_dir):
+    mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
+    mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
+    mocker.patch('nlp_ws._subtask.SubTask.prepare_subtask', prepare_subtask)
+    mocker.patch('nlp_ws._worker.NLPWorker.update_progress')
+    SubTask.prepare_subtask(
+        {"q_in": ap.AioQueue(), "q_out": ap.AioQueue()},
+        os.getpid()
+    )
+    worker.process(
+        os.path.join(input_dir, input_file1),
+        {"lang": "ru"}, os.path.join(output_dir, input_file1)
+    )
+    assert cmp(os.path.join(output_dir, input_file1),
+               os.path.join(expected_dir, input_file1))
+    os.remove(os.path.join(output_dir, input_file1))
+
+
+def test_base_process_file_small_limit_ru(mocker, worker_small, input_dir, input_file_small,
+                        output_dir, expected_dir):
+    mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
+    mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
+    mocker.patch('nlp_ws._subtask.SubTask.prepare_subtask', prepare_subtask)
+    mocker.patch('nlp_ws._worker.NLPWorker.update_progress')
+    SubTask.prepare_subtask(
+        {"q_in": ap.AioQueue(), "q_out": ap.AioQueue()},
+        os.getpid()
+    )
+    worker_small.process(
+        os.path.join(input_dir, input_file_small),
+        {"lang": "ru"}, os.path.join(output_dir, input_file_small)
     )
     assert cmp(os.path.join(output_dir, input_file_small),
                os.path.join(expected_dir, input_file_small))
diff --git a/tests/testdata/input/pos_tagger.yaml b/tests/testdata/input/pos_tagger.yaml
index 4418310..cc7bf6c 100644
--- a/tests/testdata/input/pos_tagger.yaml
+++ b/tests/testdata/input/pos_tagger.yaml
@@ -9,6 +9,31 @@ taggers:
         lpmn: [{"spacy":{"lang":"en"}}]
         output: json
         tagset: ud
+    de:
+      default:
+        lpmn: [{"spacy":{"lang":"de"}}]
+        output: json
+        tagset: ud
+    es:
+      default:
+        lpmn: [{"spacy":{"lang":"es"}}]
+        output: json
+        tagset: ud
+    pt:
+      default:
+        lpmn: [{"spacy":{"lang":"pt"}}]
+        output: json
+        tagset: ud
+    fr:
+      default:
+        lpmn: [{"spacy":{"lang":"fr"}}]
+        output: json
+        tagset: ud
+    ru:
+      default:
+        lpmn: [{"spacy":{"lang":"ru"}}]
+        output: json
+        tagset: ud
 ners:
     pl:
       default:
@@ -20,3 +45,28 @@ ners:
         lpmn: [{"spacy":{"lang":"en", 'method': 'ner'}}]
         output: json
         tagset: ud
+    de:
+      default:
+        lpmn: [{"spacy":{"lang":"de", 'method': 'ner'}}]
+        output: json
+        tagset: ud
+    es:
+      default:
+        lpmn: [{"spacy":{"lang":"es", 'method': 'ner'}}]
+        output: json
+        tagset: ud
+    pt:
+      default:
+        lpmn: [{"spacy":{"lang":"pt", 'method': 'ner'}}]
+        output: json
+        tagset: ud
+    fr:
+      default:
+        lpmn: [{"spacy":{"lang":"fr", 'method': 'ner'}}]
+        output: json
+        tagset: ud
+    ru:
+      default:
+        lpmn: [{"spacy":{"lang":"ru", 'method': 'ner'}}]
+        output: json
+        tagset: ud
\ No newline at end of file
-- 
GitLab


From 2ffb0e428c5ee119bbb9292bf893aa3e117749dc Mon Sep 17 00:00:00 2001
From: bmatysiak <bartosz.matysiak@pwr.edu.pl>
Date: Tue, 4 Apr 2023 10:41:04 +0200
Subject: [PATCH 2/6] Add debug print

---
 tests/test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test.py b/tests/test.py
index 174a0d0..3f0c5e9 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -14,6 +14,7 @@ def prepare_subtask(parameters: dict, process_id: int):
 
 
 def get_output_path(self, timeout=0):
+    print(self.task)
     return "tests/testdata/output/tmp-subtask-result"
 
 
-- 
GitLab


From ed23a6a5455e6df6235a483a02d57a417687838c Mon Sep 17 00:00:00 2001
From: bmatysiak <bartosz.matysiak@pwr.edu.pl>
Date: Tue, 4 Apr 2023 10:45:26 +0200
Subject: [PATCH 3/6] Add [popsute] keyword to tmp-subtask-result file

---
 tests/testdata/output/tmp-subtask-result | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testdata/output/tmp-subtask-result b/tests/testdata/output/tmp-subtask-result
index 1a1c303..433fd91 100644
--- a/tests/testdata/output/tmp-subtask-result
+++ b/tests/testdata/output/tmp-subtask-result
@@ -1 +1 @@
-foobar, baz
\ No newline at end of file
+foobar, baz[popsute]
\ No newline at end of file
-- 
GitLab


From 232f360b3fc279cef9f894ce73ad2b9bebb4366f Mon Sep 17 00:00:00 2001
From: bmatysiak <bartosz.matysiak@pwr.edu.pl>
Date: Tue, 4 Apr 2023 12:54:51 +0200
Subject: [PATCH 4/6] Add separate expects for tests

---
 tests/conftest.py                             |  60 +++++++
 tests/test.py                                 | 157 +++++++++---------
 tests/testdata/expected/post_spacy_input_de   |   1 +
 tests/testdata/expected/post_spacy_input_es   |   1 +
 tests/testdata/expected/post_spacy_input_fr   |   1 +
 tests/testdata/expected/post_spacy_input_pl   |   1 +
 tests/testdata/expected/post_spacy_input_pt   |   1 +
 tests/testdata/expected/post_spacy_input_ru   |   1 +
 .../expected/post_spacy_small_limit_input_de  |  15 ++
 .../expected/post_spacy_small_limit_input_es  |  15 ++
 .../expected/post_spacy_small_limit_input_fr  |  15 ++
 .../expected/post_spacy_small_limit_input_pl  |  15 ++
 .../expected/post_spacy_small_limit_input_pt  |  15 ++
 .../expected/post_spacy_small_limit_input_ru  |  15 ++
 tests/testdata/input/post_spacy_input_de      |   1 +
 tests/testdata/input/post_spacy_input_es      |   1 +
 tests/testdata/input/post_spacy_input_fr      |   1 +
 tests/testdata/input/post_spacy_input_pl      |   1 +
 tests/testdata/input/post_spacy_input_pt      |   1 +
 tests/testdata/input/post_spacy_input_ru      |   1 +
 .../input/post_spacy_small_limit_input_de     |   1 +
 .../input/post_spacy_small_limit_input_es     |   1 +
 .../input/post_spacy_small_limit_input_fr     |   1 +
 .../input/post_spacy_small_limit_input_pl     |   1 +
 .../input/post_spacy_small_limit_input_pt     |   1 +
 .../input/post_spacy_small_limit_input_ru     |   1 +
 tests/testdata/output/tmp-subtask-result      |   2 +-
 tests/testdata/output/tmp-subtask-result-de   |   1 +
 tests/testdata/output/tmp-subtask-result-es   |   1 +
 tests/testdata/output/tmp-subtask-result-fr   |   1 +
 tests/testdata/output/tmp-subtask-result-pl   |   1 +
 tests/testdata/output/tmp-subtask-result-pt   |   1 +
 tests/testdata/output/tmp-subtask-result-ru   |   1 +
 33 files changed, 258 insertions(+), 75 deletions(-)
 create mode 100644 tests/testdata/expected/post_spacy_input_de
 create mode 100644 tests/testdata/expected/post_spacy_input_es
 create mode 100644 tests/testdata/expected/post_spacy_input_fr
 create mode 100644 tests/testdata/expected/post_spacy_input_pl
 create mode 100644 tests/testdata/expected/post_spacy_input_pt
 create mode 100644 tests/testdata/expected/post_spacy_input_ru
 create mode 100644 tests/testdata/expected/post_spacy_small_limit_input_de
 create mode 100644 tests/testdata/expected/post_spacy_small_limit_input_es
 create mode 100644 tests/testdata/expected/post_spacy_small_limit_input_fr
 create mode 100644 tests/testdata/expected/post_spacy_small_limit_input_pl
 create mode 100644 tests/testdata/expected/post_spacy_small_limit_input_pt
 create mode 100644 tests/testdata/expected/post_spacy_small_limit_input_ru
 create mode 100644 tests/testdata/input/post_spacy_input_de
 create mode 100644 tests/testdata/input/post_spacy_input_es
 create mode 100644 tests/testdata/input/post_spacy_input_fr
 create mode 100644 tests/testdata/input/post_spacy_input_pl
 create mode 100644 tests/testdata/input/post_spacy_input_pt
 create mode 100644 tests/testdata/input/post_spacy_input_ru
 create mode 100644 tests/testdata/input/post_spacy_small_limit_input_de
 create mode 100644 tests/testdata/input/post_spacy_small_limit_input_es
 create mode 100644 tests/testdata/input/post_spacy_small_limit_input_fr
 create mode 100644 tests/testdata/input/post_spacy_small_limit_input_pl
 create mode 100644 tests/testdata/input/post_spacy_small_limit_input_pt
 create mode 100644 tests/testdata/input/post_spacy_small_limit_input_ru
 create mode 100644 tests/testdata/output/tmp-subtask-result-de
 create mode 100644 tests/testdata/output/tmp-subtask-result-es
 create mode 100644 tests/testdata/output/tmp-subtask-result-fr
 create mode 100644 tests/testdata/output/tmp-subtask-result-pl
 create mode 100644 tests/testdata/output/tmp-subtask-result-pt
 create mode 100644 tests/testdata/output/tmp-subtask-result-ru

diff --git a/tests/conftest.py b/tests/conftest.py
index e19c946..22ea48e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -34,6 +34,66 @@ def input_file_small():
     return 'post_spacy_small_limit_input'
 
 
+@pytest.fixture
+def input_file1_pl():
+    return 'post_spacy_input_pl'
+
+
+@pytest.fixture
+def input_file_small_pl():
+    return 'post_spacy_small_limit_input_pl'
+
+
+@pytest.fixture
+def input_file1_de():
+    return 'post_spacy_input_de'
+
+
+@pytest.fixture
+def input_file_small_de():
+    return 'post_spacy_small_limit_input_de'
+
+
+@pytest.fixture
+def input_file1_es():
+    return 'post_spacy_input_es'
+
+
+@pytest.fixture
+def input_file_small_es():
+    return 'post_spacy_small_limit_input_es'
+
+
+@pytest.fixture
+def input_file1_pt():
+    return 'post_spacy_input_pt'
+
+
+@pytest.fixture
+def input_file_small_pt():
+    return 'post_spacy_small_limit_input_pt'
+
+
+@pytest.fixture
+def input_file1_fr():
+    return 'post_spacy_input_fr'
+
+
+@pytest.fixture
+def input_file_small_fr():
+    return 'post_spacy_small_limit_input_fr'
+
+
+@pytest.fixture
+def input_file1_ru():
+    return 'post_spacy_input_ru'
+
+
+@pytest.fixture
+def input_file_small_ru():
+    return 'post_spacy_small_limit_input_ru'
+
+
 @pytest.fixture
 def input_dir2():
     return 'input_dir2'
diff --git a/tests/test.py b/tests/test.py
index 3f0c5e9..53c9582 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -14,8 +14,17 @@ def prepare_subtask(parameters: dict, process_id: int):
 
 
 def get_output_path(self, timeout=0):
-    print(self.task)
-    return "tests/testdata/output/tmp-subtask-result"
+    tmp_subtask_result_file = {
+        "en": "tests/testdata/output/tmp-subtask-result",
+        "pl": "tests/testdata/output/tmp-subtask-result-pl",
+        "de": "tests/testdata/output/tmp-subtask-result-de",
+        "es": "tests/testdata/output/tmp-subtask-result-es",
+        "pt": "tests/testdata/output/tmp-subtask-result-pt",
+        "fr": "tests/testdata/output/tmp-subtask-result-fr",
+        "ru": "tests/testdata/output/tmp-subtask-result-ru",
+    }
+    dict_key = self.task[0].get("spacy", {"lang": "pl"})["lang"]
+    return tmp_subtask_result_file[dict_key]
 
 
 def test_init():
@@ -61,7 +70,7 @@ def test_base_process_file_small_limit_en(mocker, worker_small, input_dir, input
     os.remove(os.path.join(output_dir, input_file_small))
 
 
-def test_base_process_file_pl(mocker, worker, input_dir, input_file1,
+def test_base_process_file_pl(mocker, worker, input_dir, input_file1_pl,
                         output_dir, expected_dir):
     mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
     mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
@@ -72,15 +81,15 @@ def test_base_process_file_pl(mocker, worker, input_dir, input_file1,
         os.getpid()
     )
     worker.process(
-        os.path.join(input_dir, input_file1),
-        {"lang": "pl"}, os.path.join(output_dir, input_file1)
+        os.path.join(input_dir, input_file1_pl),
+        {"lang": "pl"}, os.path.join(output_dir, input_file1_pl)
     )
-    assert cmp(os.path.join(output_dir, input_file1),
-               os.path.join(expected_dir, input_file1))
-    os.remove(os.path.join(output_dir, input_file1))
+    assert cmp(os.path.join(output_dir, input_file1_pl),
+               os.path.join(expected_dir, input_file1_pl))
+    os.remove(os.path.join(output_dir, input_file1_pl))
 
 
-def test_base_process_file_small_limit_pl(mocker, worker_small, input_dir, input_file_small,
+def test_base_process_file_small_limit_pl(mocker, worker_small, input_dir, input_file_small_pl,
                         output_dir, expected_dir):
     mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
     mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
@@ -91,15 +100,15 @@ def test_base_process_file_small_limit_pl(mocker, worker_small, input_dir, input
         os.getpid()
     )
     worker_small.process(
-        os.path.join(input_dir, input_file_small),
-        {"lang": "pl"}, os.path.join(output_dir, input_file_small)
+        os.path.join(input_dir, input_file_small_pl),
+        {"lang": "pl"}, os.path.join(output_dir, input_file_small_pl)
     )
-    assert cmp(os.path.join(output_dir, input_file_small),
-               os.path.join(expected_dir, input_file_small))
-    os.remove(os.path.join(output_dir, input_file_small))
+    assert cmp(os.path.join(output_dir, input_file_small_pl),
+               os.path.join(expected_dir, input_file_small_pl))
+    os.remove(os.path.join(output_dir, input_file_small_pl))
 
 
-def test_base_process_file_de(mocker, worker, input_dir, input_file1,
+def test_base_process_file_de(mocker, worker, input_dir, input_file1_de,
                         output_dir, expected_dir):
     mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
     mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
@@ -110,15 +119,15 @@ def test_base_process_file_de(mocker, worker, input_dir, input_file1,
         os.getpid()
     )
     worker.process(
-        os.path.join(input_dir, input_file1),
-        {"lang": "de"}, os.path.join(output_dir, input_file1)
+        os.path.join(input_dir, input_file1_de),
+        {"lang": "de"}, os.path.join(output_dir, input_file1_de)
     )
-    assert cmp(os.path.join(output_dir, input_file1),
-               os.path.join(expected_dir, input_file1))
-    os.remove(os.path.join(output_dir, input_file1))
+    assert cmp(os.path.join(output_dir, input_file1_de),
+               os.path.join(expected_dir, input_file1_de))
+    os.remove(os.path.join(output_dir, input_file1_de))
 
 
-def test_base_process_file_small_limit_de(mocker, worker_small, input_dir, input_file_small,
+def test_base_process_file_small_limit_de(mocker, worker_small, input_dir, input_file_small_de,
                         output_dir, expected_dir):
     mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
     mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
@@ -129,15 +138,15 @@ def test_base_process_file_small_limit_de(mocker, worker_small, input_dir, input
         os.getpid()
     )
     worker_small.process(
-        os.path.join(input_dir, input_file_small),
-        {"lang": "de"}, os.path.join(output_dir, input_file_small)
+        os.path.join(input_dir, input_file_small_de),
+        {"lang": "de"}, os.path.join(output_dir, input_file_small_de)
     )
-    assert cmp(os.path.join(output_dir, input_file_small),
-               os.path.join(expected_dir, input_file_small))
-    os.remove(os.path.join(output_dir, input_file_small))
+    assert cmp(os.path.join(output_dir, input_file_small_de),
+               os.path.join(expected_dir, input_file_small_de))
+    os.remove(os.path.join(output_dir, input_file_small_de))
 
 
-def test_base_process_file_es(mocker, worker, input_dir, input_file1,
+def test_base_process_file_es(mocker, worker, input_dir, input_file1_es,
                         output_dir, expected_dir):
     mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
     mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
@@ -148,15 +157,15 @@ def test_base_process_file_es(mocker, worker, input_dir, input_file1,
         os.getpid()
     )
     worker.process(
-        os.path.join(input_dir, input_file1),
-        {"lang": "es"}, os.path.join(output_dir, input_file1)
+        os.path.join(input_dir, input_file1_es),
+        {"lang": "es"}, os.path.join(output_dir, input_file1_es)
     )
-    assert cmp(os.path.join(output_dir, input_file1),
-               os.path.join(expected_dir, input_file1))
-    os.remove(os.path.join(output_dir, input_file1))
+    assert cmp(os.path.join(output_dir, input_file1_es),
+               os.path.join(expected_dir, input_file1_es))
+    os.remove(os.path.join(output_dir, input_file1_es))
 
 
-def test_base_process_file_small_limit_es(mocker, worker_small, input_dir, input_file_small,
+def test_base_process_file_small_limit_es(mocker, worker_small, input_dir, input_file_small_es,
                         output_dir, expected_dir):
     mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
     mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
@@ -167,15 +176,15 @@ def test_base_process_file_small_limit_es(mocker, worker_small, input_dir, input
         os.getpid()
     )
     worker_small.process(
-        os.path.join(input_dir, input_file_small),
-        {"lang": "es"}, os.path.join(output_dir, input_file_small)
+        os.path.join(input_dir, input_file_small_es),
+        {"lang": "es"}, os.path.join(output_dir, input_file_small_es)
     )
-    assert cmp(os.path.join(output_dir, input_file_small),
-               os.path.join(expected_dir, input_file_small))
-    os.remove(os.path.join(output_dir, input_file_small))
+    assert cmp(os.path.join(output_dir, input_file_small_es),
+               os.path.join(expected_dir, input_file_small_es))
+    os.remove(os.path.join(output_dir, input_file_small_es))
 
 
-def test_base_process_file_pt(mocker, worker, input_dir, input_file1,
+def test_base_process_file_pt(mocker, worker, input_dir, input_file1_pt,
                         output_dir, expected_dir):
     mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
     mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
@@ -186,15 +195,15 @@ def test_base_process_file_pt(mocker, worker, input_dir, input_file1,
         os.getpid()
     )
     worker.process(
-        os.path.join(input_dir, input_file1),
-        {"lang": "pt"}, os.path.join(output_dir, input_file1)
+        os.path.join(input_dir, input_file1_pt),
+        {"lang": "pt"}, os.path.join(output_dir, input_file1_pt)
     )
-    assert cmp(os.path.join(output_dir, input_file1),
-               os.path.join(expected_dir, input_file1))
-    os.remove(os.path.join(output_dir, input_file1))
+    assert cmp(os.path.join(output_dir, input_file1_pt),
+               os.path.join(expected_dir, input_file1_pt))
+    os.remove(os.path.join(output_dir, input_file1_pt))
 
 
-def test_base_process_file_small_limit_pt(mocker, worker_small, input_dir, input_file_small,
+def test_base_process_file_small_limit_pt(mocker, worker_small, input_dir, input_file_small_pt,
                         output_dir, expected_dir):
     mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
     mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
@@ -205,15 +214,15 @@ def test_base_process_file_small_limit_pt(mocker, worker_small, input_dir, input
         os.getpid()
     )
     worker_small.process(
-        os.path.join(input_dir, input_file_small),
-        {"lang": "pt"}, os.path.join(output_dir, input_file_small)
+        os.path.join(input_dir, input_file_small_pt),
+        {"lang": "pt"}, os.path.join(output_dir, input_file_small_pt)
     )
-    assert cmp(os.path.join(output_dir, input_file_small),
-               os.path.join(expected_dir, input_file_small))
-    os.remove(os.path.join(output_dir, input_file_small))
+    assert cmp(os.path.join(output_dir, input_file_small_pt),
+               os.path.join(expected_dir, input_file_small_pt))
+    os.remove(os.path.join(output_dir, input_file_small_pt))
 
 
-def test_base_process_file_fr(mocker, worker, input_dir, input_file1,
+def test_base_process_file_fr(mocker, worker, input_dir, input_file1_fr,
                         output_dir, expected_dir):
     mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
     mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
@@ -224,15 +233,15 @@ def test_base_process_file_fr(mocker, worker, input_dir, input_file1,
         os.getpid()
     )
     worker.process(
-        os.path.join(input_dir, input_file1),
-        {"lang": "fr"}, os.path.join(output_dir, input_file1)
+        os.path.join(input_dir, input_file1_fr),
+        {"lang": "fr"}, os.path.join(output_dir, input_file1_fr)
     )
-    assert cmp(os.path.join(output_dir, input_file1),
-               os.path.join(expected_dir, input_file1))
-    os.remove(os.path.join(output_dir, input_file1))
+    assert cmp(os.path.join(output_dir, input_file1_fr),
+               os.path.join(expected_dir, input_file1_fr))
+    os.remove(os.path.join(output_dir, input_file1_fr))
 
 
-def test_base_process_file_small_limit_fr(mocker, worker_small, input_dir, input_file_small,
+def test_base_process_file_small_limit_fr(mocker, worker_small, input_dir, input_file_small_fr,
                         output_dir, expected_dir):
     mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
     mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
@@ -243,15 +252,15 @@ def test_base_process_file_small_limit_fr(mocker, worker_small, input_dir, input
         os.getpid()
     )
     worker_small.process(
-        os.path.join(input_dir, input_file_small),
-        {"lang": "fr"}, os.path.join(output_dir, input_file_small)
+        os.path.join(input_dir, input_file_small_fr),
+        {"lang": "fr"}, os.path.join(output_dir, input_file_small_fr)
     )
-    assert cmp(os.path.join(output_dir, input_file_small),
-               os.path.join(expected_dir, input_file_small))
-    os.remove(os.path.join(output_dir, input_file_small))
+    assert cmp(os.path.join(output_dir, input_file_small_fr),
+               os.path.join(expected_dir, input_file_small_fr))
+    os.remove(os.path.join(output_dir, input_file_small_fr))
 
 
-def test_base_process_file_ru(mocker, worker, input_dir, input_file1,
+def test_base_process_file_ru(mocker, worker, input_dir, input_file1_ru,
                         output_dir, expected_dir):
     mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
     mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
@@ -262,15 +271,15 @@ def test_base_process_file_ru(mocker, worker, input_dir, input_file1,
         os.getpid()
     )
     worker.process(
-        os.path.join(input_dir, input_file1),
-        {"lang": "ru"}, os.path.join(output_dir, input_file1)
+        os.path.join(input_dir, input_file1_ru),
+        {"lang": "ru"}, os.path.join(output_dir, input_file1_ru)
     )
-    assert cmp(os.path.join(output_dir, input_file1),
-               os.path.join(expected_dir, input_file1))
-    os.remove(os.path.join(output_dir, input_file1))
+    assert cmp(os.path.join(output_dir, input_file1_ru),
+               os.path.join(expected_dir, input_file1_ru))
+    os.remove(os.path.join(output_dir, input_file1_ru))
 
 
-def test_base_process_file_small_limit_ru(mocker, worker_small, input_dir, input_file_small,
+def test_base_process_file_small_limit_ru(mocker, worker_small, input_dir, input_file_small_ru,
                         output_dir, expected_dir):
     mocker.patch('nlp_ws._subtask.SubTask.run', return_value=None)
     mocker.patch('nlp_ws._subtask.SubTask.get_output_path', get_output_path)
@@ -281,9 +290,9 @@ def test_base_process_file_small_limit_ru(mocker, worker_small, input_dir, input
         os.getpid()
     )
     worker_small.process(
-        os.path.join(input_dir, input_file_small),
-        {"lang": "ru"}, os.path.join(output_dir, input_file_small)
+        os.path.join(input_dir, input_file_small_ru),
+        {"lang": "ru"}, os.path.join(output_dir, input_file_small_ru)
     )
-    assert cmp(os.path.join(output_dir, input_file_small),
-               os.path.join(expected_dir, input_file_small))
-    os.remove(os.path.join(output_dir, input_file_small))
+    assert cmp(os.path.join(output_dir, input_file_small_ru),
+               os.path.join(expected_dir, input_file_small_ru))
+    os.remove(os.path.join(output_dir, input_file_small_ru))
diff --git a/tests/testdata/expected/post_spacy_input_de b/tests/testdata/expected/post_spacy_input_de
new file mode 100644
index 0000000..941f92a
--- /dev/null
+++ b/tests/testdata/expected/post_spacy_input_de
@@ -0,0 +1 @@
+fööbär, bäzßü
diff --git a/tests/testdata/expected/post_spacy_input_es b/tests/testdata/expected/post_spacy_input_es
new file mode 100644
index 0000000..8d95aa5
--- /dev/null
+++ b/tests/testdata/expected/post_spacy_input_es
@@ -0,0 +1 @@
+fóóbár, bézíñú¿¡
diff --git a/tests/testdata/expected/post_spacy_input_fr b/tests/testdata/expected/post_spacy_input_fr
new file mode 100644
index 0000000..c6624ee
--- /dev/null
+++ b/tests/testdata/expected/post_spacy_input_fr
@@ -0,0 +1 @@
+fæèêbàr, bâzœ
diff --git a/tests/testdata/expected/post_spacy_input_pl b/tests/testdata/expected/post_spacy_input_pl
new file mode 100644
index 0000000..11c7a71
--- /dev/null
+++ b/tests/testdata/expected/post_spacy_input_pl
@@ -0,0 +1 @@
+fóbąr, baźż
diff --git a/tests/testdata/expected/post_spacy_input_pt b/tests/testdata/expected/post_spacy_input_pt
new file mode 100644
index 0000000..4eef2ea
--- /dev/null
+++ b/tests/testdata/expected/post_spacy_input_pt
@@ -0,0 +1 @@
+fõbár, bâz
diff --git a/tests/testdata/expected/post_spacy_input_ru b/tests/testdata/expected/post_spacy_input_ru
new file mode 100644
index 0000000..f3a908b
--- /dev/null
+++ b/tests/testdata/expected/post_spacy_input_ru
@@ -0,0 +1 @@
+фубар, баз
diff --git a/tests/testdata/expected/post_spacy_small_limit_input_de b/tests/testdata/expected/post_spacy_small_limit_input_de
new file mode 100644
index 0000000..bd91f84
--- /dev/null
+++ b/tests/testdata/expected/post_spacy_small_limit_input_de
@@ -0,0 +1,15 @@
+fööbär, bäzßü
+fööbär, bäzßü
+fööbär, bäzßü
+fööbär, bäzßü
+fööbär, bäzßü
+fööbär, bäzßü
+fööbär, bäzßü
+fööbär, bäzßü
+fööbär, bäzßü
+fööbär, bäzßü
+fööbär, bäzßü
+fööbär, bäzßü
+fööbär, bäzßü
+fööbär, bäzßü
+fööbär, bäzßü
diff --git a/tests/testdata/expected/post_spacy_small_limit_input_es b/tests/testdata/expected/post_spacy_small_limit_input_es
new file mode 100644
index 0000000..51179d5
--- /dev/null
+++ b/tests/testdata/expected/post_spacy_small_limit_input_es
@@ -0,0 +1,15 @@
+fóóbár, bézíñú¿¡
+fóóbár, bézíñú¿¡
+fóóbár, bézíñú¿¡
+fóóbár, bézíñú¿¡
+fóóbár, bézíñú¿¡
+fóóbár, bézíñú¿¡
+fóóbár, bézíñú¿¡
+fóóbár, bézíñú¿¡
+fóóbár, bézíñú¿¡
+fóóbár, bézíñú¿¡
+fóóbár, bézíñú¿¡
+fóóbár, bézíñú¿¡
+fóóbár, bézíñú¿¡
+fóóbár, bézíñú¿¡
+fóóbár, bézíñú¿¡
diff --git a/tests/testdata/expected/post_spacy_small_limit_input_fr b/tests/testdata/expected/post_spacy_small_limit_input_fr
new file mode 100644
index 0000000..e3c8c5d
--- /dev/null
+++ b/tests/testdata/expected/post_spacy_small_limit_input_fr
@@ -0,0 +1,15 @@
+fæèêbàr, bâzœ
+fæèêbàr, bâzœ
+fæèêbàr, bâzœ
+fæèêbàr, bâzœ
+fæèêbàr, bâzœ
+fæèêbàr, bâzœ
+fæèêbàr, bâzœ
+fæèêbàr, bâzœ
+fæèêbàr, bâzœ
+fæèêbàr, bâzœ
+fæèêbàr, bâzœ
+fæèêbàr, bâzœ
+fæèêbàr, bâzœ
+fæèêbàr, bâzœ
+fæèêbàr, bâzœ
diff --git a/tests/testdata/expected/post_spacy_small_limit_input_pl b/tests/testdata/expected/post_spacy_small_limit_input_pl
new file mode 100644
index 0000000..6f9787d
--- /dev/null
+++ b/tests/testdata/expected/post_spacy_small_limit_input_pl
@@ -0,0 +1,15 @@
+fóbąr, baźż
+fóbąr, baźż
+fóbąr, baźż
+fóbąr, baźż
+fóbąr, baźż
+fóbąr, baźż
+fóbąr, baźż
+fóbąr, baźż
+fóbąr, baźż
+fóbąr, baźż
+fóbąr, baźż
+fóbąr, baźż
+fóbąr, baźż
+fóbąr, baźż
+fóbąr, baźż
diff --git a/tests/testdata/expected/post_spacy_small_limit_input_pt b/tests/testdata/expected/post_spacy_small_limit_input_pt
new file mode 100644
index 0000000..0af0035
--- /dev/null
+++ b/tests/testdata/expected/post_spacy_small_limit_input_pt
@@ -0,0 +1,15 @@
+fõbár, bâz
+fõbár, bâz
+fõbár, bâz
+fõbár, bâz
+fõbár, bâz
+fõbár, bâz
+fõbár, bâz
+fõbár, bâz
+fõbár, bâz
+fõbár, bâz
+fõbár, bâz
+fõbár, bâz
+fõbár, bâz
+fõbár, bâz
+fõbár, bâz
diff --git a/tests/testdata/expected/post_spacy_small_limit_input_ru b/tests/testdata/expected/post_spacy_small_limit_input_ru
new file mode 100644
index 0000000..410acfc
--- /dev/null
+++ b/tests/testdata/expected/post_spacy_small_limit_input_ru
@@ -0,0 +1,15 @@
+фубар, баз
+фубар, баз
+фубар, баз
+фубар, баз
+фубар, баз
+фубар, баз
+фубар, баз
+фубар, баз
+фубар, баз
+фубар, баз
+фубар, баз
+фубар, баз
+фубар, баз
+фубар, баз
+фубар, баз
diff --git a/tests/testdata/input/post_spacy_input_de b/tests/testdata/input/post_spacy_input_de
new file mode 100644
index 0000000..46d611b
--- /dev/null
+++ b/tests/testdata/input/post_spacy_input_de
@@ -0,0 +1 @@
+When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.
diff --git a/tests/testdata/input/post_spacy_input_es b/tests/testdata/input/post_spacy_input_es
new file mode 100644
index 0000000..46d611b
--- /dev/null
+++ b/tests/testdata/input/post_spacy_input_es
@@ -0,0 +1 @@
+When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.
diff --git a/tests/testdata/input/post_spacy_input_fr b/tests/testdata/input/post_spacy_input_fr
new file mode 100644
index 0000000..46d611b
--- /dev/null
+++ b/tests/testdata/input/post_spacy_input_fr
@@ -0,0 +1 @@
+When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.
diff --git a/tests/testdata/input/post_spacy_input_pl b/tests/testdata/input/post_spacy_input_pl
new file mode 100644
index 0000000..46d611b
--- /dev/null
+++ b/tests/testdata/input/post_spacy_input_pl
@@ -0,0 +1 @@
+When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.
diff --git a/tests/testdata/input/post_spacy_input_pt b/tests/testdata/input/post_spacy_input_pt
new file mode 100644
index 0000000..46d611b
--- /dev/null
+++ b/tests/testdata/input/post_spacy_input_pt
@@ -0,0 +1 @@
+When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.
diff --git a/tests/testdata/input/post_spacy_input_ru b/tests/testdata/input/post_spacy_input_ru
new file mode 100644
index 0000000..46d611b
--- /dev/null
+++ b/tests/testdata/input/post_spacy_input_ru
@@ -0,0 +1 @@
+When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.
diff --git a/tests/testdata/input/post_spacy_small_limit_input_de b/tests/testdata/input/post_spacy_small_limit_input_de
new file mode 100644
index 0000000..46d611b
--- /dev/null
+++ b/tests/testdata/input/post_spacy_small_limit_input_de
@@ -0,0 +1 @@
+When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.
diff --git a/tests/testdata/input/post_spacy_small_limit_input_es b/tests/testdata/input/post_spacy_small_limit_input_es
new file mode 100644
index 0000000..46d611b
--- /dev/null
+++ b/tests/testdata/input/post_spacy_small_limit_input_es
@@ -0,0 +1 @@
+When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.
diff --git a/tests/testdata/input/post_spacy_small_limit_input_fr b/tests/testdata/input/post_spacy_small_limit_input_fr
new file mode 100644
index 0000000..46d611b
--- /dev/null
+++ b/tests/testdata/input/post_spacy_small_limit_input_fr
@@ -0,0 +1 @@
+When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.
diff --git a/tests/testdata/input/post_spacy_small_limit_input_pl b/tests/testdata/input/post_spacy_small_limit_input_pl
new file mode 100644
index 0000000..46d611b
--- /dev/null
+++ b/tests/testdata/input/post_spacy_small_limit_input_pl
@@ -0,0 +1 @@
+When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.
diff --git a/tests/testdata/input/post_spacy_small_limit_input_pt b/tests/testdata/input/post_spacy_small_limit_input_pt
new file mode 100644
index 0000000..46d611b
--- /dev/null
+++ b/tests/testdata/input/post_spacy_small_limit_input_pt
@@ -0,0 +1 @@
+When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.
diff --git a/tests/testdata/input/post_spacy_small_limit_input_ru b/tests/testdata/input/post_spacy_small_limit_input_ru
new file mode 100644
index 0000000..46d611b
--- /dev/null
+++ b/tests/testdata/input/post_spacy_small_limit_input_ru
@@ -0,0 +1 @@
+When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.
diff --git a/tests/testdata/output/tmp-subtask-result b/tests/testdata/output/tmp-subtask-result
index 433fd91..1a1c303 100644
--- a/tests/testdata/output/tmp-subtask-result
+++ b/tests/testdata/output/tmp-subtask-result
@@ -1 +1 @@
-foobar, baz[popsute]
\ No newline at end of file
+foobar, baz
\ No newline at end of file
diff --git a/tests/testdata/output/tmp-subtask-result-de b/tests/testdata/output/tmp-subtask-result-de
new file mode 100644
index 0000000..231232f
--- /dev/null
+++ b/tests/testdata/output/tmp-subtask-result-de
@@ -0,0 +1 @@
+fööbär, bäzßü
\ No newline at end of file
diff --git a/tests/testdata/output/tmp-subtask-result-es b/tests/testdata/output/tmp-subtask-result-es
new file mode 100644
index 0000000..a99a38a
--- /dev/null
+++ b/tests/testdata/output/tmp-subtask-result-es
@@ -0,0 +1 @@
+fóóbár, bézíñú¿¡
\ No newline at end of file
diff --git a/tests/testdata/output/tmp-subtask-result-fr b/tests/testdata/output/tmp-subtask-result-fr
new file mode 100644
index 0000000..8759587
--- /dev/null
+++ b/tests/testdata/output/tmp-subtask-result-fr
@@ -0,0 +1 @@
+fæèêbàr, bâzœ
\ No newline at end of file
diff --git a/tests/testdata/output/tmp-subtask-result-pl b/tests/testdata/output/tmp-subtask-result-pl
new file mode 100644
index 0000000..efa5be7
--- /dev/null
+++ b/tests/testdata/output/tmp-subtask-result-pl
@@ -0,0 +1 @@
+fóbąr, baźż
\ No newline at end of file
diff --git a/tests/testdata/output/tmp-subtask-result-pt b/tests/testdata/output/tmp-subtask-result-pt
new file mode 100644
index 0000000..c2a919c
--- /dev/null
+++ b/tests/testdata/output/tmp-subtask-result-pt
@@ -0,0 +1 @@
+fõbár, bâz
\ No newline at end of file
diff --git a/tests/testdata/output/tmp-subtask-result-ru b/tests/testdata/output/tmp-subtask-result-ru
new file mode 100644
index 0000000..5e9d869
--- /dev/null
+++ b/tests/testdata/output/tmp-subtask-result-ru
@@ -0,0 +1 @@
+фубар, баз
\ No newline at end of file
-- 
GitLab


From d0016f9f3a4b44eada97a55ed5c63cb9dc63bbd8 Mon Sep 17 00:00:00 2001
From: bmatysiak <bartosz.matysiak@pwr.edu.pl>
Date: Tue, 4 Apr 2023 13:02:21 +0200
Subject: [PATCH 5/6] Quickfix for pl

---
 tests/test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test.py b/tests/test.py
index 53c9582..6edb025 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -23,7 +23,9 @@ def get_output_path(self, timeout=0):
         "fr": "tests/testdata/output/tmp-subtask-result-fr",
         "ru": "tests/testdata/output/tmp-subtask-result-ru",
     }
-    dict_key = self.task[0].get("spacy", {"lang": "pl"})["lang"]
+    dict_key = "pl"
+    if len(self.task) == 1 and 'spacy' in self.task[0]:
+        dict_key = self.task[0]["spacy"]["lang"]
     return tmp_subtask_result_file[dict_key]
 
 
-- 
GitLab


From 59f29a343c2efc8dba265d5e4f3d780753262ce0 Mon Sep 17 00:00:00 2001
From: bmatysiak <bartosz.matysiak@pwr.edu.pl>
Date: Wed, 5 Apr 2023 11:44:12 +0200
Subject: [PATCH 6/6] Add integration tests

---
 lpmn_queries.json                                    | 12 +++++++++++-
 tests/testdata/expected/elasmucha_grisea_ru.json     |  1 +
 .../testdata/expected/turritopsis_nutricula_de.json  |  1 +
 .../testdata/expected/turritopsis_nutricula_es.json  |  1 +
 .../testdata/expected/turritopsis_nutricula_fr.json  |  1 +
 .../testdata/expected/turritopsis_nutricula_pt.json  |  1 +
 tests/testdata/input/elasmucha_grisea_ru.txt         |  1 +
 tests/testdata/input/turritopsis_nutricula_de.txt    |  1 +
 tests/testdata/input/turritopsis_nutricula_es.txt    |  1 +
 tests/testdata/input/turritopsis_nutricula_fr.txt    |  1 +
 tests/testdata/input/turritopsis_nutricula_pt.txt    |  1 +
 11 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 tests/testdata/expected/elasmucha_grisea_ru.json
 create mode 100644 tests/testdata/expected/turritopsis_nutricula_de.json
 create mode 100644 tests/testdata/expected/turritopsis_nutricula_es.json
 create mode 100644 tests/testdata/expected/turritopsis_nutricula_fr.json
 create mode 100644 tests/testdata/expected/turritopsis_nutricula_pt.json
 create mode 100644 tests/testdata/input/elasmucha_grisea_ru.txt
 create mode 100644 tests/testdata/input/turritopsis_nutricula_de.txt
 create mode 100644 tests/testdata/input/turritopsis_nutricula_es.txt
 create mode 100644 tests/testdata/input/turritopsis_nutricula_fr.txt
 create mode 100644 tests/testdata/input/turritopsis_nutricula_pt.txt

diff --git a/lpmn_queries.json b/lpmn_queries.json
index 67b852d..a764318 100644
--- a/lpmn_queries.json
+++ b/lpmn_queries.json
@@ -11,5 +11,15 @@
 
     "ner_for_pl": {"task": [{"postagger": {"lang": "pl", "output": "json", "method": "ner"}}], "input": "post_postagger_input", "expected": "ner_for_pl.json"},
 
-    "ner_for_en": {"task": [{"postagger": {"lang": "en", "output": "json", "method": "ner"}}], "input": "post_spacy_input", "expected": "ner_for_en.json"}
+    "ner_for_en": {"task": [{"postagger": {"lang": "en", "output": "json", "method": "ner"}}], "input": "post_spacy_input", "expected": "ner_for_en.json"},
+
+    "postagger_lone_json_de": {"task": [{"postagger": {"lang": "de", "output": "json", "method": "tagger"}}], "input": "turritopsis_nutricula_de.txt", "expected": "turritopsis_nutricula_de.json"},
+
+    "postagger_lone_json_es": {"task": [{"postagger": {"lang": "es", "output": "json", "method": "tagger"}}], "input": "turritopsis_nutricula_es.txt", "expected": "turritopsis_nutricula_es.json"},
+
+    "postagger_lone_json_pt": {"task": [{"postagger": {"lang": "pt", "output": "json", "method": "tagger"}}], "input": "turritopsis_nutricula_pt.txt", "expected": "turritopsis_nutricula_pt.json"},
+
+    "postagger_lone_json_fr": {"task": [{"postagger": {"lang": "fr", "output": "json", "method": "tagger"}}], "input": "turritopsis_nutricula_fr.txt", "expected": "turritopsis_nutricula_fr.json"},
+
+    "postagger_lone_json_ru": {"task": [{"postagger": {"lang": "ru", "output": "json", "method": "tagger"}}], "input": "elasmucha_grisea_ru.txt", "expected": "elasmucha_grisea_ru.json"}
 }
diff --git a/tests/testdata/expected/elasmucha_grisea_ru.json b/tests/testdata/expected/elasmucha_grisea_ru.json
new file mode 100644
index 0000000..debf8e2
--- /dev/null
+++ b/tests/testdata/expected/elasmucha_grisea_ru.json
@@ -0,0 +1 @@
+{"filename": "1a863079-6aec-457f-8681-f670d716a19c", "tagset": "ud", "tokens": [{"index": 1, "position": [0, 3], "orth": "Для", "lexemes": [{"lemma": "для", "mstag": "ADP", "disamb": true}]}, {"index": 2, "position": [4, 8], "orth": "вида", "lexemes": [{"lemma": "вид", "mstag": "NOUN", "disamb": true}]}, {"index": 3, "position": [9, 22], "orth": "зафиксирована", "lexemes": [{"lemma": "зафиксировать", "mstag": "VERB", "disamb": true}]}, {"index": 4, "position": [23, 34], "orth": "материнская", "lexemes": [{"lemma": "материнский", "mstag": "ADJ", "disamb": true}]}, {"index": 5, "position": [35, 41], "orth": "забота", "lexemes": [{"lemma": "забота", "mstag": "NOUN", "disamb": true}]}, {"index": 6, "position": [42, 43], "orth": "(", "lexemes": [{"lemma": "(", "mstag": "PUNCT", "disamb": true}]}, {"index": 7, "position": [44, 52], "orth": "поведение", "lexemes": [{"lemma": "поведение", "mstag": "NOUN", "disamb": true}]}, {"index": 8, "position": [53, 55], "orth": "по", "lexemes": [{"lemma": "по", "mstag": "ADP", "disamb": true}]}, {"index": 9, "position": [56, 62], "orth": "охране", "lexemes": [{"lemma": "охрана", "mstag": "NOUN", "disamb": true}]}, {"index": 10, "position": [63, 66], "orth": "яиц", "lexemes": [{"lemma": "яйцо", "mstag": "NOUN", "disamb": true}]}, {"index": 11, "position": [67, 68], "orth": "и", "lexemes": [{"lemma": "и", "mstag": "CCONJ", "disamb": true}]}, {"index": 12, "position": [69, 76], "orth": "личинок", "lexemes": [{"lemma": "личинка", "mstag": "NOUN", "disamb": true}]}, {"index": 13, "position": [77, 77], "orth": "-", "lexemes": [{"lemma": "-", "mstag": "NOUN", "disamb": true}]}, {"index": 14, "position": [78, 81], "orth": "нимф", "lexemes": [{"lemma": "нимф", "mstag": "NOUN", "disamb": true}]}, {"index": 15, "position": [82, 82], "orth": ")", "lexemes": [{"lemma": ")", "mstag": "PUNCT", "disamb": true}]}, {"index": 16, "position": [83, 83], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 17, "position": [84, 89], "orth": "После", "lexemes": [{"lemma": "после", "mstag": "ADP", "disamb": true}]}, {"index": 18, "position": [90, 100], "orth": "яйцекладки", "lexemes": [{"lemma": "яйцекладка", "mstag": "NOUN", "disamb": true}]}, {"index": 19, "position": [101, 106], "orth": "самка", "lexemes": [{"lemma": "самка", "mstag": "NOUN", "disamb": true}]}, {"index": 20, "position": [107, 112], "orth": "стоит", "lexemes": [{"lemma": "стоить", "mstag": "VERB", "disamb": true}]}, {"index": 21, "position": [113, 116], "orth": "над", "lexemes": [{"lemma": "над", "mstag": "ADP", "disamb": true}]}, {"index": 22, "position": [117, 124], "orth": "кладкой", "lexemes": [{"lemma": "кладка", "mstag": "NOUN", "disamb": true}]}, {"index": 23, "position": [125, 128], "orth": "яиц", "lexemes": [{"lemma": "яйцо", "mstag": "NOUN", "disamb": true}]}, {"index": 24, "position": [129, 130], "orth": "и", "lexemes": [{"lemma": "и", "mstag": "CCONJ", "disamb": true}]}, {"index": 25, "position": [131, 139], "orth": "защищает", "lexemes": [{"lemma": "защищать", "mstag": "VERB", "disamb": true}]}, {"index": 26, "position": [140, 142], "orth": "её", "lexemes": [{"lemma": "её", "mstag": "PRON", "disamb": true}]}, {"index": 27, "position": [143, 145], "orth": "на", "lexemes": [{"lemma": "на", "mstag": "ADP", "disamb": true}]}, {"index": 28, "position": [146, 156], "orth": "протяжении", "lexemes": [{"lemma": "протяжение", "mstag": "NOUN", "disamb": true}]}, {"index": 29, "position": [157, 162], "orth": "всего", "lexemes": [{"lemma": "весь", "mstag": "DET", "disamb": true}]}, {"index": 30, "position": [163, 171], "orth": "развития", "lexemes": [{"lemma": "развитие", "mstag": "NOUN", "disamb": true}]}, {"index": 31, "position": [172, 175], "orth": "яиц", "lexemes": [{"lemma": "яйцо", "mstag": "NOUN", "disamb": true}]}, {"index": 32, "position": [176, 176], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 33, "position": [177, 186], "orth": "Репертуар", "lexemes": [{"lemma": "репертуар", "mstag": "NOUN", "disamb": true}]}, {"index": 34, "position": [187, 196], "orth": "защитного", "lexemes": [{"lemma": "защитный", "mstag": "ADJ", "disamb": true}]}, {"index": 35, "position": [197, 206], "orth": "поведения", "lexemes": [{"lemma": "поведение", "mstag": "NOUN", "disamb": true}]}, {"index": 36, "position": [207, 212], "orth": "самок", "lexemes": [{"lemma": "самка", "mstag": "NOUN", "disamb": true}]}, {"index": 37, "position": [213, 213], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 38, "position": [214, 221], "orth": "стоящих", "lexemes": [{"lemma": "стоить", "mstag": "VERB", "disamb": true}]}, {"index": 39, "position": [222, 225], "orth": "над", "lexemes": [{"lemma": "над", "mstag": "ADP", "disamb": true}]}, {"index": 40, "position": [226, 233], "orth": "пакетом", "lexemes": [{"lemma": "пакет", "mstag": "NOUN", "disamb": true}]}, {"index": 41, "position": [234, 237], "orth": "яиц", "lexemes": [{"lemma": "яйцо", "mstag": "NOUN", "disamb": true}]}, {"index": 42, "position": [238, 238], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 43, "position": [239, 247], "orth": "включает", "lexemes": [{"lemma": "включать", "mstag": "VERB", "disamb": true}]}, {"index": 44, "position": [248, 249], "orth": "в", "lexemes": [{"lemma": "в", "mstag": "ADP", "disamb": true}]}, {"index": 45, "position": [250, 254], "orth": "себя", "lexemes": [{"lemma": "себя", "mstag": "PRON", "disamb": true}]}, {"index": 46, "position": [255, 261], "orth": "взмахи", "lexemes": [{"lemma": "взмахи", "mstag": "NOUN", "disamb": true}]}, {"index": 47, "position": [262, 270], "orth": "крыльями", "lexemes": [{"lemma": "крыло", "mstag": "NOUN", "disamb": true}]}, {"index": 48, "position": [271, 271], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 49, "position": [272, 284], "orth": "подёргивание", "lexemes": [{"lemma": "подёргивание", "mstag": "NOUN", "disamb": true}]}, {"index": 50, "position": [285, 289], "orth": "тела", "lexemes": [{"lemma": "тело", "mstag": "NOUN", "disamb": true}]}, {"index": 51, "position": [290, 290], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 52, "position": [291, 297], "orth": "наклон", "lexemes": [{"lemma": "наклон", "mstag": "NOUN", "disamb": true}]}, {"index": 53, "position": [298, 299], "orth": "в", "lexemes": [{"lemma": "в", "mstag": "ADP", "disamb": true}]}, {"index": 54, "position": [300, 307], "orth": "сторону", "lexemes": [{"lemma": "сторона", "mstag": "NOUN", "disamb": true}]}, {"index": 55, "position": [308, 313], "orth": "врага", "lexemes": [{"lemma": "враг", "mstag": "NOUN", "disamb": true}]}, {"index": 56, "position": [314, 315], "orth": "и", "lexemes": [{"lemma": "и", "mstag": "CCONJ", "disamb": true}]}, {"index": 57, "position": [316, 316], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 58, "position": [317, 324], "orth": "наконец", "lexemes": [{"lemma": "наконец", "mstag": "ADV", "disamb": true}]}, {"index": 59, "position": [325, 325], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 60, "position": [326, 335], "orth": "выделение", "lexemes": [{"lemma": "выделение", "mstag": "NOUN", "disamb": true}]}, {"index": 61, "position": [336, 346], "orth": "неприятных", "lexemes": [{"lemma": "неприятный", "mstag": "ADJ", "disamb": true}]}, {"index": 62, "position": [347, 354], "orth": "запахов", "lexemes": [{"lemma": "запах", "mstag": "NOUN", "disamb": true}]}, {"index": 63, "position": [355, 357], "orth": "из", "lexemes": [{"lemma": "из", "mstag": "ADP", "disamb": true}]}, {"index": 64, "position": [358, 371], "orth": "ароматических", "lexemes": [{"lemma": "ароматический", "mstag": "ADJ", "disamb": true}]}, {"index": 65, "position": [372, 377], "orth": "желёз", "lexemes": [{"lemma": "желёз", "mstag": "NOUN", "disamb": true}]}, {"index": 66, "position": [378, 378], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 67, "position": [379, 385], "orth": "Иногда", "lexemes": [{"lemma": "иногда", "mstag": "ADV", "disamb": true}]}, {"index": 68, "position": [386, 391], "orth": "самки", "lexemes": [{"lemma": "самка", "mstag": "NOUN", "disamb": true}]}, {"index": 69, "position": [392, 401], "orth": "совместно", "lexemes": [{"lemma": "совместно", "mstag": "ADV", "disamb": true}]}, {"index": 70, "position": [402, 410], "orth": "охраняют", "lexemes": [{"lemma": "охранять", "mstag": "VERB", "disamb": true}]}, {"index": 71, "position": [411, 415], "orth": "свои", "lexemes": [{"lemma": "свой", "mstag": "DET", "disamb": true}]}, {"index": 72, "position": [416, 422], "orth": "кладки", "lexemes": [{"lemma": "кладка", "mstag": "NOUN", "disamb": true}]}, {"index": 73, "position": [423, 426], "orth": "бок", "lexemes": [{"lemma": "бок", "mstag": "ADV", "disamb": true}]}, {"index": 74, "position": [427, 428], "orth": "о", "lexemes": [{"lemma": "о", "mstag": "ADP", "disamb": true}]}, {"index": 75, "position": [429, 432], "orth": "бок", "lexemes": [{"lemma": "бок", "mstag": "NOUN", "disamb": true}]}, {"index": 76, "position": [433, 435], "orth": "на", "lexemes": [{"lemma": "на", "mstag": "ADP", "disamb": true}]}, {"index": 77, "position": [436, 441], "orth": "одном", "lexemes": [{"lemma": "один", "mstag": "DET", "disamb": true}]}, {"index": 78, "position": [442, 443], "orth": "и", "lexemes": [{"lemma": "и", "mstag": "CCONJ", "disamb": true}]}, {"index": 79, "position": [444, 447], "orth": "том", "lexemes": [{"lemma": "тот", "mstag": "DET", "disamb": true}]}, {"index": 80, "position": [448, 450], "orth": "же", "lexemes": [{"lemma": "же", "mstag": "PART", "disamb": true}]}, {"index": 81, "position": [451, 456], "orth": "листе", "lexemes": [{"lemma": "лист", "mstag": "NOUN", "disamb": true}]}, {"index": 82, "position": [457, 463], "orth": "берёзы", "lexemes": [{"lemma": "берёзы", "mstag": "NOUN", "disamb": true}]}, {"index": 83, "position": [464, 464], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 84, "position": [465, 468], "orth": "что", "lexemes": [{"lemma": "что", "mstag": "PRON", "disamb": true}]}, {"index": 85, "position": [469, 480], "orth": "увеличивает", "lexemes": [{"lemma": "увеличивать", "mstag": "VERB", "disamb": true}]}, {"index": 86, "position": [481, 486], "orth": "шансы", "lexemes": [{"lemma": "шанс", "mstag": "NOUN", "disamb": true}]}, {"index": 87, "position": [487, 496], "orth": "потомства", "lexemes": [{"lemma": "потомство", "mstag": "NOUN", "disamb": true}]}, {"index": 88, "position": [497, 499], "orth": "на", "lexemes": [{"lemma": "на", "mstag": "ADP", "disamb": true}]}, {"index": 89, "position": [500, 509], "orth": "выживание", "lexemes": [{"lemma": "выживание", "mstag": "NOUN", "disamb": true}]}, {"index": 90, "position": [510, 510], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 91, "position": [511, 514], "orth": "Эти", "lexemes": [{"lemma": "этот", "mstag": "DET", "disamb": true}]}, {"index": 92, "position": [515, 524], "orth": "насекомые", "lexemes": [{"lemma": "насекомое", "mstag": "NOUN", "disamb": true}]}, {"index": 93, "position": [525, 531], "orth": "служат", "lexemes": [{"lemma": "служить", "mstag": "VERB", "disamb": true}]}, {"index": 94, "position": [532, 540], "orth": "примером", "lexemes": [{"lemma": "пример", "mstag": "NOUN", "disamb": true}]}, {"index": 95, "position": [541, 546], "orth": "самых", "lexemes": [{"lemma": "самых", "mstag": "ADJ", "disamb": true}]}, {"index": 96, "position": [547, 553], "orth": "ранних", "lexemes": [{"lemma": "ранний", "mstag": "ADJ", "disamb": true}]}, {"index": 97, "position": [554, 560], "orth": "стадий", "lexemes": [{"lemma": "стадия", "mstag": "NOUN", "disamb": true}]}, {"index": 98, "position": [561, 575], "orth": "эусоциальности", "lexemes": [{"lemma": "эусоциальности", "mstag": "NOUN", "disamb": true}]}, {"index": 99, "position": [576, 576], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 100, "position": [577, 577], "orth": "\n", "lexemes": [{"lemma": "\n", "mstag": "SPACE", "disamb": true}]}], "text": "Для вида зафиксирована материнская забота (поведение по охране яиц и личинок-нимф). После яйцекладки самка стоит над кладкой яиц и защищает её на протяжении всего развития яиц. Репертуар защитного поведения самок, стоящих над пакетом яиц, включает в себя взмахи крыльями, подёргивание тела, наклон в сторону врага и, наконец, выделение неприятных запахов из ароматических желёз. Иногда самки совместно охраняют свои кладки бок о бок на одном и том же листе берёзы, что увеличивает шансы потомства на выживание. Эти насекомые служат примером самых ранних стадий эусоциальности.\n"}
diff --git a/tests/testdata/expected/turritopsis_nutricula_de.json b/tests/testdata/expected/turritopsis_nutricula_de.json
new file mode 100644
index 0000000..8ee7b67
--- /dev/null
+++ b/tests/testdata/expected/turritopsis_nutricula_de.json
@@ -0,0 +1 @@
+{"filename": "e7498228-90ac-4b09-81b7-a82f2ac02f55", "tagset": "ud", "tokens": [{"index": 1, "position": [0, 11], "orth": "Turritopsis", "lexemes": [{"lemma": "Turritopsis", "mstag": "NOUN", "disamb": true}]}, {"index": 2, "position": [12, 21], "orth": "nutricula", "lexemes": [{"lemma": "nutricula", "mstag": "ADV", "disamb": true}]}, {"index": 3, "position": [22, 28], "orth": "bildet", "lexemes": [{"lemma": "bilden", "mstag": "VERB", "disamb": true}]}, {"index": 4, "position": [29, 38], "orth": "aufrechte", "lexemes": [{"lemma": "aufrechen", "mstag": "VERB", "disamb": true}]}, {"index": 5, "position": [39, 39], "orth": ",", "lexemes": [{"lemma": "--", "mstag": "PUNCT", "disamb": true}]}, {"index": 6, "position": [40, 50], "orth": "verzweigte", "lexemes": [{"lemma": "verzweigt", "mstag": "ADJ", "disamb": true}]}, {"index": 7, "position": [51, 69], "orth": "Hydroiden-Kolonien", "lexemes": [{"lemma": "Hydroiden-Kolonien", "mstag": "NOUN", "disamb": true}]}, {"index": 8, "position": [70, 70], "orth": ".", "lexemes": [{"lemma": "--", "mstag": "PUNCT", "disamb": true}]}, {"index": 9, "position": [71, 74], "orth": "Die", "lexemes": [{"lemma": "der", "mstag": "DET", "disamb": true}]}, {"index": 10, "position": [75, 90], "orth": "Polypenköpfchen", "lexemes": [{"lemma": "Polypenköpfche", "mstag": "NOUN", "disamb": true}]}, {"index": 11, "position": [91, 92], "orth": "(", "lexemes": [{"lemma": "--", "mstag": "PUNCT", "disamb": true}]}, {"index": 12, "position": [93, 102], "orth": "Hydranthen", "lexemes": [{"lemma": "Hydranthe", "mstag": "NOUN", "disamb": true}]}, {"index": 13, "position": [103, 103], "orth": ")", "lexemes": [{"lemma": "--", "mstag": "PUNCT", "disamb": true}]}, {"index": 14, "position": [104, 108], "orth": "sind", "lexemes": [{"lemma": "sein", "mstag": "AUX", "disamb": true}]}, {"index": 15, "position": [109, 117], "orth": "spindel-", "lexemes": [{"lemma": "spindel", "mstag": "X", "disamb": true}]}, {"index": 16, "position": [118, 121], "orth": "bis", "lexemes": [{"lemma": "bis", "mstag": "CCONJ", "disamb": true}]}, {"index": 17, "position": [122, 134], "orth": "keulenförmig", "lexemes": [{"lemma": "keulenförmig", "mstag": "ADV", "disamb": true}]}, {"index": 18, "position": [135, 135], "orth": ",", "lexemes": [{"lemma": "--", "mstag": "PUNCT", "disamb": true}]}, {"index": 19, "position": [136, 140], "orth": "ihre", "lexemes": [{"lemma": "ihr", "mstag": "DET", "disamb": true}]}, {"index": 20, "position": [141, 154], "orth": "fadenförmigen", "lexemes": [{"lemma": "fadenförmig", "mstag": "ADJ", "disamb": true}]}, {"index": 21, "position": [155, 163], "orth": "Tentakel", "lexemes": [{"lemma": "Tentakel", "mstag": "NOUN", "disamb": true}]}, {"index": 22, "position": [164, 168], "orth": "sind", "lexemes": [{"lemma": "sein", "mstag": "AUX", "disamb": true}]}, {"index": 23, "position": [169, 181], "orth": "unregelmäßig", "lexemes": [{"lemma": "unregelmäßig", "mstag": "ADV", "disamb": true}]}, {"index": 24, "position": [182, 186], "orth": "über", "lexemes": [{"lemma": "über", "mstag": "ADP", "disamb": true}]}, {"index": 25, "position": [187, 190], "orth": "den", "lexemes": [{"lemma": "der", "mstag": "DET", "disamb": true}]}, {"index": 26, "position": [191, 207], "orth": "Hydranthenkörper", "lexemes": [{"lemma": "Hydranthenkörper", "mstag": "NOUN", "disamb": true}]}, {"index": 27, "position": [208, 216], "orth": "verteilt", "lexemes": [{"lemma": "verteilen", "mstag": "VERB", "disamb": true}]}, {"index": 28, "position": [217, 217], "orth": ".", "lexemes": [{"lemma": "--", "mstag": "PUNCT", "disamb": true}]}, {"index": 29, "position": [218, 221], "orth": "Die", "lexemes": [{"lemma": "der", "mstag": "DET", "disamb": true}]}, {"index": 30, "position": [222, 227], "orth": "Hülle", "lexemes": [{"lemma": "Hülle", "mstag": "NOUN", "disamb": true}]}, {"index": 31, "position": [228, 229], "orth": "(", "lexemes": [{"lemma": "--", "mstag": "PUNCT", "disamb": true}]}, {"index": 32, "position": [230, 237], "orth": "Periderm", "lexemes": [{"lemma": "Periderm", "mstag": "NOUN", "disamb": true}]}, {"index": 33, "position": [238, 238], "orth": ")", "lexemes": [{"lemma": "--", "mstag": "PUNCT", "disamb": true}]}, {"index": 34, "position": [239, 241], "orth": "um", "lexemes": [{"lemma": "um", "mstag": "ADP", "disamb": true}]}, {"index": 35, "position": [242, 245], "orth": "den", "lexemes": [{"lemma": "der", "mstag": "DET", "disamb": true}]}, {"index": 36, "position": [246, 251], "orth": "Stiel", "lexemes": [{"lemma": "Stiel", "mstag": "NOUN", "disamb": true}]}, {"index": 37, "position": [252, 253], "orth": "(", "lexemes": [{"lemma": "--", "mstag": "PUNCT", "disamb": true}]}, {"index": 38, "position": [254, 264], "orth": "Hydrocaulus", "lexemes": [{"lemma": "Hydrocaulus", "mstag": "NOUN", "disamb": true}]}, {"index": 39, "position": [265, 265], "orth": ")", "lexemes": [{"lemma": "--", "mstag": "PUNCT", "disamb": true}]}, {"index": 40, "position": [266, 269], "orth": "ist", "lexemes": [{"lemma": "sein", "mstag": "AUX", "disamb": true}]}, {"index": 41, "position": [270, 279], "orth": "zweilagig", "lexemes": [{"lemma": "zweilagig", "mstag": "ADV", "disamb": true}]}, {"index": 42, "position": [280, 280], "orth": ".", "lexemes": [{"lemma": "--", "mstag": "PUNCT", "disamb": true}]}, {"index": 43, "position": [281, 284], "orth": "Die", "lexemes": [{"lemma": "der", "mstag": "DET", "disamb": true}]}, {"index": 44, "position": [285, 299], "orth": "Medusenknospen", "lexemes": [{"lemma": "Medusenknospe", "mstag": "NOUN", "disamb": true}]}, {"index": 45, "position": [300, 301], "orth": "(", "lexemes": [{"lemma": "--", "mstag": "PUNCT", "disamb": true}]}, {"index": 46, "position": [302, 311], "orth": "Gonophoren", "lexemes": [{"lemma": "Gonophor", "mstag": "NOUN", "disamb": true}]}, {"index": 47, "position": [312, 312], "orth": ")", "lexemes": [{"lemma": "--", "mstag": "PUNCT", "disamb": true}]}, {"index": 48, "position": [313, 323], "orth": "entwickeln", "lexemes": [{"lemma": "entwickeln", "mstag": "VERB", "disamb": true}]}, {"index": 49, "position": [324, 328], "orth": "sich", "lexemes": [{"lemma": "sich", "mstag": "PRON", "disamb": true}]}, {"index": 50, "position": [329, 332], "orth": "auf", "lexemes": [{"lemma": "auf", "mstag": "ADP", "disamb": true}]}, {"index": 51, "position": [333, 336], "orth": "den", "lexemes": [{"lemma": "der", "mstag": "DET", "disamb": true}]}, {"index": 52, "position": [337, 344], "orth": "Stielen", "lexemes": [{"lemma": "Stiel", "mstag": "NOUN", "disamb": true}]}, {"index": 53, "position": [345, 347], "orth": "in", "lexemes": [{"lemma": "in", "mstag": "ADP", "disamb": true}]}, {"index": 54, "position": [348, 353], "orth": "einer", "lexemes": [{"lemma": "ein", "mstag": "DET", "disamb": true}]}, {"index": 55, "position": [354, 357], "orth": "mit", "lexemes": [{"lemma": "mit", "mstag": "ADP", "disamb": true}]}, {"index": 56, "position": [358, 366], "orth": "Perisarc", "lexemes": [{"lemma": "Perisarc", "mstag": "PROPN", "disamb": true}]}, {"index": 57, "position": [367, 379], "orth": "eingehüllten", "lexemes": [{"lemma": "eingehüllt", "mstag": "ADJ", "disamb": true}]}, {"index": 58, "position": [380, 386], "orth": "Region", "lexemes": [{"lemma": "Region", "mstag": "NOUN", "disamb": true}]}, {"index": 59, "position": [387, 387], "orth": ".", "lexemes": [{"lemma": "--", "mstag": "PUNCT", "disamb": true}]}, {"index": 60, "position": [388, 388], "orth": "\n", "lexemes": [{"lemma": "\n", "mstag": "SPACE", "disamb": true}]}], "text": "Turritopsis nutricula bildet aufrechte, verzweigte Hydroiden-Kolonien. Die Polypenköpfchen (Hydranthen) sind spindel- bis keulenförmig, ihre fadenförmigen Tentakel sind unregelmäßig über den Hydranthenkörper verteilt. Die Hülle (Periderm) um den Stiel (Hydrocaulus) ist zweilagig. Die Medusenknospen (Gonophoren) entwickeln sich auf den Stielen in einer mit Perisarc eingehüllten Region.\n"}
diff --git a/tests/testdata/expected/turritopsis_nutricula_es.json b/tests/testdata/expected/turritopsis_nutricula_es.json
new file mode 100644
index 0000000..45766e3
--- /dev/null
+++ b/tests/testdata/expected/turritopsis_nutricula_es.json
@@ -0,0 +1 @@
+{"filename": "4b30963c-3e8f-477f-aa7f-aa238618023f", "tagset": "ud", "tokens": [{"index": 1, "position": [0, 5], "orth": "Tiene", "lexemes": [{"lemma": "tener", "mstag": "VERB", "disamb": true}]}, {"index": 2, "position": [6, 8], "orth": "un", "lexemes": [{"lemma": "uno", "mstag": "DET", "disamb": true}]}, {"index": 3, "position": [9, 17], "orth": "diámetro", "lexemes": [{"lemma": "diámetro", "mstag": "NOUN", "disamb": true}]}, {"index": 4, "position": [18, 20], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 5, "position": [21, 24], "orth": "4-5", "lexemes": [{"lemma": "4-5", "mstag": "NUM", "disamb": true}]}, {"index": 6, "position": [25, 27], "orth": "mm", "lexemes": [{"lemma": "mm", "mstag": "NOUN", "disamb": true}]}, {"index": 7, "position": [28, 28], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 8, "position": [29, 31], "orth": "Su", "lexemes": [{"lemma": "su", "mstag": "DET", "disamb": true}]}, {"index": 9, "position": [32, 38], "orth": "figura", "lexemes": [{"lemma": "figura", "mstag": "NOUN", "disamb": true}]}, {"index": 10, "position": [39, 41], "orth": "es", "lexemes": [{"lemma": "ser", "mstag": "AUX", "disamb": true}]}, {"index": 11, "position": [42, 46], "orth": "alta", "lexemes": [{"lemma": "alto", "mstag": "ADJ", "disamb": true}]}, {"index": 12, "position": [47, 48], "orth": "y", "lexemes": [{"lemma": "y", "mstag": "CCONJ", "disamb": true}]}, {"index": 13, "position": [49, 59], "orth": "acampanada", "lexemes": [{"lemma": "acampanado", "mstag": "ADJ", "disamb": true}]}, {"index": 14, "position": [60, 63], "orth": "con", "lexemes": [{"lemma": "con", "mstag": "ADP", "disamb": true}]}, {"index": 15, "position": [64, 71], "orth": "paredes", "lexemes": [{"lemma": "pared", "mstag": "NOUN", "disamb": true}]}, {"index": 16, "position": [72, 77], "orth": "finas", "lexemes": [{"lemma": "fino", "mstag": "ADJ", "disamb": true}]}, {"index": 17, "position": [78, 79], "orth": "y", "lexemes": [{"lemma": "y", "mstag": "CCONJ", "disamb": true}]}, {"index": 18, "position": [80, 89], "orth": "uniformes", "lexemes": [{"lemma": "uniforme", "mstag": "NOUN", "disamb": true}]}, {"index": 19, "position": [90, 90], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 20, "position": [91, 93], "orth": "Su", "lexemes": [{"lemma": "su", "mstag": "DET", "disamb": true}]}, {"index": 21, "position": [94, 98], "orth": "gran", "lexemes": [{"lemma": "gran", "mstag": "ADJ", "disamb": true}]}, {"index": 22, "position": [99, 107], "orth": "estómago", "lexemes": [{"lemma": "estómago", "mstag": "NOUN", "disamb": true}]}, {"index": 23, "position": [108, 109], "orth": "(", "lexemes": [{"lemma": "(", "mstag": "PUNCT", "disamb": true}]}, {"index": 24, "position": [110, 116], "orth": "cavidad", "lexemes": [{"lemma": "cavidad", "mstag": "NOUN", "disamb": true}]}, {"index": 25, "position": [117, 131], "orth": "gastrovascular", "lexemes": [{"lemma": "gastrovascular", "mstag": "ADJ", "disamb": true}]}, {"index": 26, "position": [132, 132], "orth": ")", "lexemes": [{"lemma": ")", "mstag": "PUNCT", "disamb": true}]}, {"index": 27, "position": [133, 133], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 28, "position": [134, 138], "orth": "rojo", "lexemes": [{"lemma": "rojo", "mstag": "NOUN", "disamb": true}]}, {"index": 29, "position": [139, 143], "orth": "vivo", "lexemes": [{"lemma": "vivo", "mstag": "ADJ", "disamb": true}]}, {"index": 30, "position": [144, 144], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 31, "position": [145, 150], "orth": "tiene", "lexemes": [{"lemma": "tener", "mstag": "VERB", "disamb": true}]}, {"index": 32, "position": [151, 156], "orth": "forma", "lexemes": [{"lemma": "forma", "mstag": "NOUN", "disamb": true}]}, {"index": 33, "position": [157, 167], "orth": "cruciforme", "lexemes": [{"lemma": "cruciforme", "mstag": "ADJ", "disamb": true}]}, {"index": 34, "position": [168, 170], "orth": "en", "lexemes": [{"lemma": "en", "mstag": "ADP", "disamb": true}]}, {"index": 35, "position": [171, 173], "orth": "su", "lexemes": [{"lemma": "su", "mstag": "DET", "disamb": true}]}, {"index": 36, "position": [174, 179], "orth": "corte", "lexemes": [{"lemma": "corte", "mstag": "NOUN", "disamb": true}]}, {"index": 37, "position": [180, 191], "orth": "transversal", "lexemes": [{"lemma": "transversal", "mstag": "ADJ", "disamb": true}]}, {"index": 38, "position": [192, 192], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 39, "position": [193, 196], "orth": "Los", "lexemes": [{"lemma": "el", "mstag": "DET", "disamb": true}]}, {"index": 40, "position": [197, 208], "orth": "especímenes", "lexemes": [{"lemma": "especímén", "mstag": "NOUN", "disamb": true}]}, {"index": 41, "position": [209, 216], "orth": "jóvenes", "lexemes": [{"lemma": "joven", "mstag": "ADJ", "disamb": true}]}, {"index": 42, "position": [217, 223], "orth": "tienen", "lexemes": [{"lemma": "tener", "mstag": "VERB", "disamb": true}]}, {"index": 43, "position": [224, 228], "orth": "ocho", "lexemes": [{"lemma": "ocho", "mstag": "NUM", "disamb": true}]}, {"index": 44, "position": [229, 239], "orth": "tentáculos", "lexemes": [{"lemma": "tentáculo", "mstag": "NOUN", "disamb": true}]}, {"index": 45, "position": [240, 242], "orth": "en", "lexemes": [{"lemma": "en", "mstag": "ADP", "disamb": true}]}, {"index": 46, "position": [243, 245], "orth": "el", "lexemes": [{"lemma": "el", "mstag": "DET", "disamb": true}]}, {"index": 47, "position": [246, 251], "orth": "borde", "lexemes": [{"lemma": "borde", "mstag": "NOUN", "disamb": true}]}, {"index": 48, "position": [252, 256], "orth": "pero", "lexemes": [{"lemma": "pero", "mstag": "CCONJ", "disamb": true}]}, {"index": 49, "position": [257, 260], "orth": "los", "lexemes": [{"lemma": "el", "mstag": "DET", "disamb": true}]}, {"index": 50, "position": [261, 268], "orth": "adultos", "lexemes": [{"lemma": "adulto", "mstag": "NOUN", "disamb": true}]}, {"index": 51, "position": [269, 275], "orth": "llegan", "lexemes": [{"lemma": "llegar", "mstag": "VERB", "disamb": true}]}, {"index": 52, "position": [276, 277], "orth": "a", "lexemes": [{"lemma": "a", "mstag": "ADP", "disamb": true}]}, {"index": 53, "position": [278, 283], "orth": "tener", "lexemes": [{"lemma": "tener", "mstag": "VERB", "disamb": true}]}, {"index": 54, "position": [284, 289], "orth": "hasta", "lexemes": [{"lemma": "hasta", "mstag": "ADP", "disamb": true}]}, {"index": 55, "position": [290, 297], "orth": "noventa", "lexemes": [{"lemma": "noventa", "mstag": "NUM", "disamb": true}]}, {"index": 56, "position": [298, 308], "orth": "tentáculos", "lexemes": [{"lemma": "tentáculo", "mstag": "NOUN", "disamb": true}]}, {"index": 57, "position": [309, 309], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 58, "position": [310, 313], "orth": "Los", "lexemes": [{"lemma": "el", "mstag": "DET", "disamb": true}]}, {"index": 59, "position": [314, 320], "orth": "huevos", "lexemes": [{"lemma": "huevo", "mstag": "NOUN", "disamb": true}]}, {"index": 60, "position": [321, 333], "orth": "fertilizados", "lexemes": [{"lemma": "fertilizado", "mstag": "ADJ", "disamb": true}]}, {"index": 61, "position": [334, 336], "orth": "se", "lexemes": [{"lemma": "él", "mstag": "PRON", "disamb": true}]}, {"index": 62, "position": [337, 348], "orth": "desarrollan", "lexemes": [{"lemma": "desarrollar", "mstag": "VERB", "disamb": true}]}, {"index": 63, "position": [349, 351], "orth": "en", "lexemes": [{"lemma": "en", "mstag": "ADP", "disamb": true}]}, {"index": 64, "position": [352, 354], "orth": "el", "lexemes": [{"lemma": "el", "mstag": "DET", "disamb": true}]}, {"index": 65, "position": [355, 363], "orth": "estómago", "lexemes": [{"lemma": "estómago", "mstag": "NOUN", "disamb": true}]}, {"index": 66, "position": [364, 365], "orth": "y", "lexemes": [{"lemma": "y", "mstag": "CCONJ", "disamb": true}]}, {"index": 67, "position": [366, 368], "orth": "en", "lexemes": [{"lemma": "en", "mstag": "ADP", "disamb": true}]}, {"index": 68, "position": [369, 378], "orth": "cavidades", "lexemes": [{"lemma": "cavidad", "mstag": "NOUN", "disamb": true}]}, {"index": 69, "position": [379, 381], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 70, "position": [382, 384], "orth": "la", "lexemes": [{"lemma": "el", "mstag": "DET", "disamb": true}]}, {"index": 71, "position": [385, 390], "orth": "larva", "lexemes": [{"lemma": "larva", "mstag": "NOUN", "disamb": true}]}, {"index": 72, "position": [391, 392], "orth": "(", "lexemes": [{"lemma": "(", "mstag": "PUNCT", "disamb": true}]}, {"index": 73, "position": [393, 399], "orth": "plánula", "lexemes": [{"lemma": "plánular", "mstag": "VERB", "disamb": true}]}, {"index": 74, "position": [400, 400], "orth": ")", "lexemes": [{"lemma": ")", "mstag": "PUNCT", "disamb": true}]}, {"index": 75, "position": [401, 401], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 76, "position": [402, 405], "orth": "Los", "lexemes": [{"lemma": "el", "mstag": "DET", "disamb": true}]}, {"index": 77, "position": [406, 412], "orth": "huevos", "lexemes": [{"lemma": "huevo", "mstag": "NOUN", "disamb": true}]}, {"index": 78, "position": [413, 427], "orth": "posteriormente", "lexemes": [{"lemma": "posteriormente", "mstag": "ADV", "disamb": true}]}, {"index": 79, "position": [428, 430], "orth": "se", "lexemes": [{"lemma": "él", "mstag": "PRON", "disamb": true}]}, {"index": 80, "position": [431, 438], "orth": "plantan", "lexemes": [{"lemma": "plantir", "mstag": "VERB", "disamb": true}]}, {"index": 81, "position": [439, 441], "orth": "en", "lexemes": [{"lemma": "en", "mstag": "ADP", "disamb": true}]}, {"index": 82, "position": [442, 444], "orth": "el", "lexemes": [{"lemma": "el", "mstag": "DET", "disamb": true}]}, {"index": 83, "position": [445, 450], "orth": "fondo", "lexemes": [{"lemma": "fondo", "mstag": "NOUN", "disamb": true}]}, {"index": 84, "position": [451, 454], "orth": "del", "lexemes": [{"lemma": "del", "mstag": "ADP", "disamb": true}]}, {"index": 85, "position": [455, 458], "orth": "mar", "lexemes": [{"lemma": "mar", "mstag": "NOUN", "disamb": true}]}, {"index": 86, "position": [459, 461], "orth": "en", "lexemes": [{"lemma": "en", "mstag": "ADP", "disamb": true}]}, {"index": 87, "position": [462, 470], "orth": "colonias", "lexemes": [{"lemma": "colonia", "mstag": "NOUN", "disamb": true}]}, {"index": 88, "position": [471, 473], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 89, "position": [474, 481], "orth": "pólipos", "lexemes": [{"lemma": "pólipo", "mstag": "NOUN", "disamb": true}]}, {"index": 90, "position": [482, 482], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 91, "position": [483, 485], "orth": "La", "lexemes": [{"lemma": "el", "mstag": "DET", "disamb": true}]}, {"index": 92, "position": [486, 492], "orth": "medusa", "lexemes": [{"lemma": "medusa", "mstag": "NOUN", "disamb": true}]}, {"index": 93, "position": [493, 499], "orth": "incuba", "lexemes": [{"lemma": "incubar", "mstag": "VERB", "disamb": true}]}, {"index": 94, "position": [500, 507], "orth": "después", "lexemes": [{"lemma": "después", "mstag": "ADV", "disamb": true}]}, {"index": 95, "position": [508, 510], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 96, "position": [511, 514], "orth": "dos", "lexemes": [{"lemma": "dos", "mstag": "NUM", "disamb": true}]}, {"index": 97, "position": [515, 519], "orth": "días", "lexemes": [{"lemma": "día", "mstag": "NOUN", "disamb": true}]}, {"index": 98, "position": [520, 520], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 99, "position": [521, 526], "orth": "Llega", "lexemes": [{"lemma": "llegar", "mstag": "VERB", "disamb": true}]}, {"index": 100, "position": [527, 528], "orth": "a", "lexemes": [{"lemma": "a", "mstag": "ADP", "disamb": true}]}, {"index": 101, "position": [529, 532], "orth": "ser", "lexemes": [{"lemma": "ser", "mstag": "AUX", "disamb": true}]}, {"index": 102, "position": [533, 539], "orth": "madura", "lexemes": [{"lemma": "madura", "mstag": "NOUN", "disamb": true}]}, {"index": 103, "position": [540, 551], "orth": "sexualmente", "lexemes": [{"lemma": "sexualmente", "mstag": "ADV", "disamb": true}]}, {"index": 104, "position": [552, 559], "orth": "después", "lexemes": [{"lemma": "después", "mstag": "ADV", "disamb": true}]}, {"index": 105, "position": [560, 562], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 106, "position": [563, 568], "orth": "pocas", "lexemes": [{"lemma": "poco", "mstag": "DET", "disamb": true}]}, {"index": 107, "position": [569, 576], "orth": "semanas", "lexemes": [{"lemma": "semana", "mstag": "NOUN", "disamb": true}]}, {"index": 108, "position": [577, 578], "orth": "(", "lexemes": [{"lemma": "(", "mstag": "PUNCT", "disamb": true}]}, {"index": 109, "position": [579, 580], "orth": "su", "lexemes": [{"lemma": "su", "mstag": "DET", "disamb": true}]}, {"index": 110, "position": [581, 589], "orth": "duración", "lexemes": [{"lemma": "duración", "mstag": "NOUN", "disamb": true}]}, {"index": 111, "position": [590, 596], "orth": "exacta", "lexemes": [{"lemma": "exacto", "mstag": "ADJ", "disamb": true}]}, {"index": 112, "position": [597, 604], "orth": "depende", "lexemes": [{"lemma": "depender", "mstag": "VERB", "disamb": true}]}, {"index": 113, "position": [605, 607], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 114, "position": [608, 610], "orth": "la", "lexemes": [{"lemma": "el", "mstag": "DET", "disamb": true}]}, {"index": 115, "position": [611, 622], "orth": "temperatura", "lexemes": [{"lemma": "temperatura", "mstag": "NOUN", "disamb": true}]}, {"index": 116, "position": [623, 625], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 117, "position": [626, 629], "orth": "las", "lexemes": [{"lemma": "el", "mstag": "DET", "disamb": true}]}, {"index": 118, "position": [630, 635], "orth": "aguas", "lexemes": [{"lemma": "agua", "mstag": "NOUN", "disamb": true}]}, {"index": 119, "position": [636, 636], "orth": ":", "lexemes": [{"lemma": ":", "mstag": "PUNCT", "disamb": true}]}, {"index": 120, "position": [637, 638], "orth": "a", "lexemes": [{"lemma": "a", "mstag": "ADP", "disamb": true}]}, {"index": 121, "position": [639, 641], "orth": "20", "lexemes": [{"lemma": "20", "mstag": "NUM", "disamb": true}]}, {"index": 122, "position": [642, 644], "orth": "°C", "lexemes": [{"lemma": "°C", "mstag": "PROPN", "disamb": true}]}, {"index": 123, "position": [645, 650], "orth": "entre", "lexemes": [{"lemma": "entre", "mstag": "ADP", "disamb": true}]}, {"index": 124, "position": [651, 662], "orth": "veinticinco", "lexemes": [{"lemma": "veinticinco", "mstag": "NUM", "disamb": true}]}, {"index": 125, "position": [663, 664], "orth": "a", "lexemes": [{"lemma": "a", "mstag": "ADP", "disamb": true}]}, {"index": 126, "position": [665, 672], "orth": "treinta", "lexemes": [{"lemma": "treinta", "mstag": "NUM", "disamb": true}]}, {"index": 127, "position": [673, 677], "orth": "días", "lexemes": [{"lemma": "día", "mstag": "NOUN", "disamb": true}]}, {"index": 128, "position": [678, 679], "orth": "y", "lexemes": [{"lemma": "y", "mstag": "CCONJ", "disamb": true}]}, {"index": 129, "position": [680, 681], "orth": "a", "lexemes": [{"lemma": "a", "mstag": "ADP", "disamb": true}]}, {"index": 130, "position": [682, 684], "orth": "22", "lexemes": [{"lemma": "22", "mstag": "NUM", "disamb": true}]}, {"index": 131, "position": [685, 687], "orth": "°C", "lexemes": [{"lemma": "°C", "mstag": "PROPN", "disamb": true}]}, {"index": 132, "position": [688, 690], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 133, "position": [691, 700], "orth": "dieciocho", "lexemes": [{"lemma": "dieciocho", "mstag": "NUM", "disamb": true}]}, {"index": 134, "position": [701, 702], "orth": "a", "lexemes": [{"lemma": "a", "mstag": "ADP", "disamb": true}]}, {"index": 135, "position": [703, 712], "orth": "veintidós", "lexemes": [{"lemma": "veintidós", "mstag": "DET", "disamb": true}]}, {"index": 136, "position": [713, 717], "orth": "días", "lexemes": [{"lemma": "día", "mstag": "NOUN", "disamb": true}]}, {"index": 137, "position": [718, 718], "orth": ")", "lexemes": [{"lemma": ")", "mstag": "PUNCT", "disamb": true}]}, {"index": 138, "position": [719, 719], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 139, "position": [720, 720], "orth": "\n", "lexemes": [{"lemma": "\n", "mstag": "SPACE", "disamb": true}]}], "text": "Tiene un diámetro de 4-5 mm. Su figura es alta y acampanada con paredes finas y uniformes. Su gran estómago (cavidad gastrovascular), rojo vivo, tiene forma cruciforme en su corte transversal. Los especímenes jóvenes tienen ocho tentáculos en el borde pero los adultos llegan a tener hasta noventa tentáculos. Los huevos fertilizados se desarrollan en el estómago y en cavidades de la larva (plánula). Los huevos posteriormente se plantan en el fondo del mar en colonias de pólipos. La medusa incuba después de dos días. Llega a ser madura sexualmente después de pocas semanas (su duración exacta depende de la temperatura de las aguas: a 20 °C entre veinticinco a treinta días y a 22 °C de dieciocho a veintidós días).\n"}
diff --git a/tests/testdata/expected/turritopsis_nutricula_fr.json b/tests/testdata/expected/turritopsis_nutricula_fr.json
new file mode 100644
index 0000000..152444e
--- /dev/null
+++ b/tests/testdata/expected/turritopsis_nutricula_fr.json
@@ -0,0 +1 @@
+{"filename": "df1f4f2c-a675-4c7b-be4f-3e751f969153", "tagset": "ud", "tokens": [{"index": 1, "position": [0, 5], "orth": "Cette", "lexemes": [{"lemma": "ce", "mstag": "DET", "disamb": true}]}, {"index": 2, "position": [6, 12], "orth": "méduse", "lexemes": [{"lemma": "méduse", "mstag": "NOUN", "disamb": true}]}, {"index": 3, "position": [13, 16], "orth": "est", "lexemes": [{"lemma": "être", "mstag": "AUX", "disamb": true}]}, {"index": 4, "position": [17, 24], "orth": "capable", "lexemes": [{"lemma": "capable", "mstag": "ADJ", "disamb": true}]}, {"index": 5, "position": [25, 27], "orth": "d’", "lexemes": [{"lemma": "d’", "mstag": "ADV", "disamb": true}]}, {"index": 6, "position": [28, 35], "orth": "inverser", "lexemes": [{"lemma": "inverser", "mstag": "VERB", "disamb": true}]}, {"index": 7, "position": [36, 39], "orth": "son", "lexemes": [{"lemma": "son", "mstag": "DET", "disamb": true}]}, {"index": 8, "position": [40, 49], "orth": "processus", "lexemes": [{"lemma": "processus", "mstag": "NOUN", "disamb": true}]}, {"index": 9, "position": [50, 52], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 10, "position": [53, 67], "orth": "vieillissement", "lexemes": [{"lemma": "vieillissement", "mstag": "NOUN", "disamb": true}]}, {"index": 11, "position": [68, 69], "orth": "a", "lexemes": [{"lemma": "avoir", "mstag": "AUX", "disamb": true}]}, {"index": 12, "position": [70, 76], "orth": "priori", "lexemes": [{"lemma": "priori", "mstag": "X", "disamb": true}]}, {"index": 13, "position": [77, 89], "orth": "indéfiniment", "lexemes": [{"lemma": "indéfiniment", "mstag": "ADV", "disamb": true}]}, {"index": 14, "position": [90, 90], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 15, "position": [91, 96], "orth": "grâce", "lexemes": [{"lemma": "grâce", "mstag": "NOUN", "disamb": true}]}, {"index": 16, "position": [97, 99], "orth": "au", "lexemes": [{"lemma": "au", "mstag": "DET", "disamb": true}]}, {"index": 17, "position": [100, 109], "orth": "processus", "lexemes": [{"lemma": "processus", "mstag": "NOUN", "disamb": true}]}, {"index": 18, "position": [110, 112], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 19, "position": [113, 133], "orth": "transdifférenciation", "lexemes": [{"lemma": "transdifférenciation", "mstag": "NOUN", "disamb": true}]}, {"index": 20, "position": [134, 134], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 21, "position": [135, 142], "orth": "Presque", "lexemes": [{"lemma": "presque", "mstag": "ADV", "disamb": true}]}, {"index": 22, "position": [143, 149], "orth": "toutes", "lexemes": [{"lemma": "tout", "mstag": "ADJ", "disamb": true}]}, {"index": 23, "position": [150, 153], "orth": "les", "lexemes": [{"lemma": "le", "mstag": "DET", "disamb": true}]}, {"index": 24, "position": [154, 161], "orth": "méduses", "lexemes": [{"lemma": "méduse", "mstag": "NOUN", "disamb": true}]}, {"index": 25, "position": [162, 169], "orth": "peuvent", "lexemes": [{"lemma": "pouvoir", "mstag": "VERB", "disamb": true}]}, {"index": 26, "position": [170, 172], "orth": "se", "lexemes": [{"lemma": "se", "mstag": "PRON", "disamb": true}]}, {"index": 27, "position": [173, 183], "orth": "multiplier", "lexemes": [{"lemma": "multiplier", "mstag": "VERB", "disamb": true}]}, {"index": 28, "position": [184, 187], "orth": "par", "lexemes": [{"lemma": "par", "mstag": "ADP", "disamb": true}]}, {"index": 29, "position": [188, 195], "orth": "clonage", "lexemes": [{"lemma": "clonage", "mstag": "NOUN", "disamb": true}]}, {"index": 30, "position": [196, 203], "orth": "pendant", "lexemes": [{"lemma": "pendant", "mstag": "ADP", "disamb": true}]}, {"index": 31, "position": [204, 208], "orth": "leur", "lexemes": [{"lemma": "leur", "mstag": "DET", "disamb": true}]}, {"index": 32, "position": [209, 214], "orth": "stade", "lexemes": [{"lemma": "stade", "mstag": "NOUN", "disamb": true}]}, {"index": 33, "position": [215, 221], "orth": "polype", "lexemes": [{"lemma": "polyp", "mstag": "ADJ", "disamb": true}]}, {"index": 34, "position": [222, 223], "orth": "(", "lexemes": [{"lemma": "(", "mstag": "PUNCT", "disamb": true}]}, {"index": 35, "position": [224, 226], "orth": "tel", "lexemes": [{"lemma": "tel", "mstag": "ADJ", "disamb": true}]}, {"index": 36, "position": [227, 229], "orth": "le", "lexemes": [{"lemma": "le", "mstag": "DET", "disamb": true}]}, {"index": 37, "position": [230, 244], "orth": "bourgeonnement", "lexemes": [{"lemma": "bourgeonnement", "mstag": "NOUN", "disamb": true}]}, {"index": 38, "position": [245, 248], "orth": "des", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 39, "position": [249, 261], "orth": "hydrozoaires", "lexemes": [{"lemma": "hydrozoaire", "mstag": "NOUN", "disamb": true}]}, {"index": 40, "position": [262, 262], "orth": ")", "lexemes": [{"lemma": ")", "mstag": "PUNCT", "disamb": true}]}, {"index": 41, "position": [263, 263], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 42, "position": [264, 266], "orth": "ce", "lexemes": [{"lemma": "ce", "mstag": "PRON", "disamb": true}]}, {"index": 43, "position": [267, 270], "orth": "qui", "lexemes": [{"lemma": "qui", "mstag": "PRON", "disamb": true}]}, {"index": 44, "position": [271, 274], "orth": "est", "lexemes": [{"lemma": "être", "mstag": "AUX", "disamb": true}]}, {"index": 45, "position": [275, 280], "orth": "aussi", "lexemes": [{"lemma": "aussi", "mstag": "ADV", "disamb": true}]}, {"index": 46, "position": [281, 284], "orth": "une", "lexemes": [{"lemma": "un", "mstag": "DET", "disamb": true}]}, {"index": 47, "position": [285, 293], "orth": "certaine", "lexemes": [{"lemma": "certaine", "mstag": "NOUN", "disamb": true}]}, {"index": 48, "position": [294, 299], "orth": "forme", "lexemes": [{"lemma": "form", "mstag": "ADJ", "disamb": true}]}, {"index": 49, "position": [300, 302], "orth": "d'", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 50, "position": [303, 313], "orth": "immortalité", "lexemes": [{"lemma": "immortalité", "mstag": "NOUN", "disamb": true}]}, {"index": 51, "position": [314, 314], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 52, "position": [315, 317], "orth": "Du", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 53, "position": [318, 322], "orth": "fait", "lexemes": [{"lemma": "fait", "mstag": "NOUN", "disamb": true}]}, {"index": 54, "position": [323, 325], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 55, "position": [326, 329], "orth": "ses", "lexemes": [{"lemma": "son", "mstag": "DET", "disamb": true}]}, {"index": 56, "position": [330, 346], "orth": "caractéristiques", "lexemes": [{"lemma": "caractéristique", "mstag": "NOUN", "disamb": true}]}, {"index": 57, "position": [347, 362], "orth": "exceptionnelles", "lexemes": [{"lemma": "exceptionnel", "mstag": "ADJ", "disamb": true}]}, {"index": 58, "position": [363, 363], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 59, "position": [364, 368], "orth": "elle", "lexemes": [{"lemma": "lui", "mstag": "PRON", "disamb": true}]}, {"index": 60, "position": [369, 373], "orth": "fait", "lexemes": [{"lemma": "faire", "mstag": "VERB", "disamb": true}]}, {"index": 61, "position": [374, 376], "orth": "l’", "lexemes": [{"lemma": "l’", "mstag": "SPACE", "disamb": true}]}, {"index": 62, "position": [377, 381], "orth": "objet", "lexemes": [{"lemma": "objet", "mstag": "ADJ", "disamb": true}]}, {"index": 63, "position": [382, 384], "orth": "d'", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 64, "position": [385, 390], "orth": "études", "lexemes": [{"lemma": "étude", "mstag": "NOUN", "disamb": true}]}, {"index": 65, "position": [391, 394], "orth": "par", "lexemes": [{"lemma": "par", "mstag": "ADP", "disamb": true}]}, {"index": 66, "position": [395, 398], "orth": "les", "lexemes": [{"lemma": "le", "mstag": "DET", "disamb": true}]}, {"index": 67, "position": [399, 410], "orth": "biologistes", "lexemes": [{"lemma": "biologiste", "mstag": "NOUN", "disamb": true}]}, {"index": 68, "position": [411, 413], "orth": "et", "lexemes": [{"lemma": "et", "mstag": "CCONJ", "disamb": true}]}, {"index": 69, "position": [414, 417], "orth": "les", "lexemes": [{"lemma": "le", "mstag": "DET", "disamb": true}]}, {"index": 70, "position": [418, 429], "orth": "généticiens", "lexemes": [{"lemma": "généticien", "mstag": "NOUN", "disamb": true}]}, {"index": 71, "position": [430, 430], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 72, "position": [431, 433], "orth": "Le", "lexemes": [{"lemma": "le", "mstag": "DET", "disamb": true}]}, {"index": 73, "position": [434, 443], "orth": "chercheur", "lexemes": [{"lemma": "chercheur", "mstag": "NOUN", "disamb": true}]}, {"index": 74, "position": [444, 452], "orth": "japonais", "lexemes": [{"lemma": "japoner", "mstag": "VERB", "disamb": true}]}, {"index": 75, "position": [453, 457], "orth": "Shin", "lexemes": [{"lemma": "Shin", "mstag": "PROPN", "disamb": true}]}, {"index": 76, "position": [458, 464], "orth": "Kubota", "lexemes": [{"lemma": "Kubota", "mstag": "PROPN", "disamb": true}]}, {"index": 77, "position": [465, 466], "orth": "a", "lexemes": [{"lemma": "avoir", "mstag": "VERB", "disamb": true}]}, {"index": 78, "position": [467, 469], "orth": "d'", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 79, "position": [470, 477], "orth": "ailleurs", "lexemes": [{"lemma": "ailleurs", "mstag": "ADV", "disamb": true}]}, {"index": 80, "position": [478, 485], "orth": "observé", "lexemes": [{"lemma": "observer", "mstag": "VERB", "disamb": true}]}, {"index": 81, "position": [486, 488], "orth": "en", "lexemes": [{"lemma": "en", "mstag": "ADP", "disamb": true}]}, {"index": 82, "position": [489, 493], "orth": "2011", "lexemes": [{"lemma": "2011", "mstag": "NUM", "disamb": true}]}, {"index": 83, "position": [494, 496], "orth": "ce", "lexemes": [{"lemma": "ce", "mstag": "DET", "disamb": true}]}, {"index": 84, "position": [497, 511], "orth": "rajeunissement", "lexemes": [{"lemma": "rajeunissement", "mstag": "NOUN", "disamb": true}]}, {"index": 85, "position": [512, 513], "orth": "à", "lexemes": [{"lemma": "à", "mstag": "ADP", "disamb": true}]}, {"index": 86, "position": [514, 517], "orth": "une", "lexemes": [{"lemma": "un", "mstag": "DET", "disamb": true}]}, {"index": 87, "position": [518, 525], "orth": "dizaine", "lexemes": [{"lemma": "dizaine", "mstag": "NOUN", "disamb": true}]}, {"index": 88, "position": [526, 528], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 89, "position": [529, 537], "orth": "reprises", "lexemes": [{"lemma": "reprise", "mstag": "NOUN", "disamb": true}]}, {"index": 90, "position": [538, 538], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}], "text": "Cette méduse est capable d’inverser son processus de vieillissement a priori indéfiniment, grâce au processus de transdifférenciation. Presque toutes les méduses peuvent se multiplier par clonage pendant leur stade polype (tel le bourgeonnement des hydrozoaires), ce qui est aussi une certaine forme d'immortalité. Du fait de ses caractéristiques exceptionnelles, elle fait l’objet d'études par les biologistes et les généticiens. Le chercheur japonais Shin Kubota a d'ailleurs observé en 2011 ce rajeunissement à une dizaine de reprises."}
diff --git a/tests/testdata/expected/turritopsis_nutricula_pt.json b/tests/testdata/expected/turritopsis_nutricula_pt.json
new file mode 100644
index 0000000..7c5c259
--- /dev/null
+++ b/tests/testdata/expected/turritopsis_nutricula_pt.json
@@ -0,0 +1 @@
+{"filename": "54553d40-690a-4faf-9dbf-2c9476412e02", "tagset": "ud", "tokens": [{"index": 1, "position": [0, 11], "orth": "Turritopsis", "lexemes": [{"lemma": "Turritopsis", "mstag": "NOUN", "disamb": true}]}, {"index": 2, "position": [12, 21], "orth": "nutricula", "lexemes": [{"lemma": "nutriculo", "mstag": "PROPN", "disamb": true}]}, {"index": 3, "position": [22, 23], "orth": "é", "lexemes": [{"lemma": "ser", "mstag": "AUX", "disamb": true}]}, {"index": 4, "position": [24, 26], "orth": "um", "lexemes": [{"lemma": "um", "mstag": "DET", "disamb": true}]}, {"index": 5, "position": [27, 38], "orth": "hidrozoário", "lexemes": [{"lemma": "hidrozoário", "mstag": "NOUN", "disamb": true}]}, {"index": 6, "position": [39, 39], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 7, "position": [40, 42], "orth": "As", "lexemes": [{"lemma": "o", "mstag": "DET", "disamb": true}]}, {"index": 8, "position": [43, 44], "orth": "\"", "lexemes": [{"lemma": "\"", "mstag": "SPACE", "disamb": true}]}, {"index": 9, "position": [45, 55], "orth": "águas-vivas", "lexemes": [{"lemma": "águas-viva", "mstag": "NOUN", "disamb": true}]}, {"index": 10, "position": [56, 64], "orth": "imortais", "lexemes": [{"lemma": "imortal", "mstag": "ADJ", "disamb": true}]}, {"index": 11, "position": [65, 65], "orth": "\"", "lexemes": [{"lemma": "\"", "mstag": "SPACE", "disamb": true}]}, {"index": 12, "position": [66, 66], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 13, "position": [67, 70], "orth": "com", "lexemes": [{"lemma": "com", "mstag": "ADP", "disamb": true}]}, {"index": 14, "position": [71, 73], "orth": "um", "lexemes": [{"lemma": "um", "mstag": "DET", "disamb": true}]}, {"index": 15, "position": [74, 79], "orth": "ciclo", "lexemes": [{"lemma": "ciclo", "mstag": "NOUN", "disamb": true}]}, {"index": 16, "position": [80, 82], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 17, "position": [83, 87], "orth": "vida", "lexemes": [{"lemma": "vida", "mstag": "NOUN", "disamb": true}]}, {"index": 18, "position": [88, 90], "orth": "no", "lexemes": [{"lemma": "em o", "mstag": "ADP", "disamb": true}]}, {"index": 19, "position": [91, 95], "orth": "qual", "lexemes": [{"lemma": "qual", "mstag": "PRON", "disamb": true}]}, {"index": 20, "position": [96, 106], "orth": "reverte-se", "lexemes": [{"lemma": "reverte-se", "mstag": "VERB", "disamb": true}]}, {"index": 21, "position": [107, 109], "orth": "ao", "lexemes": [{"lemma": "a o", "mstag": "ADP", "disamb": true}]}, {"index": 22, "position": [110, 117], "orth": "estágio", "lexemes": [{"lemma": "estágio", "mstag": "NOUN", "disamb": true}]}, {"index": 23, "position": [118, 120], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 24, "position": [121, 127], "orth": "pólipo", "lexemes": [{"lemma": "pólipo", "mstag": "NOUN", "disamb": true}]}, {"index": 25, "position": [128, 132], "orth": "após", "lexemes": [{"lemma": "após", "mstag": "SCONJ", "disamb": true}]}, {"index": 26, "position": [133, 142], "orth": "chegar-se", "lexemes": [{"lemma": "chegar se", "mstag": "VERB", "disamb": true}]}, {"index": 27, "position": [143, 144], "orth": "a", "lexemes": [{"lemma": "o", "mstag": "DET", "disamb": true}]}, {"index": 28, "position": [145, 155], "orth": "maturidade", "lexemes": [{"lemma": "maturidade", "mstag": "NOUN", "disamb": true}]}, {"index": 29, "position": [156, 162], "orth": "sexual", "lexemes": [{"lemma": "sexual", "mstag": "ADJ", "disamb": true}]}, {"index": 30, "position": [163, 163], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 31, "position": [164, 167], "orth": "são", "lexemes": [{"lemma": "ser", "mstag": "AUX", "disamb": true}]}, {"index": 32, "position": [168, 169], "orth": "o", "lexemes": [{"lemma": "o", "mstag": "DET", "disamb": true}]}, {"index": 33, "position": [170, 175], "orth": "único", "lexemes": [{"lemma": "único", "mstag": "ADJ", "disamb": true}]}, {"index": 34, "position": [176, 180], "orth": "caso", "lexemes": [{"lemma": "caso", "mstag": "NOUN", "disamb": true}]}, {"index": 35, "position": [181, 190], "orth": "conhecido", "lexemes": [{"lemma": "conhecer", "mstag": "VERB", "disamb": true}]}, {"index": 36, "position": [191, 193], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 37, "position": [194, 196], "orth": "um", "lexemes": [{"lemma": "um", "mstag": "DET", "disamb": true}]}, {"index": 38, "position": [197, 203], "orth": "animal", "lexemes": [{"lemma": "animal", "mstag": "NOUN", "disamb": true}]}, {"index": 39, "position": [204, 212], "orth": "ferfóide", "lexemes": [{"lemma": "ferfóide", "mstag": "ADJ", "disamb": true}]}, {"index": 40, "position": [213, 214], "orth": "(", "lexemes": [{"lemma": "(", "mstag": "PUNCT", "disamb": true}]}, {"index": 41, "position": [215, 221], "orth": "exemplo", "lexemes": [{"lemma": "exemplo", "mstag": "NOUN", "disamb": true}]}, {"index": 42, "position": [222, 222], "orth": ":", "lexemes": [{"lemma": ":", "mstag": "PUNCT", "disamb": true}]}, {"index": 43, "position": [223, 234], "orth": "Peixe-Ferfa", "lexemes": [{"lemma": "Peixe-Ferfa", "mstag": "NOUN", "disamb": true}]}, {"index": 44, "position": [235, 235], "orth": ")", "lexemes": [{"lemma": ")", "mstag": "PUNCT", "disamb": true}]}, {"index": 45, "position": [236, 241], "orth": "capaz", "lexemes": [{"lemma": "capaz", "mstag": "ADJ", "disamb": true}]}, {"index": 46, "position": [242, 244], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "SCONJ", "disamb": true}]}, {"index": 47, "position": [245, 253], "orth": "reverter", "lexemes": [{"lemma": "reverter", "mstag": "VERB", "disamb": true}]}, {"index": 48, "position": [254, 267], "orth": "completamente", "lexemes": [{"lemma": "completamente", "mstag": "ADV", "disamb": true}]}, {"index": 49, "position": [268, 269], "orth": "a", "lexemes": [{"lemma": "a", "mstag": "ADP", "disamb": true}]}, {"index": 50, "position": [270, 272], "orth": "um", "lexemes": [{"lemma": "um", "mstag": "DET", "disamb": true}]}, {"index": 51, "position": [273, 280], "orth": "estágio", "lexemes": [{"lemma": "estágio", "mstag": "NOUN", "disamb": true}]}, {"index": 52, "position": [281, 283], "orth": "de", "lexemes": [{"lemma": "de", "mstag": "ADP", "disamb": true}]}, {"index": 53, "position": [284, 295], "orth": "imaturidade", "lexemes": [{"lemma": "imaturidade", "mstag": "NOUN", "disamb": true}]}, {"index": 54, "position": [296, 302], "orth": "sexual", "lexemes": [{"lemma": "sexual", "mstag": "ADJ", "disamb": true}]}, {"index": 55, "position": [303, 303], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 56, "position": [304, 308], "orth": "Elas", "lexemes": [{"lemma": "elas", "mstag": "PRON", "disamb": true}]}, {"index": 57, "position": [309, 316], "orth": "estavam", "lexemes": [{"lemma": "estar", "mstag": "AUX", "disamb": true}]}, {"index": 58, "position": [317, 330], "orth": "anteriormente", "lexemes": [{"lemma": "anteriormente", "mstag": "ADV", "disamb": true}]}, {"index": 59, "position": [331, 344], "orth": "classificadas", "lexemes": [{"lemma": "classificar", "mstag": "VERB", "disamb": true}]}, {"index": 60, "position": [345, 350], "orth": "nessa", "lexemes": [{"lemma": "em esse", "mstag": "ADP", "disamb": true}]}, {"index": 61, "position": [351, 358], "orth": "espécie", "lexemes": [{"lemma": "espécie", "mstag": "NOUN", "disamb": true}]}, {"index": 62, "position": [359, 359], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 63, "position": [360, 361], "orth": "e", "lexemes": [{"lemma": "e", "mstag": "CCONJ", "disamb": true}]}, {"index": 64, "position": [362, 365], "orth": "são", "lexemes": [{"lemma": "ser", "mstag": "AUX", "disamb": true}]}, {"index": 65, "position": [366, 379], "orth": "classificadas", "lexemes": [{"lemma": "classificar", "mstag": "VERB", "disamb": true}]}, {"index": 66, "position": [380, 390], "orth": "atualmente", "lexemes": [{"lemma": "atualmente", "mstag": "ADV", "disamb": true}]}, {"index": 67, "position": [391, 395], "orth": "como", "lexemes": [{"lemma": "como", "mstag": "ADP", "disamb": true}]}, {"index": 68, "position": [396, 407], "orth": "Turritopsis", "lexemes": [{"lemma": "Turritopsi", "mstag": "NOUN", "disamb": true}]}, {"index": 69, "position": [408, 415], "orth": "dohrnii", "lexemes": [{"lemma": "dohrnii", "mstag": "NOUN", "disamb": true}]}, {"index": 70, "position": [416, 416], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 71, "position": [417, 417], "orth": "\n", "lexemes": [{"lemma": "\n", "mstag": "SPACE", "disamb": true}]}], "text": "Turritopsis nutricula é um hidrozoário. As \"águas-vivas imortais\", com um ciclo de vida no qual reverte-se ao estágio de pólipo após chegar-se a maturidade sexual, são o único caso conhecido de um animal ferfóide (exemplo: Peixe-Ferfa) capaz de reverter completamente a um estágio de imaturidade sexual. Elas estavam anteriormente classificadas nessa espécie, e são classificadas atualmente como Turritopsis dohrnii.\n"}
diff --git a/tests/testdata/input/elasmucha_grisea_ru.txt b/tests/testdata/input/elasmucha_grisea_ru.txt
new file mode 100644
index 0000000..25b41ea
--- /dev/null
+++ b/tests/testdata/input/elasmucha_grisea_ru.txt
@@ -0,0 +1 @@
+Для вида зафиксирована материнская забота (поведение по охране яиц и личинок-нимф). После яйцекладки самка стоит над кладкой яиц и защищает её на протяжении всего развития яиц. Репертуар защитного поведения самок, стоящих над пакетом яиц, включает в себя взмахи крыльями, подёргивание тела, наклон в сторону врага и, наконец, выделение неприятных запахов из ароматических желёз. Иногда самки совместно охраняют свои кладки бок о бок на одном и том же листе берёзы, что увеличивает шансы потомства на выживание. Эти насекомые служат примером самых ранних стадий эусоциальности.
diff --git a/tests/testdata/input/turritopsis_nutricula_de.txt b/tests/testdata/input/turritopsis_nutricula_de.txt
new file mode 100644
index 0000000..47b0595
--- /dev/null
+++ b/tests/testdata/input/turritopsis_nutricula_de.txt
@@ -0,0 +1 @@
+Turritopsis nutricula bildet aufrechte, verzweigte Hydroiden-Kolonien. Die Polypenköpfchen (Hydranthen) sind spindel- bis keulenförmig, ihre fadenförmigen Tentakel sind unregelmäßig über den Hydranthenkörper verteilt. Die Hülle (Periderm) um den Stiel (Hydrocaulus) ist zweilagig. Die Medusenknospen (Gonophoren) entwickeln sich auf den Stielen in einer mit Perisarc eingehüllten Region.
diff --git a/tests/testdata/input/turritopsis_nutricula_es.txt b/tests/testdata/input/turritopsis_nutricula_es.txt
new file mode 100644
index 0000000..9f6898e
--- /dev/null
+++ b/tests/testdata/input/turritopsis_nutricula_es.txt
@@ -0,0 +1 @@
+Tiene un diámetro de 4-5 mm. Su figura es alta y acampanada con paredes finas y uniformes. Su gran estómago (cavidad gastrovascular), rojo vivo, tiene forma cruciforme en su corte transversal. Los especímenes jóvenes tienen ocho tentáculos en el borde pero los adultos llegan a tener hasta noventa tentáculos. Los huevos fertilizados se desarrollan en el estómago y en cavidades de la larva (plánula). Los huevos posteriormente se plantan en el fondo del mar en colonias de pólipos. La medusa incuba después de dos días. Llega a ser madura sexualmente después de pocas semanas (su duración exacta depende de la temperatura de las aguas: a 20 °C entre veinticinco a treinta días y a 22 °C de dieciocho a veintidós días).
diff --git a/tests/testdata/input/turritopsis_nutricula_fr.txt b/tests/testdata/input/turritopsis_nutricula_fr.txt
new file mode 100644
index 0000000..0896689
--- /dev/null
+++ b/tests/testdata/input/turritopsis_nutricula_fr.txt
@@ -0,0 +1 @@
+Cette méduse est capable d’inverser son processus de vieillissement a priori indéfiniment, grâce au processus de transdifférenciation. Presque toutes les méduses peuvent se multiplier par clonage pendant leur stade polype (tel le bourgeonnement des hydrozoaires), ce qui est aussi une certaine forme d'immortalité. Du fait de ses caractéristiques exceptionnelles, elle fait l’objet d'études par les biologistes et les généticiens. Le chercheur japonais Shin Kubota a d'ailleurs observé en 2011 ce rajeunissement à une dizaine de reprises.
\ No newline at end of file
diff --git a/tests/testdata/input/turritopsis_nutricula_pt.txt b/tests/testdata/input/turritopsis_nutricula_pt.txt
new file mode 100644
index 0000000..bc296fe
--- /dev/null
+++ b/tests/testdata/input/turritopsis_nutricula_pt.txt
@@ -0,0 +1 @@
+Turritopsis nutricula é um hidrozoário. As "águas-vivas imortais", com um ciclo de vida no qual reverte-se ao estágio de pólipo após chegar-se a maturidade sexual, são o único caso conhecido de um animal ferfóide (exemplo: Peixe-Ferfa) capaz de reverter completamente a um estágio de imaturidade sexual. Elas estavam anteriormente classificadas nessa espécie, e são classificadas atualmente como Turritopsis dohrnii.
-- 
GitLab