From bf05a41593e943b498055048c2160b22bf3c6bff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pogoda?= <mipo57@e-science.pl> Date: Tue, 7 Mar 2023 08:20:57 +0100 Subject: [PATCH] [WIP] Style + logic updates, added output to txt --- .../wiktorner_jsonl_txt_output.yaml | 15 ++++ config/pipeline/sequential_jsonl.yaml | 3 +- src/detectors/date/utils.py | 82 +++++++++---------- src/pipeline/sequential_jsonl.py | 15 +++- .../__init__.py | 0 ...iktorner_jsonl_txt_output_configuration.py | 20 +++++ .../wiktorner_jsonl.jsonl | 2 + tests/unit/detectors/date/test_pl.py | 25 ++++++ 8 files changed, 119 insertions(+), 43 deletions(-) create mode 100644 config/configuration/wiktorner_jsonl_txt_output.yaml create mode 100644 tests/integration/wiktorner_jsonl_txt_output_configuration/__init__.py create mode 100644 tests/integration/wiktorner_jsonl_txt_output_configuration/test_wiktorner_jsonl_txt_output_configuration.py create mode 100644 tests/integration/wiktorner_jsonl_txt_output_configuration/wiktorner_jsonl.jsonl diff --git a/config/configuration/wiktorner_jsonl_txt_output.yaml b/config/configuration/wiktorner_jsonl_txt_output.yaml new file mode 100644 index 0000000..6d001f2 --- /dev/null +++ b/config/configuration/wiktorner_jsonl_txt_output.yaml @@ -0,0 +1,15 @@ +# @package _global_ + +defaults: + - /paths: default + - /detectors: all_ner_kpwr_ext + - /replacers: tag + - /suppressor: order_based + - /input_parser: wiktor_ner + - /pipeline: sequential_jsonl + - _self_ + +pipeline: + concat_to_txt: true + +language: "pl" \ No newline at end of file diff --git a/config/pipeline/sequential_jsonl.yaml b/config/pipeline/sequential_jsonl.yaml index 4ee4802..033e220 100644 --- a/config/pipeline/sequential_jsonl.yaml +++ b/config/pipeline/sequential_jsonl.yaml @@ -2,4 +2,5 @@ _target_: src.pipeline.sequential_jsonl.SequentialJSONLPipeline input_parser: ${input_parser} detectors: ${detectors} suppressor: ${suppressor} -replacers: ${replacers} \ No newline at end of file +replacers: ${replacers} +concat_to_txt: false \ No newline at end of file diff --git a/src/detectors/date/utils.py b/src/detectors/date/utils.py index 5e0846e..5ca9d02 100644 --- a/src/detectors/date/utils.py +++ b/src/detectors/date/utils.py @@ -6,47 +6,47 @@ def _parse_day_or_month(re_entry) -> List[Tuple[int, int, DateDetection]]: assert re_entry["day_or_month_year"] is not None result = [] - if re_entry["day_month1"] is not None: - if len(re_entry["day_month1"]) == 1: - result.append( - ( - DateDetection.AnnotationPart.TWO_DIGITS_DAY, - "0" + re_entry["day_month1"], - ) - ) - else: - result.append( - (DateDetection.AnnotationPart.TWO_DIGITS_DAY, re_entry["day_month1"]) - ) - result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"])) - - if len(re_entry["day_month2"]) == 1: - result.append( - ( - DateDetection.AnnotationPart.TWO_DIGIT_MONTH, - "0" + re_entry["day_month2"], - ) - ) - else: - result.append( - (DateDetection.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month2"]) - ) - - result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"])) - elif "day_month2" in re_entry: - if len(re_entry["day_month2"]) == 1: - result.append( - ( - DateDetection.AnnotationPart.TWO_DIGIT_MONTH, - "0" + re_entry["day_month2"], - ) - ) - else: - result.append( - (DateDetection.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month2"]) - ) - - result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"])) + # if re_entry["day_month1"] is not None: + if len(re_entry["day_month1"]) == 1: + result.append( + ( + DateDetection.AnnotationPart.TWO_DIGITS_DAY, + "0" + re_entry["day_month1"], + ) + ) + else: + result.append( + (DateDetection.AnnotationPart.TWO_DIGITS_DAY, re_entry["day_month1"]) + ) + result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"])) + + if len(re_entry["day_month2"]) == 1: + result.append( + ( + DateDetection.AnnotationPart.TWO_DIGIT_MONTH, + "0" + re_entry["day_month2"], + ) + ) + else: + result.append( + (DateDetection.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month2"]) + ) + + result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"])) + # elif "day_month2" in re_entry: + # if len(re_entry["day_month2"]) == 1: + # result.append( + # ( + # DateDetection.AnnotationPart.TWO_DIGIT_MONTH, + # "0" + re_entry["day_month2"], + # ) + # ) + # else: + # result.append( + # (DateDetection.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month2"]) + # ) + + # result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"])) if "year1" in re_entry: if len(re_entry["year1"]) == 2: diff --git a/src/pipeline/sequential_jsonl.py b/src/pipeline/sequential_jsonl.py index 5f922ec..e605af0 100644 --- a/src/pipeline/sequential_jsonl.py +++ b/src/pipeline/sequential_jsonl.py @@ -14,11 +14,13 @@ class SequentialJSONLPipeline(Pipeline): detectors: Dict[str, Detector], suppressor: Suppressor, replacers: Dict[str, ReplacerInterface], + concat_to_txt: bool = False, ): self._input_parser = input_parser self._detectors = detectors self._suppressor = suppressor self._replacers = replacers + self._concat_to_txt = concat_to_txt def run(self, input_path) -> str: result = [] @@ -45,4 +47,15 @@ class SequentialJSONLPipeline(Pipeline): result.append({"text": replaced_input}) - return "\n".join([json.dumps(item, ensure_ascii=False) for item in result]) + if self._concat_to_txt: + result_text = "" + for item in result: + text = item["text"] + if result_text != "" and result_text.rstrip() == result_text and text.lstrip() == text: + result_text += " " + text + else: + result_text += text + + return result_text + else: + return "\n".join([json.dumps(item, ensure_ascii=False) for item in result]) diff --git a/tests/integration/wiktorner_jsonl_txt_output_configuration/__init__.py b/tests/integration/wiktorner_jsonl_txt_output_configuration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration/wiktorner_jsonl_txt_output_configuration/test_wiktorner_jsonl_txt_output_configuration.py b/tests/integration/wiktorner_jsonl_txt_output_configuration/test_wiktorner_jsonl_txt_output_configuration.py new file mode 100644 index 0000000..3cb494f --- /dev/null +++ b/tests/integration/wiktorner_jsonl_txt_output_configuration/test_wiktorner_jsonl_txt_output_configuration.py @@ -0,0 +1,20 @@ +from hydra import initialize, compose +from hydra.utils import instantiate + + +def test_wiktorner_jsonl_txt_output_configuration(): + with initialize(config_path="../../../config", version_base="1.1"): + config = compose( + config_name="config", + overrides=["paths.root_path=../../../", "configuration=wiktorner_jsonl_txt_output"], + ) + pipeline = instantiate(config.pipeline) + + result = pipeline.run( + "./tests/integration/wiktorner_jsonl_txt_output_configuration/wiktorner_jsonl.jsonl" + ) + + assert ( + result + == 'ROZDZIAÅ I. CO NIECO O SAMEJ PIPIDÓWCE Przede wszystkim muszÄ™ uprzedzić z góry czytelników, aby siÄ™ daremnie nie trudzili nad szukaniem wyżej wyrażonego miasteczka na mapach [MIEJSCE] i [MIEJSCE], bo go tam nie znajdÄ…. Nie dlatego, jakoby [MIEJSCE] nie istniaÅ‚a w rzeczywistoÅ›ci i byÅ‚a tylko wytworem fantazji autora, ale po prostu dlatego, że mieszkaÅ„cy owego sÅ‚awnego grodu, urosÅ‚szy z czasem w ambicjÄ™, uważali tÄ™ nazwÄ™ jako ubliżajÄ…cÄ… ich powadze i podali do c. k. namiestnictwa pokornÄ… proÅ›bÄ™ o pozwolenie zamienienia jej na innÄ…. Podobne zamiany nazwisk praktykujÄ… siÄ™ dość czÄ™sto w [MIEJSCE], szczególnie u pojedynczych osób, które nie czujÄ…c siÄ™ na siÅ‚ach uszlachetnienia sobÄ…, swymi czynami wÅ‚asnego nazwiska, chcÄ… nazwiskiem uszlachetnić siebie, i tak np. ROZDZIAÅ I. CO NIECO O SAMEJ PIPIDÓWCE Przede wszystkim muszÄ™ uprzedzić z góry czytelników, aby siÄ™ daremnie nie trudzili nad szukaniem wyżej wyrażonego miasteczka na mapach [MIEJSCE] i [MIEJSCE], bo go tam nie znajdÄ…. Nie dlatego, jakoby [MIEJSCE] nie istniaÅ‚a w rzeczywistoÅ›ci i byÅ‚a tylko wytworem fantazji autora, ale po prostu dlatego, że mieszkaÅ„cy owego sÅ‚awnego grodu, urosÅ‚szy z czasem w ambicjÄ™, uważali tÄ™ nazwÄ™ jako ubliżajÄ…cÄ… ich powadze i podali do c. k. namiestnictwa pokornÄ… proÅ›bÄ™ o pozwolenie zamienienia jej na innÄ…. Podobne zamiany nazwisk praktykujÄ… siÄ™ dość czÄ™sto w [MIEJSCE], szczególnie u pojedynczych osób, które nie czujÄ…c siÄ™ na siÅ‚ach uszlachetnienia sobÄ…, swymi czynami wÅ‚asnego nazwiska, chcÄ… nazwiskiem uszlachetnić siebie, i tak np.' + ) diff --git a/tests/integration/wiktorner_jsonl_txt_output_configuration/wiktorner_jsonl.jsonl b/tests/integration/wiktorner_jsonl_txt_output_configuration/wiktorner_jsonl.jsonl new file mode 100644 index 0000000..ee43222 --- /dev/null +++ b/tests/integration/wiktorner_jsonl_txt_output_configuration/wiktorner_jsonl.jsonl @@ -0,0 +1,2 @@ +{"filename": "bb4a16ff-33de-4478-939d-12db67d750b1","text": "ROZDZIAÅ I. CO NIECO O SAMEJ PIPIDÓWCE Przede wszystkim muszÄ™ uprzedzić z góry czytelników, aby siÄ™ daremnie nie trudzili nad szukaniem wyżej wyrażonego miasteczka na mapach Galicji i Lodomerii, bo go tam nie znajdÄ…. Nie dlatego, jakoby Pipidówka nie istniaÅ‚a w rzeczywistoÅ›ci i byÅ‚a tylko wytworem fantazji autora, ale po prostu dlatego, że mieszkaÅ„cy owego sÅ‚awnego grodu, urosÅ‚szy z czasem w ambicjÄ™, uważali tÄ™ nazwÄ™ jako ubliżajÄ…cÄ… ich powadze i podali do c. k. namiestnictwa pokornÄ… proÅ›bÄ™ o pozwolenie zamienienia jej na innÄ…. Podobne zamiany nazwisk praktykujÄ… siÄ™ dość czÄ™sto w Galicji, szczególnie u pojedynczych osób, które nie czujÄ…c siÄ™ na siÅ‚ach uszlachetnienia sobÄ…, swymi czynami wÅ‚asnego nazwiska, chcÄ… nazwiskiem uszlachetnić siebie, i tak np.","tokens": [{"index": 1,"position": [0,8],"orth": "ROZDZIAÅ","lexemes": [{"lemma": "rozdziaÅ‚","mstag": "subst:sg:nom:m3","disamb": true}]},{"index": 2,"position": [9,10],"orth": "I","lexemes": [{"lemma": "I","mstag": "adj:sg:nom:m3:pos","disamb": true}]},{"index": 3,"position": [10,11],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 4,"position": [12,14],"orth": "CO","lexemes": [{"lemma": "co","mstag": "conj","disamb": true}]},{"index": 5,"position": [15,20],"orth": "NIECO","lexemes": [{"lemma": "nieco","mstag": "adv","disamb": true}]},{"index": 6,"position": [21,22],"orth": "O","lexemes": [{"lemma": "o","mstag": "prep:loc","disamb": true}]},{"index": 7,"position": [23,28],"orth": "SAMEJ","lexemes": [{"lemma": "sam","mstag": "adj:sg:loc:f:pos","disamb": true}]},{"index": 8,"position": [29,38],"orth": "PIPIDÓWCE","lexemes": [{"lemma": "Pipidówka","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 9,"position": [39,45],"orth": "Przede","lexemes": [{"lemma": "przed","mstag": "prep:inst:wok","disamb": true}]},{"index": 10,"position": [46,55],"orth": "wszystkim","lexemes": [{"lemma": "wszystko","mstag": "subst:sg:inst:n","disamb": true}]},{"index": 11,"position": [56,61],"orth": "muszÄ™","lexemes": [{"lemma": "musieć","mstag": "fin:sg:pri:imperf","disamb": true}]},{"index": 12,"position": [62,71],"orth": "uprzedzić","lexemes": [{"lemma": "uprzedzić","mstag": "inf:perf","disamb": true}]},{"index": 13,"position": [72,73],"orth": "z","lexemes": [{"lemma": "z","mstag": "prep:gen:nwok","disamb": true}]},{"index": 14,"position": [74,78],"orth": "góry","lexemes": [{"lemma": "góra","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 15,"position": [79,90],"orth": "czytelników","lexemes": [{"lemma": "czytelnik","mstag": "subst:pl:gen:m1","disamb": true}]},{"index": 16,"position": [90,91],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 17,"position": [92,95],"orth": "aby","lexemes": [{"lemma": "aby","mstag": "comp","disamb": true}]},{"index": 18,"position": [96,99],"orth": "siÄ™","lexemes": [{"lemma": "siÄ™","mstag": "qub","disamb": true}]},{"index": 19,"position": [100,108],"orth": "daremnie","lexemes": [{"lemma": "daremnie","mstag": "adv:pos","disamb": true}]},{"index": 20,"position": [109,112],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 21,"position": [113,121],"orth": "trudzili","lexemes": [{"lemma": "trudzić","mstag": "praet:pl:m1:imperf","disamb": true}]},{"index": 22,"position": [122,125],"orth": "nad","lexemes": [{"lemma": "nad","mstag": "prep:inst:nwok","disamb": true}]},{"index": 23,"position": [126,135],"orth": "szukaniem","lexemes": [{"lemma": "szukać","mstag": "ger:sg:inst:n:imperf:aff","disamb": true}]},{"index": 24,"position": [136,141],"orth": "wyżej","lexemes": [{"lemma": "wysoko","mstag": "adv:com","disamb": true}]},{"index": 25,"position": [142,152],"orth": "wyrażonego","lexemes": [{"lemma": "wyrazić","mstag": "ppas:sg:gen:n:perf:aff","disamb": true}]},{"index": 26,"position": [153,163],"orth": "miasteczka","lexemes": [{"lemma": "miasteczko","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 27,"position": [164,166],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:loc","disamb": true}]},{"index": 28,"position": [167,173],"orth": "mapach","lexemes": [{"lemma": "mapa","mstag": "subst:pl:loc:f","disamb": true}]},{"index": 29,"position": [174,181],"orth": "Galicji","lexemes": [{"lemma": "Galicja","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 30,"position": [182,183],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 31,"position": [184,193],"orth": "Lodomerii","lexemes": [{"lemma": "Lodomerii","mstag": "ign","disamb": true}]},{"index": 32,"position": [193,194],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 33,"position": [195,197],"orth": "bo","lexemes": [{"lemma": "bo","mstag": "comp","disamb": true}]},{"index": 34,"position": [198,200],"orth": "go","lexemes": [{"lemma": "on","mstag": "ppron3:sg:gen:m1:ter:nakc:npraep","disamb": true}]},{"index": 35,"position": [201,204],"orth": "tam","lexemes": [{"lemma": "tam","mstag": "adv","disamb": true}]},{"index": 36,"position": [205,208],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 37,"position": [209,215],"orth": "znajdÄ…","lexemes": [{"lemma": "znaleźć","mstag": "fin:pl:ter:perf","disamb": true}]},{"index": 38,"position": [215,216],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 39,"position": [217,220],"orth": "Nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 40,"position": [221,228],"orth": "dlatego","lexemes": [{"lemma": "dlatego","mstag": "adv","disamb": true}]},{"index": 41,"position": [228,229],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 42,"position": [230,236],"orth": "jakoby","lexemes": [{"lemma": "jakoby","mstag": "comp","disamb": true}]},{"index": 43,"position": [237,246],"orth": "Pipidówka","lexemes": [{"lemma": "Pipidówka","mstag": "subst:sg:nom:f","disamb": true}]},{"index": 44,"position": [247,250],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 45,"position": [251,259],"orth": "istniaÅ‚a","lexemes": [{"lemma": "istnieć","mstag": "praet:sg:f:imperf","disamb": true}]},{"index": 46,"position": [260,261],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:loc:nwok","disamb": true}]},{"index": 47,"position": [262,276],"orth": "rzeczywistoÅ›ci","lexemes": [{"lemma": "rzeczywistość","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 48,"position": [277,278],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 49,"position": [279,283],"orth": "byÅ‚a","lexemes": [{"lemma": "być","mstag": "praet:sg:f:imperf","disamb": true}]},{"index": 50,"position": [284,289],"orth": "tylko","lexemes": [{"lemma": "tylko","mstag": "qub","disamb": true}]},{"index": 51,"position": [290,298],"orth": "wytworem","lexemes": [{"lemma": "wytwór","mstag": "subst:sg:inst:m3","disamb": true}]},{"index": 52,"position": [299,307],"orth": "fantazji","lexemes": [{"lemma": "fantazja","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 53,"position": [308,314],"orth": "autora","lexemes": [{"lemma": "autor","mstag": "subst:sg:gen:m1","disamb": true}]},{"index": 54,"position": [314,315],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 55,"position": [316,319],"orth": "ale","lexemes": [{"lemma": "ale","mstag": "conj","disamb": true}]},{"index": 56,"position": [320,322],"orth": "po","lexemes": [{"lemma": "po","mstag": "prep:acc","disamb": true}]},{"index": 57,"position": [323,329],"orth": "prostu","lexemes": [{"lemma": "prosty","mstag": "adjp","disamb": true}]},{"index": 58,"position": [330,337],"orth": "dlatego","lexemes": [{"lemma": "dlatego","mstag": "adv","disamb": true}]},{"index": 59,"position": [337,338],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 60,"position": [339,341],"orth": "że","lexemes": [{"lemma": "że","mstag": "comp","disamb": true}]},{"index": 61,"position": [342,352],"orth": "mieszkaÅ„cy","lexemes": [{"lemma": "mieszkaniec","mstag": "subst:pl:nom:m1","disamb": true}]},{"index": 62,"position": [353,358],"orth": "owego","lexemes": [{"lemma": "ów","mstag": "adj:sg:gen:m3:pos","disamb": true}]},{"index": 63,"position": [359,367],"orth": "sÅ‚awnego","lexemes": [{"lemma": "sÅ‚awny","mstag": "adj:sg:gen:m3:pos","disamb": true}]},{"index": 64,"position": [368,373],"orth": "grodu","lexemes": [{"lemma": "gród","mstag": "subst:sg:gen:m3","disamb": true}]},{"index": 65,"position": [373,374],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 66,"position": [375,383],"orth": "urosÅ‚szy","lexemes": [{"lemma": "urosÅ‚szy","mstag": "ign","disamb": true}]},{"index": 67,"position": [384,385],"orth": "z","lexemes": [{"lemma": "z","mstag": "prep:inst:nwok","disamb": true}]},{"index": 68,"position": [386,392],"orth": "czasem","lexemes": [{"lemma": "czas","mstag": "subst:sg:inst:m3","disamb": true}]},{"index": 69,"position": [393,394],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:acc:nwok","disamb": true}]},{"index": 70,"position": [395,402],"orth": "ambicjÄ™","lexemes": [{"lemma": "ambicja","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 71,"position": [402,403],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 72,"position": [404,411],"orth": "uważali","lexemes": [{"lemma": "uważać","mstag": "praet:pl:m1:imperf","disamb": true}]},{"index": 73,"position": [412,414],"orth": "tÄ™","lexemes": [{"lemma": "ten","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 74,"position": [415,420],"orth": "nazwÄ™","lexemes": [{"lemma": "nazwa","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 75,"position": [421,425],"orth": "jako","lexemes": [{"lemma": "jako","mstag": "conj","disamb": true}]},{"index": 76,"position": [426,436],"orth": "ubliżajÄ…cÄ…","lexemes": [{"lemma": "ubliżajÄ…cy","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 77,"position": [437,440],"orth": "ich","lexemes": [{"lemma": "on","mstag": "ppron3:pl:gen:m1:ter:akc:npraep","disamb": true}]},{"index": 78,"position": [441,448],"orth": "powadze","lexemes": [{"lemma": "powaga","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 79,"position": [449,450],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 80,"position": [451,457],"orth": "podali","lexemes": [{"lemma": "podać","mstag": "praet:pl:m1:perf","disamb": true}]},{"index": 81,"position": [458,460],"orth": "do","lexemes": [{"lemma": "do","mstag": "prep:gen","disamb": true}]},{"index": 82,"position": [461,462],"orth": "c","lexemes": [{"lemma": "c","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 83,"position": [462,463],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 84,"position": [464,465],"orth": "k","lexemes": [{"lemma": "K","mstag": "brev:pun","disamb": true}]},{"index": 85,"position": [465,466],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 86,"position": [467,480],"orth": "namiestnictwa","lexemes": [{"lemma": "namiestnictwo","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 87,"position": [481,488],"orth": "pokornÄ…","lexemes": [{"lemma": "pokorny","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 88,"position": [489,495],"orth": "proÅ›bÄ™","lexemes": [{"lemma": "proÅ›ba","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 89,"position": [496,497],"orth": "o","lexemes": [{"lemma": "o","mstag": "prep:acc","disamb": true}]},{"index": 90,"position": [498,508],"orth": "pozwolenie","lexemes": [{"lemma": "pozwolić","mstag": "ger:sg:acc:n:perf:aff","disamb": true}]},{"index": 91,"position": [509,520],"orth": "zamienienia","lexemes": [{"lemma": "zamienić","mstag": "ger:sg:gen:n:perf:aff","disamb": true}]},{"index": 92,"position": [521,524],"orth": "jej","lexemes": [{"lemma": "on","mstag": "ppron3:sg:gen:f:ter:akc:npraep","disamb": true}]},{"index": 93,"position": [525,527],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:acc","disamb": true}]},{"index": 94,"position": [528,532],"orth": "innÄ…","lexemes": [{"lemma": "inny","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 95,"position": [532,533],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 96,"position": [534,541],"orth": "Podobne","lexemes": [{"lemma": "podobny","mstag": "adj:pl:nom:f:pos","disamb": true}]},{"index": 97,"position": [542,549],"orth": "zamiany","lexemes": [{"lemma": "zamiana","mstag": "subst:pl:nom:f","disamb": true}]},{"index": 98,"position": [550,557],"orth": "nazwisk","lexemes": [{"lemma": "nazwisko","mstag": "subst:pl:gen:n","disamb": true}]},{"index": 99,"position": [558,568],"orth": "praktykujÄ…","lexemes": [{"lemma": "praktykować","mstag": "fin:pl:ter:imperf","disamb": true}]},{"index": 100,"position": [569,572],"orth": "siÄ™","lexemes": [{"lemma": "siÄ™","mstag": "qub","disamb": true}]},{"index": 101,"position": [573,577],"orth": "dość","lexemes": [{"lemma": "dość","mstag": "qub","disamb": true}]},{"index": 102,"position": [578,584],"orth": "czÄ™sto","lexemes": [{"lemma": "czÄ™sto","mstag": "adv:pos","disamb": true}]},{"index": 103,"position": [585,586],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:loc:nwok","disamb": true}]},{"index": 104,"position": [587,594],"orth": "Galicji","lexemes": [{"lemma": "Galicja","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 105,"position": [594,595],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 106,"position": [596,607],"orth": "szczególnie","lexemes": [{"lemma": "szczególnie","mstag": "qub","disamb": true}]},{"index": 107,"position": [608,609],"orth": "u","lexemes": [{"lemma": "u","mstag": "prep:gen","disamb": true}]},{"index": 108,"position": [610,622],"orth": "pojedynczych","lexemes": [{"lemma": "pojedynczy","mstag": "adj:pl:gen:f:pos","disamb": true}]},{"index": 109,"position": [623,627],"orth": "osób","lexemes": [{"lemma": "osoba","mstag": "subst:pl:gen:f","disamb": true}]},{"index": 110,"position": [627,628],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 111,"position": [629,634],"orth": "które","lexemes": [{"lemma": "który","mstag": "adj:pl:nom:f:pos","disamb": true}]},{"index": 112,"position": [635,638],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 113,"position": [639,645],"orth": "czujÄ…c","lexemes": [{"lemma": "czuć","mstag": "pcon:imperf","disamb": true}]},{"index": 114,"position": [646,649],"orth": "siÄ™","lexemes": [{"lemma": "siÄ™","mstag": "qub","disamb": true}]},{"index": 115,"position": [650,652],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:loc","disamb": true}]},{"index": 116,"position": [653,659],"orth": "siÅ‚ach","lexemes": [{"lemma": "siÅ‚a","mstag": "subst:pl:loc:f","disamb": true}]},{"index": 117,"position": [660,675],"orth": "uszlachetnienia","lexemes": [{"lemma": "uszlachetnić","mstag": "ger:sg:gen:n:perf:aff","disamb": true}]},{"index": 118,"position": [676,680],"orth": "sobÄ…","lexemes": [{"lemma": "siebie","mstag": "siebie:inst","disamb": true}]},{"index": 119,"position": [680,681],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 120,"position": [682,687],"orth": "swymi","lexemes": [{"lemma": "swój","mstag": "adj:pl:inst:m3:pos","disamb": true}]},{"index": 121,"position": [688,695],"orth": "czynami","lexemes": [{"lemma": "czyn","mstag": "subst:pl:inst:m3","disamb": true}]},{"index": 122,"position": [696,704],"orth": "wÅ‚asnego","lexemes": [{"lemma": "wÅ‚asny","mstag": "adj:sg:gen:n:pos","disamb": true}]},{"index": 123,"position": [705,713],"orth": "nazwiska","lexemes": [{"lemma": "nazwisko","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 124,"position": [713,714],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 125,"position": [715,719],"orth": "chcÄ…","lexemes": [{"lemma": "chcieć","mstag": "fin:pl:ter:imperf","disamb": true}]},{"index": 126,"position": [720,730],"orth": "nazwiskiem","lexemes": [{"lemma": "nazwisko","mstag": "subst:sg:inst:n","disamb": true}]},{"index": 127,"position": [731,743],"orth": "uszlachetnić","lexemes": [{"lemma": "uszlachetnić","mstag": "inf:perf","disamb": true}]},{"index": 128,"position": [744,750],"orth": "siebie","lexemes": [{"lemma": "siebie","mstag": "siebie:acc","disamb": true}]},{"index": 129,"position": [750,751],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 130,"position": [752,753],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 131,"position": [754,757],"orth": "tak","lexemes": [{"lemma": "tak","mstag": "adv:pos","disamb": true}]},{"index": 132,"position": [758,760],"orth": "np","lexemes": [{"lemma": "na przykÅ‚ad","mstag": "brev:pun","disamb": true}]},{"index": 133,"position": [760,761],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]}],"entities": [{"text": "Galicji","type": "nam_loc_historical_region","tokens": [28,29],"positions": [174,181]},{"text": "Lodomerii","type": "nam_loc_gpe_admin1","tokens": [30,31],"positions": [184,193]},{"text": "Pipidówka","type": "nam_loc_gpe_city","tokens": [42,43],"positions": [237,246]},{"text": "Galicji","type": "nam_loc_gpe_admin1","tokens": [103,104],"positions": [587,594]}]} +{"filename": "bb4a16ff-33de-4478-939d-12db67d750b1","text": "ROZDZIAÅ I. CO NIECO O SAMEJ PIPIDÓWCE Przede wszystkim muszÄ™ uprzedzić z góry czytelników, aby siÄ™ daremnie nie trudzili nad szukaniem wyżej wyrażonego miasteczka na mapach Galicji i Lodomerii, bo go tam nie znajdÄ…. Nie dlatego, jakoby Pipidówka nie istniaÅ‚a w rzeczywistoÅ›ci i byÅ‚a tylko wytworem fantazji autora, ale po prostu dlatego, że mieszkaÅ„cy owego sÅ‚awnego grodu, urosÅ‚szy z czasem w ambicjÄ™, uważali tÄ™ nazwÄ™ jako ubliżajÄ…cÄ… ich powadze i podali do c. k. namiestnictwa pokornÄ… proÅ›bÄ™ o pozwolenie zamienienia jej na innÄ…. Podobne zamiany nazwisk praktykujÄ… siÄ™ dość czÄ™sto w Galicji, szczególnie u pojedynczych osób, które nie czujÄ…c siÄ™ na siÅ‚ach uszlachetnienia sobÄ…, swymi czynami wÅ‚asnego nazwiska, chcÄ… nazwiskiem uszlachetnić siebie, i tak np.","tokens": [{"index": 1,"position": [0,8],"orth": "ROZDZIAÅ","lexemes": [{"lemma": "rozdziaÅ‚","mstag": "subst:sg:nom:m3","disamb": true}]},{"index": 2,"position": [9,10],"orth": "I","lexemes": [{"lemma": "I","mstag": "adj:sg:nom:m3:pos","disamb": true}]},{"index": 3,"position": [10,11],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 4,"position": [12,14],"orth": "CO","lexemes": [{"lemma": "co","mstag": "conj","disamb": true}]},{"index": 5,"position": [15,20],"orth": "NIECO","lexemes": [{"lemma": "nieco","mstag": "adv","disamb": true}]},{"index": 6,"position": [21,22],"orth": "O","lexemes": [{"lemma": "o","mstag": "prep:loc","disamb": true}]},{"index": 7,"position": [23,28],"orth": "SAMEJ","lexemes": [{"lemma": "sam","mstag": "adj:sg:loc:f:pos","disamb": true}]},{"index": 8,"position": [29,38],"orth": "PIPIDÓWCE","lexemes": [{"lemma": "Pipidówka","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 9,"position": [39,45],"orth": "Przede","lexemes": [{"lemma": "przed","mstag": "prep:inst:wok","disamb": true}]},{"index": 10,"position": [46,55],"orth": "wszystkim","lexemes": [{"lemma": "wszystko","mstag": "subst:sg:inst:n","disamb": true}]},{"index": 11,"position": [56,61],"orth": "muszÄ™","lexemes": [{"lemma": "musieć","mstag": "fin:sg:pri:imperf","disamb": true}]},{"index": 12,"position": [62,71],"orth": "uprzedzić","lexemes": [{"lemma": "uprzedzić","mstag": "inf:perf","disamb": true}]},{"index": 13,"position": [72,73],"orth": "z","lexemes": [{"lemma": "z","mstag": "prep:gen:nwok","disamb": true}]},{"index": 14,"position": [74,78],"orth": "góry","lexemes": [{"lemma": "góra","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 15,"position": [79,90],"orth": "czytelników","lexemes": [{"lemma": "czytelnik","mstag": "subst:pl:gen:m1","disamb": true}]},{"index": 16,"position": [90,91],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 17,"position": [92,95],"orth": "aby","lexemes": [{"lemma": "aby","mstag": "comp","disamb": true}]},{"index": 18,"position": [96,99],"orth": "siÄ™","lexemes": [{"lemma": "siÄ™","mstag": "qub","disamb": true}]},{"index": 19,"position": [100,108],"orth": "daremnie","lexemes": [{"lemma": "daremnie","mstag": "adv:pos","disamb": true}]},{"index": 20,"position": [109,112],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 21,"position": [113,121],"orth": "trudzili","lexemes": [{"lemma": "trudzić","mstag": "praet:pl:m1:imperf","disamb": true}]},{"index": 22,"position": [122,125],"orth": "nad","lexemes": [{"lemma": "nad","mstag": "prep:inst:nwok","disamb": true}]},{"index": 23,"position": [126,135],"orth": "szukaniem","lexemes": [{"lemma": "szukać","mstag": "ger:sg:inst:n:imperf:aff","disamb": true}]},{"index": 24,"position": [136,141],"orth": "wyżej","lexemes": [{"lemma": "wysoko","mstag": "adv:com","disamb": true}]},{"index": 25,"position": [142,152],"orth": "wyrażonego","lexemes": [{"lemma": "wyrazić","mstag": "ppas:sg:gen:n:perf:aff","disamb": true}]},{"index": 26,"position": [153,163],"orth": "miasteczka","lexemes": [{"lemma": "miasteczko","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 27,"position": [164,166],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:loc","disamb": true}]},{"index": 28,"position": [167,173],"orth": "mapach","lexemes": [{"lemma": "mapa","mstag": "subst:pl:loc:f","disamb": true}]},{"index": 29,"position": [174,181],"orth": "Galicji","lexemes": [{"lemma": "Galicja","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 30,"position": [182,183],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 31,"position": [184,193],"orth": "Lodomerii","lexemes": [{"lemma": "Lodomerii","mstag": "ign","disamb": true}]},{"index": 32,"position": [193,194],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 33,"position": [195,197],"orth": "bo","lexemes": [{"lemma": "bo","mstag": "comp","disamb": true}]},{"index": 34,"position": [198,200],"orth": "go","lexemes": [{"lemma": "on","mstag": "ppron3:sg:gen:m1:ter:nakc:npraep","disamb": true}]},{"index": 35,"position": [201,204],"orth": "tam","lexemes": [{"lemma": "tam","mstag": "adv","disamb": true}]},{"index": 36,"position": [205,208],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 37,"position": [209,215],"orth": "znajdÄ…","lexemes": [{"lemma": "znaleźć","mstag": "fin:pl:ter:perf","disamb": true}]},{"index": 38,"position": [215,216],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 39,"position": [217,220],"orth": "Nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 40,"position": [221,228],"orth": "dlatego","lexemes": [{"lemma": "dlatego","mstag": "adv","disamb": true}]},{"index": 41,"position": [228,229],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 42,"position": [230,236],"orth": "jakoby","lexemes": [{"lemma": "jakoby","mstag": "comp","disamb": true}]},{"index": 43,"position": [237,246],"orth": "Pipidówka","lexemes": [{"lemma": "Pipidówka","mstag": "subst:sg:nom:f","disamb": true}]},{"index": 44,"position": [247,250],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 45,"position": [251,259],"orth": "istniaÅ‚a","lexemes": [{"lemma": "istnieć","mstag": "praet:sg:f:imperf","disamb": true}]},{"index": 46,"position": [260,261],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:loc:nwok","disamb": true}]},{"index": 47,"position": [262,276],"orth": "rzeczywistoÅ›ci","lexemes": [{"lemma": "rzeczywistość","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 48,"position": [277,278],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 49,"position": [279,283],"orth": "byÅ‚a","lexemes": [{"lemma": "być","mstag": "praet:sg:f:imperf","disamb": true}]},{"index": 50,"position": [284,289],"orth": "tylko","lexemes": [{"lemma": "tylko","mstag": "qub","disamb": true}]},{"index": 51,"position": [290,298],"orth": "wytworem","lexemes": [{"lemma": "wytwór","mstag": "subst:sg:inst:m3","disamb": true}]},{"index": 52,"position": [299,307],"orth": "fantazji","lexemes": [{"lemma": "fantazja","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 53,"position": [308,314],"orth": "autora","lexemes": [{"lemma": "autor","mstag": "subst:sg:gen:m1","disamb": true}]},{"index": 54,"position": [314,315],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 55,"position": [316,319],"orth": "ale","lexemes": [{"lemma": "ale","mstag": "conj","disamb": true}]},{"index": 56,"position": [320,322],"orth": "po","lexemes": [{"lemma": "po","mstag": "prep:acc","disamb": true}]},{"index": 57,"position": [323,329],"orth": "prostu","lexemes": [{"lemma": "prosty","mstag": "adjp","disamb": true}]},{"index": 58,"position": [330,337],"orth": "dlatego","lexemes": [{"lemma": "dlatego","mstag": "adv","disamb": true}]},{"index": 59,"position": [337,338],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 60,"position": [339,341],"orth": "że","lexemes": [{"lemma": "że","mstag": "comp","disamb": true}]},{"index": 61,"position": [342,352],"orth": "mieszkaÅ„cy","lexemes": [{"lemma": "mieszkaniec","mstag": "subst:pl:nom:m1","disamb": true}]},{"index": 62,"position": [353,358],"orth": "owego","lexemes": [{"lemma": "ów","mstag": "adj:sg:gen:m3:pos","disamb": true}]},{"index": 63,"position": [359,367],"orth": "sÅ‚awnego","lexemes": [{"lemma": "sÅ‚awny","mstag": "adj:sg:gen:m3:pos","disamb": true}]},{"index": 64,"position": [368,373],"orth": "grodu","lexemes": [{"lemma": "gród","mstag": "subst:sg:gen:m3","disamb": true}]},{"index": 65,"position": [373,374],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 66,"position": [375,383],"orth": "urosÅ‚szy","lexemes": [{"lemma": "urosÅ‚szy","mstag": "ign","disamb": true}]},{"index": 67,"position": [384,385],"orth": "z","lexemes": [{"lemma": "z","mstag": "prep:inst:nwok","disamb": true}]},{"index": 68,"position": [386,392],"orth": "czasem","lexemes": [{"lemma": "czas","mstag": "subst:sg:inst:m3","disamb": true}]},{"index": 69,"position": [393,394],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:acc:nwok","disamb": true}]},{"index": 70,"position": [395,402],"orth": "ambicjÄ™","lexemes": [{"lemma": "ambicja","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 71,"position": [402,403],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 72,"position": [404,411],"orth": "uważali","lexemes": [{"lemma": "uważać","mstag": "praet:pl:m1:imperf","disamb": true}]},{"index": 73,"position": [412,414],"orth": "tÄ™","lexemes": [{"lemma": "ten","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 74,"position": [415,420],"orth": "nazwÄ™","lexemes": [{"lemma": "nazwa","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 75,"position": [421,425],"orth": "jako","lexemes": [{"lemma": "jako","mstag": "conj","disamb": true}]},{"index": 76,"position": [426,436],"orth": "ubliżajÄ…cÄ…","lexemes": [{"lemma": "ubliżajÄ…cy","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 77,"position": [437,440],"orth": "ich","lexemes": [{"lemma": "on","mstag": "ppron3:pl:gen:m1:ter:akc:npraep","disamb": true}]},{"index": 78,"position": [441,448],"orth": "powadze","lexemes": [{"lemma": "powaga","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 79,"position": [449,450],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 80,"position": [451,457],"orth": "podali","lexemes": [{"lemma": "podać","mstag": "praet:pl:m1:perf","disamb": true}]},{"index": 81,"position": [458,460],"orth": "do","lexemes": [{"lemma": "do","mstag": "prep:gen","disamb": true}]},{"index": 82,"position": [461,462],"orth": "c","lexemes": [{"lemma": "c","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 83,"position": [462,463],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 84,"position": [464,465],"orth": "k","lexemes": [{"lemma": "K","mstag": "brev:pun","disamb": true}]},{"index": 85,"position": [465,466],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 86,"position": [467,480],"orth": "namiestnictwa","lexemes": [{"lemma": "namiestnictwo","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 87,"position": [481,488],"orth": "pokornÄ…","lexemes": [{"lemma": "pokorny","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 88,"position": [489,495],"orth": "proÅ›bÄ™","lexemes": [{"lemma": "proÅ›ba","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 89,"position": [496,497],"orth": "o","lexemes": [{"lemma": "o","mstag": "prep:acc","disamb": true}]},{"index": 90,"position": [498,508],"orth": "pozwolenie","lexemes": [{"lemma": "pozwolić","mstag": "ger:sg:acc:n:perf:aff","disamb": true}]},{"index": 91,"position": [509,520],"orth": "zamienienia","lexemes": [{"lemma": "zamienić","mstag": "ger:sg:gen:n:perf:aff","disamb": true}]},{"index": 92,"position": [521,524],"orth": "jej","lexemes": [{"lemma": "on","mstag": "ppron3:sg:gen:f:ter:akc:npraep","disamb": true}]},{"index": 93,"position": [525,527],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:acc","disamb": true}]},{"index": 94,"position": [528,532],"orth": "innÄ…","lexemes": [{"lemma": "inny","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 95,"position": [532,533],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 96,"position": [534,541],"orth": "Podobne","lexemes": [{"lemma": "podobny","mstag": "adj:pl:nom:f:pos","disamb": true}]},{"index": 97,"position": [542,549],"orth": "zamiany","lexemes": [{"lemma": "zamiana","mstag": "subst:pl:nom:f","disamb": true}]},{"index": 98,"position": [550,557],"orth": "nazwisk","lexemes": [{"lemma": "nazwisko","mstag": "subst:pl:gen:n","disamb": true}]},{"index": 99,"position": [558,568],"orth": "praktykujÄ…","lexemes": [{"lemma": "praktykować","mstag": "fin:pl:ter:imperf","disamb": true}]},{"index": 100,"position": [569,572],"orth": "siÄ™","lexemes": [{"lemma": "siÄ™","mstag": "qub","disamb": true}]},{"index": 101,"position": [573,577],"orth": "dość","lexemes": [{"lemma": "dość","mstag": "qub","disamb": true}]},{"index": 102,"position": [578,584],"orth": "czÄ™sto","lexemes": [{"lemma": "czÄ™sto","mstag": "adv:pos","disamb": true}]},{"index": 103,"position": [585,586],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:loc:nwok","disamb": true}]},{"index": 104,"position": [587,594],"orth": "Galicji","lexemes": [{"lemma": "Galicja","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 105,"position": [594,595],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 106,"position": [596,607],"orth": "szczególnie","lexemes": [{"lemma": "szczególnie","mstag": "qub","disamb": true}]},{"index": 107,"position": [608,609],"orth": "u","lexemes": [{"lemma": "u","mstag": "prep:gen","disamb": true}]},{"index": 108,"position": [610,622],"orth": "pojedynczych","lexemes": [{"lemma": "pojedynczy","mstag": "adj:pl:gen:f:pos","disamb": true}]},{"index": 109,"position": [623,627],"orth": "osób","lexemes": [{"lemma": "osoba","mstag": "subst:pl:gen:f","disamb": true}]},{"index": 110,"position": [627,628],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 111,"position": [629,634],"orth": "które","lexemes": [{"lemma": "który","mstag": "adj:pl:nom:f:pos","disamb": true}]},{"index": 112,"position": [635,638],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 113,"position": [639,645],"orth": "czujÄ…c","lexemes": [{"lemma": "czuć","mstag": "pcon:imperf","disamb": true}]},{"index": 114,"position": [646,649],"orth": "siÄ™","lexemes": [{"lemma": "siÄ™","mstag": "qub","disamb": true}]},{"index": 115,"position": [650,652],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:loc","disamb": true}]},{"index": 116,"position": [653,659],"orth": "siÅ‚ach","lexemes": [{"lemma": "siÅ‚a","mstag": "subst:pl:loc:f","disamb": true}]},{"index": 117,"position": [660,675],"orth": "uszlachetnienia","lexemes": [{"lemma": "uszlachetnić","mstag": "ger:sg:gen:n:perf:aff","disamb": true}]},{"index": 118,"position": [676,680],"orth": "sobÄ…","lexemes": [{"lemma": "siebie","mstag": "siebie:inst","disamb": true}]},{"index": 119,"position": [680,681],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 120,"position": [682,687],"orth": "swymi","lexemes": [{"lemma": "swój","mstag": "adj:pl:inst:m3:pos","disamb": true}]},{"index": 121,"position": [688,695],"orth": "czynami","lexemes": [{"lemma": "czyn","mstag": "subst:pl:inst:m3","disamb": true}]},{"index": 122,"position": [696,704],"orth": "wÅ‚asnego","lexemes": [{"lemma": "wÅ‚asny","mstag": "adj:sg:gen:n:pos","disamb": true}]},{"index": 123,"position": [705,713],"orth": "nazwiska","lexemes": [{"lemma": "nazwisko","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 124,"position": [713,714],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 125,"position": [715,719],"orth": "chcÄ…","lexemes": [{"lemma": "chcieć","mstag": "fin:pl:ter:imperf","disamb": true}]},{"index": 126,"position": [720,730],"orth": "nazwiskiem","lexemes": [{"lemma": "nazwisko","mstag": "subst:sg:inst:n","disamb": true}]},{"index": 127,"position": [731,743],"orth": "uszlachetnić","lexemes": [{"lemma": "uszlachetnić","mstag": "inf:perf","disamb": true}]},{"index": 128,"position": [744,750],"orth": "siebie","lexemes": [{"lemma": "siebie","mstag": "siebie:acc","disamb": true}]},{"index": 129,"position": [750,751],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 130,"position": [752,753],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 131,"position": [754,757],"orth": "tak","lexemes": [{"lemma": "tak","mstag": "adv:pos","disamb": true}]},{"index": 132,"position": [758,760],"orth": "np","lexemes": [{"lemma": "na przykÅ‚ad","mstag": "brev:pun","disamb": true}]},{"index": 133,"position": [760,761],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]}],"entities": [{"text": "Galicji","type": "nam_loc_historical_region","tokens": [28,29],"positions": [174,181]},{"text": "Lodomerii","type": "nam_loc_gpe_admin1","tokens": [30,31],"positions": [184,193]},{"text": "Pipidówka","type": "nam_loc_gpe_city","tokens": [42,43],"positions": [237,246]},{"text": "Galicji","type": "nam_loc_gpe_admin1","tokens": [103,104],"positions": [587,594]}]} \ No newline at end of file diff --git a/tests/unit/detectors/date/test_pl.py b/tests/unit/detectors/date/test_pl.py index 9ddcc58..2c9c31c 100644 --- a/tests/unit/detectors/date/test_pl.py +++ b/tests/unit/detectors/date/test_pl.py @@ -34,3 +34,28 @@ def test_detect_dates_pl(): (7, 16, DateDetection(format_date1)), (34, 49, DateDetection(format_date2)), ] + +def test_date_with_different_punctuations(): + # There is discussion about this wheter we should even detect such cases + # as a dates... However, for now we do and if we find cases where that is + # problematic, this definitly could be changed. + + detector = DateDetector("pl") + + text = "1.01,2022" + found_dates = detector.detect(text, dict()) + + format_date = [ + ( + DateDetection.AnnotationPart.TWO_DIGITS_DAY, + "01", + ), + (DateDetection.AnnotationPart.OTHER, "."), + (DateDetection.AnnotationPart.TWO_DIGIT_MONTH, "01"), + (DateDetection.AnnotationPart.OTHER, ","), + (DateDetection.AnnotationPart.FOUR_DIGIT_YEAR, "2022"), + ] + + assert found_dates == [ + (7, 16, DateDetection(format_date)), + ] \ No newline at end of file -- GitLab