Skip to content
Snippets Groups Projects
Commit bf05a415 authored by Michał Pogoda's avatar Michał Pogoda
Browse files

[WIP] Style + logic updates, added output to txt

parent 831cf50b
Branches
3 merge requests!10Anonimizer v2,!8Add txt output format,!7Better coverage
# @package _global_
defaults:
- /paths: default
- /detectors: all_ner_kpwr_ext
- /replacers: tag
- /suppressor: order_based
- /input_parser: wiktor_ner
- /pipeline: sequential_jsonl
- _self_
pipeline:
concat_to_txt: true
language: "pl"
\ No newline at end of file
......@@ -2,4 +2,5 @@ _target_: src.pipeline.sequential_jsonl.SequentialJSONLPipeline
input_parser: ${input_parser}
detectors: ${detectors}
suppressor: ${suppressor}
replacers: ${replacers}
\ No newline at end of file
replacers: ${replacers}
concat_to_txt: false
\ No newline at end of file
......@@ -6,47 +6,47 @@ def _parse_day_or_month(re_entry) -> List[Tuple[int, int, DateDetection]]:
assert re_entry["day_or_month_year"] is not None
result = []
if re_entry["day_month1"] is not None:
if len(re_entry["day_month1"]) == 1:
result.append(
(
DateDetection.AnnotationPart.TWO_DIGITS_DAY,
"0" + re_entry["day_month1"],
)
)
else:
result.append(
(DateDetection.AnnotationPart.TWO_DIGITS_DAY, re_entry["day_month1"])
)
result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"]))
if len(re_entry["day_month2"]) == 1:
result.append(
(
DateDetection.AnnotationPart.TWO_DIGIT_MONTH,
"0" + re_entry["day_month2"],
)
)
else:
result.append(
(DateDetection.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month2"])
)
result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"]))
elif "day_month2" in re_entry:
if len(re_entry["day_month2"]) == 1:
result.append(
(
DateDetection.AnnotationPart.TWO_DIGIT_MONTH,
"0" + re_entry["day_month2"],
)
)
else:
result.append(
(DateDetection.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month2"])
)
result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"]))
# if re_entry["day_month1"] is not None:
if len(re_entry["day_month1"]) == 1:
result.append(
(
DateDetection.AnnotationPart.TWO_DIGITS_DAY,
"0" + re_entry["day_month1"],
)
)
else:
result.append(
(DateDetection.AnnotationPart.TWO_DIGITS_DAY, re_entry["day_month1"])
)
result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"]))
if len(re_entry["day_month2"]) == 1:
result.append(
(
DateDetection.AnnotationPart.TWO_DIGIT_MONTH,
"0" + re_entry["day_month2"],
)
)
else:
result.append(
(DateDetection.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month2"])
)
result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"]))
# elif "day_month2" in re_entry:
# if len(re_entry["day_month2"]) == 1:
# result.append(
# (
# DateDetection.AnnotationPart.TWO_DIGIT_MONTH,
# "0" + re_entry["day_month2"],
# )
# )
# else:
# result.append(
# (DateDetection.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month2"])
# )
# result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"]))
if "year1" in re_entry:
if len(re_entry["year1"]) == 2:
......
......@@ -14,11 +14,13 @@ class SequentialJSONLPipeline(Pipeline):
detectors: Dict[str, Detector],
suppressor: Suppressor,
replacers: Dict[str, ReplacerInterface],
concat_to_txt: bool = False,
):
self._input_parser = input_parser
self._detectors = detectors
self._suppressor = suppressor
self._replacers = replacers
self._concat_to_txt = concat_to_txt
def run(self, input_path) -> str:
result = []
......@@ -45,4 +47,15 @@ class SequentialJSONLPipeline(Pipeline):
result.append({"text": replaced_input})
return "\n".join([json.dumps(item, ensure_ascii=False) for item in result])
if self._concat_to_txt:
result_text = ""
for item in result:
text = item["text"]
if result_text != "" and result_text.rstrip() == result_text and text.lstrip() == text:
result_text += " " + text
else:
result_text += text
return result_text
else:
return "\n".join([json.dumps(item, ensure_ascii=False) for item in result])
from hydra import initialize, compose
from hydra.utils import instantiate
def test_wiktorner_jsonl_txt_output_configuration():
with initialize(config_path="../../../config", version_base="1.1"):
config = compose(
config_name="config",
overrides=["paths.root_path=../../../", "configuration=wiktorner_jsonl_txt_output"],
)
pipeline = instantiate(config.pipeline)
result = pipeline.run(
"./tests/integration/wiktorner_jsonl_txt_output_configuration/wiktorner_jsonl.jsonl"
)
assert (
result
== 'ROZDZIAŁ I. CO NIECO O SAMEJ PIPIDÓWCE Przede wszystkim muszę uprzedzić z góry czytelników, aby się daremnie nie trudzili nad szukaniem wyżej wyrażonego miasteczka na mapach [MIEJSCE] i [MIEJSCE], bo go tam nie znajdą. Nie dlatego, jakoby [MIEJSCE] nie istniała w rzeczywistości i była tylko wytworem fantazji autora, ale po prostu dlatego, że mieszkańcy owego sławnego grodu, urosłszy z czasem w ambicję, uważali tę nazwę jako ubliżającą ich powadze i podali do c. k. namiestnictwa pokorną prośbę o pozwolenie zamienienia jej na inną. Podobne zamiany nazwisk praktykują się dość często w [MIEJSCE], szczególnie u pojedynczych osób, które nie czując się na siłach uszlachetnienia sobą, swymi czynami własnego nazwiska, chcą nazwiskiem uszlachetnić siebie, i tak np. ROZDZIAŁ I. CO NIECO O SAMEJ PIPIDÓWCE Przede wszystkim muszę uprzedzić z góry czytelników, aby się daremnie nie trudzili nad szukaniem wyżej wyrażonego miasteczka na mapach [MIEJSCE] i [MIEJSCE], bo go tam nie znajdą. Nie dlatego, jakoby [MIEJSCE] nie istniała w rzeczywistości i była tylko wytworem fantazji autora, ale po prostu dlatego, że mieszkańcy owego sławnego grodu, urosłszy z czasem w ambicję, uważali tę nazwę jako ubliżającą ich powadze i podali do c. k. namiestnictwa pokorną prośbę o pozwolenie zamienienia jej na inną. Podobne zamiany nazwisk praktykują się dość często w [MIEJSCE], szczególnie u pojedynczych osób, które nie czując się na siłach uszlachetnienia sobą, swymi czynami własnego nazwiska, chcą nazwiskiem uszlachetnić siebie, i tak np.'
)
{"filename": "bb4a16ff-33de-4478-939d-12db67d750b1","text": "ROZDZIAŁ I. CO NIECO O SAMEJ PIPIDÓWCE Przede wszystkim muszę uprzedzić z góry czytelników, aby się daremnie nie trudzili nad szukaniem wyżej wyrażonego miasteczka na mapach Galicji i Lodomerii, bo go tam nie znajdą. Nie dlatego, jakoby Pipidówka nie istniała w rzeczywistości i była tylko wytworem fantazji autora, ale po prostu dlatego, że mieszkańcy owego sławnego grodu, urosłszy z czasem w ambicję, uważali tę nazwę jako ubliżającą ich powadze i podali do c. k. namiestnictwa pokorną prośbę o pozwolenie zamienienia jej na inną. Podobne zamiany nazwisk praktykują się dość często w Galicji, szczególnie u pojedynczych osób, które nie czując się na siłach uszlachetnienia sobą, swymi czynami własnego nazwiska, chcą nazwiskiem uszlachetnić siebie, i tak np.","tokens": [{"index": 1,"position": [0,8],"orth": "ROZDZIAŁ","lexemes": [{"lemma": "rozdział","mstag": "subst:sg:nom:m3","disamb": true}]},{"index": 2,"position": [9,10],"orth": "I","lexemes": [{"lemma": "I","mstag": "adj:sg:nom:m3:pos","disamb": true}]},{"index": 3,"position": [10,11],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 4,"position": [12,14],"orth": "CO","lexemes": [{"lemma": "co","mstag": "conj","disamb": true}]},{"index": 5,"position": [15,20],"orth": "NIECO","lexemes": [{"lemma": "nieco","mstag": "adv","disamb": true}]},{"index": 6,"position": [21,22],"orth": "O","lexemes": [{"lemma": "o","mstag": "prep:loc","disamb": true}]},{"index": 7,"position": [23,28],"orth": "SAMEJ","lexemes": [{"lemma": "sam","mstag": "adj:sg:loc:f:pos","disamb": true}]},{"index": 8,"position": [29,38],"orth": "PIPIDÓWCE","lexemes": [{"lemma": "Pipidówka","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 9,"position": [39,45],"orth": "Przede","lexemes": [{"lemma": "przed","mstag": "prep:inst:wok","disamb": true}]},{"index": 10,"position": [46,55],"orth": "wszystkim","lexemes": [{"lemma": "wszystko","mstag": "subst:sg:inst:n","disamb": true}]},{"index": 11,"position": [56,61],"orth": "muszę","lexemes": [{"lemma": "musieć","mstag": "fin:sg:pri:imperf","disamb": true}]},{"index": 12,"position": [62,71],"orth": "uprzedzić","lexemes": [{"lemma": "uprzedzić","mstag": "inf:perf","disamb": true}]},{"index": 13,"position": [72,73],"orth": "z","lexemes": [{"lemma": "z","mstag": "prep:gen:nwok","disamb": true}]},{"index": 14,"position": [74,78],"orth": "góry","lexemes": [{"lemma": "góra","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 15,"position": [79,90],"orth": "czytelników","lexemes": [{"lemma": "czytelnik","mstag": "subst:pl:gen:m1","disamb": true}]},{"index": 16,"position": [90,91],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 17,"position": [92,95],"orth": "aby","lexemes": [{"lemma": "aby","mstag": "comp","disamb": true}]},{"index": 18,"position": [96,99],"orth": "się","lexemes": [{"lemma": "się","mstag": "qub","disamb": true}]},{"index": 19,"position": [100,108],"orth": "daremnie","lexemes": [{"lemma": "daremnie","mstag": "adv:pos","disamb": true}]},{"index": 20,"position": [109,112],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 21,"position": [113,121],"orth": "trudzili","lexemes": [{"lemma": "trudzić","mstag": "praet:pl:m1:imperf","disamb": true}]},{"index": 22,"position": [122,125],"orth": "nad","lexemes": [{"lemma": "nad","mstag": "prep:inst:nwok","disamb": true}]},{"index": 23,"position": [126,135],"orth": "szukaniem","lexemes": [{"lemma": "szukać","mstag": "ger:sg:inst:n:imperf:aff","disamb": true}]},{"index": 24,"position": [136,141],"orth": "wyżej","lexemes": [{"lemma": "wysoko","mstag": "adv:com","disamb": true}]},{"index": 25,"position": [142,152],"orth": "wyrażonego","lexemes": [{"lemma": "wyrazić","mstag": "ppas:sg:gen:n:perf:aff","disamb": true}]},{"index": 26,"position": [153,163],"orth": "miasteczka","lexemes": [{"lemma": "miasteczko","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 27,"position": [164,166],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:loc","disamb": true}]},{"index": 28,"position": [167,173],"orth": "mapach","lexemes": [{"lemma": "mapa","mstag": "subst:pl:loc:f","disamb": true}]},{"index": 29,"position": [174,181],"orth": "Galicji","lexemes": [{"lemma": "Galicja","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 30,"position": [182,183],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 31,"position": [184,193],"orth": "Lodomerii","lexemes": [{"lemma": "Lodomerii","mstag": "ign","disamb": true}]},{"index": 32,"position": [193,194],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 33,"position": [195,197],"orth": "bo","lexemes": [{"lemma": "bo","mstag": "comp","disamb": true}]},{"index": 34,"position": [198,200],"orth": "go","lexemes": [{"lemma": "on","mstag": "ppron3:sg:gen:m1:ter:nakc:npraep","disamb": true}]},{"index": 35,"position": [201,204],"orth": "tam","lexemes": [{"lemma": "tam","mstag": "adv","disamb": true}]},{"index": 36,"position": [205,208],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 37,"position": [209,215],"orth": "znajdą","lexemes": [{"lemma": "znaleźć","mstag": "fin:pl:ter:perf","disamb": true}]},{"index": 38,"position": [215,216],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 39,"position": [217,220],"orth": "Nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 40,"position": [221,228],"orth": "dlatego","lexemes": [{"lemma": "dlatego","mstag": "adv","disamb": true}]},{"index": 41,"position": [228,229],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 42,"position": [230,236],"orth": "jakoby","lexemes": [{"lemma": "jakoby","mstag": "comp","disamb": true}]},{"index": 43,"position": [237,246],"orth": "Pipidówka","lexemes": [{"lemma": "Pipidówka","mstag": "subst:sg:nom:f","disamb": true}]},{"index": 44,"position": [247,250],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 45,"position": [251,259],"orth": "istniała","lexemes": [{"lemma": "istnieć","mstag": "praet:sg:f:imperf","disamb": true}]},{"index": 46,"position": [260,261],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:loc:nwok","disamb": true}]},{"index": 47,"position": [262,276],"orth": "rzeczywistości","lexemes": [{"lemma": "rzeczywistość","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 48,"position": [277,278],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 49,"position": [279,283],"orth": "była","lexemes": [{"lemma": "być","mstag": "praet:sg:f:imperf","disamb": true}]},{"index": 50,"position": [284,289],"orth": "tylko","lexemes": [{"lemma": "tylko","mstag": "qub","disamb": true}]},{"index": 51,"position": [290,298],"orth": "wytworem","lexemes": [{"lemma": "wytwór","mstag": "subst:sg:inst:m3","disamb": true}]},{"index": 52,"position": [299,307],"orth": "fantazji","lexemes": [{"lemma": "fantazja","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 53,"position": [308,314],"orth": "autora","lexemes": [{"lemma": "autor","mstag": "subst:sg:gen:m1","disamb": true}]},{"index": 54,"position": [314,315],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 55,"position": [316,319],"orth": "ale","lexemes": [{"lemma": "ale","mstag": "conj","disamb": true}]},{"index": 56,"position": [320,322],"orth": "po","lexemes": [{"lemma": "po","mstag": "prep:acc","disamb": true}]},{"index": 57,"position": [323,329],"orth": "prostu","lexemes": [{"lemma": "prosty","mstag": "adjp","disamb": true}]},{"index": 58,"position": [330,337],"orth": "dlatego","lexemes": [{"lemma": "dlatego","mstag": "adv","disamb": true}]},{"index": 59,"position": [337,338],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 60,"position": [339,341],"orth": "że","lexemes": [{"lemma": "że","mstag": "comp","disamb": true}]},{"index": 61,"position": [342,352],"orth": "mieszkańcy","lexemes": [{"lemma": "mieszkaniec","mstag": "subst:pl:nom:m1","disamb": true}]},{"index": 62,"position": [353,358],"orth": "owego","lexemes": [{"lemma": "ów","mstag": "adj:sg:gen:m3:pos","disamb": true}]},{"index": 63,"position": [359,367],"orth": "sławnego","lexemes": [{"lemma": "sławny","mstag": "adj:sg:gen:m3:pos","disamb": true}]},{"index": 64,"position": [368,373],"orth": "grodu","lexemes": [{"lemma": "gród","mstag": "subst:sg:gen:m3","disamb": true}]},{"index": 65,"position": [373,374],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 66,"position": [375,383],"orth": "urosłszy","lexemes": [{"lemma": "urosłszy","mstag": "ign","disamb": true}]},{"index": 67,"position": [384,385],"orth": "z","lexemes": [{"lemma": "z","mstag": "prep:inst:nwok","disamb": true}]},{"index": 68,"position": [386,392],"orth": "czasem","lexemes": [{"lemma": "czas","mstag": "subst:sg:inst:m3","disamb": true}]},{"index": 69,"position": [393,394],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:acc:nwok","disamb": true}]},{"index": 70,"position": [395,402],"orth": "ambicję","lexemes": [{"lemma": "ambicja","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 71,"position": [402,403],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 72,"position": [404,411],"orth": "uważali","lexemes": [{"lemma": "uważać","mstag": "praet:pl:m1:imperf","disamb": true}]},{"index": 73,"position": [412,414],"orth": "tę","lexemes": [{"lemma": "ten","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 74,"position": [415,420],"orth": "nazwę","lexemes": [{"lemma": "nazwa","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 75,"position": [421,425],"orth": "jako","lexemes": [{"lemma": "jako","mstag": "conj","disamb": true}]},{"index": 76,"position": [426,436],"orth": "ubliżającą","lexemes": [{"lemma": "ubliżający","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 77,"position": [437,440],"orth": "ich","lexemes": [{"lemma": "on","mstag": "ppron3:pl:gen:m1:ter:akc:npraep","disamb": true}]},{"index": 78,"position": [441,448],"orth": "powadze","lexemes": [{"lemma": "powaga","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 79,"position": [449,450],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 80,"position": [451,457],"orth": "podali","lexemes": [{"lemma": "podać","mstag": "praet:pl:m1:perf","disamb": true}]},{"index": 81,"position": [458,460],"orth": "do","lexemes": [{"lemma": "do","mstag": "prep:gen","disamb": true}]},{"index": 82,"position": [461,462],"orth": "c","lexemes": [{"lemma": "c","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 83,"position": [462,463],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 84,"position": [464,465],"orth": "k","lexemes": [{"lemma": "K","mstag": "brev:pun","disamb": true}]},{"index": 85,"position": [465,466],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 86,"position": [467,480],"orth": "namiestnictwa","lexemes": [{"lemma": "namiestnictwo","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 87,"position": [481,488],"orth": "pokorną","lexemes": [{"lemma": "pokorny","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 88,"position": [489,495],"orth": "prośbę","lexemes": [{"lemma": "prośba","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 89,"position": [496,497],"orth": "o","lexemes": [{"lemma": "o","mstag": "prep:acc","disamb": true}]},{"index": 90,"position": [498,508],"orth": "pozwolenie","lexemes": [{"lemma": "pozwolić","mstag": "ger:sg:acc:n:perf:aff","disamb": true}]},{"index": 91,"position": [509,520],"orth": "zamienienia","lexemes": [{"lemma": "zamienić","mstag": "ger:sg:gen:n:perf:aff","disamb": true}]},{"index": 92,"position": [521,524],"orth": "jej","lexemes": [{"lemma": "on","mstag": "ppron3:sg:gen:f:ter:akc:npraep","disamb": true}]},{"index": 93,"position": [525,527],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:acc","disamb": true}]},{"index": 94,"position": [528,532],"orth": "inną","lexemes": [{"lemma": "inny","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 95,"position": [532,533],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 96,"position": [534,541],"orth": "Podobne","lexemes": [{"lemma": "podobny","mstag": "adj:pl:nom:f:pos","disamb": true}]},{"index": 97,"position": [542,549],"orth": "zamiany","lexemes": [{"lemma": "zamiana","mstag": "subst:pl:nom:f","disamb": true}]},{"index": 98,"position": [550,557],"orth": "nazwisk","lexemes": [{"lemma": "nazwisko","mstag": "subst:pl:gen:n","disamb": true}]},{"index": 99,"position": [558,568],"orth": "praktykują","lexemes": [{"lemma": "praktykować","mstag": "fin:pl:ter:imperf","disamb": true}]},{"index": 100,"position": [569,572],"orth": "się","lexemes": [{"lemma": "się","mstag": "qub","disamb": true}]},{"index": 101,"position": [573,577],"orth": "dość","lexemes": [{"lemma": "dość","mstag": "qub","disamb": true}]},{"index": 102,"position": [578,584],"orth": "często","lexemes": [{"lemma": "często","mstag": "adv:pos","disamb": true}]},{"index": 103,"position": [585,586],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:loc:nwok","disamb": true}]},{"index": 104,"position": [587,594],"orth": "Galicji","lexemes": [{"lemma": "Galicja","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 105,"position": [594,595],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 106,"position": [596,607],"orth": "szczególnie","lexemes": [{"lemma": "szczególnie","mstag": "qub","disamb": true}]},{"index": 107,"position": [608,609],"orth": "u","lexemes": [{"lemma": "u","mstag": "prep:gen","disamb": true}]},{"index": 108,"position": [610,622],"orth": "pojedynczych","lexemes": [{"lemma": "pojedynczy","mstag": "adj:pl:gen:f:pos","disamb": true}]},{"index": 109,"position": [623,627],"orth": "osób","lexemes": [{"lemma": "osoba","mstag": "subst:pl:gen:f","disamb": true}]},{"index": 110,"position": [627,628],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 111,"position": [629,634],"orth": "które","lexemes": [{"lemma": "który","mstag": "adj:pl:nom:f:pos","disamb": true}]},{"index": 112,"position": [635,638],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 113,"position": [639,645],"orth": "czując","lexemes": [{"lemma": "czuć","mstag": "pcon:imperf","disamb": true}]},{"index": 114,"position": [646,649],"orth": "się","lexemes": [{"lemma": "się","mstag": "qub","disamb": true}]},{"index": 115,"position": [650,652],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:loc","disamb": true}]},{"index": 116,"position": [653,659],"orth": "siłach","lexemes": [{"lemma": "siła","mstag": "subst:pl:loc:f","disamb": true}]},{"index": 117,"position": [660,675],"orth": "uszlachetnienia","lexemes": [{"lemma": "uszlachetnić","mstag": "ger:sg:gen:n:perf:aff","disamb": true}]},{"index": 118,"position": [676,680],"orth": "sobą","lexemes": [{"lemma": "siebie","mstag": "siebie:inst","disamb": true}]},{"index": 119,"position": [680,681],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 120,"position": [682,687],"orth": "swymi","lexemes": [{"lemma": "swój","mstag": "adj:pl:inst:m3:pos","disamb": true}]},{"index": 121,"position": [688,695],"orth": "czynami","lexemes": [{"lemma": "czyn","mstag": "subst:pl:inst:m3","disamb": true}]},{"index": 122,"position": [696,704],"orth": "własnego","lexemes": [{"lemma": "własny","mstag": "adj:sg:gen:n:pos","disamb": true}]},{"index": 123,"position": [705,713],"orth": "nazwiska","lexemes": [{"lemma": "nazwisko","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 124,"position": [713,714],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 125,"position": [715,719],"orth": "chcą","lexemes": [{"lemma": "chcieć","mstag": "fin:pl:ter:imperf","disamb": true}]},{"index": 126,"position": [720,730],"orth": "nazwiskiem","lexemes": [{"lemma": "nazwisko","mstag": "subst:sg:inst:n","disamb": true}]},{"index": 127,"position": [731,743],"orth": "uszlachetnić","lexemes": [{"lemma": "uszlachetnić","mstag": "inf:perf","disamb": true}]},{"index": 128,"position": [744,750],"orth": "siebie","lexemes": [{"lemma": "siebie","mstag": "siebie:acc","disamb": true}]},{"index": 129,"position": [750,751],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 130,"position": [752,753],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 131,"position": [754,757],"orth": "tak","lexemes": [{"lemma": "tak","mstag": "adv:pos","disamb": true}]},{"index": 132,"position": [758,760],"orth": "np","lexemes": [{"lemma": "na przykład","mstag": "brev:pun","disamb": true}]},{"index": 133,"position": [760,761],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]}],"entities": [{"text": "Galicji","type": "nam_loc_historical_region","tokens": [28,29],"positions": [174,181]},{"text": "Lodomerii","type": "nam_loc_gpe_admin1","tokens": [30,31],"positions": [184,193]},{"text": "Pipidówka","type": "nam_loc_gpe_city","tokens": [42,43],"positions": [237,246]},{"text": "Galicji","type": "nam_loc_gpe_admin1","tokens": [103,104],"positions": [587,594]}]}
{"filename": "bb4a16ff-33de-4478-939d-12db67d750b1","text": "ROZDZIAŁ I. CO NIECO O SAMEJ PIPIDÓWCE Przede wszystkim muszę uprzedzić z góry czytelników, aby się daremnie nie trudzili nad szukaniem wyżej wyrażonego miasteczka na mapach Galicji i Lodomerii, bo go tam nie znajdą. Nie dlatego, jakoby Pipidówka nie istniała w rzeczywistości i była tylko wytworem fantazji autora, ale po prostu dlatego, że mieszkańcy owego sławnego grodu, urosłszy z czasem w ambicję, uważali tę nazwę jako ubliżającą ich powadze i podali do c. k. namiestnictwa pokorną prośbę o pozwolenie zamienienia jej na inną. Podobne zamiany nazwisk praktykują się dość często w Galicji, szczególnie u pojedynczych osób, które nie czując się na siłach uszlachetnienia sobą, swymi czynami własnego nazwiska, chcą nazwiskiem uszlachetnić siebie, i tak np.","tokens": [{"index": 1,"position": [0,8],"orth": "ROZDZIAŁ","lexemes": [{"lemma": "rozdział","mstag": "subst:sg:nom:m3","disamb": true}]},{"index": 2,"position": [9,10],"orth": "I","lexemes": [{"lemma": "I","mstag": "adj:sg:nom:m3:pos","disamb": true}]},{"index": 3,"position": [10,11],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 4,"position": [12,14],"orth": "CO","lexemes": [{"lemma": "co","mstag": "conj","disamb": true}]},{"index": 5,"position": [15,20],"orth": "NIECO","lexemes": [{"lemma": "nieco","mstag": "adv","disamb": true}]},{"index": 6,"position": [21,22],"orth": "O","lexemes": [{"lemma": "o","mstag": "prep:loc","disamb": true}]},{"index": 7,"position": [23,28],"orth": "SAMEJ","lexemes": [{"lemma": "sam","mstag": "adj:sg:loc:f:pos","disamb": true}]},{"index": 8,"position": [29,38],"orth": "PIPIDÓWCE","lexemes": [{"lemma": "Pipidówka","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 9,"position": [39,45],"orth": "Przede","lexemes": [{"lemma": "przed","mstag": "prep:inst:wok","disamb": true}]},{"index": 10,"position": [46,55],"orth": "wszystkim","lexemes": [{"lemma": "wszystko","mstag": "subst:sg:inst:n","disamb": true}]},{"index": 11,"position": [56,61],"orth": "muszę","lexemes": [{"lemma": "musieć","mstag": "fin:sg:pri:imperf","disamb": true}]},{"index": 12,"position": [62,71],"orth": "uprzedzić","lexemes": [{"lemma": "uprzedzić","mstag": "inf:perf","disamb": true}]},{"index": 13,"position": [72,73],"orth": "z","lexemes": [{"lemma": "z","mstag": "prep:gen:nwok","disamb": true}]},{"index": 14,"position": [74,78],"orth": "góry","lexemes": [{"lemma": "góra","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 15,"position": [79,90],"orth": "czytelników","lexemes": [{"lemma": "czytelnik","mstag": "subst:pl:gen:m1","disamb": true}]},{"index": 16,"position": [90,91],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 17,"position": [92,95],"orth": "aby","lexemes": [{"lemma": "aby","mstag": "comp","disamb": true}]},{"index": 18,"position": [96,99],"orth": "się","lexemes": [{"lemma": "się","mstag": "qub","disamb": true}]},{"index": 19,"position": [100,108],"orth": "daremnie","lexemes": [{"lemma": "daremnie","mstag": "adv:pos","disamb": true}]},{"index": 20,"position": [109,112],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 21,"position": [113,121],"orth": "trudzili","lexemes": [{"lemma": "trudzić","mstag": "praet:pl:m1:imperf","disamb": true}]},{"index": 22,"position": [122,125],"orth": "nad","lexemes": [{"lemma": "nad","mstag": "prep:inst:nwok","disamb": true}]},{"index": 23,"position": [126,135],"orth": "szukaniem","lexemes": [{"lemma": "szukać","mstag": "ger:sg:inst:n:imperf:aff","disamb": true}]},{"index": 24,"position": [136,141],"orth": "wyżej","lexemes": [{"lemma": "wysoko","mstag": "adv:com","disamb": true}]},{"index": 25,"position": [142,152],"orth": "wyrażonego","lexemes": [{"lemma": "wyrazić","mstag": "ppas:sg:gen:n:perf:aff","disamb": true}]},{"index": 26,"position": [153,163],"orth": "miasteczka","lexemes": [{"lemma": "miasteczko","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 27,"position": [164,166],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:loc","disamb": true}]},{"index": 28,"position": [167,173],"orth": "mapach","lexemes": [{"lemma": "mapa","mstag": "subst:pl:loc:f","disamb": true}]},{"index": 29,"position": [174,181],"orth": "Galicji","lexemes": [{"lemma": "Galicja","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 30,"position": [182,183],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 31,"position": [184,193],"orth": "Lodomerii","lexemes": [{"lemma": "Lodomerii","mstag": "ign","disamb": true}]},{"index": 32,"position": [193,194],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 33,"position": [195,197],"orth": "bo","lexemes": [{"lemma": "bo","mstag": "comp","disamb": true}]},{"index": 34,"position": [198,200],"orth": "go","lexemes": [{"lemma": "on","mstag": "ppron3:sg:gen:m1:ter:nakc:npraep","disamb": true}]},{"index": 35,"position": [201,204],"orth": "tam","lexemes": [{"lemma": "tam","mstag": "adv","disamb": true}]},{"index": 36,"position": [205,208],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 37,"position": [209,215],"orth": "znajdą","lexemes": [{"lemma": "znaleźć","mstag": "fin:pl:ter:perf","disamb": true}]},{"index": 38,"position": [215,216],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 39,"position": [217,220],"orth": "Nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 40,"position": [221,228],"orth": "dlatego","lexemes": [{"lemma": "dlatego","mstag": "adv","disamb": true}]},{"index": 41,"position": [228,229],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 42,"position": [230,236],"orth": "jakoby","lexemes": [{"lemma": "jakoby","mstag": "comp","disamb": true}]},{"index": 43,"position": [237,246],"orth": "Pipidówka","lexemes": [{"lemma": "Pipidówka","mstag": "subst:sg:nom:f","disamb": true}]},{"index": 44,"position": [247,250],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 45,"position": [251,259],"orth": "istniała","lexemes": [{"lemma": "istnieć","mstag": "praet:sg:f:imperf","disamb": true}]},{"index": 46,"position": [260,261],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:loc:nwok","disamb": true}]},{"index": 47,"position": [262,276],"orth": "rzeczywistości","lexemes": [{"lemma": "rzeczywistość","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 48,"position": [277,278],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 49,"position": [279,283],"orth": "była","lexemes": [{"lemma": "być","mstag": "praet:sg:f:imperf","disamb": true}]},{"index": 50,"position": [284,289],"orth": "tylko","lexemes": [{"lemma": "tylko","mstag": "qub","disamb": true}]},{"index": 51,"position": [290,298],"orth": "wytworem","lexemes": [{"lemma": "wytwór","mstag": "subst:sg:inst:m3","disamb": true}]},{"index": 52,"position": [299,307],"orth": "fantazji","lexemes": [{"lemma": "fantazja","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 53,"position": [308,314],"orth": "autora","lexemes": [{"lemma": "autor","mstag": "subst:sg:gen:m1","disamb": true}]},{"index": 54,"position": [314,315],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 55,"position": [316,319],"orth": "ale","lexemes": [{"lemma": "ale","mstag": "conj","disamb": true}]},{"index": 56,"position": [320,322],"orth": "po","lexemes": [{"lemma": "po","mstag": "prep:acc","disamb": true}]},{"index": 57,"position": [323,329],"orth": "prostu","lexemes": [{"lemma": "prosty","mstag": "adjp","disamb": true}]},{"index": 58,"position": [330,337],"orth": "dlatego","lexemes": [{"lemma": "dlatego","mstag": "adv","disamb": true}]},{"index": 59,"position": [337,338],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 60,"position": [339,341],"orth": "że","lexemes": [{"lemma": "że","mstag": "comp","disamb": true}]},{"index": 61,"position": [342,352],"orth": "mieszkańcy","lexemes": [{"lemma": "mieszkaniec","mstag": "subst:pl:nom:m1","disamb": true}]},{"index": 62,"position": [353,358],"orth": "owego","lexemes": [{"lemma": "ów","mstag": "adj:sg:gen:m3:pos","disamb": true}]},{"index": 63,"position": [359,367],"orth": "sławnego","lexemes": [{"lemma": "sławny","mstag": "adj:sg:gen:m3:pos","disamb": true}]},{"index": 64,"position": [368,373],"orth": "grodu","lexemes": [{"lemma": "gród","mstag": "subst:sg:gen:m3","disamb": true}]},{"index": 65,"position": [373,374],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 66,"position": [375,383],"orth": "urosłszy","lexemes": [{"lemma": "urosłszy","mstag": "ign","disamb": true}]},{"index": 67,"position": [384,385],"orth": "z","lexemes": [{"lemma": "z","mstag": "prep:inst:nwok","disamb": true}]},{"index": 68,"position": [386,392],"orth": "czasem","lexemes": [{"lemma": "czas","mstag": "subst:sg:inst:m3","disamb": true}]},{"index": 69,"position": [393,394],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:acc:nwok","disamb": true}]},{"index": 70,"position": [395,402],"orth": "ambicję","lexemes": [{"lemma": "ambicja","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 71,"position": [402,403],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 72,"position": [404,411],"orth": "uważali","lexemes": [{"lemma": "uważać","mstag": "praet:pl:m1:imperf","disamb": true}]},{"index": 73,"position": [412,414],"orth": "tę","lexemes": [{"lemma": "ten","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 74,"position": [415,420],"orth": "nazwę","lexemes": [{"lemma": "nazwa","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 75,"position": [421,425],"orth": "jako","lexemes": [{"lemma": "jako","mstag": "conj","disamb": true}]},{"index": 76,"position": [426,436],"orth": "ubliżającą","lexemes": [{"lemma": "ubliżający","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 77,"position": [437,440],"orth": "ich","lexemes": [{"lemma": "on","mstag": "ppron3:pl:gen:m1:ter:akc:npraep","disamb": true}]},{"index": 78,"position": [441,448],"orth": "powadze","lexemes": [{"lemma": "powaga","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 79,"position": [449,450],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 80,"position": [451,457],"orth": "podali","lexemes": [{"lemma": "podać","mstag": "praet:pl:m1:perf","disamb": true}]},{"index": 81,"position": [458,460],"orth": "do","lexemes": [{"lemma": "do","mstag": "prep:gen","disamb": true}]},{"index": 82,"position": [461,462],"orth": "c","lexemes": [{"lemma": "c","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 83,"position": [462,463],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 84,"position": [464,465],"orth": "k","lexemes": [{"lemma": "K","mstag": "brev:pun","disamb": true}]},{"index": 85,"position": [465,466],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 86,"position": [467,480],"orth": "namiestnictwa","lexemes": [{"lemma": "namiestnictwo","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 87,"position": [481,488],"orth": "pokorną","lexemes": [{"lemma": "pokorny","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 88,"position": [489,495],"orth": "prośbę","lexemes": [{"lemma": "prośba","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 89,"position": [496,497],"orth": "o","lexemes": [{"lemma": "o","mstag": "prep:acc","disamb": true}]},{"index": 90,"position": [498,508],"orth": "pozwolenie","lexemes": [{"lemma": "pozwolić","mstag": "ger:sg:acc:n:perf:aff","disamb": true}]},{"index": 91,"position": [509,520],"orth": "zamienienia","lexemes": [{"lemma": "zamienić","mstag": "ger:sg:gen:n:perf:aff","disamb": true}]},{"index": 92,"position": [521,524],"orth": "jej","lexemes": [{"lemma": "on","mstag": "ppron3:sg:gen:f:ter:akc:npraep","disamb": true}]},{"index": 93,"position": [525,527],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:acc","disamb": true}]},{"index": 94,"position": [528,532],"orth": "inną","lexemes": [{"lemma": "inny","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 95,"position": [532,533],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 96,"position": [534,541],"orth": "Podobne","lexemes": [{"lemma": "podobny","mstag": "adj:pl:nom:f:pos","disamb": true}]},{"index": 97,"position": [542,549],"orth": "zamiany","lexemes": [{"lemma": "zamiana","mstag": "subst:pl:nom:f","disamb": true}]},{"index": 98,"position": [550,557],"orth": "nazwisk","lexemes": [{"lemma": "nazwisko","mstag": "subst:pl:gen:n","disamb": true}]},{"index": 99,"position": [558,568],"orth": "praktykują","lexemes": [{"lemma": "praktykować","mstag": "fin:pl:ter:imperf","disamb": true}]},{"index": 100,"position": [569,572],"orth": "się","lexemes": [{"lemma": "się","mstag": "qub","disamb": true}]},{"index": 101,"position": [573,577],"orth": "dość","lexemes": [{"lemma": "dość","mstag": "qub","disamb": true}]},{"index": 102,"position": [578,584],"orth": "często","lexemes": [{"lemma": "często","mstag": "adv:pos","disamb": true}]},{"index": 103,"position": [585,586],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:loc:nwok","disamb": true}]},{"index": 104,"position": [587,594],"orth": "Galicji","lexemes": [{"lemma": "Galicja","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 105,"position": [594,595],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 106,"position": [596,607],"orth": "szczególnie","lexemes": [{"lemma": "szczególnie","mstag": "qub","disamb": true}]},{"index": 107,"position": [608,609],"orth": "u","lexemes": [{"lemma": "u","mstag": "prep:gen","disamb": true}]},{"index": 108,"position": [610,622],"orth": "pojedynczych","lexemes": [{"lemma": "pojedynczy","mstag": "adj:pl:gen:f:pos","disamb": true}]},{"index": 109,"position": [623,627],"orth": "osób","lexemes": [{"lemma": "osoba","mstag": "subst:pl:gen:f","disamb": true}]},{"index": 110,"position": [627,628],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 111,"position": [629,634],"orth": "które","lexemes": [{"lemma": "który","mstag": "adj:pl:nom:f:pos","disamb": true}]},{"index": 112,"position": [635,638],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 113,"position": [639,645],"orth": "czując","lexemes": [{"lemma": "czuć","mstag": "pcon:imperf","disamb": true}]},{"index": 114,"position": [646,649],"orth": "się","lexemes": [{"lemma": "się","mstag": "qub","disamb": true}]},{"index": 115,"position": [650,652],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:loc","disamb": true}]},{"index": 116,"position": [653,659],"orth": "siłach","lexemes": [{"lemma": "siła","mstag": "subst:pl:loc:f","disamb": true}]},{"index": 117,"position": [660,675],"orth": "uszlachetnienia","lexemes": [{"lemma": "uszlachetnić","mstag": "ger:sg:gen:n:perf:aff","disamb": true}]},{"index": 118,"position": [676,680],"orth": "sobą","lexemes": [{"lemma": "siebie","mstag": "siebie:inst","disamb": true}]},{"index": 119,"position": [680,681],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 120,"position": [682,687],"orth": "swymi","lexemes": [{"lemma": "swój","mstag": "adj:pl:inst:m3:pos","disamb": true}]},{"index": 121,"position": [688,695],"orth": "czynami","lexemes": [{"lemma": "czyn","mstag": "subst:pl:inst:m3","disamb": true}]},{"index": 122,"position": [696,704],"orth": "własnego","lexemes": [{"lemma": "własny","mstag": "adj:sg:gen:n:pos","disamb": true}]},{"index": 123,"position": [705,713],"orth": "nazwiska","lexemes": [{"lemma": "nazwisko","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 124,"position": [713,714],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 125,"position": [715,719],"orth": "chcą","lexemes": [{"lemma": "chcieć","mstag": "fin:pl:ter:imperf","disamb": true}]},{"index": 126,"position": [720,730],"orth": "nazwiskiem","lexemes": [{"lemma": "nazwisko","mstag": "subst:sg:inst:n","disamb": true}]},{"index": 127,"position": [731,743],"orth": "uszlachetnić","lexemes": [{"lemma": "uszlachetnić","mstag": "inf:perf","disamb": true}]},{"index": 128,"position": [744,750],"orth": "siebie","lexemes": [{"lemma": "siebie","mstag": "siebie:acc","disamb": true}]},{"index": 129,"position": [750,751],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 130,"position": [752,753],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 131,"position": [754,757],"orth": "tak","lexemes": [{"lemma": "tak","mstag": "adv:pos","disamb": true}]},{"index": 132,"position": [758,760],"orth": "np","lexemes": [{"lemma": "na przykład","mstag": "brev:pun","disamb": true}]},{"index": 133,"position": [760,761],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]}],"entities": [{"text": "Galicji","type": "nam_loc_historical_region","tokens": [28,29],"positions": [174,181]},{"text": "Lodomerii","type": "nam_loc_gpe_admin1","tokens": [30,31],"positions": [184,193]},{"text": "Pipidówka","type": "nam_loc_gpe_city","tokens": [42,43],"positions": [237,246]},{"text": "Galicji","type": "nam_loc_gpe_admin1","tokens": [103,104],"positions": [587,594]}]}
\ No newline at end of file
......@@ -34,3 +34,28 @@ def test_detect_dates_pl():
(7, 16, DateDetection(format_date1)),
(34, 49, DateDetection(format_date2)),
]
def test_date_with_different_punctuations():
# There is discussion about this wheter we should even detect such cases
# as a dates... However, for now we do and if we find cases where that is
# problematic, this definitly could be changed.
detector = DateDetector("pl")
text = "1.01,2022"
found_dates = detector.detect(text, dict())
format_date = [
(
DateDetection.AnnotationPart.TWO_DIGITS_DAY,
"01",
),
(DateDetection.AnnotationPart.OTHER, "."),
(DateDetection.AnnotationPart.TWO_DIGIT_MONTH, "01"),
(DateDetection.AnnotationPart.OTHER, ","),
(DateDetection.AnnotationPart.FOUR_DIGIT_YEAR, "2022"),
]
assert found_dates == [
(7, 16, DateDetection(format_date)),
]
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment