Implement support for wiktorner

e25eadab · Michał Pogoda · 63784ace · e25eadab · e25eadab · e25eadab
Commit e25eadab authored Feb 9, 2023 by Michał Pogoda
--- a/config/config.yaml
+++ b/config/config.yaml
 defaults:
-  - paths: default
-  - detectors: all
-  - replacers: tag
-  - suppressor: order_based
-  - input_parser: ccl
-  - pipeline: default
+  - configuration: ccl
  - _self_
\ No newline at end of file
-
-language: "pl"
\ No newline at end of file
--- a/config/configuration/ccl.yaml
+++ b/config/configuration/ccl.yaml
+# @package _global_
+
+defaults:
+  - /paths: default
+  - /detectors: all
+  - /replacers: tag
+  - /suppressor: order_based
+  - /input_parser: ccl
+  - /pipeline: default
+  - _self_
+
+language: "pl"
\ No newline at end of file
--- a/config/configuration/wiktorner_jsonl.yaml
+++ b/config/configuration/wiktorner_jsonl.yaml
+# @package _global_
+
+defaults:
+  - /paths: default
+  - /detectors: all_ner_kpwr_ext
+  - /replacers: tag
+  - /suppressor: order_based
+  - /input_parser: wiktor_ner
+  - /pipeline: sequential_jsonl
+  - _self_
+
+language: "pl"
\ No newline at end of file
--- a/config/detectors/all.yaml
+++ b/config/detectors/all.yaml
 defaults:
  - date
  - email
-  - ner
+  - ner_n5
  - phone
  - url
  - user
+  - number
\ No newline at end of file
--- a/config/detectors/all_ner_kpwr_ext.yaml
+++ b/config/detectors/all_ner_kpwr_ext.yaml
+defaults:
+  - date
+  - email
+  - ner_kpwr_ext
+  - phone
+  - url
+  - user
+  - number
\ No newline at end of file
--- a/config/detectors/ner.yaml
+++ b/config/detectors/ner.yaml
-ner:
-  _target_: src.detectors.ner.NerDetector
-  language: ${language}
\ No newline at end of file
--- a/config/detectors/ner_kpwr_ext.yaml
+++ b/config/detectors/ner_kpwr_ext.yaml
+ner:
+  _target_: src.detectors.ner.NerDetector
+  language: ${language}
+  detection_mapping:
+    "nam_liv_person_first": "name"
+    "nam_liv_person_last": "surname"
+    "nam_fac_road": "street_name"
+    "nam_loc_gpe_city": "city"
+    "nam_loc_gpe_country": "country"
+    "nam_loc_gpe_admin1": "country" # TODO: Implement better mapping for this
+    "nam_loc_historical_region": "country" # TODO: Implement better mapping for this
--- a/config/detectors/ner_n5.yaml
+++ b/config/detectors/ner_n5.yaml
+ner:
+  _target_: src.detectors.ner.NerDetector
+  language: ${language}
+  detection_mapping:
+    "person_first_nam": "name"
+    "person_last_nam": "surname"
+    "road_nam": "street_name"
+    "city_nam": "city"
+    "country_nam": "country"
\ No newline at end of file
--- a/config/input_parser/wiktor_ner.yaml
+++ b/config/input_parser/wiktor_ner.yaml
+_target_: src.input_parsers.wiktor_ner.WiktorNERInputParser
\ No newline at end of file
--- a/config/pipeline/sequential_jsonl.yaml
+++ b/config/pipeline/sequential_jsonl.yaml
+_target_: src.pipeline.sequential_jsonl.SequentialJSONLPipeline
+input_parser: ${input_parser}
+detectors: ${detectors}
+suppressor: ${suppressor}
+replacers: ${replacers}
\ No newline at end of file
--- a/config/replacers/ner.yaml
+++ b/config/replacers/ner.yaml
 ner:
  _target_: src.replacers.ner_replacer.NERReplacer
  dictionary:
-    _target_: src.dictionaries.morphosyntactic.pl_ner.PlNERMorphosyntacticDictionary
+    _target_: src.dictionaries.morphosyntactic.ner_file.NERFileMorphosyntacticDictionary
    dictionary_path: ${paths.dictionaries_path}/pl_dict.txt
      
\ No newline at end of file
--- a/dictionaries/pl_dict.txt
+++ b/dictionaries/pl_dict.txt
--- a/example_inputs/wiktorner_jsonl.jsonl
+++ b/example_inputs/wiktorner_jsonl.jsonl
+{"filename": "bb4a16ff-33de-4478-939d-12db67d750b1","text": "ROZDZIAŁ I. CO NIECO O SAMEJ PIPIDÓWCE Przede wszystkim muszę uprzedzić z góry czytelników, aby się daremnie nie trudzili nad szukaniem wyżej wyrażonego miasteczka na mapach Galicji i Lodomerii, bo go tam nie znajdą. Nie dlatego, jakoby Pipidówka nie istniała w rzeczywistości i była tylko wytworem fantazji autora, ale po prostu dlatego, że mieszkańcy owego sławnego grodu, urosłszy z czasem w ambicję, uważali tę nazwę jako ubliżającą ich powadze i podali do c. k. namiestnictwa pokorną prośbę o pozwolenie zamienienia jej na inną. Podobne zamiany nazwisk praktykują się dość często w Galicji, szczególnie u pojedynczych osób, które nie czując się na siłach uszlachetnienia sobą, swymi czynami własnego nazwiska, chcą nazwiskiem uszlachetnić siebie, i tak np.","tokens": [{"index": 1,"position": [0,8],"orth": "ROZDZIAŁ","lexemes": [{"lemma": "rozdział","mstag": "subst:sg:nom:m3","disamb": true}]},{"index": 2,"position": [9,10],"orth": "I","lexemes": [{"lemma": "I","mstag": "adj:sg:nom:m3:pos","disamb": true}]},{"index": 3,"position": [10,11],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 4,"position": [12,14],"orth": "CO","lexemes": [{"lemma": "co","mstag": "conj","disamb": true}]},{"index": 5,"position": [15,20],"orth": "NIECO","lexemes": [{"lemma": "nieco","mstag": "adv","disamb": true}]},{"index": 6,"position": [21,22],"orth": "O","lexemes": [{"lemma": "o","mstag": "prep:loc","disamb": true}]},{"index": 7,"position": [23,28],"orth": "SAMEJ","lexemes": [{"lemma": "sam","mstag": "adj:sg:loc:f:pos","disamb": true}]},{"index": 8,"position": [29,38],"orth": "PIPIDÓWCE","lexemes": [{"lemma": "Pipidówka","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 9,"position": [39,45],"orth": "Przede","lexemes": [{"lemma": "przed","mstag": "prep:inst:wok","disamb": true}]},{"index": 10,"position": [46,55],"orth": "wszystkim","lexemes": [{"lemma": "wszystko","mstag": "subst:sg:inst:n","disamb": true}]},{"index": 11,"position": [56,61],"orth": "muszę","lexemes": [{"lemma": "musieć","mstag": "fin:sg:pri:imperf","disamb": true}]},{"index": 12,"position": [62,71],"orth": "uprzedzić","lexemes": [{"lemma": "uprzedzić","mstag": "inf:perf","disamb": true}]},{"index": 13,"position": [72,73],"orth": "z","lexemes": [{"lemma": "z","mstag": "prep:gen:nwok","disamb": true}]},{"index": 14,"position": [74,78],"orth": "góry","lexemes": [{"lemma": "góra","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 15,"position": [79,90],"orth": "czytelników","lexemes": [{"lemma": "czytelnik","mstag": "subst:pl:gen:m1","disamb": true}]},{"index": 16,"position": [90,91],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 17,"position": [92,95],"orth": "aby","lexemes": [{"lemma": "aby","mstag": "comp","disamb": true}]},{"index": 18,"position": [96,99],"orth": "się","lexemes": [{"lemma": "się","mstag": "qub","disamb": true}]},{"index": 19,"position": [100,108],"orth": "daremnie","lexemes": [{"lemma": "daremnie","mstag": "adv:pos","disamb": true}]},{"index": 20,"position": [109,112],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 21,"position": [113,121],"orth": "trudzili","lexemes": [{"lemma": "trudzić","mstag": "praet:pl:m1:imperf","disamb": true}]},{"index": 22,"position": [122,125],"orth": "nad","lexemes": [{"lemma": "nad","mstag": "prep:inst:nwok","disamb": true}]},{"index": 23,"position": [126,135],"orth": "szukaniem","lexemes": [{"lemma": "szukać","mstag": "ger:sg:inst:n:imperf:aff","disamb": true}]},{"index": 24,"position": [136,141],"orth": "wyżej","lexemes": [{"lemma": "wysoko","mstag": "adv:com","disamb": true}]},{"index": 25,"position": [142,152],"orth": "wyrażonego","lexemes": [{"lemma": "wyrazić","mstag": "ppas:sg:gen:n:perf:aff","disamb": true}]},{"index": 26,"position": [153,163],"orth": "miasteczka","lexemes": [{"lemma": "miasteczko","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 27,"position": [164,166],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:loc","disamb": true}]},{"index": 28,"position": [167,173],"orth": "mapach","lexemes": [{"lemma": "mapa","mstag": "subst:pl:loc:f","disamb": true}]},{"index": 29,"position": [174,181],"orth": "Galicji","lexemes": [{"lemma": "Galicja","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 30,"position": [182,183],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 31,"position": [184,193],"orth": "Lodomerii","lexemes": [{"lemma": "Lodomerii","mstag": "ign","disamb": true}]},{"index": 32,"position": [193,194],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 33,"position": [195,197],"orth": "bo","lexemes": [{"lemma": "bo","mstag": "comp","disamb": true}]},{"index": 34,"position": [198,200],"orth": "go","lexemes": [{"lemma": "on","mstag": "ppron3:sg:gen:m1:ter:nakc:npraep","disamb": true}]},{"index": 35,"position": [201,204],"orth": "tam","lexemes": [{"lemma": "tam","mstag": "adv","disamb": true}]},{"index": 36,"position": [205,208],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 37,"position": [209,215],"orth": "znajdą","lexemes": [{"lemma": "znaleźć","mstag": "fin:pl:ter:perf","disamb": true}]},{"index": 38,"position": [215,216],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 39,"position": [217,220],"orth": "Nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 40,"position": [221,228],"orth": "dlatego","lexemes": [{"lemma": "dlatego","mstag": "adv","disamb": true}]},{"index": 41,"position": [228,229],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 42,"position": [230,236],"orth": "jakoby","lexemes": [{"lemma": "jakoby","mstag": "comp","disamb": true}]},{"index": 43,"position": [237,246],"orth": "Pipidówka","lexemes": [{"lemma": "Pipidówka","mstag": "subst:sg:nom:f","disamb": true}]},{"index": 44,"position": [247,250],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 45,"position": [251,259],"orth": "istniała","lexemes": [{"lemma": "istnieć","mstag": "praet:sg:f:imperf","disamb": true}]},{"index": 46,"position": [260,261],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:loc:nwok","disamb": true}]},{"index": 47,"position": [262,276],"orth": "rzeczywistości","lexemes": [{"lemma": "rzeczywistość","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 48,"position": [277,278],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 49,"position": [279,283],"orth": "była","lexemes": [{"lemma": "być","mstag": "praet:sg:f:imperf","disamb": true}]},{"index": 50,"position": [284,289],"orth": "tylko","lexemes": [{"lemma": "tylko","mstag": "qub","disamb": true}]},{"index": 51,"position": [290,298],"orth": "wytworem","lexemes": [{"lemma": "wytwór","mstag": "subst:sg:inst:m3","disamb": true}]},{"index": 52,"position": [299,307],"orth": "fantazji","lexemes": [{"lemma": "fantazja","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 53,"position": [308,314],"orth": "autora","lexemes": [{"lemma": "autor","mstag": "subst:sg:gen:m1","disamb": true}]},{"index": 54,"position": [314,315],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 55,"position": [316,319],"orth": "ale","lexemes": [{"lemma": "ale","mstag": "conj","disamb": true}]},{"index": 56,"position": [320,322],"orth": "po","lexemes": [{"lemma": "po","mstag": "prep:acc","disamb": true}]},{"index": 57,"position": [323,329],"orth": "prostu","lexemes": [{"lemma": "prosty","mstag": "adjp","disamb": true}]},{"index": 58,"position": [330,337],"orth": "dlatego","lexemes": [{"lemma": "dlatego","mstag": "adv","disamb": true}]},{"index": 59,"position": [337,338],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 60,"position": [339,341],"orth": "że","lexemes": [{"lemma": "że","mstag": "comp","disamb": true}]},{"index": 61,"position": [342,352],"orth": "mieszkańcy","lexemes": [{"lemma": "mieszkaniec","mstag": "subst:pl:nom:m1","disamb": true}]},{"index": 62,"position": [353,358],"orth": "owego","lexemes": [{"lemma": "ów","mstag": "adj:sg:gen:m3:pos","disamb": true}]},{"index": 63,"position": [359,367],"orth": "sławnego","lexemes": [{"lemma": "sławny","mstag": "adj:sg:gen:m3:pos","disamb": true}]},{"index": 64,"position": [368,373],"orth": "grodu","lexemes": [{"lemma": "gród","mstag": "subst:sg:gen:m3","disamb": true}]},{"index": 65,"position": [373,374],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 66,"position": [375,383],"orth": "urosłszy","lexemes": [{"lemma": "urosłszy","mstag": "ign","disamb": true}]},{"index": 67,"position": [384,385],"orth": "z","lexemes": [{"lemma": "z","mstag": "prep:inst:nwok","disamb": true}]},{"index": 68,"position": [386,392],"orth": "czasem","lexemes": [{"lemma": "czas","mstag": "subst:sg:inst:m3","disamb": true}]},{"index": 69,"position": [393,394],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:acc:nwok","disamb": true}]},{"index": 70,"position": [395,402],"orth": "ambicję","lexemes": [{"lemma": "ambicja","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 71,"position": [402,403],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 72,"position": [404,411],"orth": "uważali","lexemes": [{"lemma": "uważać","mstag": "praet:pl:m1:imperf","disamb": true}]},{"index": 73,"position": [412,414],"orth": "tę","lexemes": [{"lemma": "ten","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 74,"position": [415,420],"orth": "nazwę","lexemes": [{"lemma": "nazwa","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 75,"position": [421,425],"orth": "jako","lexemes": [{"lemma": "jako","mstag": "conj","disamb": true}]},{"index": 76,"position": [426,436],"orth": "ubliżającą","lexemes": [{"lemma": "ubliżający","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 77,"position": [437,440],"orth": "ich","lexemes": [{"lemma": "on","mstag": "ppron3:pl:gen:m1:ter:akc:npraep","disamb": true}]},{"index": 78,"position": [441,448],"orth": "powadze","lexemes": [{"lemma": "powaga","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 79,"position": [449,450],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 80,"position": [451,457],"orth": "podali","lexemes": [{"lemma": "podać","mstag": "praet:pl:m1:perf","disamb": true}]},{"index": 81,"position": [458,460],"orth": "do","lexemes": [{"lemma": "do","mstag": "prep:gen","disamb": true}]},{"index": 82,"position": [461,462],"orth": "c","lexemes": [{"lemma": "c","mstag": "subst:sg:gen:f","disamb": true}]},{"index": 83,"position": [462,463],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 84,"position": [464,465],"orth": "k","lexemes": [{"lemma": "K","mstag": "brev:pun","disamb": true}]},{"index": 85,"position": [465,466],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 86,"position": [467,480],"orth": "namiestnictwa","lexemes": [{"lemma": "namiestnictwo","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 87,"position": [481,488],"orth": "pokorną","lexemes": [{"lemma": "pokorny","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 88,"position": [489,495],"orth": "prośbę","lexemes": [{"lemma": "prośba","mstag": "subst:sg:acc:f","disamb": true}]},{"index": 89,"position": [496,497],"orth": "o","lexemes": [{"lemma": "o","mstag": "prep:acc","disamb": true}]},{"index": 90,"position": [498,508],"orth": "pozwolenie","lexemes": [{"lemma": "pozwolić","mstag": "ger:sg:acc:n:perf:aff","disamb": true}]},{"index": 91,"position": [509,520],"orth": "zamienienia","lexemes": [{"lemma": "zamienić","mstag": "ger:sg:gen:n:perf:aff","disamb": true}]},{"index": 92,"position": [521,524],"orth": "jej","lexemes": [{"lemma": "on","mstag": "ppron3:sg:gen:f:ter:akc:npraep","disamb": true}]},{"index": 93,"position": [525,527],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:acc","disamb": true}]},{"index": 94,"position": [528,532],"orth": "inną","lexemes": [{"lemma": "inny","mstag": "adj:sg:acc:f:pos","disamb": true}]},{"index": 95,"position": [532,533],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]},{"index": 96,"position": [534,541],"orth": "Podobne","lexemes": [{"lemma": "podobny","mstag": "adj:pl:nom:f:pos","disamb": true}]},{"index": 97,"position": [542,549],"orth": "zamiany","lexemes": [{"lemma": "zamiana","mstag": "subst:pl:nom:f","disamb": true}]},{"index": 98,"position": [550,557],"orth": "nazwisk","lexemes": [{"lemma": "nazwisko","mstag": "subst:pl:gen:n","disamb": true}]},{"index": 99,"position": [558,568],"orth": "praktykują","lexemes": [{"lemma": "praktykować","mstag": "fin:pl:ter:imperf","disamb": true}]},{"index": 100,"position": [569,572],"orth": "się","lexemes": [{"lemma": "się","mstag": "qub","disamb": true}]},{"index": 101,"position": [573,577],"orth": "dość","lexemes": [{"lemma": "dość","mstag": "qub","disamb": true}]},{"index": 102,"position": [578,584],"orth": "często","lexemes": [{"lemma": "często","mstag": "adv:pos","disamb": true}]},{"index": 103,"position": [585,586],"orth": "w","lexemes": [{"lemma": "w","mstag": "prep:loc:nwok","disamb": true}]},{"index": 104,"position": [587,594],"orth": "Galicji","lexemes": [{"lemma": "Galicja","mstag": "subst:sg:loc:f","disamb": true}]},{"index": 105,"position": [594,595],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 106,"position": [596,607],"orth": "szczególnie","lexemes": [{"lemma": "szczególnie","mstag": "qub","disamb": true}]},{"index": 107,"position": [608,609],"orth": "u","lexemes": [{"lemma": "u","mstag": "prep:gen","disamb": true}]},{"index": 108,"position": [610,622],"orth": "pojedynczych","lexemes": [{"lemma": "pojedynczy","mstag": "adj:pl:gen:f:pos","disamb": true}]},{"index": 109,"position": [623,627],"orth": "osób","lexemes": [{"lemma": "osoba","mstag": "subst:pl:gen:f","disamb": true}]},{"index": 110,"position": [627,628],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 111,"position": [629,634],"orth": "które","lexemes": [{"lemma": "który","mstag": "adj:pl:nom:f:pos","disamb": true}]},{"index": 112,"position": [635,638],"orth": "nie","lexemes": [{"lemma": "nie","mstag": "qub","disamb": true}]},{"index": 113,"position": [639,645],"orth": "czując","lexemes": [{"lemma": "czuć","mstag": "pcon:imperf","disamb": true}]},{"index": 114,"position": [646,649],"orth": "się","lexemes": [{"lemma": "się","mstag": "qub","disamb": true}]},{"index": 115,"position": [650,652],"orth": "na","lexemes": [{"lemma": "na","mstag": "prep:loc","disamb": true}]},{"index": 116,"position": [653,659],"orth": "siłach","lexemes": [{"lemma": "siła","mstag": "subst:pl:loc:f","disamb": true}]},{"index": 117,"position": [660,675],"orth": "uszlachetnienia","lexemes": [{"lemma": "uszlachetnić","mstag": "ger:sg:gen:n:perf:aff","disamb": true}]},{"index": 118,"position": [676,680],"orth": "sobą","lexemes": [{"lemma": "siebie","mstag": "siebie:inst","disamb": true}]},{"index": 119,"position": [680,681],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 120,"position": [682,687],"orth": "swymi","lexemes": [{"lemma": "swój","mstag": "adj:pl:inst:m3:pos","disamb": true}]},{"index": 121,"position": [688,695],"orth": "czynami","lexemes": [{"lemma": "czyn","mstag": "subst:pl:inst:m3","disamb": true}]},{"index": 122,"position": [696,704],"orth": "własnego","lexemes": [{"lemma": "własny","mstag": "adj:sg:gen:n:pos","disamb": true}]},{"index": 123,"position": [705,713],"orth": "nazwiska","lexemes": [{"lemma": "nazwisko","mstag": "subst:sg:gen:n","disamb": true}]},{"index": 124,"position": [713,714],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 125,"position": [715,719],"orth": "chcą","lexemes": [{"lemma": "chcieć","mstag": "fin:pl:ter:imperf","disamb": true}]},{"index": 126,"position": [720,730],"orth": "nazwiskiem","lexemes": [{"lemma": "nazwisko","mstag": "subst:sg:inst:n","disamb": true}]},{"index": 127,"position": [731,743],"orth": "uszlachetnić","lexemes": [{"lemma": "uszlachetnić","mstag": "inf:perf","disamb": true}]},{"index": 128,"position": [744,750],"orth": "siebie","lexemes": [{"lemma": "siebie","mstag": "siebie:acc","disamb": true}]},{"index": 129,"position": [750,751],"orth": ",","lexemes": [{"lemma": ",","mstag": "interp","disamb": true}]},{"index": 130,"position": [752,753],"orth": "i","lexemes": [{"lemma": "i","mstag": "conj","disamb": true}]},{"index": 131,"position": [754,757],"orth": "tak","lexemes": [{"lemma": "tak","mstag": "adv:pos","disamb": true}]},{"index": 132,"position": [758,760],"orth": "np","lexemes": [{"lemma": "na przykład","mstag": "brev:pun","disamb": true}]},{"index": 133,"position": [760,761],"orth": ".","lexemes": [{"lemma": ".","mstag": "interp","disamb": true}]}],"entities": [{"text": "Galicji","type": "nam_loc_historical_region","tokens": [28,29],"positions": [174,181]},{"text": "Lodomerii","type": "nam_loc_gpe_admin1","tokens": [30,31],"positions": [184,193]},{"text": "Pipidówka","type": "nam_loc_gpe_city","tokens": [42,43],"positions": [237,246]},{"text": "Galicji","type": "nam_loc_gpe_admin1","tokens": [103,104],"positions": [587,594]}]}
\ No newline at end of file
--- a/notebooks/.gitignore
+++ b/notebooks/.gitignore
+tmp.ipynb
\ No newline at end of file
--- a/src/detections/__init__.py
+++ b/src/detections/__init__.py
 from src.detections.detection import *
 from src.detections.date import *
+from src.utils.subclasses import get_sublcasses
+
+DETECTION_CLASSES_MAP = {
+    detection_class.TYPE_NAME: detection_class
+    for detection_class in get_sublcasses(Detection)
+}
\ No newline at end of file
--- a/src/detections/date.py
+++ b/src/detections/date.py
@@ -2,6 +2,7 @@ from src.detections.detection import Detection
 from typing import List, Tuple, Optional

 class DateDetection(Detection):
+    TYPE_NAME = "date"
    class AnnotationPart:
        TWO_DIGITS_DAY = "DD"
        ONE_DIGIT_DAY = "D"
@@ -19,7 +20,7 @@ class DateDetection(Detection):
        :type format: Optional[List[Tuple[str, str]]]
        """
        
-        super().__init__("date")
+        super().__init__()
        
        self.format = format
        

--- a/src/detections/detection.py
+++ b/src/detections/detection.py
@@ -3,8 +3,9 @@ from typing import Optional

 @dataclass
 class Detection:
-    def __init__(self, type_name: str) -> None:
-        self._type_name = type_name
+    TYPE_NAME = "detection"
+    def __init__(self) -> None:
+        pass
            
    def __hash__(self) -> int:
        return (type(self), *(self.__dict__.values())).__hash__()
@@ -19,53 +20,78 @@ class MorphosyntacticInfoMixin:
        return self._morpho_tag
        
 class NameDetection(MorphosyntacticInfoMixin, Detection):
+    TYPE_NAME = "name"
+    
    def __init__(self, morpho_tag: Optional[str] = None) -> None:
-        super().__init__(morpho_tag=morpho_tag, type_name="name")
+        super().__init__(morpho_tag=morpho_tag)
        
 class SurnameDetection(MorphosyntacticInfoMixin, Detection):
+    TYPE_NAME = "surname"
+    
    def __init__(self, morpho_tag: Optional[str] = None) -> None:
-        super().__init__(morpho_tag=morpho_tag, type_name="surname")
+        super().__init__(morpho_tag=morpho_tag)
        
 class StreetNameDetection(MorphosyntacticInfoMixin, Detection):
+    TYPE_NAME = "street_name"
+    
    def __init__(self, morpho_tag: Optional[str] = None) -> None:
-        super().__init__(morpho_tag=morpho_tag, type_name="street_name")
+        super().__init__(morpho_tag=morpho_tag)
        
 class CityDetection(MorphosyntacticInfoMixin, Detection):
+    TYPE_NAME = "city"
+    
    def __init__(self, morpho_tag: Optional[str] = None) -> None:
-        super().__init__(morpho_tag=morpho_tag, type_name="city")
+        super().__init__(morpho_tag=morpho_tag)
        
 class CountryDetection(MorphosyntacticInfoMixin, Detection):
+    TYPE_NAME = "country"
+    
    def __init__(self, morpho_tag: Optional[str] = None) -> None:
-        super().__init__(morpho_tag=morpho_tag, type_name="country")
+        super().__init__(morpho_tag=morpho_tag)
        
 class UrlDetection(Detection):
+    TYPE_NAME = "url"
+    
    def __init__(self) -> None:
-        super().__init__("url")
+        super().__init__()
        
 class UserDetection(Detection):
+    TYPE_NAME = "user"
+    
    def __init__(self) -> None:
-        super().__init__("user")
+        super().__init__()
        
 class EmailDetection(Detection):
+    TYPE_NAME = "email"
+    
    def __init__(self) -> None:
-        super().__init__("email")
+        super().__init__()
        
 class NumberDetection(Detection):
+    TYPE_NAME = "number"
+    
    def __init__(self) -> None:
-        super().__init__("number")        
+        super().__init__()        
 class PhoneNumberDetection(NumberDetection):
+    TYPE_NAME = "phone_number"
+    
    def __init__(self) -> None:
        super().__init__()
-        self._type_name = "phone_number"
    
 class TINDetection(Detection): # Tax Identification Number
+    TYPE_NAME = "tin"
+    
    def __init__(self) -> None:
-        super().__init__("tin")
+        super().__init__()
        
 class KRSDetection(Detection): # National Court Register
+    TYPE_NAME = "krs"
+    
    def __init__(self) -> None:
-        super().__init__("krs")
+        super().__init__()
        
 class OtherDetection(Detection): # Non standard entity
+    TYPE_NAME = "other"
+    
    def __init__(self) -> None:
-        super().__init__("other")
\ No newline at end of file
+        super().__init__()
\ No newline at end of file
--- a/src/detectors/ner/ner.py
+++ b/src/detectors/ner/ner.py
 from typing import List, Dict, Any, Tuple
-from src.detectors.ner.pl_liner_n5 import detect_ner_pl_liner_n5
 from src.detectors.interface import Detector
-from src.detections import Detection
-from src.annotations import Annotation
-
+from src.detections import Detection, MorphosyntacticInfoMixin
+from src.annotations import Annotation, NerAnnotation, MorphosyntacticAnnotation
+from src.detections import DETECTION_CLASSES_MAP

 class NerDetector(Detector):
-    def __init__(self, language: str = "pl") -> None:
+    def __init__(self, detection_mapping: Dict[str, str], language: str = "pl") -> None:
        self._language = language
+        self._detection_mapping = detection_mapping
        
    def detect(
        self, text: str, annotations: List[Tuple[int, int, Annotation]]
    ) -> List[Tuple[int, int, str]]:
-        return detect_ner(annotations, self._language)
        
+        morpho_tags = dict()
+        ner_detections = []
        
-def detect_ner(
-    annotations: List[Tuple[int, int, Annotation]], language: str
-) -> List[Tuple[int, int, str]]:
-    if language == "pl":
-        return detect_ner_pl_liner_n5(annotations)
-    else:
-        raise NotImplementedError(f"Language {language} is not supported.")
+        for annotation in annotations:
+            start, end, annotation = annotation
+            if isinstance(annotation, MorphosyntacticAnnotation):
+                morpho_tags[(start, end)] = annotation.morphosyntactic_tag
+            elif isinstance(annotation, NerAnnotation):
+                ner_type = annotation.ner_type
+                
+                if ner_type in self._detection_mapping:
+                    detection_class = DETECTION_CLASSES_MAP[self._detection_mapping[ner_type]]
+                    ner_detections.append((start, end, detection_class))
+                 
+        result = []   
+        for start, end, ner_detection in ner_detections:
+            kwargs = dict()
+            if issubclass(ner_detection, MorphosyntacticInfoMixin):
+                if (start, end) in morpho_tags:
+                    kwargs["morpho_tag"] = morpho_tags[(start, end)]
+                
+            result.append((start, end, ner_detection(**kwargs)))                                    
+                
+        return result
--- a/src/detectors/ner/pl_liner_n5.py
+++ b/src/detectors/ner/pl_liner_n5.py
-from typing import List, Tuple, Dict
-from src.utils.utils import subdict
-from src.detections import OtherDetection, Detection
-from src.mappings.ner_pl_n5_mapping import NER_PL_N5_MAPPING
-from src.annotations import Annotation, NerAnnotation, MorphosyntacticAnnotation
-
-def detect_ner_pl_liner_n5(
-    annotations: List[Tuple[int, int, Annotation]],
-) -> List[Tuple[int, int, str]]:
-    """
-    Detects ner entities in the text based on liner_n5 NER ontology.
-
-    :param ner_annotations: a dictionary of NER annotations
-    :type ner_annotations: Dict[str, List[Tuple[int, int, str]]]
-    :return: a list of tuples containing (start, end, entity_type)
-    :rtype: List[Tuple[int, int, Annotation]]
-    """
-    
-    ner_anotations = []
-    ner_annotation_tags = dict()
-    for annotation in annotations:
-        if issubclass(annotation[2].__class__, NerAnnotation):
-            if annotation[2].ner_type in NER_PL_N5_MAPPING.keys():
-                ner_anotations.append(annotation)
-        if issubclass(annotation[2].__class__, MorphosyntacticAnnotation):
-            ner_annotation_tags[(annotation[0], annotation[1])] = annotation[2].morphosyntactic_tag
-                
-    return [
-        (start, end, NER_PL_N5_MAPPING.get(ner_annotation.ner_type, OtherDetection)(morpho_tag=ner_annotation_tags.get((start, end), None)))
-        for start, end, ner_annotation in ner_anotations
-    ]
--- a/src/dictionaries/morphosyntactic/ner_file.py
+++ b/src/dictionaries/morphosyntactic/ner_file.py
+from typing import Dict, List, Optional, Type
+from collections import defaultdict
+from src.detections import Detection, MorphosyntacticInfoMixin, DETECTION_CLASSES_MAP
+from src.dictionaries.morphosyntactic.interface import MorphosyntacticDictionary
+import random
+
+class NERFileMorphosyntacticDictionary(MorphosyntacticDictionary):
+    def __init__(
+        self,
+        dictionary_path: Optional[str] = None,
+        always_replace=True,
+    ) -> None:
+        super().__init__()
+        self._dictionary = None
+        self._always_replace = always_replace
+        
+        self._from_file(dictionary_path)
+        
+    def _from_file(
+        self, path_to_dictionary: str
+    ) -> None:
+        replacement_dictionary = defaultdict(lambda: defaultdict(dict))
+        with open(path_to_dictionary, "r", encoding="utf-8") as file:
+            for line in file:
+                line = line.strip()
+                ner_tag, word, lemma, morpho_tag = line.split("\t")
+                replacement_dictionary[ner_tag][morpho_tag][lemma] = word
+
+        self._dictionary = replacement_dictionary
+
+    def get_supported_detection_classes(self) -> List[Type[Detection]]:
+        """
+        Returns a list of supported detection classes
+        """
+        return [
+            DETECTION_CLASSES_MAP[name]
+            for name in self._dictionary.keys()
+        ]
+
+    def get_random_replacement(self, original_entry: Detection) -> Optional[str]:
+        original_entry_type = type(original_entry)
+        original_entry_type_name = original_entry_type.TYPE_NAME
+
+        result = None
+
+        if issubclass(original_entry_type, MorphosyntacticInfoMixin):
+            morpho_tag = original_entry.morpho_tag
+
+            if (
+                original_entry_type_name in self._dictionary
+                and morpho_tag in self._dictionary[original_entry_type_name]
+            ):
+                result = random.choice(
+                    list(self._dictionary[original_entry_type_name][morpho_tag].values())
+                )
+
+        if result is None and self._always_replace:
+            random_type = random.choice(list(self._dictionary.keys()))
+            random_tag = random.choice(list(self._dictionary[random_type].keys()))
+
+            result = random.choice(
+                list(self._dictionary[random_type][random_tag].values())
+            )
+
+        return result
\ No newline at end of file