Skip to content
Snippets Groups Projects
Commit 010d4760 authored by Michał Pogoda's avatar Michał Pogoda
Browse files

Support toggling of first morpho subtag removal

parent f99a6a2e
3 merge requests!10Anonimizer v2,!9Fix infancy erorrs based on Magdalena's report,!7Better coverage
......@@ -7,9 +7,13 @@ from src.dictionaries.morphosyntactic.ner_file import NERFileMorphosyntacticDict
class NERFileNKJPMorphosyntacticDictionary(NERFileMorphosyntacticDictionary):
def __init__(
self, dictionary_path: Optional[str] = None, always_replace=True
self,
dictionary_path: Optional[str] = None,
always_replace=True,
remove_first_morpho_subtag=True
) -> None:
super().__init__(dictionary_path, always_replace)
self._remove_first_morpho_subtag = remove_first_morpho_subtag
def get_random_replacement(self, original_entry: Detection) -> Optional[str]:
original_entry_type = type(original_entry)
......@@ -19,7 +23,10 @@ class NERFileNKJPMorphosyntacticDictionary(NERFileMorphosyntacticDictionary):
if issubclass(original_entry_type, MorphosyntacticInfoMixin):
# THAT IS A HACK FOR NOW FOR CORRUPTED NKJP TAGS IN DICTIONARY
morpho_tag = ":".join(original_entry.morpho_tag.split(":")[1:])
if self._remove_first_morpho_subtag:
morpho_tag = ":".join(original_entry.morpho_tag.split(":")[1:])
else:
morpho_tag = original_entry.morpho_tag
if (
original_entry_type_name in self._dictionary
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment