diff --git a/src/dictionaries/morphosyntactic/ner_file_nkjp.py b/src/dictionaries/morphosyntactic/ner_file_nkjp.py index 987128cb39c758774706a191a64769cbca1b7bd7..bee31631e1d1b238cf418bed011a2521428736e1 100644 --- a/src/dictionaries/morphosyntactic/ner_file_nkjp.py +++ b/src/dictionaries/morphosyntactic/ner_file_nkjp.py @@ -7,9 +7,13 @@ from src.dictionaries.morphosyntactic.ner_file import NERFileMorphosyntacticDict class NERFileNKJPMorphosyntacticDictionary(NERFileMorphosyntacticDictionary): def __init__( - self, dictionary_path: Optional[str] = None, always_replace=True + self, + dictionary_path: Optional[str] = None, + always_replace=True, + remove_first_morpho_subtag=True ) -> None: super().__init__(dictionary_path, always_replace) + self._remove_first_morpho_subtag = remove_first_morpho_subtag def get_random_replacement(self, original_entry: Detection) -> Optional[str]: original_entry_type = type(original_entry) @@ -19,7 +23,10 @@ class NERFileNKJPMorphosyntacticDictionary(NERFileMorphosyntacticDictionary): if issubclass(original_entry_type, MorphosyntacticInfoMixin): # THAT IS A HACK FOR NOW FOR CORRUPTED NKJP TAGS IN DICTIONARY - morpho_tag = ":".join(original_entry.morpho_tag.split(":")[1:]) + if self._remove_first_morpho_subtag: + morpho_tag = ":".join(original_entry.morpho_tag.split(":")[1:]) + else: + morpho_tag = original_entry.morpho_tag if ( original_entry_type_name in self._dictionary