From 010d4760acecc411e3752a00cd548a87357878d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pogoda?= <mipo57@e-science.pl> Date: Fri, 31 Mar 2023 10:09:45 +0200 Subject: [PATCH] Support toggling of first morpho subtag removal --- src/dictionaries/morphosyntactic/ner_file_nkjp.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/dictionaries/morphosyntactic/ner_file_nkjp.py b/src/dictionaries/morphosyntactic/ner_file_nkjp.py index 987128c..bee3163 100644 --- a/src/dictionaries/morphosyntactic/ner_file_nkjp.py +++ b/src/dictionaries/morphosyntactic/ner_file_nkjp.py @@ -7,9 +7,13 @@ from src.dictionaries.morphosyntactic.ner_file import NERFileMorphosyntacticDict class NERFileNKJPMorphosyntacticDictionary(NERFileMorphosyntacticDictionary): def __init__( - self, dictionary_path: Optional[str] = None, always_replace=True + self, + dictionary_path: Optional[str] = None, + always_replace=True, + remove_first_morpho_subtag=True ) -> None: super().__init__(dictionary_path, always_replace) + self._remove_first_morpho_subtag = remove_first_morpho_subtag def get_random_replacement(self, original_entry: Detection) -> Optional[str]: original_entry_type = type(original_entry) @@ -19,7 +23,10 @@ class NERFileNKJPMorphosyntacticDictionary(NERFileMorphosyntacticDictionary): if issubclass(original_entry_type, MorphosyntacticInfoMixin): # THAT IS A HACK FOR NOW FOR CORRUPTED NKJP TAGS IN DICTIONARY - morpho_tag = ":".join(original_entry.morpho_tag.split(":")[1:]) + if self._remove_first_morpho_subtag: + morpho_tag = ":".join(original_entry.morpho_tag.split(":")[1:]) + else: + morpho_tag = original_entry.morpho_tag if ( original_entry_type_name in self._dictionary -- GitLab