From 010d4760acecc411e3752a00cd548a87357878d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pogoda?= <mipo57@e-science.pl>
Date: Fri, 31 Mar 2023 10:09:45 +0200
Subject: [PATCH] Support toggling of first morpho subtag removal

---
 src/dictionaries/morphosyntactic/ner_file_nkjp.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/dictionaries/morphosyntactic/ner_file_nkjp.py b/src/dictionaries/morphosyntactic/ner_file_nkjp.py
index 987128c..bee3163 100644
--- a/src/dictionaries/morphosyntactic/ner_file_nkjp.py
+++ b/src/dictionaries/morphosyntactic/ner_file_nkjp.py
@@ -7,9 +7,13 @@ from src.dictionaries.morphosyntactic.ner_file import NERFileMorphosyntacticDict
 
 class NERFileNKJPMorphosyntacticDictionary(NERFileMorphosyntacticDictionary):
     def __init__(
-        self, dictionary_path: Optional[str] = None, always_replace=True
+        self, 
+        dictionary_path: Optional[str] = None, 
+        always_replace=True,
+        remove_first_morpho_subtag=True
     ) -> None:
         super().__init__(dictionary_path, always_replace)
+        self._remove_first_morpho_subtag = remove_first_morpho_subtag
 
     def get_random_replacement(self, original_entry: Detection) -> Optional[str]:
         original_entry_type = type(original_entry)
@@ -19,7 +23,10 @@ class NERFileNKJPMorphosyntacticDictionary(NERFileMorphosyntacticDictionary):
 
         if issubclass(original_entry_type, MorphosyntacticInfoMixin):
             # THAT IS A HACK FOR NOW FOR CORRUPTED NKJP TAGS IN DICTIONARY
-            morpho_tag = ":".join(original_entry.morpho_tag.split(":")[1:])
+            if self._remove_first_morpho_subtag:
+                morpho_tag = ":".join(original_entry.morpho_tag.split(":")[1:])
+            else:
+                morpho_tag = original_entry.morpho_tag
 
             if (
                 original_entry_type_name in self._dictionary
-- 
GitLab