From 63784aceda776eece84c0b7aee7a0fb8b34f81c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pogoda?= <mipo57@e-science.pl>
Date: Thu, 26 Jan 2023 15:00:20 +0100
Subject: [PATCH] Working polish version

---
 config/config.yaml                            |  1 +
 config/detectors/number.yaml                  |  2 +
 config/paths/default.yaml                     |  3 +-
 config/replacers/ner.yaml                     |  3 +-
 config/replacers/number.yaml                  |  2 +
 config/replacers/pseudo.yaml                  |  1 +
 dictionaries/pl_dict.txt                      |  1 +
 .../marek_kowalski_pojechal_do_wroclawia.ccl  | 84 ++++++++++++++++---
 src/annotations/__init__.py                   |  1 +
 src/annotations/annotations.py                | 14 ++++
 src/detections/detection.py                   | 14 ++--
 src/detectors/ner/ner.py                      |  7 +-
 src/detectors/ner/pl_liner_n5.py              | 24 +++---
 src/detectors/number/__init__.py              |  1 +
 src/detectors/number/number.py                | 25 ++++++
 src/dictionaries/morphosyntactic/interface.py |  8 +-
 src/dictionaries/morphosyntactic/pl_ner.py    | 29 +++++--
 src/input_parsers/ccl.py                      | 27 +++---
 src/replacers/ner_replacer.py                 |  4 +
 src/replacers/number_replacer.py              | 43 ++++++++++
 tests/detectors/ner/test_pl_liner_n5.py       | 28 ++++---
 tests/input_parsers/test_ccl.py               | 26 +++---
 22 files changed, 272 insertions(+), 76 deletions(-)
 create mode 100644 config/detectors/number.yaml
 create mode 100644 config/replacers/number.yaml
 create mode 100644 src/annotations/__init__.py
 create mode 100644 src/annotations/annotations.py
 create mode 100644 src/detectors/number/__init__.py
 create mode 100644 src/detectors/number/number.py
 create mode 100644 src/replacers/number_replacer.py

diff --git a/config/config.yaml b/config/config.yaml
index d4c077e..071d64b 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -1,4 +1,5 @@
 defaults:
+  - paths: default
   - detectors: all
   - replacers: tag
   - suppressor: order_based
diff --git a/config/detectors/number.yaml b/config/detectors/number.yaml
new file mode 100644
index 0000000..e7714ca
--- /dev/null
+++ b/config/detectors/number.yaml
@@ -0,0 +1,2 @@
+number:
+  _target_: src.detectors.number.NumberDetector
\ No newline at end of file
diff --git a/config/paths/default.yaml b/config/paths/default.yaml
index 657a167..f903ce8 100644
--- a/config/paths/default.yaml
+++ b/config/paths/default.yaml
@@ -1 +1,2 @@
-dictionaries_path: dictionaries
\ No newline at end of file
+root_path: ./
+dictionaries_path: ${paths.root_path}/dictionaries
\ No newline at end of file
diff --git a/config/replacers/ner.yaml b/config/replacers/ner.yaml
index 8d20018..e8e6cb9 100644
--- a/config/replacers/ner.yaml
+++ b/config/replacers/ner.yaml
@@ -2,4 +2,5 @@ ner:
   _target_: src.replacers.ner_replacer.NERReplacer
   dictionary:
     _target_: src.dictionaries.morphosyntactic.pl_ner.PlNERMorphosyntacticDictionary
-    dictionary_path: ${paths.dictionaries_path}/pl_dict.txt
\ No newline at end of file
+    dictionary_path: ${paths.dictionaries_path}/pl_dict.txt
+      
\ No newline at end of file
diff --git a/config/replacers/number.yaml b/config/replacers/number.yaml
new file mode 100644
index 0000000..0d494fd
--- /dev/null
+++ b/config/replacers/number.yaml
@@ -0,0 +1,2 @@
+number:
+  _target_: src.replacers.number_replacer.NumberReplacer
\ No newline at end of file
diff --git a/config/replacers/pseudo.yaml b/config/replacers/pseudo.yaml
index 5c4a301..4538b57 100644
--- a/config/replacers/pseudo.yaml
+++ b/config/replacers/pseudo.yaml
@@ -3,4 +3,5 @@ defaults:
   - email
   - ner
   - user
+  - number
   - tag # Fallback to tag replacement if no other replacement is found
\ No newline at end of file
diff --git a/dictionaries/pl_dict.txt b/dictionaries/pl_dict.txt
index ad0cb84..281114c 100644
--- a/dictionaries/pl_dict.txt
+++ b/dictionaries/pl_dict.txt
@@ -2456,6 +2456,7 @@ nam_loc_gpe_city	Helsinek	Helsinki	pl:gen:n
 nam_loc_gpe_city	Helsinkami	Helsinki	pl:inst:n
 nam_loc_gpe_city	Helsinkach	Helsinki	pl:loc:n
 nam_loc_gpe_city	Helsinkom	Helsinki	pl:dat:n
+nam_liv_person	Henryk	Henryk	sg:nom:m1
 nam_liv_person	Henryka	Henryk	sg:acc:m3
 nam_liv_person	Henrykowi	Henryk	sg:dat:m3
 nam_liv_person	Henrykiem	Henryk	sg:inst:m3
diff --git a/example_inputs/marek_kowalski_pojechal_do_wroclawia.ccl b/example_inputs/marek_kowalski_pojechal_do_wroclawia.ccl
index f1459ba..453acb9 100644
--- a/example_inputs/marek_kowalski_pojechal_do_wroclawia.ccl
+++ b/example_inputs/marek_kowalski_pojechal_do_wroclawia.ccl
@@ -4,9 +4,8 @@
  <chunk type="p" id="ch1">
   <sentence id="s1">
    <tok>
-    <orth>Marek</orth>
-    <lex disamb="1"><base>Marek</base><ctag>subst:sg:nom:m1</ctag></lex>
-    <lex disamb="1"><base>marek</base><ctag>subst:sg:nom:m1</ctag></lex>
+    <orth>Jan</orth>
+    <lex disamb="1"><base>Jan</base><ctag>subst:sg:nom:m1</ctag></lex>
     <ann chan="person_first_nam" head="1">1</ann>
     <ann chan="person_last_nam">0</ann>
     <ann chan="city_nam">0</ann>
@@ -19,34 +18,95 @@
     <ann chan="city_nam">0</ann>
    </tok>
    <tok>
-    <orth>pojechał</orth>
-    <lex disamb="1"><base>pojechać</base><ctag>praet:sg:m1:perf</ctag></lex>
+    <orth>(</orth>
+    <lex disamb="1"><base>(</base><ctag>interp</ctag></lex>
     <ann chan="person_first_nam">0</ann>
     <ann chan="person_last_nam">0</ann>
     <ann chan="city_nam">0</ann>
    </tok>
+   <ns/>
    <tok>
-    <orth>do</orth>
-    <lex disamb="1"><base>do</base><ctag>prep:gen</ctag></lex>
+    <orth>numer</orth>
+    <lex disamb="1"><base>numer</base><ctag>subst:sg:nom:m3</ctag></lex>
     <ann chan="person_first_nam">0</ann>
     <ann chan="person_last_nam">0</ann>
     <ann chan="city_nam">0</ann>
    </tok>
    <tok>
-    <orth>Wrocławia</orth>
-    <lex disamb="1"><base>Wrocław</base><ctag>subst:sg:gen:m3</ctag></lex>
+    <orth>telefonu</orth>
+    <lex disamb="1"><base>telefon</base><ctag>subst:sg:gen:m3</ctag></lex>
     <ann chan="person_first_nam">0</ann>
     <ann chan="person_last_nam">0</ann>
-    <ann chan="city_nam" head="1">1</ann>
+    <ann chan="city_nam">0</ann>
+   </tok>
+   <tok>
+    <orth>123</orth>
+    <lex disamb="1"><base>123</base><ctag>num:pl:nom:m1:rec</ctag></lex>
+    <ann chan="person_first_nam">0</ann>
+    <ann chan="person_last_nam">0</ann>
+    <ann chan="city_nam">0</ann>
+   </tok>
+   <ns/>
+   <tok>
+    <orth>-</orth>
+    <lex disamb="1"><base>-</base><ctag>interp</ctag></lex>
+    <ann chan="person_first_nam">0</ann>
+    <ann chan="person_last_nam">0</ann>
+    <ann chan="city_nam">0</ann>
+   </tok>
+   <ns/>
+   <tok>
+    <orth>456</orth>
+    <lex disamb="1"><base>456</base><ctag>num:pl:nom:m1:rec</ctag></lex>
+    <ann chan="person_first_nam">0</ann>
+    <ann chan="person_last_nam">0</ann>
+    <ann chan="city_nam">0</ann>
    </tok>
    <ns/>
    <tok>
-    <orth>.</orth>
-    <lex disamb="1"><base>.</base><ctag>interp</ctag></lex>
+    <orth>-</orth>
+    <lex disamb="1"><base>-</base><ctag>interp</ctag></lex>
     <ann chan="person_first_nam">0</ann>
     <ann chan="person_last_nam">0</ann>
     <ann chan="city_nam">0</ann>
    </tok>
+   <ns/>
+   <tok>
+    <orth>789</orth>
+    <lex disamb="1"><base>789</base><ctag>num:pl:nom:m1:rec</ctag></lex>
+    <ann chan="person_first_nam">0</ann>
+    <ann chan="person_last_nam">0</ann>
+    <ann chan="city_nam">0</ann>
+   </tok>
+   <ns/>
+   <tok>
+    <orth>)</orth>
+    <lex disamb="1"><base>)</base><ctag>interp</ctag></lex>
+    <ann chan="person_first_nam">0</ann>
+    <ann chan="person_last_nam">0</ann>
+    <ann chan="city_nam">0</ann>
+   </tok>
+   <tok>
+    <orth>miesza</orth>
+    <lex disamb="1"><base>mieszać</base><ctag>fin:sg:ter:imperf</ctag></lex>
+    <ann chan="person_first_nam">0</ann>
+    <ann chan="person_last_nam">0</ann>
+    <ann chan="city_nam">0</ann>
+   </tok>
+   <tok>
+    <orth>we</orth>
+    <lex disamb="1"><base>w</base><ctag>prep:acc:wok</ctag></lex>
+    <ann chan="person_first_nam">0</ann>
+    <ann chan="person_last_nam">0</ann>
+    <ann chan="city_nam">0</ann>
+   </tok>
+   <tok>
+    <orth>Wrocławiu</orth>
+    <lex disamb="1"><base>Wrocław</base><ctag>subst:sg:loc:m3</ctag></lex>
+    <ann chan="person_first_nam">0</ann>
+    <ann chan="person_last_nam">0</ann>
+    <ann chan="city_nam" head="1">1</ann>
+   </tok>
   </sentence>
  </chunk>
 </chunkList>
\ No newline at end of file
diff --git a/src/annotations/__init__.py b/src/annotations/__init__.py
new file mode 100644
index 0000000..d09a852
--- /dev/null
+++ b/src/annotations/__init__.py
@@ -0,0 +1 @@
+from src.annotations.annotations import *
\ No newline at end of file
diff --git a/src/annotations/annotations.py b/src/annotations/annotations.py
new file mode 100644
index 0000000..fab8b10
--- /dev/null
+++ b/src/annotations/annotations.py
@@ -0,0 +1,14 @@
+from dataclasses import dataclass
+
+@dataclass
+class Annotation:
+    def __hash__(self) -> int:
+        return (type(self), *(self.__dict__.values())).__hash__()
+
+class MorphosyntacticAnnotation(Annotation):
+    def __init__(self, morphosyntactic_tag) -> None:
+        self.morphosyntactic_tag = morphosyntactic_tag
+
+class NerAnnotation(Annotation):
+    def __init__(self, ner_type: str) -> None:
+        self.ner_type = ner_type
\ No newline at end of file
diff --git a/src/detections/detection.py b/src/detections/detection.py
index 59d712d..79abbde 100644
--- a/src/detections/detection.py
+++ b/src/detections/detection.py
@@ -7,7 +7,7 @@ class Detection:
         self._type_name = type_name
         
     def __hash__(self) -> int:
-        return tuple(self.__dict__.values()).__hash__()
+        return (type(self), *(self.__dict__.values())).__hash__()
     
 class MorphosyntacticInfoMixin:
     def __init__(self, morpho_tag: str, *args, **kwargs) -> None:
@@ -38,10 +38,6 @@ class CountryDetection(MorphosyntacticInfoMixin, Detection):
     def __init__(self, morpho_tag: Optional[str] = None) -> None:
         super().__init__(morpho_tag=morpho_tag, type_name="country")
         
-class PhoneNumberDetection(Detection):
-    def __init__(self) -> None:
-        super().__init__("phone_number")
-        
 class UrlDetection(Detection):
     def __init__(self) -> None:
         super().__init__("url")
@@ -54,6 +50,14 @@ class EmailDetection(Detection):
     def __init__(self) -> None:
         super().__init__("email")
         
+class NumberDetection(Detection):
+    def __init__(self) -> None:
+        super().__init__("number")        
+class PhoneNumberDetection(NumberDetection):
+    def __init__(self) -> None:
+        super().__init__()
+        self._type_name = "phone_number"
+    
 class TINDetection(Detection): # Tax Identification Number
     def __init__(self) -> None:
         super().__init__("tin")
diff --git a/src/detectors/ner/ner.py b/src/detectors/ner/ner.py
index 6c4ae8a..1a4fdad 100644
--- a/src/detectors/ner/ner.py
+++ b/src/detectors/ner/ner.py
@@ -2,6 +2,7 @@ from typing import List, Dict, Any, Tuple
 from src.detectors.ner.pl_liner_n5 import detect_ner_pl_liner_n5
 from src.detectors.interface import Detector
 from src.detections import Detection
+from src.annotations import Annotation
 
 
 class NerDetector(Detector):
@@ -9,15 +10,15 @@ class NerDetector(Detector):
         self._language = language
 
     def detect(
-        self, text: str, annotations: Dict[str, List[Tuple[int, int, Any]]]
+        self, text: str, annotations: List[Tuple[int, int, Annotation]]
     ) -> List[Tuple[int, int, str]]:
         return detect_ner(annotations, self._language)
 
 
 def detect_ner(
-    ccl_annotations: Dict[str, List[Tuple[int, int, Detection]]], language: str
+    annotations: List[Tuple[int, int, Annotation]], language: str
 ) -> List[Tuple[int, int, str]]:
     if language == "pl":
-        return detect_ner_pl_liner_n5(ccl_annotations)
+        return detect_ner_pl_liner_n5(annotations)
     else:
         raise NotImplementedError(f"Language {language} is not supported.")
diff --git a/src/detectors/ner/pl_liner_n5.py b/src/detectors/ner/pl_liner_n5.py
index d51cfa9..f11b67a 100644
--- a/src/detectors/ner/pl_liner_n5.py
+++ b/src/detectors/ner/pl_liner_n5.py
@@ -2,9 +2,10 @@ from typing import List, Tuple, Dict
 from src.utils.utils import subdict
 from src.detections import OtherDetection, Detection
 from src.mappings.ner_pl_n5_mapping import NER_PL_N5_MAPPING
+from src.annotations import Annotation, NerAnnotation, MorphosyntacticAnnotation
 
 def detect_ner_pl_liner_n5(
-    ccl_annotations: Dict[str, List[Tuple[int, int, Detection]]]
+    annotations: List[Tuple[int, int, Annotation]],
 ) -> List[Tuple[int, int, str]]:
     """
     Detects ner entities in the text based on liner_n5 NER ontology.
@@ -14,14 +15,17 @@ def detect_ner_pl_liner_n5(
     :return: a list of tuples containing (start, end, entity_type)
     :rtype: List[Tuple[int, int, Annotation]]
     """
-    names = subdict(
-        ccl_annotations,
-        list(NER_PL_N5_MAPPING.keys()),
-        all_must_be_present=False,
-    )
-
+    
+    ner_anotations = []
+    ner_annotation_tags = dict()
+    for annotation in annotations:
+        if issubclass(annotation[2].__class__, NerAnnotation):
+            if annotation[2].ner_type in NER_PL_N5_MAPPING.keys():
+                ner_anotations.append(annotation)
+        if issubclass(annotation[2].__class__, MorphosyntacticAnnotation):
+            ner_annotation_tags[(annotation[0], annotation[1])] = annotation[2].morphosyntactic_tag
+                
     return [
-        (start, end, NER_PL_N5_MAPPING.get(entity_type, OtherDetection)())
-        for entity_type, entity in names.items()
-        for start, end, _ in entity
+        (start, end, NER_PL_N5_MAPPING.get(ner_annotation.ner_type, OtherDetection)(morpho_tag=ner_annotation_tags.get((start, end), None)))
+        for start, end, ner_annotation in ner_anotations
     ]
diff --git a/src/detectors/number/__init__.py b/src/detectors/number/__init__.py
new file mode 100644
index 0000000..7d72f52
--- /dev/null
+++ b/src/detectors/number/__init__.py
@@ -0,0 +1 @@
+from src.detectors.number.number import NumberDetector
\ No newline at end of file
diff --git a/src/detectors/number/number.py b/src/detectors/number/number.py
new file mode 100644
index 0000000..3fbfa55
--- /dev/null
+++ b/src/detectors/number/number.py
@@ -0,0 +1,25 @@
+import regex as re
+from typing import List, Dict, Any, Tuple
+from src.detections import NumberDetection
+from src.detectors.interface import Detector
+
+NUMBER_REGEX = re.compile(
+    r"\d+[^a-zA-Z\d]*\d*",
+    re.I,
+)
+
+class NumberDetector(Detector):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def detect(
+        self, text: str, annotations: Dict[str, List[Tuple[int, int, Any]]]
+    ) -> List[Tuple[int, int, NumberDetection]]:
+        NUMBER_REGEX.finditer(text)
+        numbers = []
+        
+        for number in numbers:
+            numbers.append((number.start(), number.end(), NumberDetection()))
+        
+
+        return numbers
\ No newline at end of file
diff --git a/src/dictionaries/morphosyntactic/interface.py b/src/dictionaries/morphosyntactic/interface.py
index 3f8a66b..f8d9fa7 100644
--- a/src/dictionaries/morphosyntactic/interface.py
+++ b/src/dictionaries/morphosyntactic/interface.py
@@ -1,7 +1,13 @@
 from src.detections import Detection
-from typing import Optional
+from typing import Optional, List, Type
 
 class MorphosyntacticDictionary:
+    def get_supported_detection_classes(self) -> List[Type[Detection]]:
+        """
+        Returns a list of supported detection classes
+        """
+        raise NotImplementedError()
+    
     def get_random_replacement(self, original_entry: Detection) -> Optional[str]:
         """
         Returns a random replacement for the original entry
diff --git a/src/dictionaries/morphosyntactic/pl_ner.py b/src/dictionaries/morphosyntactic/pl_ner.py
index d25beae..d3b1160 100644
--- a/src/dictionaries/morphosyntactic/pl_ner.py
+++ b/src/dictionaries/morphosyntactic/pl_ner.py
@@ -4,12 +4,26 @@ from src.detections import Detection, OtherDetection, MorphosyntacticInfoMixin
 from src.dictionaries.morphosyntactic.interface import MorphosyntacticDictionary
 import random
 
+from src.detections import (
+    NameDetection,
+    SurnameDetection,
+    StreetNameDetection,
+    CityDetection,
+    CountryDetection,
+)
+
+NER_PL_N5_MAPPING = {
+    "nam_liv_person": NameDetection,
+    "nam_liv_person_last": SurnameDetection,
+    "nam_fac_road": StreetNameDetection,
+    "nam_loc_gpe_city": CityDetection,
+    "nam_org_group_team": CountryDetection,
+}
 
 class PlNERMorphosyntacticDictionary(MorphosyntacticDictionary):
     def __init__(
         self,
         dictionary_path: Optional[str] = None,
-        annotation_mapping: Optional[Dict[str, Type[Detection]]] = None,
         list: Optional[List[Tuple[Detection, str, str, str]]] = None,
         always_replace=True,
     ) -> None:
@@ -18,8 +32,7 @@ class PlNERMorphosyntacticDictionary(MorphosyntacticDictionary):
         self._always_replace = always_replace
         
         if dictionary_path is not None:
-            assert annotation_mapping is not None
-            self._from_file(dictionary_path, annotation_mapping)
+            self._from_file(dictionary_path, NER_PL_N5_MAPPING)
         elif list is not None:
             self._from_list(list)
         else:
@@ -36,6 +49,12 @@ class PlNERMorphosyntacticDictionary(MorphosyntacticDictionary):
         self._dictionary = defaultdict(lambda: defaultdict(dict))
         for annotation, word, lemma, morpho_tag in list:
             self._dictionary[annotation][morpho_tag][lemma] = word
+            
+    def get_supported_detection_classes(self) -> List[Type[Detection]]:
+        """
+        Returns a list of supported detection classes
+        """
+        return list(self._dictionary.keys())
 
     def get_random_replacement(self, original_entry: Detection) -> Optional[str]:
         original_entry_type = type(original_entry)
@@ -43,7 +62,7 @@ class PlNERMorphosyntacticDictionary(MorphosyntacticDictionary):
         result = None
 
         if issubclass(original_entry_type, MorphosyntacticInfoMixin):
-            morpho_tag = original_entry.morpho_tag
+            morpho_tag = ":".join(original_entry.morpho_tag.split(":")[1:])
 
             if (
                 original_entry_type in self._dictionary
@@ -103,7 +122,7 @@ def load_pl_ner_replacements_dictionary(
             ner_tag, word, lemma, morpho_tag = line.split("\t")
 
             if ner_mapping is not None:
-                ner_tag = ner_mapping.get(ner_tag, OtherDetection)()
+                ner_tag = ner_mapping.get(ner_tag, OtherDetection)
 
             replacement_dictionary[ner_tag][morpho_tag][lemma] = word
 
diff --git a/src/input_parsers/ccl.py b/src/input_parsers/ccl.py
index 7b8bb7e..7847043 100644
--- a/src/input_parsers/ccl.py
+++ b/src/input_parsers/ccl.py
@@ -3,12 +3,13 @@ from lxml import etree
 from collections import defaultdict
 # from src.annotation_types_old import 
 from src.input_parsers.interface import InputParser
+from src.annotations import Annotation, MorphosyntacticAnnotation, NerAnnotation
 
 class CCLInputParser(InputParser):
     def __init__(self) -> None:
         super().__init__()
         
-    def parse(self, path_to_input: str) -> Tuple[str, Dict[str, List[Tuple[int, int, str]]]]:
+    def parse(self, path_to_input: str) -> List[Tuple[int, int, Annotation]]:
         """Parse CCL string into text and annotations.
 
         Annotations are returned as a dictionary with channel name as a key and list of tuples.
@@ -17,16 +18,19 @@ class CCLInputParser(InputParser):
             path_to_input (str): Path to file containing CCL.
 
         Returns:
-            Tuple[str, Dict[str, List[Tuple[int, int, str]]]]: Text and annotations.
+            Tuple[str, Dict[str, List[Tuple[int, int, Annotation]]]]: Text and annotations.
         """
         with open(path_to_input, 'r') as f:
             ccl = f.read()
             
         ccl_tree = etree.fromstring(ccl.strip().encode('utf-8'))
         
-        results = defaultdict(list)
+        results = []
         text = ""
         
+        ner_annotations = []
+        morphosyntactic_annotations = []
+        
         # First token is assumed to not have space before it
         last_was_ns = True
         
@@ -43,7 +47,7 @@ class CCLInputParser(InputParser):
                 for lex in token.xpath('./lex'):
                     if lex.attrib['disamb'] == "1":
                         ctag = lex.xpath('./ctag')[0]
-                        # results[AnnotationTypes.MORPHOSYNTACTIC_TAG].append((start, end, ctag.text))
+                        morphosyntactic_annotations.append((start, end, MorphosyntacticAnnotation(ctag.text)))
                         
                         break
                     
@@ -56,20 +60,17 @@ class CCLInputParser(InputParser):
                     is_head = "head" in ann.attrib and ann.attrib['head'] == "1"
                     
                     if is_head:
-                        results[channel].append((start, end, word))
-                    else:
-                        if last_was_ns:
-                            new_word = results[channel][-1][2] + word
-                        else:
-                            new_word = results[channel][-1][2] + " " + word
-                            
-                        old_start = results[channel][-1][0]
+                        ner_annotations.append((start, end, NerAnnotation(channel)))
+                    else:                            
+                        old_start = ner_annotations[-1][0]
                             
-                        results[channel][-1] = (old_start, end, new_word)
+                        ner_annotations[-1] = (old_start, end, ner_annotations[-1][2])
                             
                 last_was_ns = False
                 text += word
             elif token.tag == 'ns':
                 last_was_ns = True
                 
+        results = ner_annotations + morphosyntactic_annotations
+                
         return text, results
\ No newline at end of file
diff --git a/src/replacers/ner_replacer.py b/src/replacers/ner_replacer.py
index 214f0b7..6804451 100644
--- a/src/replacers/ner_replacer.py
+++ b/src/replacers/ner_replacer.py
@@ -20,6 +20,10 @@ class NERReplacer(ReplacerInterface):
         already_replaced = dict()
 
         for item in detections:
+            if type(item[2]) not in self._dictionary.get_supported_detection_classes():
+                not_processed.append(item)
+                continue
+            
             start, end, detection = item
             
             key = (text[start:end], type(detection))
diff --git a/src/replacers/number_replacer.py b/src/replacers/number_replacer.py
new file mode 100644
index 0000000..d0c8f9e
--- /dev/null
+++ b/src/replacers/number_replacer.py
@@ -0,0 +1,43 @@
+from typing import List, Tuple
+from src.detections import (
+    Detection,
+    NumberDetection,
+)
+from src.string_replacements import replace_and_update
+from src.replacers.interface import ReplacerInterface
+import random
+import string
+
+def randomize_digits_in_text(text: str) -> str:
+    result = ""
+    
+    for c in text:
+        if c.isdigit():
+            result += random.choice(string.digits)
+        else:
+            result += c
+    
+    return result
+
+class NumberReplacer(ReplacerInterface):
+    def __init__(self):
+        pass
+    
+    def replace(self, text: str, detections: List[Tuple[int, int, Detection]]) -> Tuple[str, List[Tuple[int, int, Detection]]]:
+        replacements = []
+        not_processed = []
+        
+        already_replaced = dict()
+
+        for item in detections:
+            start, end, detection = item
+
+            if isinstance(detection, NumberDetection):
+                if text[start:end] not in already_replaced:
+                    already_replaced[text[start:end]] = randomize_digits_in_text(text[start:end])
+                    
+                replacements.append((start, end, already_replaced[text[start:end]]))
+            else:
+                not_processed.append(item)
+            
+        return replace_and_update(text, replacements, not_processed)
\ No newline at end of file
diff --git a/tests/detectors/ner/test_pl_liner_n5.py b/tests/detectors/ner/test_pl_liner_n5.py
index 7af941c..08f77a9 100644
--- a/tests/detectors/ner/test_pl_liner_n5.py
+++ b/tests/detectors/ner/test_pl_liner_n5.py
@@ -1,23 +1,29 @@
+from src.annotations import NerAnnotation, MorphosyntacticAnnotation
 from src.detections import NameDetection, SurnameDetection, CityDetection
 from src.detectors.ner import NerDetector
 
 def test_detect_names_pl_liner_n5():
     detector = NerDetector("pl")
     
-    ccl_annotations = {
-        'person_first_nam': [(10, 16, 'Marian'), (100, 109, 'Magdalena')],
-        'person_last_nam': [(30, 35, 'Nowak')],
-        'city_nam': [(50, 59, 'Wrocławiu')],
-        'some_other_annotation': [(120, 124, 'zowd')],
-    }
+    annotations = [
+        (10, 16, NerAnnotation("person_first_nam")),
+        (100, 109, NerAnnotation("person_first_nam")),
+        (30, 35, NerAnnotation("person_last_nam")),
+        (50, 59, NerAnnotation("city_nam")),
+        (120, 124, NerAnnotation("some_other_annotation")),
+        (10, 16, MorphosyntacticAnnotation("1")),
+        (100, 109, MorphosyntacticAnnotation("2")),
+        (30, 35, MorphosyntacticAnnotation("3")),
+        (120, 124, MorphosyntacticAnnotation("some_other_morphosyntactic_annotation")),
+    ]
     
-    result = detector.detect("", ccl_annotations)
+    result = detector.detect("", annotations)
     
     expected = [
-        (10, 16, NameDetection()), 
-        (100, 109, NameDetection()),
-        (30, 35, SurnameDetection()),
-        (50, 59, CityDetection()),
+        (10, 16, NameDetection(morpho_tag="1")), 
+        (100, 109, NameDetection(morpho_tag="2")),
+        (30, 35, SurnameDetection(morpho_tag="3")),
+        (50, 59, CityDetection(morpho_tag=None)),
     ]
     
     assert set(result) == set(expected)
\ No newline at end of file
diff --git a/tests/input_parsers/test_ccl.py b/tests/input_parsers/test_ccl.py
index ec78647..498e68c 100644
--- a/tests/input_parsers/test_ccl.py
+++ b/tests/input_parsers/test_ccl.py
@@ -1,6 +1,7 @@
 # from src.annotation_types_old import AnnotationTypes
 from src.input_parsers.ccl import CCLInputParser
 from tempfile import NamedTemporaryFile
+from src.annotations import NerAnnotation, MorphosyntacticAnnotation
 
 example_ccl = """<?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE chunkList SYSTEM "ccl.dtd">
@@ -50,7 +51,6 @@ example_ccl = """<?xml version="1.0" encoding="UTF-8"?>
 </chunkList>
 """
 
-
 def test_ccl_input_parser():
     parser = CCLInputParser()
     
@@ -60,16 +60,14 @@ def test_ccl_input_parser():
         text, annotations = parser.parse(f.name)
 
     assert text == "Marek Kowalski pojechał do Wrocławia."
-
-    # assert set(annotations.keys()) == set(["nam_liv", "nam_loc", AnnotationTypes.MORPHOSYNTACTIC_TAG])
-
-    assert annotations["nam_liv"] == [(0, 14, "Marek Kowalski")]
-    assert annotations["nam_loc"] == [(27, 36, "Wrocławia")]
-    # assert annotations[AnnotationTypes.MORPHOSYNTACTIC_TAG] == [
-    #     (0, 5, "subst:sg:nom:m1"),
-    #     (6, 14, "subst:sg:nom:m1"),
-    #     (15, 23, "praet:sg:m1:perf"),
-    #     (24, 26, "prep:gen"),
-    #     (27, 36, "subst:sg:gen:m3"),
-    #     (36, 37, "interp"),
-    # ]
+    assert len(annotations) == 8
+    
+    assert (0, 14, NerAnnotation("nam_liv")) in annotations
+    assert (27, 36, NerAnnotation("nam_loc")) in annotations
+    
+    assert (0, 5, MorphosyntacticAnnotation("subst:sg:nom:m1")) in annotations
+    assert (6, 14, MorphosyntacticAnnotation("subst:sg:nom:m1")) in annotations
+    assert (15, 23, MorphosyntacticAnnotation("praet:sg:m1:perf")) in annotations
+    assert (24, 26, MorphosyntacticAnnotation("prep:gen")) in annotations
+    assert (27, 36, MorphosyntacticAnnotation("subst:sg:gen:m3")) in annotations
+    assert (36, 37, MorphosyntacticAnnotation("interp")) in annotations
\ No newline at end of file
-- 
GitLab