Skip to content
Snippets Groups Projects
ner_replacer.py 1.2 KiB
Newer Older
Michał Pogoda's avatar
Michał Pogoda committed
from typing import List, Tuple
from src.annotations import (
    Annotation,
)
from src.string_replacements import replace_and_update
from src.replacers.interface import ReplacerInterface
from src.dictionaries.morphosyntactic import MorphosyntacticDictionary


class NERReplacer(ReplacerInterface):
    def __init__(self, dictionary: MorphosyntacticDictionary):
        self._dictionary = dictionary

    def replace(
        self, text: str, detections: List[Tuple[int, int, Annotation]]
    ) -> Tuple[str, List[Tuple[int, int, Annotation]]]:
        replacements = []
        not_processed = []

        already_replaced = dict()

        for item in detections:
            start, end, detection = item
            
            key = (text[start:end], type(detection))
            
            if key not in already_replaced:
                replacement = self._dictionary.get_random_replacement(detection)
                already_replaced[key] = replacement
                
            if already_replaced[key] is None:
                not_processed.append(item)
            else:
                replacements.append((start, end, already_replaced[key]))

        return replace_and_update(text, replacements, not_processed)