Skip to content
Snippets Groups Projects
date_replacer.py 3.66 KiB
Newer Older
Michał Pogoda's avatar
Michał Pogoda committed
from typing import List, Tuple
from src.detections import (
    Detection,
    DateDetection,
Michał Pogoda's avatar
Michał Pogoda committed
)
from src.string_replacements import replace_and_update
from src.replacers.interface import ReplacerInterface
import random

# TODO: Add support for other languages
months_map = {
    1: "stycznia",
    2: "lutego",
    3: "marca",
    4: "kwietnia",
    5: "maja",
    6: "czerwca",
    7: "lipca",
    8: "sierpnia",
    9: "września",
    10: "października",
    11: "listopada",
    12: "grudnia",
}

Michał Pogoda's avatar
Michał Pogoda committed
class DateReplacer(ReplacerInterface):
    def __init__(self):
        pass
Michał Pogoda's avatar
Michał Pogoda committed

    def replace(
        self, text: str, detections: List[Tuple[int, int, Detection]]
    ) -> Tuple[str, List[Tuple[int, int, Detection]]]:
Michał Pogoda's avatar
Michał Pogoda committed
        replacements = []
        not_processed = []
Michał Pogoda's avatar
Michał Pogoda committed
        already_replaced = dict()
Michał Pogoda's avatar
Michał Pogoda committed
        for item in detections:
            start, end, detection = item
            if isinstance(detection, DateDetection):
Michał Pogoda's avatar
Michał Pogoda committed
                replacement = []
                if detection.format is not None:
                    format = detection.format
                else:
                    format = [
                        (DateDetection.AnnotationPart.TWO_DIGITS_DAY, "01"),
Michał Pogoda's avatar
Michał Pogoda committed
                        (DateDetection.AnnotationPart.OTHER, "."),
                        (DateDetection.AnnotationPart.TWO_DIGIT_MONTH, "01"),
Michał Pogoda's avatar
Michał Pogoda committed
                        (DateDetection.AnnotationPart.OTHER, "."),
                        (DateDetection.AnnotationPart.FOUR_DIGIT_YEAR, "2020"),
Michał Pogoda's avatar
Michał Pogoda committed
                if text[start:end] in already_replaced:
                    replacement = already_replaced[text[start:end]]
                else:
                    for entry in format:
                        if entry[0] == DateDetection.AnnotationPart.TWO_DIGITS_DAY:
Michał Pogoda's avatar
Michał Pogoda committed
                            random_day = random.randint(1, 28)
                            replacement.append(str(random_day).zfill(2))
                        elif entry[0] == DateDetection.AnnotationPart.ONE_DIGIT_DAY:
Michał Pogoda's avatar
Michał Pogoda committed
                            random_day = random.randint(1, 28)
                            replacement.append(str(random_day))
                        elif entry[0] == DateDetection.AnnotationPart.TWO_DIGIT_MONTH:
Michał Pogoda's avatar
Michał Pogoda committed
                            random_month = random.randint(1, 12)
                            replacement.append(str(random_month).zfill(2))
                        elif entry[0] == DateDetection.AnnotationPart.ONE_DIGIT_MONTH:
Michał Pogoda's avatar
Michał Pogoda committed
                            random_month = random.randint(1, 12)
                            replacement.append(str(random_month))
                        elif entry[0] == DateDetection.AnnotationPart.FOUR_DIGIT_YEAR:
Michał Pogoda's avatar
Michał Pogoda committed
                            random_year = random.randint(1900, 2020)
                            replacement.append(str(random_year))
                        elif entry[0] == DateDetection.AnnotationPart.TWO_DIGIT_YEAR:
Michał Pogoda's avatar
Michał Pogoda committed
                            random_year = random.randint(0, 99)
                            replacement.append(str(random_year).zfill(2))
                        elif entry[0] == DateDetection.AnnotationPart.TEXT_MONTH:
Michał Pogoda's avatar
Michał Pogoda committed
                            random_month = random.randint(1, 12)
                            month_name = months_map[random_month]
                            replacement.append(month_name)
                        elif entry[0] == DateDetection.AnnotationPart.OTHER:
Michał Pogoda's avatar
Michał Pogoda committed
                            replacement.append(entry[1])
Michał Pogoda's avatar
Michał Pogoda committed
                    replacement = "".join(replacement)
                    already_replaced[text[start:end]] = replacement
Michał Pogoda's avatar
Michał Pogoda committed
                replacements.append((start, end, replacement))
            else:
                not_processed.append(item)
Michał Pogoda's avatar
Michał Pogoda committed

        return replace_and_update(text, replacements, not_processed)