Skip to content
Snippets Groups Projects
date_replacer.py 3.77 KiB
Newer Older
Michał Pogoda's avatar
Michał Pogoda committed
from typing import List, Tuple
from src.annotations import (
    Annotation,
    DateAnnotation,
)
from src.string_replacements import replace_and_update
from src.replacers.interface import ReplacerInterface
import random

# TODO: Add support for other languages
months_map = {
    1: "stycznia",
    2: "lutego",
    3: "marca",
    4: "kwietnia",
    5: "maja",
    6: "czerwca",
    7: "lipca",
    8: "sierpnia",
    9: "września",
    10: "października",
    11: "listopada",
    12: "grudnia",
}

class DateReplacer(ReplacerInterface):
    def __init__(self):
        pass
    
    def replace(self, text: str, detections: List[Tuple[int, int, Annotation]]) -> Tuple[str, List[Tuple[int, int, Annotation]]]:
        replacements = []
        not_processed = []
        
        already_replaced = dict()
        
        for item in detections:
            start, end, detection = item
            
            if isinstance(detection, DateAnnotation):
                replacement = []
                if detection.format is not None:
                    format = detection.format
                else:
                    format = [
                        (DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, "01"),
                        (DateAnnotation.AnnotationPart.OTHER, "."), 
                        (DateAnnotation.AnnotationPart.TWO_DIGIT_MONTH, "01"),
                        (DateAnnotation.AnnotationPart.OTHER, "."), 
                        (DateAnnotation.AnnotationPart.FOUR_DIGIT_YEAR, "2020"),
                    ]
                
                if text[start:end] in already_replaced:
                    replacement = already_replaced[text[start:end]]
                else:
                    for entry in format:
                        if entry[0] == DateAnnotation.AnnotationPart.TWO_DIGITS_DAY:
                            random_day = random.randint(1, 28)
                            replacement.append(str(random_day).zfill(2))
                        elif entry[0] == DateAnnotation.AnnotationPart.ONE_DIGIT_DAY:
                            random_day = random.randint(1, 28)
                            replacement.append(str(random_day))
                        elif entry[0] == DateAnnotation.AnnotationPart.TWO_DIGIT_MONTH:
                            random_month = random.randint(1, 12)
                            replacement.append(str(random_month).zfill(2))
                        elif entry[0] == DateAnnotation.AnnotationPart.ONE_DIGIT_MONTH:
                            random_month = random.randint(1, 12)
                            replacement.append(str(random_month))
                        elif entry[0] == DateAnnotation.AnnotationPart.FOUR_DIGIT_YEAR:
                            random_year = random.randint(1900, 2020)
                            replacement.append(str(random_year))
                        elif entry[0] == DateAnnotation.AnnotationPart.TWO_DIGIT_YEAR:
                            random_year = random.randint(0, 99)
                            replacement.append(str(random_year).zfill(2))
                        elif entry[0] == DateAnnotation.AnnotationPart.TEXT_MONTH:
                            random_month = random.randint(1, 12)
                            month_name = months_map[random_month]
                            replacement.append(month_name)
                        elif entry[0] == DateAnnotation.AnnotationPart.OTHER:
                            replacement.append(entry[1])
                            
                    replacement = "".join(replacement)
                    already_replaced[text[start:end]] = replacement
                    
                replacements.append((start, end, replacement))
            else:
                not_processed.append(item)
            
        return replace_and_update(text, replacements, not_processed)