Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from typing import List, Tuple
from src.annotations import (
Annotation,
DateAnnotation,
)
from src.string_replacements import replace_and_update
from src.replacers.interface import ReplacerInterface
import random
# TODO: Add support for other languages
months_map = {
1: "stycznia",
2: "lutego",
3: "marca",
4: "kwietnia",
5: "maja",
6: "czerwca",
7: "lipca",
8: "sierpnia",
9: "września",
10: "października",
11: "listopada",
12: "grudnia",
}
class DateReplacer(ReplacerInterface):
def __init__(self):
pass
def replace(self, text: str, detections: List[Tuple[int, int, Annotation]]) -> Tuple[str, List[Tuple[int, int, Annotation]]]:
replacements = []
not_processed = []
already_replaced = dict()
for item in detections:
start, end, detection = item
if isinstance(detection, DateAnnotation):
replacement = []
if detection.format is not None:
format = detection.format
else:
format = [
(DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, "01"),
(DateAnnotation.AnnotationPart.OTHER, "."),
(DateAnnotation.AnnotationPart.TWO_DIGIT_MONTH, "01"),
(DateAnnotation.AnnotationPart.OTHER, "."),
(DateAnnotation.AnnotationPart.FOUR_DIGIT_YEAR, "2020"),
]
if text[start:end] in already_replaced:
replacement = already_replaced[text[start:end]]
else:
for entry in format:
if entry[0] == DateAnnotation.AnnotationPart.TWO_DIGITS_DAY:
random_day = random.randint(1, 28)
replacement.append(str(random_day).zfill(2))
elif entry[0] == DateAnnotation.AnnotationPart.ONE_DIGIT_DAY:
random_day = random.randint(1, 28)
replacement.append(str(random_day))
elif entry[0] == DateAnnotation.AnnotationPart.TWO_DIGIT_MONTH:
random_month = random.randint(1, 12)
replacement.append(str(random_month).zfill(2))
elif entry[0] == DateAnnotation.AnnotationPart.ONE_DIGIT_MONTH:
random_month = random.randint(1, 12)
replacement.append(str(random_month))
elif entry[0] == DateAnnotation.AnnotationPart.FOUR_DIGIT_YEAR:
random_year = random.randint(1900, 2020)
replacement.append(str(random_year))
elif entry[0] == DateAnnotation.AnnotationPart.TWO_DIGIT_YEAR:
random_year = random.randint(0, 99)
replacement.append(str(random_year).zfill(2))
elif entry[0] == DateAnnotation.AnnotationPart.TEXT_MONTH:
random_month = random.randint(1, 12)
month_name = months_map[random_month]
replacement.append(month_name)
elif entry[0] == DateAnnotation.AnnotationPart.OTHER:
replacement.append(entry[1])
replacement = "".join(replacement)
already_replaced[text[start:end]] = replacement
replacements.append((start, end, replacement))
else:
not_processed.append(item)
return replace_and_update(text, replacements, not_processed)