Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from typing import List, Tuple
from src.annotations import DateAnnotation, Optional
def _parse_day_or_month(re_entry) -> List[Tuple[int, int, DateAnnotation]]:
assert re_entry["day_or_month_year"] is not None
result = []
if re_entry["day_month1"] is not None:
if len(re_entry["day_month1"]) == 1:
result.append((DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, "0" + re_entry["day_month1"]))
else:
result.append((DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, re_entry["day_month1"]))
result.append((DateAnnotation.AnnotationPart.OTHER, re_entry["punct1"]))
if len(re_entry["day_month2"]) == 1:
result.append((DateAnnotation.AnnotationPart.TWO_DIGIT_MONTH, "0" + re_entry["day_month2"]))
else:
result.append((DateAnnotation.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month2"]))
result.append((DateAnnotation.AnnotationPart.OTHER, re_entry["punct1"]))
elif "day_month2" in re_entry:
if len(re_entry["day_month2"]) == 1:
result.append((DateAnnotation.AnnotationPart.TWO_DIGIT_MONTH, "0" + re_entry["day_month2"]))
else:
result.append((DateAnnotation.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month2"]))
result.append((DateAnnotation.AnnotationPart.OTHER, re_entry["punct1"]))
if "year1" in re_entry:
if len(re_entry["year1"]) == 2:
result.append((DateAnnotation.AnnotationPart.TWO_DIGIT_YEAR, re_entry["year1"]))
else:
result.append((DateAnnotation.AnnotationPart.FOUR_DIGIT_YEAR, re_entry["year1"]))
return result
def _parse_year_month_or_day(re_entry) -> List[Tuple[int, int, DateAnnotation]]:
assert re_entry["year_month_or_day"] is not None
result = []
if "year2" in re_entry:
if len(re_entry["year2"]) == 2:
result.append((DateAnnotation.AnnotationPart.TWO_DIGIT_YEAR, re_entry["year2"]))
else:
result.append((DateAnnotation.AnnotationPart.FOUR_DIGIT_YEAR, re_entry["year2"]))
result.append((DateAnnotation.AnnotationPart.OTHER, re_entry["punct3"]))
if "day_month3" in re_entry:
if len(re_entry["day_month3"]) == 1:
result.append((DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, "0" + re_entry["day_month3"]))
else:
result.append((DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, re_entry["day_month3"]))
result.append((DateAnnotation.AnnotationPart.OTHER, re_entry["punct4"]))
if "day_month4" in re_entry:
if len(re_entry["day_month4"]) == 1:
result.append((DateAnnotation.AnnotationPart.TWO_DIGIT_MONTH, "0" + re_entry["day_month4"]))
else:
result.append((DateAnnotation.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month4"]))
result.append((DateAnnotation.AnnotationPart.OTHER, re_entry["punct4"]))
return result
def _parse_month_in_words(re_entry) -> List[Tuple[DateAnnotation.AnnotationPart, str]]:
assert re_entry["month_in_words"] is not None
result = []
if re_entry["day1"] is not None:
if len(re_entry["day1"]) == 1:
result.append((DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, "0" + re_entry["day1"]))
else:
result.append((DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, re_entry["day1"]))
result.append((DateAnnotation.AnnotationPart.OTHER, re_entry["punct5"]))
if re_entry["month"] is not None:
result.append((DateAnnotation.AnnotationPart.TEXT_MONTH, re_entry["month"]))
if re_entry["day1"] is None:
result.append((DateAnnotation.AnnotationPart.OTHER, re_entry["punct7"]))
else:
result.append((DateAnnotation.AnnotationPart.OTHER, re_entry["punct6"]))
if re_entry["day2"] is not None:
if len(re_entry["day2"]) == 1:
result.append((DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, "0" + re_entry["day2"]))
else:
result.append((DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, re_entry["day2"]))
result.append((DateAnnotation.AnnotationPart.OTHER, re_entry["punct6"]))
if re_entry["year3"] is not None:
if len(re_entry["year3"]) == 2:
result.append((DateAnnotation.AnnotationPart.TWO_DIGIT_YEAR, re_entry["year3"]))
else:
result.append((DateAnnotation.AnnotationPart.FOUR_DIGIT_YEAR, re_entry["year3"]))
return result
def _parse_date_to_format(re_entry) -> Optional[List[Tuple[DateAnnotation.AnnotationPart, str]]]:
if re_entry["day_or_month_year"] is not None:
result = _parse_day_or_month(re_entry)
elif re_entry["year_month_or_day"] is not None:
result = _parse_year_month_or_day(re_entry)
elif re_entry["month_in_words"] is not None:
result = _parse_month_in_words(re_entry)
else:
result = None
return result