from typing import List, Tuple from src.detections import DateDetection, Optional def _parse_day_or_month(re_entry) -> List[Tuple[int, int, DateDetection]]: assert re_entry["day_or_month_year"] is not None result = [] if re_entry["day_month1"] is not None: if len(re_entry["day_month1"]) == 1: result.append((DateDetection.AnnotationPart.TWO_DIGITS_DAY, "0" + re_entry["day_month1"])) else: result.append((DateDetection.AnnotationPart.TWO_DIGITS_DAY, re_entry["day_month1"])) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"])) if len(re_entry["day_month2"]) == 1: result.append((DateDetection.AnnotationPart.TWO_DIGIT_MONTH, "0" + re_entry["day_month2"])) else: result.append((DateDetection.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month2"])) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"])) elif "day_month2" in re_entry: if len(re_entry["day_month2"]) == 1: result.append((DateDetection.AnnotationPart.TWO_DIGIT_MONTH, "0" + re_entry["day_month2"])) else: result.append((DateDetection.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month2"])) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"])) if "year1" in re_entry: if len(re_entry["year1"]) == 2: result.append((DateDetection.AnnotationPart.TWO_DIGIT_YEAR, re_entry["year1"])) else: result.append((DateDetection.AnnotationPart.FOUR_DIGIT_YEAR, re_entry["year1"])) return result def _parse_year_month_or_day(re_entry) -> List[Tuple[int, int, DateDetection]]: assert re_entry["year_month_or_day"] is not None result = [] if "year2" in re_entry: if len(re_entry["year2"]) == 2: result.append((DateDetection.AnnotationPart.TWO_DIGIT_YEAR, re_entry["year2"])) else: result.append((DateDetection.AnnotationPart.FOUR_DIGIT_YEAR, re_entry["year2"])) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct3"])) if "day_month3" in re_entry: if len(re_entry["day_month3"]) == 1: result.append((DateDetection.AnnotationPart.TWO_DIGITS_DAY, "0" + re_entry["day_month3"])) else: result.append((DateDetection.AnnotationPart.TWO_DIGITS_DAY, re_entry["day_month3"])) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct4"])) if "day_month4" in re_entry: if len(re_entry["day_month4"]) == 1: result.append((DateDetection.AnnotationPart.TWO_DIGIT_MONTH, "0" + re_entry["day_month4"])) else: result.append((DateDetection.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month4"])) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct4"])) return result def _parse_month_in_words(re_entry) -> List[Tuple[DateDetection.AnnotationPart, str]]: assert re_entry["month_in_words"] is not None result = [] if re_entry["day1"] is not None: if len(re_entry["day1"]) == 1: result.append((DateDetection.AnnotationPart.TWO_DIGITS_DAY, "0" + re_entry["day1"])) else: result.append((DateDetection.AnnotationPart.TWO_DIGITS_DAY, re_entry["day1"])) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct5"])) if re_entry["month"] is not None: result.append((DateDetection.AnnotationPart.TEXT_MONTH, re_entry["month"])) if re_entry["day1"] is None: result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct7"])) else: result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct6"])) if re_entry["day2"] is not None: if len(re_entry["day2"]) == 1: result.append((DateDetection.AnnotationPart.TWO_DIGITS_DAY, "0" + re_entry["day2"])) else: result.append((DateDetection.AnnotationPart.TWO_DIGITS_DAY, re_entry["day2"])) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct6"])) if re_entry["year3"] is not None: if len(re_entry["year3"]) == 2: result.append((DateDetection.AnnotationPart.TWO_DIGIT_YEAR, re_entry["year3"])) else: result.append((DateDetection.AnnotationPart.FOUR_DIGIT_YEAR, re_entry["year3"])) return result def _parse_date_to_format(re_entry) -> Optional[List[Tuple[DateDetection.AnnotationPart, str]]]: if re_entry["day_or_month_year"] is not None: result = _parse_day_or_month(re_entry) elif re_entry["year_month_or_day"] is not None: result = _parse_year_month_or_day(re_entry) elif re_entry["month_in_words"] is not None: result = _parse_month_in_words(re_entry) else: result = None return result