from typing import List, Tuple from src.detections import DateDetection, Optional def _parse_day_or_month(re_entry) -> List[Tuple[int, int, DateDetection]]: assert re_entry["day_or_month_year"] is not None result = [] # if re_entry["day_month1"] is not None: if len(re_entry["day_month1"]) == 1: result.append( ( DateDetection.AnnotationPart.TWO_DIGITS_DAY, "0" + re_entry["day_month1"], ) ) else: result.append( (DateDetection.AnnotationPart.TWO_DIGITS_DAY, re_entry["day_month1"]) ) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"])) if len(re_entry["day_month2"]) == 1: result.append( ( DateDetection.AnnotationPart.TWO_DIGIT_MONTH, "0" + re_entry["day_month2"], ) ) else: result.append( (DateDetection.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month2"]) ) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"])) # elif "day_month2" in re_entry: # if len(re_entry["day_month2"]) == 1: # result.append( # ( # DateDetection.AnnotationPart.TWO_DIGIT_MONTH, # "0" + re_entry["day_month2"], # ) # ) # else: # result.append( # (DateDetection.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month2"]) # ) # result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct1"])) if "year1" in re_entry: if len(re_entry["year1"]) == 2: result.append( (DateDetection.AnnotationPart.TWO_DIGIT_YEAR, re_entry["year1"]) ) else: result.append( (DateDetection.AnnotationPart.FOUR_DIGIT_YEAR, re_entry["year1"]) ) return result def _parse_year_month_or_day(re_entry) -> List[Tuple[int, int, DateDetection]]: assert re_entry["year_month_or_day"] is not None result = [] if "year2" in re_entry: if len(re_entry["year2"]) == 2: result.append( (DateDetection.AnnotationPart.TWO_DIGIT_YEAR, re_entry["year2"]) ) else: result.append( (DateDetection.AnnotationPart.FOUR_DIGIT_YEAR, re_entry["year2"]) ) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct3"])) if "day_month3" in re_entry: if len(re_entry["day_month3"]) == 1: result.append( ( DateDetection.AnnotationPart.TWO_DIGITS_DAY, "0" + re_entry["day_month3"], ) ) else: result.append( (DateDetection.AnnotationPart.TWO_DIGITS_DAY, re_entry["day_month3"]) ) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct4"])) if "day_month4" in re_entry: if len(re_entry["day_month4"]) == 1: result.append( ( DateDetection.AnnotationPart.TWO_DIGIT_MONTH, "0" + re_entry["day_month4"], ) ) else: result.append( (DateDetection.AnnotationPart.TWO_DIGIT_MONTH, re_entry["day_month4"]) ) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct4"])) return result def _parse_month_in_words(re_entry) -> List[Tuple[DateDetection.AnnotationPart, str]]: assert re_entry["month_in_words"] is not None result = [] if re_entry["day1"] is not None: if len(re_entry["day1"]) == 1: result.append( (DateDetection.AnnotationPart.TWO_DIGITS_DAY, "0" + re_entry["day1"]) ) else: result.append( (DateDetection.AnnotationPart.TWO_DIGITS_DAY, re_entry["day1"]) ) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct5"])) if re_entry["month"] is not None: result.append((DateDetection.AnnotationPart.TEXT_MONTH, re_entry["month"])) if re_entry["day1"] is None: result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct7"])) else: result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct6"])) if re_entry["day2"] is not None: if len(re_entry["day2"]) == 1: result.append( (DateDetection.AnnotationPart.TWO_DIGITS_DAY, "0" + re_entry["day2"]) ) else: result.append( (DateDetection.AnnotationPart.TWO_DIGITS_DAY, re_entry["day2"]) ) result.append((DateDetection.AnnotationPart.OTHER, re_entry["punct6"])) if re_entry["year3"] is not None: if len(re_entry["year3"]) == 2: result.append( (DateDetection.AnnotationPart.TWO_DIGIT_YEAR, re_entry["year3"]) ) else: result.append( (DateDetection.AnnotationPart.FOUR_DIGIT_YEAR, re_entry["year3"]) ) return result def _parse_date_to_format( re_entry, ) -> Optional[List[Tuple[DateDetection.AnnotationPart, str]]]: if re_entry["day_or_month_year"] is not None: result = _parse_day_or_month(re_entry) elif re_entry["year_month_or_day"] is not None: result = _parse_year_month_or_day(re_entry) elif re_entry["month_in_words"] is not None: result = _parse_month_in_words(re_entry) else: result = None return result