from src.annotations import DateAnnotation from src.detectors.date.date import DateDetector def test_detect_dates_en(): detector = DateDetector("en") # Check en-us text = "On 1.01.2022, I sold my cat. On April 5, 2021, I bought a dog." found_dates = detector.detect(text, dict()) format_date1 = [ (DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, "01"), # Only supports two digits for now (DateAnnotation.AnnotationPart.OTHER, "."), (DateAnnotation.AnnotationPart.TWO_DIGIT_MONTH, "01"), (DateAnnotation.AnnotationPart.OTHER, "."), (DateAnnotation.AnnotationPart.FOUR_DIGIT_YEAR, "2022") ] format_date2 = [ (DateAnnotation.AnnotationPart.TEXT_MONTH, "April"), (DateAnnotation.AnnotationPart.OTHER, " "), (DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, "05"), # Only supports two digits for now (DateAnnotation.AnnotationPart.OTHER, ", "), (DateAnnotation.AnnotationPart.FOUR_DIGIT_YEAR, "2021"), ] assert found_dates == [(3, 12, DateAnnotation(format_date1)), (32, 45, DateAnnotation(format_date2))] # Check en-gb # TODO: Following test fails. Fix it. # text = "On 1.01.2022 I sold the cat. On 5th April 2021 I bought a dog." # found_dates = detect_dates_en(text) # assert found_dates == [(3,12, DateAnnotation()), (32,46, DateAnnotation())]