Newer
Older
from src.annotations import DateAnnotation
from src.detectors.date.date import DateDetector
# Check en-us
text = "On 1.01.2022, I sold my cat. On April 5, 2021, I bought a dog."
found_dates = detector.detect(text, dict())
format_date1 = [
(DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, "01"), # Only supports two digits for now
(DateAnnotation.AnnotationPart.OTHER, "."),
(DateAnnotation.AnnotationPart.TWO_DIGIT_MONTH, "01"),
(DateAnnotation.AnnotationPart.OTHER, "."),
(DateAnnotation.AnnotationPart.FOUR_DIGIT_YEAR, "2022")
]
format_date2 = [
(DateAnnotation.AnnotationPart.TEXT_MONTH, "April"),
(DateAnnotation.AnnotationPart.OTHER, " "),
(DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, "05"), # Only supports two digits for now
(DateAnnotation.AnnotationPart.OTHER, ", "),
(DateAnnotation.AnnotationPart.FOUR_DIGIT_YEAR, "2021"),
]
assert found_dates == [(3, 12, DateAnnotation(format_date1)), (32, 45, DateAnnotation(format_date2))]
# Check en-gb
# TODO: Following test fails. Fix it.
# text = "On 1.01.2022 I sold the cat. On 5th April 2021 I bought a dog."
# found_dates = detect_dates_en(text)
# assert found_dates == [(3,12, DateAnnotation()), (32,46, DateAnnotation())]