Skip to content
Snippets Groups Projects
test_en.py 1.36 KiB
Newer Older
Michał Pogoda's avatar
Michał Pogoda committed
from src.annotations import DateAnnotation
from src.detectors.date.date import DateDetector

def test_detect_dates_en():
Michał Pogoda's avatar
Michał Pogoda committed
    detector = DateDetector("en")
    
    # Check en-us
    text = "On 1.01.2022, I sold my cat. On April 5, 2021, I bought a dog."
Michał Pogoda's avatar
Michał Pogoda committed
    found_dates = detector.detect(text, dict())
    
    format_date1 = [
        (DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, "01"), # Only supports two digits for now
        (DateAnnotation.AnnotationPart.OTHER, "."),
        (DateAnnotation.AnnotationPart.TWO_DIGIT_MONTH, "01"),
        (DateAnnotation.AnnotationPart.OTHER, "."),
        (DateAnnotation.AnnotationPart.FOUR_DIGIT_YEAR, "2022")
    ]
    
    format_date2 = [
        (DateAnnotation.AnnotationPart.TEXT_MONTH, "April"),
        (DateAnnotation.AnnotationPart.OTHER, " "),
        (DateAnnotation.AnnotationPart.TWO_DIGITS_DAY, "05"), # Only supports two digits for now
        (DateAnnotation.AnnotationPart.OTHER, ", "),
        (DateAnnotation.AnnotationPart.FOUR_DIGIT_YEAR, "2021"),
    ]
Michał Pogoda's avatar
Michał Pogoda committed
    assert found_dates == [(3, 12, DateAnnotation(format_date1)), (32, 45, DateAnnotation(format_date2))]
    # Check en-gb
    # TODO: Following test fails. Fix it.
    # text = "On 1.01.2022 I sold the cat. On 5th April 2021 I bought a dog."
    # found_dates = detect_dates_en(text)
Michał Pogoda's avatar
Michał Pogoda committed
    # assert found_dates == [(3,12, DateAnnotation()), (32,46, DateAnnotation())]