Newer
Older

Michał Pogoda
committed
from src.detections import DateDetection
text = "W dniu 1.01.2022 sprzedałem kota. 5 kwietnia 2021 roku kupiłem psa."
(
DateDetection.AnnotationPart.TWO_DIGITS_DAY,
"01",
), # Only supports two digits for now

Michał Pogoda
committed
(DateDetection.AnnotationPart.OTHER, "."),
(DateDetection.AnnotationPart.TWO_DIGIT_MONTH, "01"),
(DateDetection.AnnotationPart.OTHER, "."),
(DateDetection.AnnotationPart.FOUR_DIGIT_YEAR, "2022"),
(
DateDetection.AnnotationPart.TWO_DIGITS_DAY,
"05",
), # Only supports two digits for now

Michał Pogoda
committed
(DateDetection.AnnotationPart.OTHER, " "),
(DateDetection.AnnotationPart.TEXT_MONTH, "kwietnia"),
(DateDetection.AnnotationPart.OTHER, " "),
(DateDetection.AnnotationPart.FOUR_DIGIT_YEAR, "2021"),
assert found_dates == [
(7, 16, DateDetection(format_date1)),
(34, 49, DateDetection(format_date2)),
]
def test_date_with_different_punctuations():
# There is discussion about this wheter we should even detect such cases
# as a dates... However, for now we do and if we find cases where that is
# problematic, this definitly could be changed.
detector = DateDetector("pl")
text = "1.01,2022"
found_dates = detector.detect(text, dict())
format_date = [
(
DateDetection.AnnotationPart.TWO_DIGITS_DAY,
"01",
),
(DateDetection.AnnotationPart.OTHER, "."),
(DateDetection.AnnotationPart.TWO_DIGIT_MONTH, "01"),
(DateDetection.AnnotationPart.OTHER, ","),
(DateDetection.AnnotationPart.FOUR_DIGIT_YEAR, "2022"),
]
assert found_dates == [
(7, 16, DateDetection(format_date)),
]