Skip to content
Snippets Groups Projects
Commit 1eeb5a98 authored by Michał Pogoda's avatar Michał Pogoda
Browse files

[WIP] - Refactoring + unit testing

parent c7a903ad
No related branches found
No related tags found
2 merge requests!10Anonimizer v2,!7Better coverage
Pipeline #6757 failed
from src.detectors.date.en import detect_dates_en
def test_detect_dates_en():
# Check en-us
text = "On 1.01.2022, I sold my cat. On April 5, 2021, I bought a dog."
found_dates = detect_dates_en(text)
assert found_dates == [(3,12,"1.01.2022"), (32,45, "April 5, 2021")]
# Check en-gb
# TODO: Following test fails. Fix it.
# text = "On 1.01.2022 I sold the cat. On 5th April 2021 I bought a dog."
# found_dates = detect_dates_en(text)
# assert found_dates == [(3,12,"1.01.2022"), (32,46, "5th April 2021")]
\ No newline at end of file
from src.detectors.date.pl import detect_dates_pl
def test_detect_dates_pl():
text = "W dniu 1.01.2022 sprzedałem kota. 5 kwietnia 2021 roku kupiłem psa."
found_dates = detect_dates_pl(text)
assert found_dates == [(7,16,"1.01.2022"), (34,49, "5 kwietnia 2021")]
\ No newline at end of file
from src.detectors.date.ru import detect_dates_ru
def test_detect_dates_pl():
text = "1.01.2022 я продал кошку. 5 апреля 2021 я купил собаку."
found_dates = detect_dates_ru(text)
assert found_dates == [(0,9,"1.01.2022"), (26,39, "5 апреля 2021")]
\ No newline at end of file
from src.detectors.email import detect_emails
def test_detect_emails():
text = "My email is arkadiusz@borek.pw. My friend's email is arkadiusz.dump@pwr.edu.pl"
found_emails = detect_emails(text, "en")
assert found_emails == [(12, 30, "arkadiusz@borek.pw"), (53, 78, "arkadiusz.dump@pwr.edu.pl")]
\ No newline at end of file
from src.detectors.phone.phone import detect_phone_numbers
def test_detect_phone_numbers():
text = "My phone number is +48 123 456 789. My friend's number is 123456789."
found_phone_numbers = detect_phone_numbers(text, "en")
assert found_phone_numbers == [(19, 34, '+48 123 456 789'), (58, 67, '123456789')]
\ No newline at end of file
from src.detectors.url import detect_urls
def test_detect_urls():
text = "This is a test for www.google.com. Make sure to go to https://www.google.com"
found_urls = detect_urls(text, "en")
assert found_urls == [(19, 33, 'www.google.com'), (54, 76, 'https://www.google.com')]
def test_detect_urls_pl():
text = "m.in. https://www.google.com"
found_urls_pl = detect_urls(text, "pl")
found_urls_en = detect_urls(text, "en")
# m.in is a valid shortcut for między innymi in Polish. It should not be detected as a URL.
assert found_urls_pl == [(6, 28, 'https://www.google.com')]
assert found_urls_en == [(0, 4, "m.in"), (6, 28, 'https://www.google.com')]
\ No newline at end of file
from src.detectors.user.user import detect_users
def test_detect_users():
text = "My username is @john_smith. My friend's username is @jane_doe."
found_users = detect_users(text, "en")
assert found_users == [(15, 26, '@john_smith'), (52, 61, '@jane_doe')]
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment