diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dcf62b24c2ed0209c98e341819289b8bf1316aa1..3ada971beb52a5aaa08b8e2c516134e40e5b8c00 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,6 +4,7 @@ cache: - .tox stages: - check_style + - test - build before_script: - pip install tox==2.9.1 @@ -18,6 +19,11 @@ docstyle: script: - tox -v -e docstyle +auto_tests: + stage: test + script: + - tox -v -e tests + build_image: stage: build image: 'docker:18.09.7' diff --git a/requirements.dev.txt b/requirements.dev.txt new file mode 100644 index 0000000000000000000000000000000000000000..28706bebc1f8bed1a34a0cb39b24af9c9cfdd037 --- /dev/null +++ b/requirements.dev.txt @@ -0,0 +1 @@ +pytest==7.3.2 \ No newline at end of file diff --git a/src/detections/__init__.py b/src/detections/__init__.py index 23a39783d6006b9a3345989f4b8b200f112ed3cb..635f68157b330c8f12adece7f7a4cf297d275464 100644 --- a/src/detections/__init__.py +++ b/src/detections/__init__.py @@ -1,7 +1,6 @@ from src.detections.date import DateDetection from src.detections.detection import ( Detection, - DetectionType, CityDetection, CountryDetection, EmailDetection, @@ -23,7 +22,7 @@ from src.detections.detection import ( UrlDetection, UserDetection, ) -from src.utils.subclasses import get_sublcasses +from src.utils.utils import get_sublcasses DETECTION_CLASSES_MAP = { detection_class.TYPE_NAME: detection_class @@ -32,7 +31,6 @@ DETECTION_CLASSES_MAP = { __all__ = [ "Detection", - "DetectionType", "CityDetection", "CountryDetection", "EmailDetection", diff --git a/src/detectors/date/utils.py b/src/detectors/date/utils.py index 05fc9fd83b94bd19fd87a54cb7a0ec7262ebef92..d2d2a58f7de70959e88afabd655ce870deb26734 100644 --- a/src/detectors/date/utils.py +++ b/src/detectors/date/utils.py @@ -1,8 +1,8 @@ """Utility scripts for parsing date detections.""" -from typing import List, Tuple +from typing import List, Tuple, Optional -from src.detections import DateDetection, Optional +from src.detections import DateDetection def _parse_day_or_month(re_entry) -> List[Tuple[int, int, DateDetection]]: diff --git a/src/detectors/ner/ner.py b/src/detectors/ner/ner.py index 6b4e9005b11e330a9d8efa6a732e0bbd5e088687..f5d65314e7d7fa38df1b211502a3cffb66f83afc 100644 --- a/src/detectors/ner/ner.py +++ b/src/detectors/ner/ner.py @@ -59,6 +59,8 @@ class NerDetector(Detector): if issubclass(ner_detection, MorphosyntacticInfoMixin): if start in morpho_tags: kwargs["morpho_tag"] = morpho_tags[start] + else: + kwargs["morpho_tag"] = None result.append((start, end, ner_detection(**kwargs))) diff --git a/src/detectors/url/common.py b/src/detectors/url/common.py index 085c7010462dddf0c7d70b5de7fdeae8d06eb27e..b9092caa83ca428e4a43eb1cc0fffb941c191902 100644 --- a/src/detectors/url/common.py +++ b/src/detectors/url/common.py @@ -1,12 +1,12 @@ """Helper module for detecting URLs.""" -from typing import List, AnyStr +from typing import List import regex as re -from regex import compile, Pattern +from regex.regex import Pattern -def generate_url_regex(exeptions: List[str]) -> Pattern[AnyStr @ compile]: +def generate_url_regex(exeptions: List[str]) -> Pattern: """Returns a regex for detecting urls. Args: diff --git a/src/suppressors/__init__.py b/src/suppressors/__init__.py index 7a5f31c202c9416c5fec760c8f49640cd71bfc3d..c42ebd8d77df119279b5fc83a55675a4f0e46b68 100644 --- a/src/suppressors/__init__.py +++ b/src/suppressors/__init__.py @@ -1,3 +1,3 @@ -from src.suppressors.order_based import suppress_order_based +from src.suppressors.order_based import OrderBasedSuppressor -__all__ = [suppress_order_based] +__all__ = [OrderBasedSuppressor] diff --git a/src/utils/__init__.py b/src/utils/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3698ae218d8ea2c7262c94b7de7afe6d2e281cea 100644 --- a/src/utils/__init__.py +++ b/src/utils/__init__.py @@ -0,0 +1,11 @@ +from src.utils.utils import ( + consume, + subdict, + get_sublcasses, +) + +__all__ = [ + "consume", + "subdict", + "get_sublcasses", +] diff --git a/tests/unit/pipeline/test_default.py b/tests/unit/pipeline/test_default.py index f0e026dbae4d446ce6cf6c6ab58728cae8f80283..fd32364a60f4ea60c7cc653dc1251b8c85cccb26 100644 --- a/tests/unit/pipeline/test_default.py +++ b/tests/unit/pipeline/test_default.py @@ -17,7 +17,7 @@ class _MockInputParser(InputParser): class _MockDetector(Detector): def detect(self, text, annotations): - return [(0, 3, NameDetection())] + return [(0, 3, NameDetection("morpho_tag"))] class _MockSuppressor(Suppressor): diff --git a/tests/unit/pipeline/test_sequential_jsonl.py b/tests/unit/pipeline/test_sequential_jsonl.py index b09fa6f449a1b2047a588044738ccf030bc09c70..1bd5ff4dc3579e7ed852db5c08f8d46d689848cc 100644 --- a/tests/unit/pipeline/test_sequential_jsonl.py +++ b/tests/unit/pipeline/test_sequential_jsonl.py @@ -17,7 +17,7 @@ class _MockInputParser(InputParser): class _MockDetector(Detector): def detect(self, text, annotations): - return [(0, 3, NameDetection())] + return [(0, 3, NameDetection("morpho_tag"))] class _MockSuppressor(Suppressor): diff --git a/tests/unit/replacers/test_date_replacer.py b/tests/unit/replacers/test_date_replacer.py index e4cca6a96413594a966c3d9a55eb4cf235bebcfa..9402ff18148f6e51a541b77860fbdf3be0e8331f 100644 --- a/tests/unit/replacers/test_date_replacer.py +++ b/tests/unit/replacers/test_date_replacer.py @@ -8,10 +8,10 @@ def test_date_replacer(): """Test date replacer.""" text = "Ala Brzeszczot urodziła sie 05.05.2005 we Wrocławiu" detections = [ - (0, 3, NameDetection()), - (4, 14, SurnameDetection()), + (0, 3, NameDetection("subst:sg:nom:f")), + (4, 14, SurnameDetection("subst:sg:nom.acc:m3")), (28, 38, DateDetection()), - (42, 51, CityDetection()), + (42, 51, CityDetection("subst:sg:loc:m3")), ] replacer = DateReplacer() @@ -21,9 +21,9 @@ def test_date_replacer(): expected_text_beggining = "Ala Brzeszczot urodziła sie " expected_text_ending = " we Wrocławiu" exptected_detections_left = [ - (0, 3, NameDetection()), - (4, 14, SurnameDetection()), - (len(result[0]) - 9, len(result[0]), CityDetection()), + (0, 3, NameDetection("subst:sg:nom:f")), + (4, 14, SurnameDetection("subst:sg:nom.acc:m3")), + (len(result[0]) - 9, len(result[0]), CityDetection("subst:sg:loc:m3")), ] assert result[0].startswith(expected_text_beggining) diff --git a/tests/unit/replacers/test_email_replacer.py b/tests/unit/replacers/test_email_replacer.py index 1bda3cc3a55b03a2fa819f817811169f3427e412..30c76986de4fe22bbd37dc7e5ce7cfb59522983a 100644 --- a/tests/unit/replacers/test_email_replacer.py +++ b/tests/unit/replacers/test_email_replacer.py @@ -10,7 +10,7 @@ def test_email_replacer(): detections = [ (0, 7, EmailDetection()), (21, 31, DateDetection()), - (35, 44, CityDetection()), + (35, 44, CityDetection("subst:sg:loc:m3")), ] replacer = EmailReplacer() @@ -19,7 +19,7 @@ def test_email_replacer(): expected_text_ending = " urodziła sie 05.05.2005 we Wrocławiu" exptected_detections_left = [ (len(result[0]) - 23, len(result[0]) - 13, DateDetection()), - (len(result[0]) - 9, len(result[0]), CityDetection()), + (len(result[0]) - 9, len(result[0]), CityDetection("subst:sg:loc:m3")), ] assert result[0].endswith(expected_text_ending) diff --git a/tests/unit/replacers/test_tag_replacer.py b/tests/unit/replacers/test_tag_replacer.py index f8dd7a2325801def1c92c475567038f103fea229..ee62de927ec9f3fc7cff5ca432f925b98717bcbb 100644 --- a/tests/unit/replacers/test_tag_replacer.py +++ b/tests/unit/replacers/test_tag_replacer.py @@ -8,10 +8,10 @@ def test_replace_with_tags(): """Test replace with tags.""" text = "Ala Brzeszczot urodziła sie 05.05.2005 we Wrocławiu" detections = [ - (0, 3, NameDetection()), - (4, 14, SurnameDetection()), + (0, 3, NameDetection("subst:sg:nom:f")), + (4, 14, SurnameDetection("subst:sg:nom.acc:m3")), (28, 38, DateDetection()), - (42, 51, CityDetection()), + (42, 51, CityDetection("subst:sg:loc:m3")), ] replacer = TagReplacer() diff --git a/tests/unit/replacers/test_user_replacer.py b/tests/unit/replacers/test_user_replacer.py index ded256aa21d9f9ab8d0efaef541fbcc8c6db7180..259e45c3e22d9ab364f526f9be6ff4d83e03550b 100644 --- a/tests/unit/replacers/test_user_replacer.py +++ b/tests/unit/replacers/test_user_replacer.py @@ -10,7 +10,7 @@ def test_user_replacer(): detections = [ (0, 7, UserDetection()), (21, 31, DateDetection()), - (35, 44, CityDetection()), + (35, 44, CityDetection("subst:sg:loc:m3")), ] replacer = UserReplacer() @@ -19,7 +19,7 @@ def test_user_replacer(): expected_text_ending = " urodziła sie 05.05.2005 we Wrocławiu" exptected_detections_left = [ (len(result[0]) - 23, len(result[0]) - 13, DateDetection()), - (len(result[0]) - 9, len(result[0]), CityDetection()), + (len(result[0]) - 9, len(result[0]), CityDetection("subst:sg:loc:m3")), ] assert result[0].endswith(expected_text_ending) diff --git a/tests/unit/suppressors/test_order_based.py b/tests/unit/suppressors/test_order_based.py index 8374dc7c6b9e33cc9b81436f0a61965c7c99cf68..c6f167fded4762df8907aa9d62232e4b07c8ff37 100644 --- a/tests/unit/suppressors/test_order_based.py +++ b/tests/unit/suppressors/test_order_based.py @@ -1,17 +1,19 @@ """Tests for order_based suppressor.""" -from src.suppressors.order_based import suppress_order_based +from src.suppressors.order_based import OrderBasedSuppressor def test_supress_order_based(): - """Test test_supress_order_based.""" + """Test OrderBasedSuppressor.""" + suppressor = OrderBasedSuppressor() + annotations = [ (10, 16, "Marian"), (10, 18, "Marianna"), (30, 35, "Nowak"), (50, 59, "Wrocławiu"), ] - result = suppress_order_based(annotations) + result = suppressor.suppress(annotations) expected = [ (10, 16, "Marian"), (30, 35, "Nowak"), diff --git a/tox.ini b/tox.ini index 00b9e8da1e5fe63894ac2697cf26896ba15420a9..a18820008861af822883689de353c8b9555f8b99 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = pep8,docstyle +envlist = pep8,docstyle,tests skipsdist = True [testenv:pep8] @@ -16,6 +16,14 @@ basepython = python3.8 commands = pydocstyle --verbose {posargs} +[testenv:tests] +deps = + -r requirements.txt + -r requirements.dev.txt +basepython = python3.8 +commands = + pytest tests/ + [flake8] # W504 skipped because it is overeager and unnecessary ignore = W504,E203,W503 @@ -24,7 +32,6 @@ exclude = .git,.venv,.tox,dist,doc,*egg,build,venv import-order-style = pep8 max-line-length = 88 - [pydocstyle] # D104 Missing docstring in public package # D203 1 blank line required before class docstring