From 039a1d8a232fbbf0c7a363c920c7d61fae3d4132 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pogoda?= <mipo57@e-science.pl>
Date: Mon, 19 Jun 2023 08:26:50 +0200
Subject: [PATCH] unit testing

---
 .gitlab-ci.yml                               |  6 ++++++
 requirements.dev.txt                         |  1 +
 src/detections/__init__.py                   |  4 +---
 src/detectors/date/utils.py                  |  4 ++--
 src/detectors/ner/ner.py                     |  2 ++
 src/detectors/url/common.py                  |  6 +++---
 src/suppressors/__init__.py                  |  4 ++--
 src/utils/__init__.py                        | 11 +++++++++++
 tests/unit/pipeline/test_default.py          |  2 +-
 tests/unit/pipeline/test_sequential_jsonl.py |  2 +-
 tests/unit/replacers/test_date_replacer.py   | 12 ++++++------
 tests/unit/replacers/test_email_replacer.py  |  4 ++--
 tests/unit/replacers/test_tag_replacer.py    |  6 +++---
 tests/unit/replacers/test_user_replacer.py   |  4 ++--
 tests/unit/suppressors/test_order_based.py   |  8 +++++---
 tox.ini                                      | 11 +++++++++--
 16 files changed, 57 insertions(+), 30 deletions(-)
 create mode 100644 requirements.dev.txt

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index dcf62b2..3ada971 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -4,6 +4,7 @@ cache:
     - .tox
 stages:
   - check_style
+  - test
   - build
 before_script:
   - pip install tox==2.9.1
@@ -18,6 +19,11 @@ docstyle:
   script:
     - tox -v -e docstyle
 
+auto_tests:
+  stage: test
+  script:
+    - tox -v -e tests
+
 build_image:
   stage: build
   image: 'docker:18.09.7'
diff --git a/requirements.dev.txt b/requirements.dev.txt
new file mode 100644
index 0000000..28706be
--- /dev/null
+++ b/requirements.dev.txt
@@ -0,0 +1 @@
+pytest==7.3.2
\ No newline at end of file
diff --git a/src/detections/__init__.py b/src/detections/__init__.py
index 23a3978..635f681 100644
--- a/src/detections/__init__.py
+++ b/src/detections/__init__.py
@@ -1,7 +1,6 @@
 from src.detections.date import DateDetection
 from src.detections.detection import (
     Detection,
-    DetectionType,
     CityDetection,
     CountryDetection,
     EmailDetection,
@@ -23,7 +22,7 @@ from src.detections.detection import (
     UrlDetection,
     UserDetection,
 )
-from src.utils.subclasses import get_sublcasses
+from src.utils.utils import get_sublcasses
 
 DETECTION_CLASSES_MAP = {
     detection_class.TYPE_NAME: detection_class
@@ -32,7 +31,6 @@ DETECTION_CLASSES_MAP = {
 
 __all__ = [
     "Detection",
-    "DetectionType",
     "CityDetection",
     "CountryDetection",
     "EmailDetection",
diff --git a/src/detectors/date/utils.py b/src/detectors/date/utils.py
index 05fc9fd..d2d2a58 100644
--- a/src/detectors/date/utils.py
+++ b/src/detectors/date/utils.py
@@ -1,8 +1,8 @@
 """Utility scripts for parsing date detections."""
 
-from typing import List, Tuple
+from typing import List, Tuple, Optional
 
-from src.detections import DateDetection, Optional
+from src.detections import DateDetection
 
 
 def _parse_day_or_month(re_entry) -> List[Tuple[int, int, DateDetection]]:
diff --git a/src/detectors/ner/ner.py b/src/detectors/ner/ner.py
index 6b4e900..f5d6531 100644
--- a/src/detectors/ner/ner.py
+++ b/src/detectors/ner/ner.py
@@ -59,6 +59,8 @@ class NerDetector(Detector):
             if issubclass(ner_detection, MorphosyntacticInfoMixin):
                 if start in morpho_tags:
                     kwargs["morpho_tag"] = morpho_tags[start]
+                else:
+                    kwargs["morpho_tag"] = None
 
             result.append((start, end, ner_detection(**kwargs)))
 
diff --git a/src/detectors/url/common.py b/src/detectors/url/common.py
index 085c701..b9092ca 100644
--- a/src/detectors/url/common.py
+++ b/src/detectors/url/common.py
@@ -1,12 +1,12 @@
 """Helper module for detecting URLs."""
 
-from typing import List, AnyStr
+from typing import List
 
 import regex as re
-from regex import compile, Pattern
+from regex.regex import Pattern
 
 
-def generate_url_regex(exeptions: List[str]) -> Pattern[AnyStr @ compile]:
+def generate_url_regex(exeptions: List[str]) -> Pattern:
     """Returns a regex for detecting urls.
 
     Args:
diff --git a/src/suppressors/__init__.py b/src/suppressors/__init__.py
index 7a5f31c..c42ebd8 100644
--- a/src/suppressors/__init__.py
+++ b/src/suppressors/__init__.py
@@ -1,3 +1,3 @@
-from src.suppressors.order_based import suppress_order_based
+from src.suppressors.order_based import OrderBasedSuppressor
 
-__all__ = [suppress_order_based]
+__all__ = [OrderBasedSuppressor]
diff --git a/src/utils/__init__.py b/src/utils/__init__.py
index e69de29..3698ae2 100644
--- a/src/utils/__init__.py
+++ b/src/utils/__init__.py
@@ -0,0 +1,11 @@
+from src.utils.utils import (
+    consume,
+    subdict,
+    get_sublcasses,
+)
+
+__all__ = [
+    "consume",
+    "subdict",
+    "get_sublcasses",
+]
diff --git a/tests/unit/pipeline/test_default.py b/tests/unit/pipeline/test_default.py
index f0e026d..fd32364 100644
--- a/tests/unit/pipeline/test_default.py
+++ b/tests/unit/pipeline/test_default.py
@@ -17,7 +17,7 @@ class _MockInputParser(InputParser):
 
 class _MockDetector(Detector):
     def detect(self, text, annotations):
-        return [(0, 3, NameDetection())]
+        return [(0, 3, NameDetection("morpho_tag"))]
 
 
 class _MockSuppressor(Suppressor):
diff --git a/tests/unit/pipeline/test_sequential_jsonl.py b/tests/unit/pipeline/test_sequential_jsonl.py
index b09fa6f..1bd5ff4 100644
--- a/tests/unit/pipeline/test_sequential_jsonl.py
+++ b/tests/unit/pipeline/test_sequential_jsonl.py
@@ -17,7 +17,7 @@ class _MockInputParser(InputParser):
 
 class _MockDetector(Detector):
     def detect(self, text, annotations):
-        return [(0, 3, NameDetection())]
+        return [(0, 3, NameDetection("morpho_tag"))]
 
 
 class _MockSuppressor(Suppressor):
diff --git a/tests/unit/replacers/test_date_replacer.py b/tests/unit/replacers/test_date_replacer.py
index e4cca6a..9402ff1 100644
--- a/tests/unit/replacers/test_date_replacer.py
+++ b/tests/unit/replacers/test_date_replacer.py
@@ -8,10 +8,10 @@ def test_date_replacer():
     """Test date replacer."""
     text = "Ala Brzeszczot urodziła sie 05.05.2005 we Wrocławiu"
     detections = [
-        (0, 3, NameDetection()),
-        (4, 14, SurnameDetection()),
+        (0, 3, NameDetection("subst:sg:nom:f")),
+        (4, 14, SurnameDetection("subst:sg:nom.acc:m3")),
         (28, 38, DateDetection()),
-        (42, 51, CityDetection()),
+        (42, 51, CityDetection("subst:sg:loc:m3")),
     ]
 
     replacer = DateReplacer()
@@ -21,9 +21,9 @@ def test_date_replacer():
     expected_text_beggining = "Ala Brzeszczot urodziła sie "
     expected_text_ending = " we Wrocławiu"
     exptected_detections_left = [
-        (0, 3, NameDetection()),
-        (4, 14, SurnameDetection()),
-        (len(result[0]) - 9, len(result[0]), CityDetection()),
+        (0, 3, NameDetection("subst:sg:nom:f")),
+        (4, 14, SurnameDetection("subst:sg:nom.acc:m3")),
+        (len(result[0]) - 9, len(result[0]), CityDetection("subst:sg:loc:m3")),
     ]
 
     assert result[0].startswith(expected_text_beggining)
diff --git a/tests/unit/replacers/test_email_replacer.py b/tests/unit/replacers/test_email_replacer.py
index 1bda3cc..30c7698 100644
--- a/tests/unit/replacers/test_email_replacer.py
+++ b/tests/unit/replacers/test_email_replacer.py
@@ -10,7 +10,7 @@ def test_email_replacer():
     detections = [
         (0, 7, EmailDetection()),
         (21, 31, DateDetection()),
-        (35, 44, CityDetection()),
+        (35, 44, CityDetection("subst:sg:loc:m3")),
     ]
 
     replacer = EmailReplacer()
@@ -19,7 +19,7 @@ def test_email_replacer():
     expected_text_ending = " urodziła sie 05.05.2005 we Wrocławiu"
     exptected_detections_left = [
         (len(result[0]) - 23, len(result[0]) - 13, DateDetection()),
-        (len(result[0]) - 9, len(result[0]), CityDetection()),
+        (len(result[0]) - 9, len(result[0]), CityDetection("subst:sg:loc:m3")),
     ]
 
     assert result[0].endswith(expected_text_ending)
diff --git a/tests/unit/replacers/test_tag_replacer.py b/tests/unit/replacers/test_tag_replacer.py
index f8dd7a2..ee62de9 100644
--- a/tests/unit/replacers/test_tag_replacer.py
+++ b/tests/unit/replacers/test_tag_replacer.py
@@ -8,10 +8,10 @@ def test_replace_with_tags():
     """Test replace with tags."""
     text = "Ala Brzeszczot urodziła sie 05.05.2005 we Wrocławiu"
     detections = [
-        (0, 3, NameDetection()),
-        (4, 14, SurnameDetection()),
+        (0, 3, NameDetection("subst:sg:nom:f")),
+        (4, 14, SurnameDetection("subst:sg:nom.acc:m3")),
         (28, 38, DateDetection()),
-        (42, 51, CityDetection()),
+        (42, 51, CityDetection("subst:sg:loc:m3")),
     ]
 
     replacer = TagReplacer()
diff --git a/tests/unit/replacers/test_user_replacer.py b/tests/unit/replacers/test_user_replacer.py
index ded256a..259e45c 100644
--- a/tests/unit/replacers/test_user_replacer.py
+++ b/tests/unit/replacers/test_user_replacer.py
@@ -10,7 +10,7 @@ def test_user_replacer():
     detections = [
         (0, 7, UserDetection()),
         (21, 31, DateDetection()),
-        (35, 44, CityDetection()),
+        (35, 44, CityDetection("subst:sg:loc:m3")),
     ]
 
     replacer = UserReplacer()
@@ -19,7 +19,7 @@ def test_user_replacer():
     expected_text_ending = " urodziła sie 05.05.2005 we Wrocławiu"
     exptected_detections_left = [
         (len(result[0]) - 23, len(result[0]) - 13, DateDetection()),
-        (len(result[0]) - 9, len(result[0]), CityDetection()),
+        (len(result[0]) - 9, len(result[0]), CityDetection("subst:sg:loc:m3")),
     ]
 
     assert result[0].endswith(expected_text_ending)
diff --git a/tests/unit/suppressors/test_order_based.py b/tests/unit/suppressors/test_order_based.py
index 8374dc7..c6f167f 100644
--- a/tests/unit/suppressors/test_order_based.py
+++ b/tests/unit/suppressors/test_order_based.py
@@ -1,17 +1,19 @@
 """Tests for order_based suppressor."""
 
-from src.suppressors.order_based import suppress_order_based
+from src.suppressors.order_based import OrderBasedSuppressor
 
 
 def test_supress_order_based():
-    """Test test_supress_order_based."""
+    """Test OrderBasedSuppressor."""
+    suppressor = OrderBasedSuppressor()
+
     annotations = [
         (10, 16, "Marian"),
         (10, 18, "Marianna"),
         (30, 35, "Nowak"),
         (50, 59, "Wrocławiu"),
     ]
-    result = suppress_order_based(annotations)
+    result = suppressor.suppress(annotations)
     expected = [
         (10, 16, "Marian"),
         (30, 35, "Nowak"),
diff --git a/tox.ini b/tox.ini
index 00b9e8d..a188200 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = pep8,docstyle
+envlist = pep8,docstyle,tests
 skipsdist = True
 
 [testenv:pep8]
@@ -16,6 +16,14 @@ basepython = python3.8
 commands =
     pydocstyle --verbose {posargs}
 
+[testenv:tests]
+deps =
+    -r requirements.txt
+    -r requirements.dev.txt
+basepython = python3.8
+commands =
+    pytest tests/
+
 [flake8]
 # W504 skipped because it is overeager and unnecessary
 ignore = W504,E203,W503
@@ -24,7 +32,6 @@ exclude = .git,.venv,.tox,dist,doc,*egg,build,venv
 import-order-style = pep8
 max-line-length = 88
 
-
 [pydocstyle]
 # D104 Missing docstring in public package
 # D203 1 blank line required before class docstring
-- 
GitLab