From 8864e2687c1cdf1bd563953f262402e7c9bbaa76 Mon Sep 17 00:00:00 2001 From: Jakub-Goluch <99048106+Jakub-Goluch@users.noreply.github.com> Date: Mon, 26 Jun 2023 21:50:17 +0200 Subject: [PATCH] Add feature to check whether document has a valid json/jsonl format, add tests --- src/easymatcher_worker.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/easymatcher_worker.py b/src/easymatcher_worker.py index 8d79c8d..5e59e71 100644 --- a/src/easymatcher_worker.py +++ b/src/easymatcher_worker.py @@ -20,9 +20,12 @@ class EasymatcherWorker(nlp_ws.NLPWorker): It relies on the use of an easymatcher tool which can be found he under - https://gitlab.clarin-pl.eu/knowledge-extraction/tools/easymatcher """ + @staticmethod - def is_jsonl(document_path: str | Path) -> bool: - """Validates whether text file has json/jsonl structure and has "text" keyword""" + def is_jsonl( + document_path: str | Path + ) -> bool: + """Validates whether text file has json/jsonl structure and has "text" keyword.""" try: with open(document_path, 'r', encoding="utf-8") as file: for line in file: -- GitLab