Skip to content
Snippets Groups Projects
Commit 8864e268 authored by Jakub-Goluch's avatar Jakub-Goluch
Browse files

Add feature to check whether document has a valid json/jsonl format, add tests

parent 73c0b676
Branches
1 merge request!4Resolve "Read not only .txt files"
Pipeline #11286 failed with stages
in 3 minutes and 9 seconds
......@@ -20,9 +20,12 @@ class EasymatcherWorker(nlp_ws.NLPWorker):
It relies on the use of an easymatcher tool which can be found he under -
https://gitlab.clarin-pl.eu/knowledge-extraction/tools/easymatcher
"""
@staticmethod
def is_jsonl(document_path: str | Path) -> bool:
"""Validates whether text file has json/jsonl structure and has "text" keyword"""
def is_jsonl(
document_path: str | Path
) -> bool:
"""Validates whether text file has json/jsonl structure and has "text" keyword."""
try:
with open(document_path, 'r', encoding="utf-8") as file:
for line in file:
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment