Skip to content
Snippets Groups Projects

Resolve "Read not only .txt files"

Files

+ 23
10
@@ -23,21 +23,30 @@ class EasymatcherWorker(nlp_ws.NLPWorker):
@staticmethod
def prepare_and_append_document(
file_path: str | Path, document_path: str | Path
file_path: str | Path, document_path: str | Path
) -> None:
"""Formats and appends plain texts into jsonl file."""
document = {}
with open(document_path, "r", encoding="utf-8") as _df:
document["text"] = _df.read()
if str(document_path).endswith(".jsonl"):
Please register or sign in to reply
with open(file_path, "a", encoding="utf-8") as _f:
with open(document_path, "r", encoding="utf-8") as _df:
for line in _df:
line_data = json.loads(line)
document['text'] = line_data['text']
# document['label'] = []
_f.write(json.dumps(document) + "\n")
else:
with open(document_path, "r", encoding="utf-8") as _df:
document["text"] = _df.read()
with open(file_path, "a", encoding="utf-8") as _f:
_f.write(f"{json.dumps(document)}\n")
with open(file_path, "a", encoding="utf-8") as _f:
_f.write(f"{json.dumps(document)}\n")
def process(
self,
input_path: str,
task_options: dict[str, str | int | float],
output_path: str,
self,
input_path: str,
task_options: dict[str, str | int | float],
output_path: str,
) -> None:
"""Called for each request made to the worker.
@@ -68,7 +77,11 @@ class EasymatcherWorker(nlp_ws.NLPWorker):
if os.path.isdir(input_path):
for file in os.listdir(input_path):
if file.endswith(".txt"):
if file.endswith(".jsonl"):
EasymatcherWorker.prepare_and_append_document(
tmpf.name, Path(input_path) / file
)
elif file.endswith(".txt"):
EasymatcherWorker.prepare_and_append_document(
tmpf.name, Path(input_path) / file
)
Loading