Skip to content
Snippets Groups Projects
Commit 6a94b42e authored by Konrad Wojtasik's avatar Konrad Wojtasik
Browse files

Update easymatcher_worker.py

parent 3130c89d
1 merge request!4Resolve "Read not only .txt files"
Pipeline #11438 failed with stages
in 3 minutes and 8 seconds
...@@ -47,9 +47,9 @@ class EasymatcherWorker(nlp_ws.NLPWorker): ...@@ -47,9 +47,9 @@ class EasymatcherWorker(nlp_ws.NLPWorker):
with open(document_path, "r", encoding="utf-8") as _df: with open(document_path, "r", encoding="utf-8") as _df:
for line in _df: for line in _df:
line_data = json.loads(line) line_data = json.loads(line)
document['text'] = line_data['text'] # document['text'] = line_data['text']
# document['label'] = [] # document['label'] = []
_f.write(json.dumps(document) + "\n") _f.write(json.dumps(line_data) + "\n")
else: else:
with open(document_path, "r", encoding="utf-8") as _df: with open(document_path, "r", encoding="utf-8") as _df:
document["text"] = _df.read() document["text"] = _df.read()
...@@ -107,5 +107,7 @@ class EasymatcherWorker(nlp_ws.NLPWorker): ...@@ -107,5 +107,7 @@ class EasymatcherWorker(nlp_ws.NLPWorker):
os.unlink(tmpf.name) os.unlink(tmpf.name)
with open(output_path, "w", encoding="utf-8") as _f: with open(output_path, "w", encoding="utf-8") as _f:
for out_document in out_documents: for out_document, document in zip(out_documents, documents):
_f.write(json.dumps(out_document) + "\n") # We want to keep content of the original labeled documents
document['label'] = out_document['label']
_f.write(json.dumps(document) + "\n")
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment