Update easymatcher_worker.py

6a94b42e · Konrad Wojtasik · 3130c89d · 6a94b42e
Commit 6a94b42e authored 2 years ago by Konrad Wojtasik
--- a/src/easymatcher_worker.py
+++ b/src/easymatcher_worker.py
@@ -47,9 +47,9 @@ class EasymatcherWorker(nlp_ws.NLPWorker):
                with open(document_path, "r", encoding="utf-8") as _df:
                    for line in _df:
                        line_data = json.loads(line)
-                        document['text'] = line_data['text']
+                        # document['text'] = line_data['text'] 
                        # document['label'] = []
-                        _f.write(json.dumps(document) + "\n")
+                        _f.write(json.dumps(line_data) + "\n")
        else:
            with open(document_path, "r", encoding="utf-8") as _df:
                document["text"] = _df.read()
@@ -107,5 +107,7 @@ class EasymatcherWorker(nlp_ws.NLPWorker):
        os.unlink(tmpf.name)
        with open(output_path, "w", encoding="utf-8") as _f:
-            for out_document in out_documents:
+            for out_document, document in zip(out_documents, documents):
-                _f.write(json.dumps(out_document) + "\n")
+                # We want to keep content of the original labeled documents
+                document['label'] = out_document['label']
+                _f.write(json.dumps(document) + "\n")