From acb98ee6b413017e3fbdd10e0698875b9867bc84 Mon Sep 17 00:00:00 2001
From: Maja Jablonska <majajjablonska@gmail.com>
Date: Mon, 27 Nov 2023 22:23:23 +1100
Subject: [PATCH] Add a corrected misc column

---
 combo/data/api.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/combo/data/api.py b/combo/data/api.py
index b36b616..ab00da5 100644
--- a/combo/data/api.py
+++ b/combo/data/api.py
@@ -86,7 +86,7 @@ def serialize_field(field: Any) -> str:
     return "{}".format(field)
 
 def serialize_token_list(tokenlist: conllu.models.TokenList) -> str:
-    KEYS_ORDER = ['idx', 'text', 'lemma', 'upostag', 'xpostag', 'entity_type', 'feats', 'head', 'deprel', 'deps', 'misc']
+    KEYS_ORDER = ['idx', 'text', 'lemma', 'upostag', 'xpostag', 'feats', 'head', 'deprel', 'deps']
     lines = []
 
     if tokenlist.metadata:
@@ -99,6 +99,18 @@ def serialize_token_list(tokenlist: conllu.models.TokenList) -> str:
 
     for token_data in tokenlist:
         line = '\t'.join(serialize_field(token_data[k]) for k in KEYS_ORDER)
+        serialized_misc = serialize_field(token_data['misc'])
+        serialized_entity_type = serialize_field(token_data['entity_type'])
+        if serialized_misc == '_' and serialized_entity_type == '_':
+            serialized_last_column = '_'
+        elif serialized_misc == '_':
+            serialized_last_column = serialized_entity_type
+        elif serialized_entity_type == '_':
+            serialized_last_column = serialized_misc
+        else:
+            serialized_last_column = serialized_entity_type + ' | ' + serialized_misc
+
+        line += '\t' + serialized_last_column
         lines.append(line)
 
     return '\n'.join(lines) + "\n\n"
-- 
GitLab