Skip to content
Snippets Groups Projects
Commit acb98ee6 authored by Maja Jablonska's avatar Maja Jablonska
Browse files

Add a corrected misc column

parent 142eead6
1 merge request!46Merge COMBO 3.0 into master
...@@ -86,7 +86,7 @@ def serialize_field(field: Any) -> str: ...@@ -86,7 +86,7 @@ def serialize_field(field: Any) -> str:
return "{}".format(field) return "{}".format(field)
def serialize_token_list(tokenlist: conllu.models.TokenList) -> str: def serialize_token_list(tokenlist: conllu.models.TokenList) -> str:
KEYS_ORDER = ['idx', 'text', 'lemma', 'upostag', 'xpostag', 'entity_type', 'feats', 'head', 'deprel', 'deps', 'misc'] KEYS_ORDER = ['idx', 'text', 'lemma', 'upostag', 'xpostag', 'feats', 'head', 'deprel', 'deps']
lines = [] lines = []
if tokenlist.metadata: if tokenlist.metadata:
...@@ -99,6 +99,18 @@ def serialize_token_list(tokenlist: conllu.models.TokenList) -> str: ...@@ -99,6 +99,18 @@ def serialize_token_list(tokenlist: conllu.models.TokenList) -> str:
for token_data in tokenlist: for token_data in tokenlist:
line = '\t'.join(serialize_field(token_data[k]) for k in KEYS_ORDER) line = '\t'.join(serialize_field(token_data[k]) for k in KEYS_ORDER)
serialized_misc = serialize_field(token_data['misc'])
serialized_entity_type = serialize_field(token_data['entity_type'])
if serialized_misc == '_' and serialized_entity_type == '_':
serialized_last_column = '_'
elif serialized_misc == '_':
serialized_last_column = serialized_entity_type
elif serialized_entity_type == '_':
serialized_last_column = serialized_misc
else:
serialized_last_column = serialized_entity_type + ' | ' + serialized_misc
line += '\t' + serialized_last_column
lines.append(line) lines.append(line)
return '\n'.join(lines) + "\n\n" return '\n'.join(lines) + "\n\n"
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment