Skip to content
Snippets Groups Projects
Commit 19956de0 authored by bmatysiak's avatar bmatysiak
Browse files

Adjust write_json_as_tei to new schema

parent dfc1448e
1 merge request!10Clarin json
Pipeline #13791 passed with stages
in 2 minutes and 27 seconds
...@@ -457,14 +457,15 @@ def write_json_as_tei(json_parts, output_path, _log: logging.Logger): ...@@ -457,14 +457,15 @@ def write_json_as_tei(json_parts, output_path, _log: logging.Logger):
indent_lvl, output_file indent_lvl, output_file
) )
sent_present = "sentence" in json_part sent_present = "spans" in json_part and \
"sentence" in json_part["spans"]
p_sent = 0 p_sent = 0
i_token_in_sent = 1 i_token_in_sent = 1
for token in json_part["tokens"]: for token in json_part["tokens"]:
if sent_present and token['position'][0] == \ if sent_present and token['start'] == \
json_part['sentence'][p_sent]['position'][0]: json_part['spans']['sentence'][p_sent]['start']:
indent_lvl = _write_opening_tag( indent_lvl = _write_opening_tag(
's', [f'{i_json_part+1}.{p_sent+1}'], 's', [f'{i_json_part+1}.{p_sent+1}'],
indent_lvl, output_file indent_lvl, output_file
...@@ -627,8 +628,8 @@ def write_json_as_tei(json_parts, output_path, _log: logging.Logger): ...@@ -627,8 +628,8 @@ def write_json_as_tei(json_parts, output_path, _log: logging.Logger):
segment_id += 1 segment_id += 1
i_token_in_sent += 1 i_token_in_sent += 1
if sent_present and token['position'][1] == \ if sent_present and token['stop'] == \
json_part['sentence'][p_sent]['position'][1]: json_part['spans']['sentence'][p_sent]['stop']:
indent_lvl = _write_closing_tag( indent_lvl = _write_closing_tag(
'</s>', indent_lvl, output_file '</s>', indent_lvl, output_file
) )
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment