Skip to content
Snippets Groups Projects
Commit 39b3f106 authored by Jarema Radom's avatar Jarema Radom
Browse files

fix for bpe related decoding

parent 2b98e2b1
No related branches found
No related tags found
1 merge request!16S3 synchronization and CI
Pipeline #3398 passed
......@@ -35,6 +35,8 @@ def decode(tokens, labels_decoded, tokenizer, bpe=False):
for label, token in zip(labels_decoded, tokens):
if bpe:
token_str = tokenizer.decode(token)
if token_str.startswith(" "):
token_str = token_str[1:]
else:
token_str = tokenizer.convert_ids_to_tokens([token])[0]
if token_str == "[PAD]":
......@@ -43,7 +45,6 @@ def decode(tokens, labels_decoded, tokenizer, bpe=False):
word.append(token_str.replace("##", ""))
else:
if len(word) > 0:
if not bpe or word_end != ' ':
word.append(word_end)
text_recovered.append("".join(word))
word = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment