From 03b004d3f508de8caa2e7b00c92eca3ba6c6ad74 Mon Sep 17 00:00:00 2001 From: Marek Maziarz <marek.maziarz@pwr.edu.pl> Date: Wed, 2 Oct 2024 17:46:27 +0200 Subject: [PATCH] wrong method pack_sentences_to_max_tokens evocation, blank lines --- src/winer_worker.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/winer_worker.py b/src/winer_worker.py index 0793276..c0265e4 100644 --- a/src/winer_worker.py +++ b/src/winer_worker.py @@ -71,14 +71,14 @@ class WinerWorker(nlp_ws.NLPWorker): packed_plain_inputs = [] packed_tokenized_inputs = [] packed_sent_starts = [] - + current_plain_inputs = [] current_tokenized_inputs = [] current_sent_start = [] current_token_count = 0 for sentence, sentence_tokens, sent_start in zip( - plain_inputs, tokenized_inputs, sent_starts): + plain_inputs, tokenized_inputs, sent_starts): if current_token_count + len(sentence_tokens) <= max_tokens: current_plain_inputs.append(sentence) @@ -89,7 +89,7 @@ class WinerWorker(nlp_ws.NLPWorker): packed_plain_inputs.append(' '.join(current_plain_inputs)) packed_tokenized_inputs.append(current_tokenized_inputs) packed_sent_starts.append(current_sent_start[0]) - + # Reset for a new batch current_plain_inputs = [] current_tokenized_inputs = [] @@ -101,9 +101,8 @@ class WinerWorker(nlp_ws.NLPWorker): packed_plain_inputs.append(' '.join(current_plain_inputs)) packed_tokenized_inputs.append(current_tokenized_inputs) packed_sent_starts.append(current_sent_start[0]) - - return packed_plain_inputs, packed_tokenized_inputs, packed_sent_starts + return packed_plain_inputs, packed_tokenized_inputs, packed_sent_starts def process( self, @@ -136,7 +135,7 @@ class WinerWorker(nlp_ws.NLPWorker): packed_tokenized_inputs, packed_sent_starts ) = ( - pack_sentences_to_max_tokens( + self.pack_sentences_to_max_tokens( plain_inputs, tokenized_inputs, sent_starts, @@ -165,4 +164,4 @@ class WinerWorker(nlp_ws.NLPWorker): # Clean the memory gc.collect() - torch.cuda.empty_cache() + torch.cuda.empty_cache() \ No newline at end of file -- GitLab