Skip to content
Snippets Groups Projects
Commit ebc3b872 authored by Michał Marcińczuk's avatar Michał Marcińczuk
Browse files

Add separator between sentences.

parent f46ac273
1 merge request!41Dev v07
...@@ -37,6 +37,13 @@ class SequenceFeatures(InputFeatures): ...@@ -37,6 +37,13 @@ class SequenceFeatures(InputFeatures):
self.valid_ids = [] self.valid_ids = []
self.append(0, 0, LABEL_IGNORE_ID, 0) # adding <s> self.append(0, 0, LABEL_IGNORE_ID, 0) # adding <s>
def add_cls(self):
self.append(0, 0, LABEL_IGNORE_ID, 0) # adding </s>
def add_separator(self):
if self.input_ids[-1] != 2:
self.append(2, 0, LABEL_IGNORE_ID, 0) # adding </s>
def append(self, token_id: int, input_mask: int, label_id: int, valid_id: int): def append(self, token_id: int, input_mask: int, label_id: int, valid_id: int):
self.input_ids.append(token_id) self.input_ids.append(token_id)
self.input_mask.append(input_mask) self.input_mask.append(input_mask)
...@@ -50,7 +57,7 @@ class SequenceFeatures(InputFeatures): ...@@ -50,7 +57,7 @@ class SequenceFeatures(InputFeatures):
self.valid_ids.extend(token.valid_ids) self.valid_ids.extend(token.valid_ids)
def close_and_fill(self, max_length=128): def close_and_fill(self, max_length=128):
self.append(2, 0, LABEL_IGNORE_ID, 0) # adding </s> self.add_separator()
while len(self.input_ids) < max_length: while len(self.input_ids) < max_length:
self.append(1, 0, LABEL_IGNORE_ID, 0) # adding padding self.append(1, 0, LABEL_IGNORE_ID, 0) # adding padding
...@@ -99,6 +106,8 @@ def convert_examples_to_features_sq(examples: List[InputExample], label_list: Li ...@@ -99,6 +106,8 @@ def convert_examples_to_features_sq(examples: List[InputExample], label_list: Li
features.append(sf) features.append(sf)
sf = SequenceFeatures() sf = SequenceFeatures()
sf.add_token(tf) sf.add_token(tf)
if tf in tokend_ending_sequence:
sf.add_separator()
if sf.length() > 1: if sf.length() > 1:
sf.close_and_fill(max_seq_length) sf.close_and_fill(max_seq_length)
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment