Skip to content
Snippets Groups Projects
Commit f085d6da authored by Maja Jabłońska's avatar Maja Jabłońska Committed by Martyna Wiącek
Browse files

Add a TokenCountBatchSamplerTest

parent efc32998
1 merge request!46Merge COMBO 3.0 into master
"""Sampler tests."""
import unittest
from combo.data import TokenCountBatchSampler, Instance
from combo.data.fields.text_field import TextField
from combo.data.tokenizers import TokenizerToken
class TokenCountBatchSamplerTest(unittest.TestCase):
def setUp(self) -> None:
self.dataset = []
self.sentences = ["First sentence makes full batch.", "Short", "This ends first batch"]
for sentence in self.sentences:
tokens = [TokenizerToken(t)
for t in sentence.split()]
text_field = TextField(tokens, {})
self.dataset.append(Instance({"sentence": text_field}))
def test_batches(self):
# given
sampler = TokenCountBatchSampler(self.dataset, word_batch_size=2, shuffle_dataset=False)
# when
length = len(sampler)
values = list(sampler)
# then
self.assertEqual(2, length)
# sort by lengths + word_batch_size makes 1, 2 first batch
self.assertListEqual([[1, 2], [0]], values)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment