diff --git a/new_datasets/iterate_hf_dataset.py b/new_datasets/iterate_hf_dataset.py index a28023787070a5c9c7eb0d122f5d9b946bee1fa0..5d8caf2b60eb8e75bdf22c01dfd1d6e1386caf62 100644 --- a/new_datasets/iterate_hf_dataset.py +++ b/new_datasets/iterate_hf_dataset.py @@ -8,10 +8,10 @@ def iterate_dataset(dataset_path: str, dataset_name: Optional[str], cache_dir: s dataset_dict = datasets.load_dataset(dataset_path, dataset_name if len(dataset_name) > 0 else None, cache_dir=cache_dir) dataset = dataset_dict[split_name] - for it in dataset[:items]: - print(it) counter = 0 for it in dataset: + if items > counter: + print(it) counter += 1 print(f'all_items {counter}')