Skip to content
Snippets Groups Projects
Commit db1eced8 authored by Marcin Wątroba's avatar Marcin Wątroba
Browse files

download_dataset command

parent 4228616a
No related merge requests found
import argparse
from typing import Optional
import datasets
def iterate_dataset(dataset_path: str, dataset_name: Optional[str], cache_dir: str, split_name: str, items: int):
dataset_dict = datasets.load_dataset(dataset_path, dataset_name if len(dataset_name) > 0 else None, cache_dir)
dataset = dataset_dict[split_name]
for it in dataset[:items]:
print(it)
counter = 0
for it in dataset:
counter += 1
print(f'all_items {counter}')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--dataset_path")
parser.add_argument("--dataset_name")
parser.add_argument("--cache_dir")
parser.add_argument("--split_name")
parser.add_argument("--items")
args = parser.parse_args()
iterate_dataset(args.dataset_path, args.dataset_name, args.cache_dir, int(args.items))
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment