from datasets import Dataset, load_dataset from new_experiment.new_dependency_provider import get_experiment_repository, get_minio_client, \ get_minio_audio_record_repository from new_experiment.pipeline.dataset_importer.fleurs_dataset_importer import FleursDatasetImporter from new_experiment.pipeline.dataset_importer.minds14_dataset_importer import Minds14DatasetImporter from new_experiment.utils.hf_dataset_importer import HfDatasetImporter def import_single_dataset(importer: HfDatasetImporter, dataset: Dataset): importer.process_dataset(dataset) def import_fleurs_dataset(dataset_lang: str, experiment_dataset_name: str): import_single_dataset(FleursDatasetImporter( get_experiment_repository(experiment_dataset_name), get_minio_audio_record_repository(), experiment_dataset_name, ), load_dataset('google/fleurs', dataset_lang)['test']) def import_minds14_dataset(dataset_lang: str, experiment_dataset_name: str): import_single_dataset(Minds14DatasetImporter( get_experiment_repository(experiment_dataset_name), get_minio_audio_record_repository(), experiment_dataset_name, ), load_dataset('PolyAI/minds14', dataset_lang)['train']) def import_voxpopuli_dataset(dataset_lang: str, experiment_dataset_name: str): import_single_dataset(Minds14DatasetImporter( get_experiment_repository(experiment_dataset_name), get_minio_audio_record_repository(), experiment_dataset_name, ), load_dataset('facebook/voxpopuli', dataset_lang)['test'])