diff --git a/new_experiment/pipeline/import_datasets.py b/new_experiment/pipeline/import_datasets.py index d1497ce4c8864501e352a6eb69bad092421ef53f..07bcb2a228abad344b34caf6f7186814af40f8ea 100644 --- a/new_experiment/pipeline/import_datasets.py +++ b/new_experiment/pipeline/import_datasets.py @@ -32,4 +32,4 @@ def import_voxpopuli_dataset(dataset_lang: str, experiment_dataset_name: str): get_experiment_repository(experiment_dataset_name), get_minio_audio_record_repository(), experiment_dataset_name, - ), load_dataset('facebook/voxpopuli', dataset_lang)) + ), load_dataset('facebook/voxpopuli', dataset_lang)['test']) diff --git a/new_experiment/pipeline/import_minds14.py b/new_experiment/pipeline/import_minds14.py new file mode 100644 index 0000000000000000000000000000000000000000..3501909ce2433d42d7932dd929453e333880007d --- /dev/null +++ b/new_experiment/pipeline/import_minds14.py @@ -0,0 +1,10 @@ +from new_experiment.pipeline.import_datasets import import_minds14_dataset + +if __name__ == '__main__': + import_minds14_dataset('nl-NL', 'nl_minds14') + import_minds14_dataset('fr-FR', 'fr_minds14') + import_minds14_dataset('de-DE', 'de_minds14') + import_minds14_dataset('it-IT', 'it_minds14') + import_minds14_dataset('pl-PL', 'pl_minds14') + import_minds14_dataset('es-ES', 'es_minds14') + import_minds14_dataset('en-US', 'en_minds14') diff --git a/new_experiment/pipeline/import_voxpopuli.py b/new_experiment/pipeline/import_voxpopuli.py new file mode 100644 index 0000000000000000000000000000000000000000..1ecfb6d2b077e1315ae5c0f3fc2661de44d99dc9 --- /dev/null +++ b/new_experiment/pipeline/import_voxpopuli.py @@ -0,0 +1,10 @@ +from new_experiment.pipeline.import_datasets import import_voxpopuli_dataset + +if __name__ == '__main__': + import_voxpopuli_dataset('nl', 'nl_voxpopuli') + import_voxpopuli_dataset('fr', 'fr_voxpopuli') + import_voxpopuli_dataset('de', 'de_voxpopuli') + import_voxpopuli_dataset('it', 'it_voxpopuli') + import_voxpopuli_dataset('pl', 'pl_voxpopuli') + import_voxpopuli_dataset('es', 'es_voxpopuli') + import_voxpopuli_dataset('en', 'en_voxpopuli')