From 95f583d66a322b1e574546ffe6f9098f229e0902 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20W=C4=85troba?= <markowanga@gmail.com> Date: Thu, 12 Jan 2023 00:00:40 +0100 Subject: [PATCH] Add minds14 import --- new_experiment/pipeline/import_datasets.py | 2 +- new_experiment/pipeline/import_minds14.py | 10 ++++++++++ new_experiment/pipeline/import_voxpopuli.py | 10 ++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 new_experiment/pipeline/import_minds14.py create mode 100644 new_experiment/pipeline/import_voxpopuli.py diff --git a/new_experiment/pipeline/import_datasets.py b/new_experiment/pipeline/import_datasets.py index d1497ce..07bcb2a 100644 --- a/new_experiment/pipeline/import_datasets.py +++ b/new_experiment/pipeline/import_datasets.py @@ -32,4 +32,4 @@ def import_voxpopuli_dataset(dataset_lang: str, experiment_dataset_name: str): get_experiment_repository(experiment_dataset_name), get_minio_audio_record_repository(), experiment_dataset_name, - ), load_dataset('facebook/voxpopuli', dataset_lang)) + ), load_dataset('facebook/voxpopuli', dataset_lang)['test']) diff --git a/new_experiment/pipeline/import_minds14.py b/new_experiment/pipeline/import_minds14.py new file mode 100644 index 0000000..3501909 --- /dev/null +++ b/new_experiment/pipeline/import_minds14.py @@ -0,0 +1,10 @@ +from new_experiment.pipeline.import_datasets import import_minds14_dataset + +if __name__ == '__main__': + import_minds14_dataset('nl-NL', 'nl_minds14') + import_minds14_dataset('fr-FR', 'fr_minds14') + import_minds14_dataset('de-DE', 'de_minds14') + import_minds14_dataset('it-IT', 'it_minds14') + import_minds14_dataset('pl-PL', 'pl_minds14') + import_minds14_dataset('es-ES', 'es_minds14') + import_minds14_dataset('en-US', 'en_minds14') diff --git a/new_experiment/pipeline/import_voxpopuli.py b/new_experiment/pipeline/import_voxpopuli.py new file mode 100644 index 0000000..1ecfb6d --- /dev/null +++ b/new_experiment/pipeline/import_voxpopuli.py @@ -0,0 +1,10 @@ +from new_experiment.pipeline.import_datasets import import_voxpopuli_dataset + +if __name__ == '__main__': + import_voxpopuli_dataset('nl', 'nl_voxpopuli') + import_voxpopuli_dataset('fr', 'fr_voxpopuli') + import_voxpopuli_dataset('de', 'de_voxpopuli') + import_voxpopuli_dataset('it', 'it_voxpopuli') + import_voxpopuli_dataset('pl', 'pl_voxpopuli') + import_voxpopuli_dataset('es', 'es_voxpopuli') + import_voxpopuli_dataset('en', 'en_voxpopuli') -- GitLab