From 89b3540c5b6cad2531b2d9623bf28a93e12ff58a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20W=C4=85troba?= <markowanga@gmail.com> Date: Sun, 15 Jan 2023 12:41:52 +0100 Subject: [PATCH] download_dataset command --- new_datasets/import_datasets/upload_audio.py | 6 +++--- new_experiment/add_to_queue_pipeline.py | 6 +----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/new_datasets/import_datasets/upload_audio.py b/new_datasets/import_datasets/upload_audio.py index 51db020..0a2e08e 100644 --- a/new_datasets/import_datasets/upload_audio.py +++ b/new_datasets/import_datasets/upload_audio.py @@ -22,7 +22,7 @@ def get_minio_client() -> Minio: def upload_single_commonvoice(lang: str, dataset_app_name: str): - ds = load_dataset("mozilla-foundation/common_voice_11_0", lang) + ds = load_dataset("mozilla-foundation/common_voice_11_0", lang, cache_dir='/mnt/disk2/huggingface') ds = ds['test'] minio = get_minio_client() index = 1 @@ -97,5 +97,5 @@ def upload_common_voice(): if __name__ == '__main__': # upload_fleurs() - upload_minds() - # upload_common_voice() + # upload_minds() + upload_common_voice() diff --git a/new_experiment/add_to_queue_pipeline.py b/new_experiment/add_to_queue_pipeline.py index c6e2f36..29eb7d1 100644 --- a/new_experiment/add_to_queue_pipeline.py +++ b/new_experiment/add_to_queue_pipeline.py @@ -8,11 +8,7 @@ from pika.adapters.blocking_connection import BlockingChannel COMMANDS = ['run_word_wer_classic_pipeline', 'run_word_wer_embedding_pipeline', 'run_spacy_dep_tag_wer_pipeline', 'run_spacy_ner_wer_pipeline', 'run_spacy_pos_wer_pipeline'] -LANGUAGES = [ - 'nl', 'fr', 'de', - 'it', - 'pl', 'es', 'en' -] +LANGUAGES = ['nl', 'fr', 'de', 'it', 'pl', 'es', 'en'] WHISPER_ASR_MODEL = ['tiny', 'base', 'small', 'medium', 'large-v2'] DATASETS = [ 'google_fleurs', -- GitLab