From 89b3540c5b6cad2531b2d9623bf28a93e12ff58a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20W=C4=85troba?= <markowanga@gmail.com>
Date: Sun, 15 Jan 2023 12:41:52 +0100
Subject: [PATCH] download_dataset command

---
 new_datasets/import_datasets/upload_audio.py | 6 +++---
 new_experiment/add_to_queue_pipeline.py      | 6 +-----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/new_datasets/import_datasets/upload_audio.py b/new_datasets/import_datasets/upload_audio.py
index 51db020..0a2e08e 100644
--- a/new_datasets/import_datasets/upload_audio.py
+++ b/new_datasets/import_datasets/upload_audio.py
@@ -22,7 +22,7 @@ def get_minio_client() -> Minio:
 
 
 def upload_single_commonvoice(lang: str, dataset_app_name: str):
-    ds = load_dataset("mozilla-foundation/common_voice_11_0", lang)
+    ds = load_dataset("mozilla-foundation/common_voice_11_0", lang, cache_dir='/mnt/disk2/huggingface')
     ds = ds['test']
     minio = get_minio_client()
     index = 1
@@ -97,5 +97,5 @@ def upload_common_voice():
 
 if __name__ == '__main__':
     # upload_fleurs()
-    upload_minds()
-    # upload_common_voice()
+    # upload_minds()
+    upload_common_voice()
diff --git a/new_experiment/add_to_queue_pipeline.py b/new_experiment/add_to_queue_pipeline.py
index c6e2f36..29eb7d1 100644
--- a/new_experiment/add_to_queue_pipeline.py
+++ b/new_experiment/add_to_queue_pipeline.py
@@ -8,11 +8,7 @@ from pika.adapters.blocking_connection import BlockingChannel
 
 COMMANDS = ['run_word_wer_classic_pipeline', 'run_word_wer_embedding_pipeline', 'run_spacy_dep_tag_wer_pipeline',
             'run_spacy_ner_wer_pipeline', 'run_spacy_pos_wer_pipeline']
-LANGUAGES = [
-    'nl', 'fr', 'de',
-    'it',
-    'pl', 'es', 'en'
-]
+LANGUAGES = ['nl', 'fr', 'de', 'it', 'pl', 'es', 'en']
 WHISPER_ASR_MODEL = ['tiny', 'base', 'small', 'medium', 'large-v2']
 DATASETS = [
     'google_fleurs',
-- 
GitLab