Skip to content
Snippets Groups Projects
Commit 89b3540c authored by Marcin Wątroba's avatar Marcin Wątroba
Browse files

download_dataset command

parent 7e677472
No related merge requests found
...@@ -22,7 +22,7 @@ def get_minio_client() -> Minio: ...@@ -22,7 +22,7 @@ def get_minio_client() -> Minio:
def upload_single_commonvoice(lang: str, dataset_app_name: str): def upload_single_commonvoice(lang: str, dataset_app_name: str):
ds = load_dataset("mozilla-foundation/common_voice_11_0", lang) ds = load_dataset("mozilla-foundation/common_voice_11_0", lang, cache_dir='/mnt/disk2/huggingface')
ds = ds['test'] ds = ds['test']
minio = get_minio_client() minio = get_minio_client()
index = 1 index = 1
...@@ -97,5 +97,5 @@ def upload_common_voice(): ...@@ -97,5 +97,5 @@ def upload_common_voice():
if __name__ == '__main__': if __name__ == '__main__':
# upload_fleurs() # upload_fleurs()
upload_minds() # upload_minds()
# upload_common_voice() upload_common_voice()
...@@ -8,11 +8,7 @@ from pika.adapters.blocking_connection import BlockingChannel ...@@ -8,11 +8,7 @@ from pika.adapters.blocking_connection import BlockingChannel
COMMANDS = ['run_word_wer_classic_pipeline', 'run_word_wer_embedding_pipeline', 'run_spacy_dep_tag_wer_pipeline', COMMANDS = ['run_word_wer_classic_pipeline', 'run_word_wer_embedding_pipeline', 'run_spacy_dep_tag_wer_pipeline',
'run_spacy_ner_wer_pipeline', 'run_spacy_pos_wer_pipeline'] 'run_spacy_ner_wer_pipeline', 'run_spacy_pos_wer_pipeline']
LANGUAGES = [ LANGUAGES = ['nl', 'fr', 'de', 'it', 'pl', 'es', 'en']
'nl', 'fr', 'de',
'it',
'pl', 'es', 'en'
]
WHISPER_ASR_MODEL = ['tiny', 'base', 'small', 'medium', 'large-v2'] WHISPER_ASR_MODEL = ['tiny', 'base', 'small', 'medium', 'large-v2']
DATASETS = [ DATASETS = [
'google_fleurs', 'google_fleurs',
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment