Skip to content
Snippets Groups Projects
Commit 584babb2 authored by Marcin Wątroba's avatar Marcin Wątroba
Browse files

download_dataset command

parent f9aed5c1
Branches
No related merge requests found
......@@ -8,7 +8,9 @@ from pika.adapters.blocking_connection import BlockingChannel
def get_all_datasets() -> List[str]:
return [it.object_name[:-1] for it in get_minio_client().list_objects('dataset-audio', '')]
# return [it.object_name[:-1] for it in get_minio_client().list_objects('dataset-audio', '')]
language_codes = ['nl', 'fr', 'de', 'it', 'pl', 'es', 'en']
return [f'{it}_common_voice' for it in language_codes]
def get_dataset_items_id(dataset_name: str) -> List[str]:
......@@ -32,11 +34,7 @@ def add_to_queue(dataset: str, asr_name: str, item_id: str, channel: BlockingCha
def add_whisper(channel: BlockingChannel):
whisper_asr_variant_list = [
# 'tiny',
# 'base',
'small', 'medium', 'large-v2'
]
whisper_asr_variant_list = ['tiny', 'base', 'small', 'medium', 'large-v2']
for whisper_variant in whisper_asr_variant_list:
asr_name = f'whisper_{whisper_variant}'
for dataset in get_all_datasets():
......
......@@ -86,13 +86,13 @@ def upload_minds():
def upload_common_voice():
upload_single_commonvoice('nl', 'nl_common_voice')
upload_single_commonvoice('fr', 'fr_common_voice')
upload_single_commonvoice('de', 'de_common_voice')
upload_single_commonvoice('it', 'it_common_voice')
# upload_single_commonvoice('nl', 'nl_common_voice')
# upload_single_commonvoice('fr', 'fr_common_voice')
# upload_single_commonvoice('de', 'de_common_voice')
# upload_single_commonvoice('it', 'it_common_voice')
upload_single_commonvoice('pl', 'pl_common_voice')
upload_single_commonvoice('es', 'es_common_voice')
upload_single_commonvoice('en', 'en_common_voice')
# upload_single_commonvoice('es', 'es_common_voice')
# upload_single_commonvoice('en', 'en_common_voice')
if __name__ == '__main__':
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment