import datetime import json from typing import List import pika from minio import Minio from pika.adapters.blocking_connection import BlockingChannel from new_experiment.new_dependency_provider import get_experiment_repository def get_all_datasets() -> List[str]: # return [it.object_name[:-1] for it in get_minio_client().list_objects('dataset-audio', '')] language_codes = ['nl', 'fr', 'de', 'it', 'pl', 'es', 'en'] return [f'{it}_common_voice' for it in language_codes] def get_dataset_items_id(dataset_name: str) -> List[str]: return list(get_experiment_repository(dataset_name).get_all_record_ids()) def get_minio_client() -> Minio: return Minio('minio-asr-benchmarks.theliver.pl', 'minio_user', 'eUxzEQbyYPdzrLxuvvethSbk18kB2s7G') def add_to_queue(dataset: str, asr_name: str, item_id: str, channel: BlockingChannel): queue_name = f'asr__{asr_name}' message_dict = {'dataset': dataset, 'item_id': item_id} print(datetime.datetime.now().isoformat(), f'{queue_name} {message_dict}') message_bytes = json.dumps(message_dict).encode('utf-8') channel.queue_declare(queue=queue_name, durable=True) channel.basic_publish(exchange='', routing_key=queue_name, body=message_bytes, properties=pika.BasicProperties( delivery_mode=pika.spec.PERSISTENT_DELIVERY_MODE )) def add_whisper(channel: BlockingChannel): whisper_asr_variant_list = ['tiny', 'base', 'small', 'medium', 'large-v2'] for whisper_variant in whisper_asr_variant_list: asr_name = f'whisper_{whisper_variant}' for dataset in get_all_datasets(): for item_id in get_dataset_items_id(dataset): add_to_queue(dataset, asr_name, item_id, channel) def main(): parameters = pika.URLParameters( 'amqps://rabbit_user:kz6m4972OUHFmtUcPOHx4kF3Lj6yw7lo@rabbit-asr-benchmarks.theliver.pl:5671/') connection = pika.BlockingConnection(parameters=parameters) channel = connection.channel() add_whisper(channel) connection.close() if __name__ == '__main__': main()