from pathlib import Path from typing import Set from minio import Minio from urllib3 import HTTPResponse from experiment.dataset_helper import DatasetHelper from new_experiment.utils.minio_audio_record_repository import MinioRecordRepository from new_experiment.utils.property_helper import PropertyHelper from sziszapangma.integration.repository.experiment_repository import ExperimentRepository class LoadedRemoteDatasetHelper(DatasetHelper): _experiment_repository: ExperimentRepository _minio_record_repository: MinioRecordRepository _dataset_name: str def __init__(self, experiment_repository: ExperimentRepository, minio_record_repository: MinioRecordRepository, dataset_name: str): self._experiment_repository = experiment_repository self._minio_record_repository = minio_record_repository self._dataset_name = dataset_name def get_all_records(self) -> Set[str]: return self._experiment_repository.get_all_record_ids_for_property(PropertyHelper.get_gold_transcript_words()) def get_path(self, record_id: str) -> str: record_path = Path.home() / f'.cache/asr_benchmark/{self._dataset_name}/{record_id}.wav' if record_path.exists(): return record_path.as_posix() self._minio_record_repository.save_file(record_path, self._dataset_name, record_id) return record_path.as_posix()