from datasets import Dataset, load_dataset

from new_experiment.new_dependency_provider import get_experiment_repository, get_minio_client, \
    get_minio_audio_record_repository
from new_experiment.pipeline.dataset_importer.fleurs_dataset_importer import FleursDatasetImporter
from new_experiment.pipeline.dataset_importer.minds14_dataset_importer import Minds14DatasetImporter
from new_experiment.utils.hf_dataset_importer import HfDatasetImporter


def import_single_dataset(importer: HfDatasetImporter, dataset: Dataset):
    importer.process_dataset(dataset)


def import_fleurs_dataset(dataset_lang: str, experiment_dataset_name: str):
    import_single_dataset(FleursDatasetImporter(
        get_experiment_repository(experiment_dataset_name),
        get_minio_audio_record_repository(),
        experiment_dataset_name,
    ), load_dataset('google/fleurs', dataset_lang)['test'])


def import_minds14_dataset(dataset_lang: str, experiment_dataset_name: str):
    import_single_dataset(Minds14DatasetImporter(
        get_experiment_repository(experiment_dataset_name),
        get_minio_audio_record_repository(),
        experiment_dataset_name,
    ), load_dataset('PolyAI/minds14', dataset_lang)['train'])


def import_voxpopuli_dataset(dataset_lang: str, experiment_dataset_name: str):
    import_single_dataset(Minds14DatasetImporter(
        get_experiment_repository(experiment_dataset_name),
        get_minio_audio_record_repository(),
        experiment_dataset_name,
    ), load_dataset('facebook/voxpopuli', dataset_lang)['test'])