import argparse

from experiment.const_pipeline_names import GOLD_TRANSCRIPT
from experiment.experiment_dependency_provider import get_record_provider, get_repository
from new_experiment.new_dependency_provider import get_experiment_repository, get_minio_audio_record_repository
from new_experiment.utils.loaded_remote_dataset_helper import LoadedRemoteDatasetHelper
from new_experiment.utils.property_helper import PropertyHelper
from sziszapangma.core.transformer.fasttext_embedding_transformer import FasttextEmbeddingTransformer
from sziszapangma.core.transformer.web_embedding_transformer import WebEmbeddingTransformer
from sziszapangma.integration.experiment_manager import ExperimentManager
from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask
from sziszapangma.integration.task.embedding_wer_metrics_task import EmbeddingWerMetricsTask


def run_word_wer_embedding_pipeline(dataset_name: str, asr_name: str):
    repository = get_experiment_repository(dataset_name)
    experiment_processor = ExperimentManager(
        record_id_iterator=LoadedRemoteDatasetHelper(repository, get_minio_audio_record_repository(), dataset_name),
        processing_tasks=[
            EmbeddingWerMetricsTask(
                task_name=f'EmbeddingWerMetricsTask___{dataset_name}___{asr_name}',
                asr_property_name=PropertyHelper.asr_result(asr_name),
                gold_transcript_property_name=PropertyHelper.get_gold_transcript_words(),
                metrics_property_name=PropertyHelper.word_wer_embeddings_metrics(asr_name),
                require_update=True,
                embedding_transformer=FasttextEmbeddingTransformer(dataset_name[:2]),
                alignment_property_name=PropertyHelper.word_wer_embeddings_alignment(asr_name)
            )
        ],
        experiment_repository=repository
    )
    experiment_processor.process()


if __name__ == '__main__':
    run_word_wer_embedding_pipeline('de_minds14', 'whisper_tiny')
