from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask
from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask
from experiment.voicelab.voicelab_dependency import get_record_provider, get_repository, GOLD_TRANSCRIPT, \
    GOLD_TRANSCRIPT_SPACY, TECHMO_POLISH_ASR, WORD_TECHMO_METRICS_WER, WORD_TECHMO_ALIGNMENT_WER, TECHMO_SPACY, \
    POS_TECHMO_METRICS_WER, POS_TECHMO_ALIGNMENT_WER, WORD_TECHMO_METRICS_WER_EMBEDDINGS, \
    WORD_TECHMO_ALIGNMENT_WER_EMBEDDINGS
from sziszapangma.core.transformer.web_embedding_transformer import WebEmbeddingTransformer
from sziszapangma.integration.asr_processor import AsrPathCacheClient
from sziszapangma.integration.experiment_manager import ExperimentManager
from sziszapangma.integration.task.asr_task import AsrTask
from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask
from sziszapangma.integration.task.embedding_wer_metrics_task import EmbeddingWerMetricsTask


def run_voicelab_experiment():
    record_provider = get_record_provider()
    experiment_processor = ExperimentManager(
        record_id_iterator=record_provider,
        processing_tasks=[
            AsrTask(
                task_name='techmo_polish_task',
                asr_processor=AsrPathCacheClient(
                    'experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo',
                    record_provider,
                    record_provider
                ),
                asr_property_name=TECHMO_POLISH_ASR,
                require_update=False,
                record_path_provider=record_provider
            ),
            ClassicWerMetricTask(
                task_name='techmo_word_wer_processing',
                asr_property_name=TECHMO_POLISH_ASR,
                gold_transcript_property_name=GOLD_TRANSCRIPT,
                metrics_property_name=WORD_TECHMO_METRICS_WER,
                require_update=False,
                alignment_property_name=WORD_TECHMO_ALIGNMENT_WER
            ),
            AsrSpacyTokenPosProcessingTask(
                task_name='techmo_spacy_task',
                input_property_name=TECHMO_POLISH_ASR,
                spacy_property_name=TECHMO_SPACY,
                require_update=True
            ),
            SpacyPosWerProcessingTask(
                task_name='techmo_pos_wer_processing',
                require_update=False,
                gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY,
                asr_pos_property_name=TECHMO_SPACY,
                pos_alignment_wer=POS_TECHMO_ALIGNMENT_WER,
                pos_metrics_wer=POS_TECHMO_METRICS_WER
            ),
            EmbeddingWerMetricsTask(
                task_name='EmbeddingWerMetricsTask',
                asr_property_name='techmo_polish_asr',
                gold_transcript_property_name=GOLD_TRANSCRIPT,
                metrics_property_name=WORD_TECHMO_METRICS_WER_EMBEDDINGS,
                require_update=False,
                embedding_transformer=WebEmbeddingTransformer('pl', 'http://localhost:5003', 'fjsd-mkwe-oius-m9h2'),
                alignment_property_name=WORD_TECHMO_ALIGNMENT_WER_EMBEDDINGS
            )
        ],
        experiment_repository=get_repository(),
        relation_manager_provider=record_provider
    )
    experiment_processor.process()


def example_run():
    run_voicelab_experiment()


if __name__ == '__main__':
    example_run()