Skip to content
Snippets Groups Projects
pipeline_process_wikineural_ner_wer.py 1.51 KiB
Newer Older
Marcin Wątroba's avatar
Marcin Wątroba committed
from experiment.sentence_wer_processor.wikineural_multilingual_ner_transformers_wer_processor_base import \
    WikineuralMultilingualNerTransformersWerProcessorBase
from new_experiment.new_dependency_provider import get_experiment_repository, get_minio_audio_record_repository
from new_experiment.utils.loaded_remote_dataset_helper import LoadedRemoteDatasetHelper

from new_experiment.utils.property_helper import PropertyHelper
from sziszapangma.integration.experiment_manager import ExperimentManager

_WIKINEURAL = 'wikineural'


def run_wikineural_ner_pipeline(dataset_name: str, asr_name: str):
    repository = get_experiment_repository(dataset_name)
    record_provider = LoadedRemoteDatasetHelper(repository, get_minio_audio_record_repository(), dataset_name)
    experiment_processor = ExperimentManager(
        record_id_iterator=record_provider,
        processing_tasks=[
            WikineuralMultilingualNerTransformersWerProcessorBase(
                gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(),
                asr_property_name=PropertyHelper.asr_result(asr_name),
                alignment_property_name=PropertyHelper.ner_alignment(asr_name, _WIKINEURAL),
                wer_property_name=PropertyHelper.ner_metrics(asr_name, _WIKINEURAL),
                task_name=f'WikineuralMultilingualNerTransformersWerProcessorBase___{dataset_name}___{asr_name}',
                require_update=False
            )
        ],
        experiment_repository=repository
    )
    experiment_processor.process()