Skip to content
Snippets Groups Projects
pipeline_process_spacy_ner_wer.py 1.43 KiB
Newer Older
Marcin Wątroba's avatar
Marcin Wątroba committed
import argparse

from experiment.const_pipeline_names import GOLD_TRANSCRIPT
from experiment.experiment_dependency_provider import get_record_provider, get_repository
from experiment.sentence_wer_processor.spacy_ner_sentence_wer_processor import SpacyNerSentenceWerProcessor
from sziszapangma.integration.experiment_manager import ExperimentManager


def run_spacy_pos_wer_pipeline(dataset_name: str, asr_name: str):
    record_provider = get_record_provider(dataset_name)
    experiment_processor = ExperimentManager(
        record_id_iterator=record_provider,
        processing_tasks=[
            SpacyNerSentenceWerProcessor(
                model_name='pl_core_news_lg',
                gold_transcript_property_name=GOLD_TRANSCRIPT,
                asr_property_name=f'{asr_name}__result',
                alignment_property_name=f'{asr_name}__spacy_ner_alignment',
                wer_property_name=f'{asr_name}__spacy_ner_metrics',
                task_name=f'SpacyNerSentenceWerProcessor___{dataset_name}___{asr_name}',
                require_update=False
            )
        ],
        experiment_repository=get_repository(dataset_name),
        relation_manager_provider=record_provider
    )
    experiment_processor.process()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--dataset")
    parser.add_argument("--asr")
    args = parser.parse_args()
    run_spacy_pos_wer_pipeline(args.dataset, args.asr)