import argparse from experiment.const_pipeline_names import GOLD_TRANSCRIPT from experiment.experiment_dependency_provider import get_record_provider, get_repository from experiment.sentence_wer_processor.spacy_pos_sentence_wer_processor import SpacyPosSentenceWerProcessor from sziszapangma.integration.experiment_manager import ExperimentManager def run_spacy_pos_wer_pipeline(dataset_name: str, asr_name: str): record_provider = get_record_provider(dataset_name) language_code = dataset_name[:2] experiment_processor = ExperimentManager( record_id_iterator=record_provider, processing_tasks=[ SpacyPosSentenceWerProcessor( model_name=f'{language_code}_core_news_lg', gold_transcript_property_name=GOLD_TRANSCRIPT, asr_property_name=f'{asr_name}__result', alignment_property_name=f'{asr_name}__spacy_pos_alignment', wer_property_name=f'{asr_name}__spacy_pos_metrics', task_name=f'SpacySentenceWerProcessorBase___{dataset_name}___{asr_name}', require_update=False ) ], experiment_repository=get_repository(dataset_name), ) experiment_processor.process() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--asr") parser.add_argument("--dataset") args = parser.parse_args() run_spacy_pos_wer_pipeline(args.dataset, args.asr)