from experiment.experiment_dependency_provider import get_record_provider, get_repository from experiment.sentence_wer_processor.spacy_pos_sentence_wer_processor import SpacyPosSentenceWerProcessor from new_experiment.new_dependency_provider import get_experiment_repository, get_minio_audio_record_repository from new_experiment.utils.get_spacy_model_name import get_spacy_model_name from new_experiment.utils.loaded_remote_dataset_helper import LoadedRemoteDatasetHelper from new_experiment.utils.property_helper import PropertyHelper from sziszapangma.integration.experiment_manager import ExperimentManager def run_spacy_pos_wer_pipeline(dataset_name: str, asr_name: str): repository = get_experiment_repository(dataset_name) record_provider = LoadedRemoteDatasetHelper(repository, get_minio_audio_record_repository(), dataset_name) language_code = dataset_name[:2] model_name = get_spacy_model_name(language_code) experiment_processor = ExperimentManager( record_id_iterator=record_provider, processing_tasks=[ SpacyPosSentenceWerProcessor( model_name=model_name, gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(), asr_property_name=PropertyHelper.asr_result(asr_name), alignment_property_name=PropertyHelper.pos_alignment(asr_name, model_name), wer_property_name=PropertyHelper.pos_metrics(asr_name, model_name), task_name=f'SpacyPosSentenceWerProcessor___{dataset_name}___{asr_name}', require_update=True ) ], experiment_repository=repository, ) experiment_processor.process() if __name__ == '__main__': run_spacy_pos_wer_pipeline('de_minds14', 'whisper_tiny')