from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider from sziszapangma.integration.path_filter import ExtensionPathFilter from sziszapangma.integration.repository.multi_files_experiment_repository import MultiFilesExperimentRepository DATASET_DIRECTORY = 'experiment_data/dataset/voicelab_cbiz_testset_20220322' GOLD_TRANSCRIPT = 'gold_transcript' GOLD_TRANSCRIPT_SPACY = 'gold_transcript_spacy' TECHMO_POLISH_ASR = 'techmo_polish_asr' WORD_TECHMO_METRICS_WER = 'word_techmo_metrics_wer' WORD_TECHMO_ALIGNMENT_WER = 'word_techmo_alignment_wer' TECHMO_SPACY = 'techmo_spacy' POS_TECHMO_ALIGNMENT_WER = 'pos_techmo_alignment_wer' POS_TECHMO_METRICS_WER = 'pos_techmo_metrics_wer' WORD_TECHMO_METRICS_WER_EMBEDDINGS = 'word_techmo_metrics_wer_embeddings' WORD_TECHMO_ALIGNMENT_WER_EMBEDDINGS = 'word_techmo_alignment_wer_embeddings' TAG_SPACY_TECHMO_METRICS_WER_EMBEDDINGS = 'tag_spacy_techmo_metrics_wer_embeddings' TAG_SPACY_TECHMO_ALIGNMENT_WER_EMBEDDINGS = 'tag_spacy_techmo_alignment_wer_embeddings' AJN_POLISH_ASR = 'ajn_polish_asr' WORD_AJN_METRICS_WER = 'word_ajn_metrics_wer' WORD_AJN_ALIGNMENT_WER = 'word_ajn_alignment_wer' AJN_SPACY = 'ajn_spacy' POS_AJN_ALIGNMENT_WER = 'pos_ajn_alignment_wer' POS_AJN_METRICS_WER = 'pos_ajn_metrics_wer' WORD_AJN_METRICS_WER_EMBEDDINGS = 'word_ajn_metrics_wer_embeddings' WORD_AJN_ALIGNMENT_WER_EMBEDDINGS = 'word_ajn_alignment_wer_embeddings' TAG_SPACY_AJN_METRICS_WER_EMBEDDINGS = 'tag_spacy_ajn_metrics_wer_embeddings' TAG_SPACY_AJN_ALIGNMENT_WER_EMBEDDINGS = 'tag_spacy_ajn_alignment_wer_embeddings' PIPELINE_DATA_DIRECTORY = 'experiment_data/pipeline' EXPERIMENT_NAME = 'asr_benchmark_voicelab_cbiz_testset_20220322' RELATION_MANAGER_ROOT_PATH = 'experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322' def get_record_provider() -> VoicelabTelcoRecordProvider: return VoicelabTelcoRecordProvider( ExtensionPathFilter( root_directory=DATASET_DIRECTORY, extension='wav' ), relation_manager_root_path=RELATION_MANAGER_ROOT_PATH ) def get_repository() -> MultiFilesExperimentRepository: return MultiFilesExperimentRepository(PIPELINE_DATA_DIRECTORY, EXPERIMENT_NAME)