from experiment.dataset_specific.pl_voicelab_cbiz.voicelab_telco_record_provider import VoicelabTelcoRecordProvider from sziszapangma.integration.path_filter import ExtensionPathFilter from sziszapangma.integration.repository.multi_files_experiment_repository import MultiFilesExperimentRepository DATASET_DIRECTORY = 'experiment_data/dataset/pl_voicelab_cbiz' GOLD_TRANSCRIPT = 'gold_transcript' GOLD_TRANSCRIPT_SPACY = 'gold_transcript_spacy' TECHMO_POLISH_ASR = 'techmo_polish_asr' WORD_TECHMO_METRICS_WER = 'word_techmo_metrics_wer' WORD_TECHMO_ALIGNMENT_WER = 'word_techmo_alignment_wer' TECHMO_SPACY = 'techmo_spacy' POS_TECHMO_ALIGNMENT_WER = 'pos_techmo_alignment_wer' POS_TECHMO_METRICS_WER = 'pos_techmo_metrics_wer' WORD_TECHMO_METRICS_WER_EMBEDDINGS = 'word_techmo_metrics_wer_embeddings' WORD_TECHMO_ALIGNMENT_WER_EMBEDDINGS = 'word_techmo_alignment_wer_embeddings' TAG_SPACY_TECHMO_METRICS_WER_EMBEDDINGS = 'tag_spacy_techmo_metrics_wer_embeddings' TAG_SPACY_TECHMO_ALIGNMENT_WER_EMBEDDINGS = 'tag_spacy_techmo_alignment_wer_embeddings' NER_SPACY_TECHMO_METRICS_WER_EMBEDDINGS = 'ner_spacy_techmo_metrics_wer_embeddings' NER_SPACY_TECHMO_ALIGNMENT_WER_EMBEDDINGS = 'ner_spacy_techmo_alignment_wer_embeddings' AJN_POLISH_ASR = 'ajn_polish_asr' WORD_AJN_METRICS_WER = 'word_ajn_metrics_wer' WORD_AJN_ALIGNMENT_WER = 'word_ajn_alignment_wer' AJN_SPACY = 'ajn_spacy' POS_AJN_ALIGNMENT_WER = 'pos_ajn_alignment_wer' POS_AJN_METRICS_WER = 'pos_ajn_metrics_wer' WORD_AJN_METRICS_WER_EMBEDDINGS = 'word_ajn_metrics_wer_embeddings' WORD_AJN_ALIGNMENT_WER_EMBEDDINGS = 'word_ajn_alignment_wer_embeddings' TAG_SPACY_AJN_METRICS_WER_EMBEDDINGS = 'tag_spacy_ajn_metrics_wer_embeddings' TAG_SPACY_AJN_ALIGNMENT_WER_EMBEDDINGS = 'tag_spacy_ajn_alignment_wer_embeddings' NER_SPACY_AJN_METRICS_WER_EMBEDDINGS = 'ner_spacy_ajn_metrics_wer_embeddings' NER_SPACY_AJN_ALIGNMENT_WER_EMBEDDINGS = 'ner_spacy_ajn_alignment_wer_embeddings' GOOGLE_POLISH_ASR = 'google_polish_asr' WORD_GOOGLE_METRICS_WER = 'WORD_GOOGLE_METRICS_WER'.lower() WORD_GOOGLE_ALIGNMENT_WER = 'WORD_GOOGLE_ALIGNMENT_WER'.lower() GOOGLE_SPACY = 'GOOGLE_SPACY'.lower() POS_GOOGLE_ALIGNMENT_WER = 'POS_GOOGLE_ALIGNMENT_WER'.lower() POS_GOOGLE_METRICS_WER = 'POS_GOOGLE_METRICS_WER'.lower() WORD_GOOGLE_METRICS_WER_EMBEDDINGS = 'WORD_GOOGLE_METRICS_WER_EMBEDDINGS'.lower() WORD_GOOGLE_ALIGNMENT_WER_EMBEDDINGS = 'WORD_GOOGLE_ALIGNMENT_WER_EMBEDDINGS'.lower() NER_SPACY_GOOGLE_METRICS_WER_EMBEDDINGS = 'NER_SPACY_GOOGLE_METRICS_WER_EMBEDDINGS'.lower() NER_SPACY_GOOGLE_ALIGNMENT_WER_EMBEDDINGS = 'NER_SPACY_GOOGLE_ALIGNMENT_WER_EMBEDDINGS'.lower() TAG_SPACY_GOOGLE_ALIGNMENT_WER_EMBEDDINGS = 'TAG_SPACY_GOOGLE_ALIGNMENT_WER_EMBEDDINGS'.lower() TAG_SPACY_GOOGLE_METRICS_WER_EMBEDDINGS = 'TAG_SPACY_GOOGLE_METRICS_WER_EMBEDDINGS'.lower() PIPELINE_DATA_DIRECTORY = 'experiment_data/pipeline' EXPERIMENT_NAME = 'pl_voicelab_cbiz' RELATION_MANAGER_ROOT_PATH = 'experiment_data/dataset_relation_manager_data/pl_voicelab_cbiz' def get_record_provider() -> VoicelabTelcoRecordProvider: return VoicelabTelcoRecordProvider( ExtensionPathFilter( root_directory=DATASET_DIRECTORY, extension='wav' ), relation_manager_root_path=RELATION_MANAGER_ROOT_PATH ) def get_repository() -> MultiFilesExperimentRepository: return MultiFilesExperimentRepository(PIPELINE_DATA_DIRECTORY, EXPERIMENT_NAME)