Newer
Older
from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider
from sziszapangma.integration.path_filter import ExtensionPathFilter
from sziszapangma.integration.repository.multi_files_experiment_repository import MultiFilesExperimentRepository
DATASET_DIRECTORY = 'experiment_data/dataset/voicelab_cbiz_testset_20220322'
GOLD_TRANSCRIPT = 'gold_transcript'
GOLD_TRANSCRIPT_SPACY = 'gold_transcript_spacy'
TECHMO_POLISH_ASR = 'techmo_polish_asr'
WORD_TECHMO_METRICS_WER = 'word_techmo_metrics_wer'
WORD_TECHMO_ALIGNMENT_WER = 'word_techmo_alignment_wer'
TECHMO_SPACY = 'techmo_spacy'
POS_TECHMO_ALIGNMENT_WER = 'pos_techmo_alignment_wer'
POS_TECHMO_METRICS_WER = 'pos_techmo_metrics_wer'
WORD_TECHMO_METRICS_WER_EMBEDDINGS = 'word_techmo_metrics_wer_embeddings'
WORD_TECHMO_ALIGNMENT_WER_EMBEDDINGS = 'word_techmo_alignment_wer_embeddings'
TAG_SPACY_TECHMO_METRICS_WER_EMBEDDINGS = 'tag_spacy_techmo_metrics_wer_embeddings'
TAG_SPACY_TECHMO_ALIGNMENT_WER_EMBEDDINGS = 'tag_spacy_techmo_alignment_wer_embeddings'
WORD_AJN_ALIGNMENT_WER = 'word_ajn_alignment_wer'
AJN_SPACY = 'ajn_spacy'
POS_AJN_ALIGNMENT_WER = 'pos_ajn_alignment_wer'
POS_AJN_METRICS_WER = 'pos_ajn_metrics_wer'
WORD_AJN_METRICS_WER_EMBEDDINGS = 'word_ajn_metrics_wer_embeddings'
WORD_AJN_ALIGNMENT_WER_EMBEDDINGS = 'word_ajn_alignment_wer_embeddings'
TAG_SPACY_AJN_METRICS_WER_EMBEDDINGS = 'tag_spacy_ajn_metrics_wer_embeddings'
TAG_SPACY_AJN_ALIGNMENT_WER_EMBEDDINGS = 'tag_spacy_ajn_alignment_wer_embeddings'
PIPELINE_DATA_DIRECTORY = 'experiment_data/pipeline'
EXPERIMENT_NAME = 'asr_benchmark_voicelab_cbiz_testset_20220322'
RELATION_MANAGER_ROOT_PATH = 'experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322'
def get_record_provider() -> VoicelabTelcoRecordProvider:
return VoicelabTelcoRecordProvider(
ExtensionPathFilter(
root_directory=DATASET_DIRECTORY,
extension='wav'
),
relation_manager_root_path=RELATION_MANAGER_ROOT_PATH
)
def get_repository() -> MultiFilesExperimentRepository:
return MultiFilesExperimentRepository(PIPELINE_DATA_DIRECTORY, EXPERIMENT_NAME)