-
Marcin Wątroba authored4d5a8bb0
voicelab_dependency.py 3.33 KiB
from experiment.dataset_specific.pl_voicelab_cbiz.voicelab_telco_record_provider import VoicelabTelcoRecordProvider
from sziszapangma.integration.path_filter import ExtensionPathFilter
from sziszapangma.integration.repository.multi_files_experiment_repository import MultiFilesExperimentRepository
DATASET_DIRECTORY = 'experiment_data/dataset/pl_voicelab_cbiz'
GOLD_TRANSCRIPT = 'gold_transcript'
GOLD_TRANSCRIPT_SPACY = 'gold_transcript_spacy'
TECHMO_POLISH_ASR = 'techmo_polish_asr'
WORD_TECHMO_METRICS_WER = 'word_techmo_metrics_wer'
WORD_TECHMO_ALIGNMENT_WER = 'word_techmo_alignment_wer'
TECHMO_SPACY = 'techmo_spacy'
POS_TECHMO_ALIGNMENT_WER = 'pos_techmo_alignment_wer'
POS_TECHMO_METRICS_WER = 'pos_techmo_metrics_wer'
WORD_TECHMO_METRICS_WER_EMBEDDINGS = 'word_techmo_metrics_wer_embeddings'
WORD_TECHMO_ALIGNMENT_WER_EMBEDDINGS = 'word_techmo_alignment_wer_embeddings'
TAG_SPACY_TECHMO_METRICS_WER_EMBEDDINGS = 'tag_spacy_techmo_metrics_wer_embeddings'
TAG_SPACY_TECHMO_ALIGNMENT_WER_EMBEDDINGS = 'tag_spacy_techmo_alignment_wer_embeddings'
NER_SPACY_TECHMO_METRICS_WER_EMBEDDINGS = 'ner_spacy_techmo_metrics_wer_embeddings'
NER_SPACY_TECHMO_ALIGNMENT_WER_EMBEDDINGS = 'ner_spacy_techmo_alignment_wer_embeddings'
AJN_POLISH_ASR = 'ajn_polish_asr'
WORD_AJN_METRICS_WER = 'word_ajn_metrics_wer'
WORD_AJN_ALIGNMENT_WER = 'word_ajn_alignment_wer'
AJN_SPACY = 'ajn_spacy'
POS_AJN_ALIGNMENT_WER = 'pos_ajn_alignment_wer'
POS_AJN_METRICS_WER = 'pos_ajn_metrics_wer'
WORD_AJN_METRICS_WER_EMBEDDINGS = 'word_ajn_metrics_wer_embeddings'
WORD_AJN_ALIGNMENT_WER_EMBEDDINGS = 'word_ajn_alignment_wer_embeddings'
TAG_SPACY_AJN_METRICS_WER_EMBEDDINGS = 'tag_spacy_ajn_metrics_wer_embeddings'
TAG_SPACY_AJN_ALIGNMENT_WER_EMBEDDINGS = 'tag_spacy_ajn_alignment_wer_embeddings'
NER_SPACY_AJN_METRICS_WER_EMBEDDINGS = 'ner_spacy_ajn_metrics_wer_embeddings'
NER_SPACY_AJN_ALIGNMENT_WER_EMBEDDINGS = 'ner_spacy_ajn_alignment_wer_embeddings'
GOOGLE_POLISH_ASR = 'google_polish_asr'
WORD_GOOGLE_METRICS_WER = 'WORD_GOOGLE_METRICS_WER'.lower()
WORD_GOOGLE_ALIGNMENT_WER = 'WORD_GOOGLE_ALIGNMENT_WER'.lower()
GOOGLE_SPACY = 'GOOGLE_SPACY'.lower()
POS_GOOGLE_ALIGNMENT_WER = 'POS_GOOGLE_ALIGNMENT_WER'.lower()
POS_GOOGLE_METRICS_WER = 'POS_GOOGLE_METRICS_WER'.lower()
WORD_GOOGLE_METRICS_WER_EMBEDDINGS = 'WORD_GOOGLE_METRICS_WER_EMBEDDINGS'.lower()
WORD_GOOGLE_ALIGNMENT_WER_EMBEDDINGS = 'WORD_GOOGLE_ALIGNMENT_WER_EMBEDDINGS'.lower()
NER_SPACY_GOOGLE_METRICS_WER_EMBEDDINGS = 'NER_SPACY_GOOGLE_METRICS_WER_EMBEDDINGS'.lower()
NER_SPACY_GOOGLE_ALIGNMENT_WER_EMBEDDINGS = 'NER_SPACY_GOOGLE_ALIGNMENT_WER_EMBEDDINGS'.lower()
TAG_SPACY_GOOGLE_ALIGNMENT_WER_EMBEDDINGS = 'TAG_SPACY_GOOGLE_ALIGNMENT_WER_EMBEDDINGS'.lower()
TAG_SPACY_GOOGLE_METRICS_WER_EMBEDDINGS = 'TAG_SPACY_GOOGLE_METRICS_WER_EMBEDDINGS'.lower()
PIPELINE_DATA_DIRECTORY = 'experiment_data/pipeline'
EXPERIMENT_NAME = 'pl_voicelab_cbiz'
RELATION_MANAGER_ROOT_PATH = 'experiment_data/dataset_relation_manager_data/pl_voicelab_cbiz'
def get_record_provider() -> VoicelabTelcoRecordProvider:
return VoicelabTelcoRecordProvider(
ExtensionPathFilter(
root_directory=DATASET_DIRECTORY,
extension='wav'
),
relation_manager_root_path=RELATION_MANAGER_ROOT_PATH
)
def get_repository() -> MultiFilesExperimentRepository:
return MultiFilesExperimentRepository(PIPELINE_DATA_DIRECTORY, EXPERIMENT_NAME)