from experiment.voicelab.voicelab_dependency import get_record_provider, GOLD_TRANSCRIPT, GOLD_TRANSCRIPT_SPACY, \ get_repository, GOOGLE_POLISH_ASR, WORD_GOOGLE_METRICS_WER, WORD_GOOGLE_ALIGNMENT_WER, GOOGLE_SPACY, \ POS_GOOGLE_ALIGNMENT_WER, POS_GOOGLE_METRICS_WER, WORD_GOOGLE_METRICS_WER_EMBEDDINGS, \ WORD_GOOGLE_ALIGNMENT_WER_EMBEDDINGS, NER_SPACY_GOOGLE_METRICS_WER_EMBEDDINGS, \ NER_SPACY_GOOGLE_ALIGNMENT_WER_EMBEDDINGS, TAG_SPACY_GOOGLE_ALIGNMENT_WER_EMBEDDINGS, \ TAG_SPACY_GOOGLE_METRICS_WER_EMBEDDINGS from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask from experiment.pos_processing.ner_spacy_wer_processing_task import NerSpacyWerProcessingTask from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask from experiment.pos_processing.tag_spacy_wer_processing_task import TagSpacyWerProcessingTask from sziszapangma.core.transformer.web_embedding_transformer import WebEmbeddingTransformer from sziszapangma.integration.asr_processor import AsrPathCacheClient from sziszapangma.integration.experiment_manager import ExperimentManager from sziszapangma.integration.task.asr_task import AsrTask from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask from sziszapangma.integration.task.embedding_wer_metrics_task import EmbeddingWerMetricsTask def run_voicelab_experiment(): record_provider = get_record_provider() experiment_processor = ExperimentManager( record_id_iterator=record_provider, processing_tasks=[ AsrTask( task_name='ajn_polish_asr_task', asr_processor=AsrPathCacheClient( 'experiment_data/cached_asr/voicelab_google_asr', record_provider, record_provider ), asr_property_name=GOOGLE_POLISH_ASR, require_update=False, record_path_provider=record_provider ), ClassicWerMetricTask( task_name='techmo_word_wer_processing', asr_property_name=GOOGLE_POLISH_ASR, gold_transcript_property_name=GOLD_TRANSCRIPT, metrics_property_name=WORD_GOOGLE_METRICS_WER, require_update=False, alignment_property_name=WORD_GOOGLE_ALIGNMENT_WER ), AsrSpacyTokenPosProcessingTask( task_name='techmo_spacy_task', input_property_name=GOOGLE_POLISH_ASR, spacy_property_name=GOOGLE_SPACY, require_update=True ), SpacyPosWerProcessingTask( task_name='techmo_pos_wer_processing', require_update=False, gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY, asr_pos_property_name=GOOGLE_SPACY, pos_alignment_wer=POS_GOOGLE_ALIGNMENT_WER, pos_metrics_wer=POS_GOOGLE_METRICS_WER ), EmbeddingWerMetricsTask( task_name='EmbeddingWerMetricsTask', asr_property_name=GOOGLE_POLISH_ASR, gold_transcript_property_name=GOLD_TRANSCRIPT, metrics_property_name=WORD_GOOGLE_METRICS_WER_EMBEDDINGS, require_update=False, embedding_transformer=WebEmbeddingTransformer('pl', 'http://localhost:5003', 'fjsd-mkwe-oius-m9h2'), alignment_property_name=WORD_GOOGLE_ALIGNMENT_WER_EMBEDDINGS ), NerSpacyWerProcessingTask( task_name='techmo_ner_wer_processing', require_update=False, gold_transcript_property_name=GOLD_TRANSCRIPT, asr_transcript_property_name=GOOGLE_POLISH_ASR, ner_alignment_wer=NER_SPACY_GOOGLE_ALIGNMENT_WER_EMBEDDINGS, ner_metrics_wer=NER_SPACY_GOOGLE_METRICS_WER_EMBEDDINGS ), TagSpacyWerProcessingTask( task_name='ajn_pos_wer_processing', require_update=False, gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY, asr_pos_property_name=GOOGLE_SPACY, pos_alignment_wer=TAG_SPACY_GOOGLE_ALIGNMENT_WER_EMBEDDINGS, pos_metrics_wer=TAG_SPACY_GOOGLE_METRICS_WER_EMBEDDINGS ) ], experiment_repository=get_repository(), relation_manager_provider=record_provider ) experiment_processor.process() def example_run(): run_voicelab_experiment() if __name__ == '__main__': example_run()