stages: save_audio_files: cmd: PYTHONPATH=. python experiment/dataset_specific/pl_google_fleurs/pipeline_save_wav_files.py deps: - experiment/dataset_specific/pl_google_fleurs/pipeline_save_wav_files.py outs: - experiment_data/dataset/pl_google_fleurs pipeline_prepare_relation_manager: foreach: - dataset: pl_common_voice - dataset: pl_google_fleurs - dataset: pl_luna - dataset: pl_minds14 - dataset: pl_voicelab_cbiz do: cmd: PYTHONPATH=. python experiment/dataset_specific/${item.dataset}/import_relation_manager.py deps: - experiment/dataset_specific/${item.dataset}/import_relation_manager.py - experiment_data/dataset/${item.dataset} outs: - experiment_data/dataset_relation_manager_data/${item.dataset} pipeline_gold_transcript: foreach: - dataset: pl_common_voice - dataset: pl_google_fleurs - dataset: pl_luna - dataset: pl_minds14 - dataset: pl_voicelab_cbiz do: cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=${item.dataset} deps: - experiment/pipeline_process_gold_transcript.py - experiment_data/dataset/${item.dataset} - experiment_data/dataset_relation_manager_data/${item.dataset} outs: - experiment_data/pipeline/${item.dataset}/gold_transcript pipeline_asr_result: foreach: - dataset: pl_common_voice asr: google - dataset: pl_common_voice asr: wav2vec2 - dataset: pl_common_voice asr: techmo - dataset: pl_common_voice asr: ajn - dataset: pl_common_voice asr: whisper_tiny - dataset: pl_voicelab_cbiz asr: google - dataset: pl_voicelab_cbiz asr: ajn - dataset: pl_voicelab_cbiz asr: techmo - dataset: pl_voicelab_cbiz asr: whisper_tiny - dataset: pl_google_fleurs asr: google - dataset: pl_google_fleurs asr: ajn - dataset: pl_google_fleurs asr: techmo - dataset: pl_google_fleurs asr: wav2vec2 - dataset: pl_google_fleurs asr: whisper_tiny - dataset: pl_luna asr: google - dataset: pl_luna asr: ajn - dataset: pl_luna asr: techmo - dataset: pl_luna asr: wav2vec2 - dataset: pl_luna asr: whisper_tiny - dataset: pl_minds14 asr: google - dataset: pl_minds14 asr: ajn - dataset: pl_minds14 asr: techmo - dataset: pl_minds14 asr: wav2vec2 - dataset: pl_minds14 asr: whisper_tiny do: cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=${item.dataset} --asr=${item.asr} deps: - experiment/pipeline_process_asr.py - experiment_data/dataset/${item.dataset} - experiment_data/cached_asr/${item.dataset}/${item.dataset}__${item.asr} outs: - experiment_data/pipeline/${item.dataset}/${item.asr}__result pipeline_word_wer: foreach: - dataset: pl_common_voice asr: google - dataset: pl_common_voice asr: wav2vec2 - dataset: pl_common_voice asr: techmo - dataset: pl_common_voice asr: ajn - dataset: pl_common_voice asr: whisper_tiny - dataset: pl_voicelab_cbiz asr: google - dataset: pl_voicelab_cbiz asr: ajn - dataset: pl_voicelab_cbiz asr: techmo - dataset: pl_google_fleurs asr: google - dataset: pl_google_fleurs asr: ajn - dataset: pl_google_fleurs asr: techmo - dataset: pl_google_fleurs asr: wav2vec2 - dataset: pl_google_fleurs asr: whisper_tiny - dataset: pl_luna asr: google - dataset: pl_luna asr: ajn - dataset: pl_luna asr: techmo - dataset: pl_luna asr: wav2vec2 - dataset: pl_luna asr: whisper_tiny - dataset: pl_minds14 asr: google - dataset: pl_minds14 asr: ajn - dataset: pl_minds14 asr: techmo - dataset: pl_minds14 asr: wav2vec2 - dataset: pl_minds14 asr: whisper_tiny do: cmd: PYTHONPATH=. python experiment/pipeline_process_word_classic_wer.py --dataset=${item.dataset} --asr=${item.asr} deps: - experiment/pipeline_process_word_classic_wer.py - experiment_data/dataset/${item.dataset} - experiment_data/pipeline/${item.dataset}/gold_transcript - experiment_data/pipeline/${item.dataset}/${item.asr}__result outs: - experiment_data/pipeline/${item.dataset}/${item.asr}__word_wer_classic_metrics - experiment_data/pipeline/${item.dataset}/${item.asr}__word_wer_classic_alignment - experiment_data/pipeline/${item.dataset}/${item.asr}__word_wer_embeddings_metrics - experiment_data/pipeline/${item.dataset}/${item.asr}__word_wer_embeddings_alignment pipeline_spacy_pos_wer: foreach: - dataset: pl_common_voice asr: google - dataset: pl_common_voice asr: wav2vec2 - dataset: pl_common_voice asr: techmo - dataset: pl_common_voice asr: ajn - dataset: pl_common_voice asr: whisper_tiny - dataset: pl_voicelab_cbiz asr: google - dataset: pl_voicelab_cbiz asr: ajn - dataset: pl_voicelab_cbiz asr: techmo - dataset: pl_google_fleurs asr: google - dataset: pl_google_fleurs asr: ajn - dataset: pl_google_fleurs asr: techmo - dataset: pl_google_fleurs asr: wav2vec2 - dataset: pl_google_fleurs asr: whisper_tiny - dataset: pl_luna asr: google - dataset: pl_luna asr: ajn - dataset: pl_luna asr: techmo - dataset: pl_luna asr: whisper_tiny - dataset: pl_minds14 asr: google - dataset: pl_minds14 asr: ajn - dataset: pl_minds14 asr: techmo - dataset: pl_minds14 asr: wav2vec2 - dataset: pl_minds14 asr: whisper_tiny do: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=${item.dataset} --asr=${item.asr} deps: - experiment/pipeline_process_spacy_pos_wer.py - experiment_data/dataset/${item.dataset} - experiment_data/pipeline/${item.dataset}/gold_transcript - experiment_data/pipeline/${item.dataset}/${item.asr}__result outs: - experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_pos_alignment - experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_pos_metrics pipeline_spacy_ner_wer: foreach: - dataset: pl_common_voice asr: google - dataset: pl_common_voice asr: wav2vec2 - dataset: pl_common_voice asr: techmo - dataset: pl_common_voice asr: ajn - dataset: pl_common_voice asr: whisper_tiny - dataset: pl_voicelab_cbiz asr: google - dataset: pl_voicelab_cbiz asr: ajn - dataset: pl_voicelab_cbiz asr: techmo - dataset: pl_google_fleurs asr: google - dataset: pl_google_fleurs asr: ajn - dataset: pl_google_fleurs asr: techmo - dataset: pl_google_fleurs asr: wav2vec2 - dataset: pl_google_fleurs asr: whisper_tiny - dataset: pl_luna asr: google - dataset: pl_luna asr: ajn - dataset: pl_luna asr: techmo - dataset: pl_luna asr: wav2vec2 - dataset: pl_luna asr: whisper_tiny - dataset: pl_minds14 asr: google - dataset: pl_minds14 asr: ajn - dataset: pl_minds14 asr: techmo - dataset: pl_minds14 asr: wav2vec2 - dataset: pl_minds14 asr: whisper_tiny do: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=${item.dataset} --asr=${item.asr} deps: - experiment/pipeline_process_spacy_pos_wer.py - experiment_data/dataset/${item.dataset} - experiment_data/pipeline/${item.dataset}/gold_transcript - experiment_data/pipeline/${item.dataset}/${item.asr}__result outs: - experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_ner_alignment - experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_ner_metrics pipeline_wikineiural_ner: foreach: - dataset: pl_common_voice asr: google - dataset: pl_common_voice asr: wav2vec2 - dataset: pl_common_voice asr: techmo - dataset: pl_common_voice asr: ajn - dataset: pl_common_voice asr: whisper_tiny - dataset: pl_voicelab_cbiz asr: google - dataset: pl_voicelab_cbiz asr: ajn - dataset: pl_voicelab_cbiz asr: techmo - dataset: pl_google_fleurs asr: google - dataset: pl_google_fleurs asr: ajn - dataset: pl_google_fleurs asr: techmo - dataset: pl_google_fleurs asr: wav2vec2 - dataset: pl_google_fleurs asr: whisper_tiny - dataset: pl_luna asr: google - dataset: pl_luna asr: ajn - dataset: pl_luna asr: techmo - dataset: pl_luna asr: wav2vec2 - dataset: pl_luna asr: whisper_tiny - dataset: pl_minds14 asr: google - dataset: pl_minds14 asr: ajn - dataset: pl_minds14 asr: techmo - dataset: pl_minds14 asr: wav2vec2 - dataset: pl_minds14 asr: whisper_tiny do: cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=${item.dataset} --asr=${item.asr} deps: - experiment/pipeline_process_wikineural_ner.py - experiment_data/dataset/${item.dataset} - experiment_data/pipeline/${item.dataset}/gold_transcript - experiment_data/pipeline/${item.dataset}/${item.asr}__result outs: - experiment_data/pipeline/${item.dataset}/${item.asr}__wikineural_ner_alignment - experiment_data/pipeline/${item.dataset}/${item.asr}__wikineural_ner_metrics pipeline_flair_upos: foreach: - dataset: pl_common_voice asr: google - dataset: pl_common_voice asr: wav2vec2 - dataset: pl_common_voice asr: techmo - dataset: pl_common_voice asr: ajn - dataset: pl_common_voice asr: whisper_tiny - dataset: pl_voicelab_cbiz asr: google - dataset: pl_voicelab_cbiz asr: ajn - dataset: pl_voicelab_cbiz asr: techmo - dataset: pl_google_fleurs asr: google - dataset: pl_google_fleurs asr: ajn - dataset: pl_google_fleurs asr: techmo - dataset: pl_google_fleurs asr: wav2vec2 - dataset: pl_google_fleurs asr: whisper_tiny - dataset: pl_luna asr: google - dataset: pl_luna asr: ajn - dataset: pl_luna asr: techmo - dataset: pl_luna asr: wav2vec2 - dataset: pl_luna asr: whisper_tiny - dataset: pl_minds14 asr: google - dataset: pl_minds14 asr: ajn - dataset: pl_minds14 asr: techmo - dataset: pl_minds14 asr: wav2vec2 - dataset: pl_minds14 asr: whisper_tiny do: cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=${item.dataset} --asr=${item.asr} deps: - experiment/pipeline_process_flair_upos.py - experiment_data/dataset/${item.dataset} - experiment_data/pipeline/${item.dataset}/gold_transcript - experiment_data/pipeline/${item.dataset}/${item.asr}__result outs: - experiment_data/pipeline/${item.dataset}/${item.asr}__flair_upos_alignment - experiment_data/pipeline/${item.dataset}/${item.asr}__flair_upos_metrics pipeline_spacy_tag_wer: foreach: - dataset: pl_common_voice asr: google - dataset: pl_common_voice asr: wav2vec2 - dataset: pl_common_voice asr: techmo - dataset: pl_common_voice asr: ajn - dataset: pl_common_voice asr: whisper_tiny # - dataset: pl_voicelab_cbiz # asr: google # - dataset: pl_voicelab_cbiz # asr: ajn # - dataset: pl_voicelab_cbiz # asr: techmo - dataset: pl_google_fleurs asr: google - dataset: pl_google_fleurs asr: ajn - dataset: pl_google_fleurs asr: techmo - dataset: pl_google_fleurs asr: wav2vec2 - dataset: pl_google_fleurs asr: whisper_tiny - dataset: pl_luna asr: google - dataset: pl_luna asr: ajn - dataset: pl_luna asr: techmo - dataset: pl_luna asr: wav2vec2 - dataset: pl_luna asr: whisper_tiny - dataset: pl_minds14 asr: google - dataset: pl_minds14 asr: ajn - dataset: pl_minds14 asr: techmo - dataset: pl_minds14 asr: wav2vec2 - dataset: pl_minds14 asr: whisper_tiny do: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=${item.dataset} --asr=${item.asr} deps: - experiment/pipeline_process_spacy_dep_tag_wer.py - experiment_data/dataset/${item.dataset} - experiment_data/pipeline/${item.dataset}/gold_transcript - experiment_data/pipeline/${item.dataset}/${item.asr}__result outs: - experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_dep_tag_alignment - experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_dep_tag_metrics