Newer
Older
save_audio_files:
cmd: PYTHONPATH=. python experiment/dataset_specific/pl_google_fleurs/pipeline_save_wav_files.py
- experiment/dataset_specific/pl_google_fleurs/pipeline_save_wav_files.py
pipeline_prepare_relation_manager:
foreach:
- dataset: pl_common_voice
- dataset: pl_google_fleurs
- dataset: pl_luna
- dataset: pl_minds14
- dataset: pl_voicelab_cbiz
do:
cmd: PYTHONPATH=. python experiment/dataset_specific/${item.dataset}/import_relation_manager.py
deps:
- experiment/dataset_specific/${item.dataset}/import_relation_manager.py
- experiment_data/dataset/${item.dataset}
outs:
- experiment_data/dataset_relation_manager_data/${item.dataset}
pipeline_gold_transcript:
foreach:
- dataset: pl_common_voice
- dataset: pl_google_fleurs
- dataset: pl_luna
- dataset: pl_minds14
- dataset: pl_voicelab_cbiz
do:
cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=${item.dataset}
deps:
- experiment/pipeline_process_gold_transcript.py
- experiment_data/dataset/${item.dataset}
- experiment_data/dataset_relation_manager_data/${item.dataset}
outs:
- experiment_data/pipeline/${item.dataset}/gold_transcript
pipeline_asr_result:
foreach:
- dataset: pl_common_voice
asr: google
- dataset: pl_common_voice
asr: wav2vec2
- dataset: pl_common_voice
asr: techmo
- dataset: pl_common_voice
asr: ajn
- dataset: pl_common_voice
asr: whisper_tiny
- dataset: pl_voicelab_cbiz
asr: google
- dataset: pl_voicelab_cbiz
asr: ajn
- dataset: pl_voicelab_cbiz
asr: techmo
- dataset: pl_voicelab_cbiz
asr: whisper_tiny
- dataset: pl_google_fleurs
asr: google
- dataset: pl_google_fleurs
asr: ajn
- dataset: pl_google_fleurs
asr: techmo
- dataset: pl_google_fleurs
asr: wav2vec2
- dataset: pl_google_fleurs
asr: whisper_tiny
- dataset: pl_luna
asr: google
- dataset: pl_luna
asr: ajn
- dataset: pl_luna
asr: techmo
- dataset: pl_luna
asr: wav2vec2
- dataset: pl_minds14
asr: google
- dataset: pl_minds14
asr: ajn
- dataset: pl_minds14
asr: techmo
- dataset: pl_minds14
asr: wav2vec2
- dataset: pl_minds14
asr: whisper_tiny
do:
cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=${item.dataset} --asr=${item.asr}
deps:
- experiment/pipeline_process_asr.py
- experiment_data/dataset/${item.dataset}
- experiment_data/cached_asr/${item.dataset}/${item.dataset}__${item.asr}
outs:
- experiment_data/pipeline/${item.dataset}/${item.asr}__result
pipeline_word_wer:
foreach:
- dataset: pl_common_voice
asr: google
- dataset: pl_common_voice
asr: wav2vec2
- dataset: pl_common_voice
asr: techmo
- dataset: pl_common_voice
asr: whisper_tiny
- dataset: pl_voicelab_cbiz
asr: google
- dataset: pl_voicelab_cbiz
asr: ajn
- dataset: pl_voicelab_cbiz
asr: techmo
- dataset: pl_google_fleurs
asr: google
- dataset: pl_google_fleurs
asr: ajn
- dataset: pl_google_fleurs
asr: techmo
- dataset: pl_google_fleurs
asr: wav2vec2
- dataset: pl_google_fleurs
asr: whisper_tiny
- dataset: pl_luna
asr: google
- dataset: pl_luna
asr: ajn
- dataset: pl_luna
asr: techmo
- dataset: pl_luna
asr: wav2vec2
- dataset: pl_minds14
asr: google
- dataset: pl_minds14
asr: ajn
- dataset: pl_minds14
asr: techmo
- dataset: pl_minds14
asr: wav2vec2
- dataset: pl_minds14
asr: whisper_tiny
cmd: PYTHONPATH=. python experiment/pipeline_process_word_classic_wer.py --dataset=${item.dataset} --asr=${item.asr}
- experiment_data/dataset/${item.dataset}
- experiment_data/pipeline/${item.dataset}/gold_transcript
- experiment_data/pipeline/${item.dataset}/${item.asr}__result
outs:
- experiment_data/pipeline/${item.dataset}/${item.asr}__word_wer_classic_metrics
- experiment_data/pipeline/${item.dataset}/${item.asr}__word_wer_classic_alignment
- experiment_data/pipeline/${item.dataset}/${item.asr}__word_wer_embeddings_metrics
- experiment_data/pipeline/${item.dataset}/${item.asr}__word_wer_embeddings_alignment
pipeline_spacy_pos_wer:
foreach:
- dataset: pl_common_voice
asr: google
- dataset: pl_common_voice
asr: wav2vec2
- dataset: pl_common_voice
asr: techmo
- dataset: pl_common_voice
asr: whisper_tiny
- dataset: pl_voicelab_cbiz
asr: google
- dataset: pl_voicelab_cbiz
asr: ajn
- dataset: pl_voicelab_cbiz
asr: techmo
- dataset: pl_google_fleurs
asr: google
- dataset: pl_google_fleurs
asr: ajn
- dataset: pl_google_fleurs
asr: techmo
- dataset: pl_google_fleurs
asr: wav2vec2
- dataset: pl_google_fleurs
asr: whisper_tiny
- dataset: pl_luna
asr: google
- dataset: pl_luna
asr: ajn
- dataset: pl_luna
asr: techmo
- dataset: pl_luna
- dataset: pl_minds14
asr: google
- dataset: pl_minds14
asr: ajn
- dataset: pl_minds14
asr: techmo
- dataset: pl_minds14
asr: wav2vec2
- dataset: pl_minds14
asr: whisper_tiny
do:
cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=${item.dataset} --asr=${item.asr}
deps:
- experiment/pipeline_process_spacy_pos_wer.py
- experiment_data/dataset/${item.dataset}
- experiment_data/pipeline/${item.dataset}/gold_transcript
- experiment_data/pipeline/${item.dataset}/${item.asr}__result
outs:
- experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_pos_alignment
- experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_pos_metrics
pipeline_spacy_ner_wer:
foreach:
- dataset: pl_common_voice
asr: google
- dataset: pl_common_voice
asr: wav2vec2
- dataset: pl_common_voice
asr: techmo
- dataset: pl_common_voice
asr: whisper_tiny
- dataset: pl_voicelab_cbiz
asr: google
- dataset: pl_voicelab_cbiz
asr: ajn
- dataset: pl_voicelab_cbiz
asr: techmo
- dataset: pl_google_fleurs
asr: google
- dataset: pl_google_fleurs
asr: ajn
- dataset: pl_google_fleurs
asr: techmo
- dataset: pl_google_fleurs
asr: wav2vec2
- dataset: pl_google_fleurs
asr: whisper_tiny
- dataset: pl_luna
asr: google
- dataset: pl_luna
asr: ajn
- dataset: pl_luna
asr: techmo
- dataset: pl_luna
asr: wav2vec2
- dataset: pl_minds14
asr: google
- dataset: pl_minds14
asr: ajn
- dataset: pl_minds14
asr: techmo
- dataset: pl_minds14
asr: wav2vec2
- dataset: pl_minds14
asr: whisper_tiny
do:
cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=${item.dataset} --asr=${item.asr}
deps:
- experiment/pipeline_process_spacy_pos_wer.py
- experiment_data/dataset/${item.dataset}
- experiment_data/pipeline/${item.dataset}/gold_transcript
- experiment_data/pipeline/${item.dataset}/${item.asr}__result
outs:
- experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_ner_alignment
- experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_ner_metrics
pipeline_wikineiural_ner:
foreach:
- dataset: pl_common_voice
asr: google
- dataset: pl_common_voice
asr: wav2vec2
- dataset: pl_common_voice
asr: techmo
- dataset: pl_common_voice
asr: whisper_tiny
- dataset: pl_voicelab_cbiz
asr: google
- dataset: pl_voicelab_cbiz
asr: ajn
- dataset: pl_voicelab_cbiz
asr: techmo
- dataset: pl_google_fleurs
asr: google
- dataset: pl_google_fleurs
asr: ajn
- dataset: pl_google_fleurs
asr: techmo
- dataset: pl_google_fleurs
asr: wav2vec2
- dataset: pl_google_fleurs
asr: whisper_tiny
- dataset: pl_luna
asr: google
- dataset: pl_luna
asr: ajn
- dataset: pl_luna
asr: techmo
- dataset: pl_luna
asr: wav2vec2
- dataset: pl_minds14
asr: google
- dataset: pl_minds14
asr: ajn
- dataset: pl_minds14
asr: techmo
- dataset: pl_minds14
asr: wav2vec2
- dataset: pl_minds14
asr: whisper_tiny
do:
cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=${item.dataset} --asr=${item.asr}
deps:
- experiment/pipeline_process_wikineural_ner.py
- experiment_data/dataset/${item.dataset}
- experiment_data/pipeline/${item.dataset}/gold_transcript
- experiment_data/pipeline/${item.dataset}/${item.asr}__result
outs:
- experiment_data/pipeline/${item.dataset}/${item.asr}__wikineural_ner_alignment
- experiment_data/pipeline/${item.dataset}/${item.asr}__wikineural_ner_metrics
pipeline_flair_upos:
foreach:
- dataset: pl_common_voice
asr: google
- dataset: pl_common_voice
asr: wav2vec2
- dataset: pl_common_voice
asr: techmo
- dataset: pl_common_voice
asr: whisper_tiny
- dataset: pl_voicelab_cbiz
asr: google
- dataset: pl_voicelab_cbiz
asr: ajn
- dataset: pl_voicelab_cbiz
asr: techmo
- dataset: pl_google_fleurs
asr: google
- dataset: pl_google_fleurs
asr: ajn
- dataset: pl_google_fleurs
asr: techmo
- dataset: pl_google_fleurs
asr: wav2vec2
- dataset: pl_google_fleurs
asr: whisper_tiny
- dataset: pl_luna
asr: google
- dataset: pl_luna
asr: ajn
- dataset: pl_luna
asr: techmo
- dataset: pl_luna
asr: wav2vec2
- dataset: pl_minds14
asr: google
- dataset: pl_minds14
asr: ajn
- dataset: pl_minds14
asr: techmo
- dataset: pl_minds14
asr: wav2vec2
- dataset: pl_minds14
asr: whisper_tiny
do:
cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=${item.dataset} --asr=${item.asr}
deps:
- experiment/pipeline_process_flair_upos.py
- experiment_data/dataset/${item.dataset}
- experiment_data/pipeline/${item.dataset}/gold_transcript
- experiment_data/pipeline/${item.dataset}/${item.asr}__result
outs:
- experiment_data/pipeline/${item.dataset}/${item.asr}__flair_upos_alignment
- experiment_data/pipeline/${item.dataset}/${item.asr}__flair_upos_metrics
pipeline_spacy_tag_wer:
foreach:
- dataset: pl_common_voice
asr: google
- dataset: pl_common_voice
asr: wav2vec2
- dataset: pl_common_voice
asr: techmo
- dataset: pl_common_voice
asr: ajn
- dataset: pl_common_voice
asr: whisper_tiny
# - dataset: pl_voicelab_cbiz
# asr: google
# - dataset: pl_voicelab_cbiz
# asr: ajn
# - dataset: pl_voicelab_cbiz
# asr: techmo
- dataset: pl_google_fleurs
asr: google
- dataset: pl_google_fleurs
asr: ajn
- dataset: pl_google_fleurs
asr: techmo
- dataset: pl_google_fleurs
asr: wav2vec2
- dataset: pl_google_fleurs
asr: whisper_tiny
- dataset: pl_luna
asr: google
- dataset: pl_luna
asr: ajn
- dataset: pl_luna
asr: techmo
- dataset: pl_luna
asr: wav2vec2
- dataset: pl_minds14
asr: google
- dataset: pl_minds14
asr: ajn
- dataset: pl_minds14
asr: techmo
- dataset: pl_minds14
asr: wav2vec2
- dataset: pl_minds14
asr: whisper_tiny
do:
cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=${item.dataset} --asr=${item.asr}
deps:
- experiment/pipeline_process_spacy_dep_tag_wer.py
- experiment_data/dataset/${item.dataset}
- experiment_data/pipeline/${item.dataset}/gold_transcript
- experiment_data/pipeline/${item.dataset}/${item.asr}__result
outs:
- experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_dep_tag_alignment
- experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_dep_tag_metrics