Select Git revision
dvc.yaml 16.95 KiB
stages:
save_audio_files:
cmd: PYTHONPATH=. python experiment/dataset_specific/pl_google_fleurs/pipeline_save_wav_files.py
deps:
- experiment/dataset_specific/pl_google_fleurs/pipeline_save_wav_files.py
outs:
- experiment_data/dataset/pl_google_fleurs
pipeline_prepare_relation_manager:
foreach:
- dataset: pl_common_voice
- dataset: pl_google_fleurs
- dataset: pl_luna
- dataset: pl_minds14
- dataset: pl_voicelab_cbiz
do:
cmd: PYTHONPATH=. python experiment/dataset_specific/${item.dataset}/import_relation_manager.py
deps:
- experiment/dataset_specific/${item.dataset}/import_relation_manager.py
- experiment_data/dataset/${item.dataset}
outs:
- experiment_data/dataset_relation_manager_data/${item.dataset}
pipeline_gold_transcript:
foreach:
- dataset: pl_common_voice
- dataset: pl_google_fleurs
- dataset: pl_luna
- dataset: pl_minds14
- dataset: pl_voicelab_cbiz
do:
cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=${item.dataset}
deps:
- experiment/pipeline_process_gold_transcript.py
- experiment_data/dataset/${item.dataset}
- experiment_data/dataset_relation_manager_data/${item.dataset}
outs:
- experiment_data/pipeline/${item.dataset}/gold_transcript
pipeline_asr_result:
foreach:
- dataset: pl_common_voice
asr: google
- dataset: pl_common_voice
asr: wav2vec2
- dataset: pl_common_voice
asr: techmo
- dataset: pl_common_voice
asr: ajn
- dataset: pl_common_voice
asr: whisper_tiny
- dataset: pl_voicelab_cbiz
asr: google
- dataset: pl_voicelab_cbiz
asr: ajn
- dataset: pl_voicelab_cbiz
asr: techmo
- dataset: pl_voicelab_cbiz
asr: whisper_tiny
- dataset: pl_google_fleurs
asr: google
- dataset: pl_google_fleurs
asr: ajn
- dataset: pl_google_fleurs
asr: techmo
- dataset: pl_google_fleurs
asr: wav2vec2