Skip to content
Snippets Groups Projects
Select Git revision
  • 584babb2a0ee71fea69894eb4e68fdbc8685376a
  • main default protected
  • change_data_model
  • feature/add_auth_asr_service
  • fix/incorrect_import
  • feature/change_registry_clarin
  • feature/add_base_asr_service
  • feature/add_poetry
  • feature/add_word_ids
  • feature/add_sziszapangma
10 results

dvc.yaml

Blame
  • user avatar
    Marcin Wątroba authored
    e407a441
    History
    dvc.yaml 16.95 KiB
    stages:
    
        save_audio_files:
            cmd: PYTHONPATH=. python experiment/dataset_specific/pl_google_fleurs/pipeline_save_wav_files.py
            deps:
                - experiment/dataset_specific/pl_google_fleurs/pipeline_save_wav_files.py
            outs:
                - experiment_data/dataset/pl_google_fleurs
    
        pipeline_prepare_relation_manager:
            foreach:
                -   dataset: pl_common_voice
                -   dataset: pl_google_fleurs
                -   dataset: pl_luna
                -   dataset: pl_minds14
                -   dataset: pl_voicelab_cbiz
            do:
                cmd: PYTHONPATH=. python experiment/dataset_specific/${item.dataset}/import_relation_manager.py
                deps:
                    - experiment/dataset_specific/${item.dataset}/import_relation_manager.py
                    - experiment_data/dataset/${item.dataset}
                outs:
                    - experiment_data/dataset_relation_manager_data/${item.dataset}
    
        pipeline_gold_transcript:
            foreach:
                -   dataset: pl_common_voice
                -   dataset: pl_google_fleurs
                -   dataset: pl_luna
                -   dataset: pl_minds14
                -   dataset: pl_voicelab_cbiz
            do:
                cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=${item.dataset}
                deps:
                    - experiment/pipeline_process_gold_transcript.py
                    - experiment_data/dataset/${item.dataset}
                    - experiment_data/dataset_relation_manager_data/${item.dataset}
                outs:
                    - experiment_data/pipeline/${item.dataset}/gold_transcript
    
        pipeline_asr_result:
            foreach:
                -   dataset: pl_common_voice
                    asr: google
                -   dataset: pl_common_voice
                    asr: wav2vec2
                -   dataset: pl_common_voice
                    asr: techmo
                -   dataset: pl_common_voice
                    asr: ajn
                -   dataset: pl_common_voice
                    asr: whisper_tiny
    
                -   dataset: pl_voicelab_cbiz
                    asr: google
                -   dataset: pl_voicelab_cbiz
                    asr: ajn
                -   dataset: pl_voicelab_cbiz
                    asr: techmo
                -   dataset: pl_voicelab_cbiz
                    asr: whisper_tiny
    
                -   dataset: pl_google_fleurs
                    asr: google
                -   dataset: pl_google_fleurs
                    asr: ajn
                -   dataset: pl_google_fleurs
                    asr: techmo
                -   dataset: pl_google_fleurs
                    asr: wav2vec2