stages:

    luna_import_to_common_format:
        cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
        deps:
            - experiment/luna/import_dataset/import_luna.py
            - experiment_data/dataset/LUNA.PL
        outs:
            - experiment_data/dataset_relation_manager_data/luna

    luna_gold_transcript_processing:
        cmd: |
            PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py
        deps:
            - experiment/luna/pipeline/luna_gold_transcript_processing.py
            - experiment_data/dataset_relation_manager_data/luna
            - experiment_data/dataset/LUNA.PL
        outs:
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy

    luna_techmo_processing:
        cmd: |
            PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py
        deps:
            - experiment/luna/pipeline/luna_techmo_processing.py
            - experiment_data/dataset/LUNA.PL
            - experiment_data/cached_asr/luna_techmo
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
            - experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer
            - experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer
            - experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
            - experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer
            - experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer
            - experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer_embeddings

    luna_ajn_processing:
        cmd: |
            PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py
        deps:
            - experiment/luna/pipeline/luna_ajn_asr_processing.py
            - experiment_data/dataset/LUNA.PL
            - experiment_data/cached_asr/luna_ajn_polish_asr
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
            - experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer
            - experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer
            - experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
            - experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer
            - experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer
            - experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer_embeddings

    luna_tag_spacy_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/luna/pipeline/spacy_tag_processing.py
        deps:
            - experiment/luna/pipeline/spacy_tag_processing.py
            - experiment_data/dataset/LUNA.PL
            - experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
            - experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_alignment_wer_embeddings

    luna_ner_spacy_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/luna/pipeline/spacy_ner_processing.py
        deps:
            - experiment/luna/pipeline/spacy_ner_processing.py
            - experiment_data/dataset/LUNA.PL
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript
            - experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
            - experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
        outs:
            - experiment_data/pipeline/asr_benchmark_luna/ner_spacy_techmo_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_luna/ner_spacy_techmo_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_luna/ner_spacy_ajn_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_luna/ner_spacy_ajn_alignment_wer_embeddings

    luna_duckling_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/luna/pipeline/duckling_processing.py
        deps:
            - experiment/luna/pipeline/duckling_processing.py
            - experiment_data/dataset/LUNA.PL
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript
            - experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
            - experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
        outs:
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_duckling
            - experiment_data/pipeline/asr_benchmark_luna/techmo_duckling
            - experiment_data/pipeline/asr_benchmark_luna/ajn_duckling

    luna_wav2vec2_processing:
        cmd: |
            PYTHONPATH=. python experiment/luna/pipeline/luna_wav2vec2_asr_processing.py
        deps:
            - experiment/luna/pipeline/luna_wav2vec2_asr_processing.py
            - experiment_data/dataset/LUNA.PL
            - experiment_data/cached_asr/luna_wav2vec2_polish_asr
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_luna/wav2vec2_polish_asr
            - experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_metrics_wer
            - experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_alignment_wer
            - experiment_data/pipeline/asr_benchmark_luna/wav2vec2_spacy
            - experiment_data/pipeline/asr_benchmark_luna/pos_wav2vec2_alignment_wer
            - experiment_data/pipeline/asr_benchmark_luna/pos_wav2vec2_metrics_wer
            - experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_alignment_wer_embeddings

    voicelab_import_to_common_format:
        cmd: PYTHONPATH=. python -u experiment/voicelab/import_data.py
        deps:
            - experiment/voicelab/import_data.py
            - experiment_data/dataset/voicelab_cbiz_testset_20220322
        outs:
            - experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322

    voicelab_gold_transcript_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py
        deps:
            - experiment/voicelab/voicelab_pipeline_gold_transcript.py
            - experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
            - experiment_data/dataset/voicelab_cbiz_testset_20220322
        outs:
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy

    voicelab_techmo_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py
        deps:
            - experiment/voicelab/voicelab_pipeline_techmo.py
            - experiment_data/dataset/voicelab_cbiz_testset_20220322
            - experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer_embeddings

    voicelab_ajn_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_ajn_asr.py
        deps:
            - experiment/voicelab/voicelab_pipeline_ajn_asr.py
            - experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn
            - experiment_data/dataset/voicelab_cbiz_testset_20220322
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer_embeddings

    voicelab_tag_spacy_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/voicelab/spacy_tag_processing.py
        deps:
            - experiment/voicelab/spacy_tag_processing.py
            - experiment_data/dataset/voicelab_cbiz_testset_20220322
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_alignment_wer_embeddings

    voicelab_ner_spacy_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/voicelab/spacy_ner_processing.py
        deps:
            - experiment/voicelab/spacy_ner_processing.py
            - experiment_data/dataset/voicelab_cbiz_testset_20220322
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
        outs:
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_techmo_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_techmo_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_ajn_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_ajn_alignment_wer_embeddings

    voicelab_duckling_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/voicelab/duckling_processing.py
        deps:
            - experiment/voicelab/duckling_processing.py
            - experiment_data/dataset/voicelab_cbiz_testset_20220322
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
        outs:
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_duckling
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_duckling
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_duckling

    voicelab_google_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/voicelab/voicelab_google.py
        deps:
            - experiment/voicelab/pipeline/voicelab_google.py
            - experiment_data/cached_asr/voicelab_google
            - experiment_data/dataset/voicelab_cbiz_testset_20220322
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/google_polish_asr
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_google_metrics_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_google_alignment_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/google_spacy
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_google_alignment_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_google_metrics_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_google_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_google_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_google_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_google_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_google_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_google_metrics_wer_embeddings

    common_voice_import_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/common_voice/import_dataset.py
        deps:
            - experiment/common_voice/import_dataset.py
        outs:
            - experiment_data/dataset_relation_manager_data/common_voice

    common_voice_gold_transcript_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/common_voice/pipeline/common_voice_gold_transcript.py
        deps:
            - experiment/common_voice/pipeline/common_voice_gold_transcript.py
            - experiment_data/dataset_relation_manager_data/common_voice
        outs:
            - experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript
            - experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript_spacy

    common_voice_google_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/common_voice/pipeline/common_voice_google.py
        deps:
            - experiment/common_voice/pipeline/common_voice_google.py
            - experiment_data/cached_asr/common_voice_google
            - experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript
            - experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_common_voice/google_polish_asr
            - experiment_data/pipeline/asr_benchmark_common_voice/word_google_metrics_wer
            - experiment_data/pipeline/asr_benchmark_common_voice/word_google_alignment_wer
            - experiment_data/pipeline/asr_benchmark_common_voice/google_spacy
            - experiment_data/pipeline/asr_benchmark_common_voice/pos_google_alignment_wer
            - experiment_data/pipeline/asr_benchmark_common_voice/pos_google_metrics_wer
            - experiment_data/pipeline/asr_benchmark_common_voice/word_google_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_common_voice/word_google_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_common_voice/ner_spacy_google_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_common_voice/ner_spacy_google_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_common_voice/tag_spacy_google_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_common_voice/tag_spacy_google_metrics_wer_embeddings

    minds14_import_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/minds14/import_dataset.py
        deps:
            - experiment/minds14/import_dataset.py
        outs:
            - experiment_data/dataset_relation_manager_data/minds14

    minds14_gold_transcript_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/minds14/pipeline/minds14_gold_transcript.py
        deps:
            - experiment/minds14/pipeline/minds14_gold_transcript.py
            - experiment_data/dataset_relation_manager_data/minds14
        outs:
            - experiment_data/pipeline/asr_benchmark_minds14/gold_transcript
            - experiment_data/pipeline/asr_benchmark_minds14/gold_transcript_spacy

    minds14_google_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/minds14/pipeline/minds14_google.py
        deps:
            - experiment/minds14/pipeline/minds14_google.py
            - experiment_data/cached_asr/minds14_google
            - experiment_data/pipeline/asr_benchmark_minds14/gold_transcript
            - experiment_data/pipeline/asr_benchmark_minds14/gold_transcript_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_minds14/google_polish_asr
            - experiment_data/pipeline/asr_benchmark_minds14/word_google_metrics_wer
            - experiment_data/pipeline/asr_benchmark_minds14/word_google_alignment_wer
            - experiment_data/pipeline/asr_benchmark_minds14/google_spacy
            - experiment_data/pipeline/asr_benchmark_minds14/pos_google_alignment_wer
            - experiment_data/pipeline/asr_benchmark_minds14/pos_google_metrics_wer
            - experiment_data/pipeline/asr_benchmark_minds14/word_google_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_minds14/word_google_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_minds14/ner_spacy_google_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_minds14/ner_spacy_google_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_minds14/tag_spacy_google_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_minds14/tag_spacy_google_metrics_wer_embeddings

    google_fleurs_save_audio_files:
        cmd: |
            PYTHONPATH=. python -u experiment/google_fleurs/save_wav_files.py
        deps:
            - experiment/google_fleurs/save_wav_files.py
        outs:
            - experiment_data/audio/fleurs_audio

    google_fleurs_import_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/google_fleurs/import_dataset.py
        deps:
            - experiment/google_fleurs/import_dataset.py
        outs:
            - experiment_data/dataset_relation_manager_data/google_fleurs

    google_fleurs_gold_transcript_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/google_fleurs/pipeline/google_fleurs_gold_transcript.py
        deps:
            - experiment/google_fleurs/pipeline/google_fleurs_gold_transcript.py
            - experiment_data/dataset_relation_manager_data/google_fleurs
        outs:
            - experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript
            - experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript_spacy

    google_fleurs_google_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/google_fleurs/pipeline/google_fleurs_google.py
        deps:
            - experiment/google_fleurs/pipeline/google_fleurs_google.py
            - experiment_data/cached_asr/google_fleurs_google
            - experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript
            - experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_google_fleurs/google_polish_asr
            - experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_metrics_wer
            - experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_alignment_wer
            - experiment_data/pipeline/asr_benchmark_google_fleurs/google_spacy
            - experiment_data/pipeline/asr_benchmark_google_fleurs/pos_google_alignment_wer
            - experiment_data/pipeline/asr_benchmark_google_fleurs/pos_google_metrics_wer
            - experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_google_fleurs/ner_spacy_google_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_google_fleurs/ner_spacy_google_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_google_fleurs/tag_spacy_google_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_google_fleurs/tag_spacy_google_metrics_wer_embeddings