Skip to content
Snippets Groups Projects
dvc.yaml 8.71 KiB
Newer Older
Marcin Wątroba's avatar
Marcin Wątroba committed
stages:

Marcin Wątroba's avatar
Marcin Wątroba committed
    luna_import_to_common_format:
Marcin Wątroba's avatar
Marcin Wątroba committed
        cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
        deps:
            - experiment/luna/import_dataset/import_luna.py
            - experiment_data/dataset/LUNA.PL
        outs:
            - experiment_data/dataset_relation_manager_data/luna
Marcin Wątroba's avatar
Marcin Wątroba committed

    luna_gold_transcript_processing:
Marcin Wątroba's avatar
Marcin Wątroba committed
        cmd: |
            PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py
Marcin Wątroba's avatar
Marcin Wątroba committed
        deps:
            - experiment/luna/pipeline/luna_gold_transcript_processing.py
Marcin Wątroba's avatar
Marcin Wątroba committed
            - experiment_data/dataset_relation_manager_data/luna
            - experiment_data/dataset/LUNA.PL
        outs:
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy

    luna_techmo_processing:
        cmd: |
            PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py
        deps:
            - experiment/luna/pipeline/luna_techmo_processing.py
            - experiment_data/dataset/LUNA.PL
Marcin Wątroba's avatar
Marcin Wątroba committed
            - experiment_data/cached_asr/luna_techmo
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
            - experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer
            - experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer
            - experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
            - experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer
            - experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer
Marcin Wątroba's avatar
Marcin Wątroba committed
            - experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer_embeddings

    luna_ajn_processing:
        cmd: |
            PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py
        deps:
            - experiment/luna/pipeline/luna_ajn_asr_processing.py
            - experiment_data/dataset/LUNA.PL
Marcin Wątroba's avatar
Marcin Wątroba committed
            - experiment_data/cached_asr/luna_ajn_polish_asr
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
Marcin Wątroba's avatar
Marcin Wątroba committed
        outs:
            - experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
            - experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer
            - experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer
            - experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
            - experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer
            - experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer
Marcin Wątroba's avatar
Marcin Wątroba committed
            - experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer_embeddings
Marcin Wątroba's avatar
Marcin Wątroba committed

Marcin Wątroba's avatar
Marcin Wątroba committed
    luna_tag_spacy_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/luna/pipeline/spacy_tag_processing.py
        deps:
            - experiment/luna/pipeline/spacy_tag_processing.py
            - experiment_data/dataset/LUNA.PL
            - experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
            - experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_alignment_wer_embeddings


Marcin Wątroba's avatar
Marcin Wątroba committed
    voicelab_import_to_common_format:
Marcin Wątroba's avatar
Marcin Wątroba committed
        cmd: PYTHONPATH=. python -u experiment/voicelab/import_data.py
Marcin Wątroba's avatar
Marcin Wątroba committed
        deps:
            - experiment/voicelab/import_data.py
            - experiment_data/dataset/voicelab_cbiz_testset_20220322
        outs:
            - experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322

    voicelab_gold_transcript_processing:
Marcin Wątroba's avatar
Marcin Wątroba committed
        cmd: |
Marcin Wątroba's avatar
Marcin Wątroba committed
            PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py
Marcin Wątroba's avatar
Marcin Wątroba committed
        deps:
Marcin Wątroba's avatar
Marcin Wątroba committed
            - experiment/voicelab/voicelab_pipeline_gold_transcript.py
Marcin Wątroba's avatar
Marcin Wątroba committed
            - experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
            - experiment_data/dataset/voicelab_cbiz_testset_20220322
        outs:
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy

    voicelab_techmo_processing:
        cmd: |
Marcin Wątroba's avatar
Marcin Wątroba committed
            PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py
        deps:
            - experiment/voicelab/voicelab_pipeline_techmo.py
            - experiment_data/dataset/voicelab_cbiz_testset_20220322
Marcin Wątroba's avatar
Marcin Wątroba committed
            - experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer
Marcin Wątroba's avatar
Marcin Wątroba committed
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer_embeddings
Marcin Wątroba's avatar
Marcin Wątroba committed
    voicelab_ajn_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_ajn_asr.py
        deps:
            - experiment/voicelab/voicelab_pipeline_ajn_asr.py
            - experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn
            - experiment_data/dataset/voicelab_cbiz_testset_20220322
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer_embeddings
Marcin Wątroba's avatar
Marcin Wątroba committed

Marcin Wątroba's avatar
Marcin Wątroba committed
    voicelab_tag_spacy_processing:
        cmd: |
            PYTHONPATH=. python -u experiment/voicelab/spacy_tag_processing.py
        deps:
            - experiment/voicelab/spacy_tag_processing.py
            - experiment_data/dataset/voicelab_cbiz_testset_20220322
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
        outs:
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_alignment_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_metrics_wer_embeddings
            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_alignment_wer_embeddings

Marcin Wątroba's avatar
Marcin Wątroba committed
# concurrent features, multiprocessing