stages: luna_import_to_common_format: cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py deps: - experiment/luna/import_dataset/import_luna.py - experiment_data/dataset/LUNA.PL outs: - experiment_data/dataset_relation_manager_data/luna luna_gold_transcript_processing: cmd: | PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py deps: - experiment/luna/pipeline/luna_gold_transcript_processing.py - experiment_data/dataset_relation_manager_data/luna - experiment_data/dataset/LUNA.PL outs: - experiment_data/pipeline/asr_benchmark_luna/gold_transcript - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy luna_techmo_processing: cmd: | PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py deps: - experiment/luna/pipeline/luna_techmo_processing.py - experiment_data/dataset/LUNA.PL - experiment_data/cached_asr/luna_techmo - experiment_data/pipeline/asr_benchmark_luna/gold_transcript - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy outs: - experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr - experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer - experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer - experiment_data/pipeline/asr_benchmark_luna/techmo_spacy - experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer - experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer - experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer_embeddings luna_ajn_processing: cmd: | PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py deps: - experiment/luna/pipeline/luna_ajn_asr_processing.py - experiment_data/dataset/LUNA.PL - experiment_data/cached_asr/luna_ajn_polish_asr - experiment_data/pipeline/asr_benchmark_luna/gold_transcript - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy outs: - experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr - experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer - experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer - experiment_data/pipeline/asr_benchmark_luna/ajn_spacy - experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer - experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer - experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer_embeddings luna_tag_spacy_processing: cmd: | PYTHONPATH=. python -u experiment/luna/pipeline/spacy_tag_processing.py deps: - experiment/luna/pipeline/spacy_tag_processing.py - experiment_data/dataset/LUNA.PL - experiment_data/pipeline/asr_benchmark_luna/ajn_spacy - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy - experiment_data/pipeline/asr_benchmark_luna/techmo_spacy outs: - experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_alignment_wer_embeddings luna_ner_spacy_processing: cmd: | PYTHONPATH=. python -u experiment/luna/pipeline/spacy_ner_processing.py deps: - experiment/luna/pipeline/spacy_ner_processing.py - experiment_data/dataset/LUNA.PL - experiment_data/pipeline/asr_benchmark_luna/gold_transcript - experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr - experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr outs: - experiment_data/pipeline/asr_benchmark_luna/ner_spacy_techmo_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_luna/ner_spacy_techmo_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_luna/ner_spacy_ajn_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_luna/ner_spacy_ajn_alignment_wer_embeddings luna_duckling_processing: cmd: | PYTHONPATH=. python -u experiment/luna/pipeline/duckling_processing.py deps: - experiment/luna/pipeline/duckling_processing.py - experiment_data/dataset/LUNA.PL - experiment_data/pipeline/asr_benchmark_luna/gold_transcript - experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr - experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr outs: - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_duckling - experiment_data/pipeline/asr_benchmark_luna/techmo_duckling - experiment_data/pipeline/asr_benchmark_luna/ajn_duckling luna_wav2vec2_processing: cmd: | PYTHONPATH=. python experiment/luna/pipeline/luna_wav2vec2_asr_processing.py deps: - experiment/luna/pipeline/luna_wav2vec2_asr_processing.py - experiment_data/dataset/LUNA.PL - experiment_data/cached_asr/luna_wav2vec2_polish_asr - experiment_data/pipeline/asr_benchmark_luna/gold_transcript - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy outs: - experiment_data/pipeline/asr_benchmark_luna/wav2vec2_polish_asr - experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_metrics_wer - experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_alignment_wer - experiment_data/pipeline/asr_benchmark_luna/wav2vec2_spacy - experiment_data/pipeline/asr_benchmark_luna/pos_wav2vec2_alignment_wer - experiment_data/pipeline/asr_benchmark_luna/pos_wav2vec2_metrics_wer - experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_alignment_wer_embeddings voicelab_import_to_common_format: cmd: PYTHONPATH=. python -u experiment/voicelab/import_data.py deps: - experiment/voicelab/import_data.py - experiment_data/dataset/voicelab_cbiz_testset_20220322 outs: - experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322 voicelab_gold_transcript_processing: cmd: | PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py deps: - experiment/voicelab/voicelab_pipeline_gold_transcript.py - experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322 - experiment_data/dataset/voicelab_cbiz_testset_20220322 outs: - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy voicelab_techmo_processing: cmd: | PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py deps: - experiment/voicelab/voicelab_pipeline_techmo.py - experiment_data/dataset/voicelab_cbiz_testset_20220322 - experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy outs: - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer_embeddings voicelab_ajn_processing: cmd: | PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_ajn_asr.py deps: - experiment/voicelab/voicelab_pipeline_ajn_asr.py - experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn - experiment_data/dataset/voicelab_cbiz_testset_20220322 - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy outs: - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer_embeddings voicelab_tag_spacy_processing: cmd: | PYTHONPATH=. python -u experiment/voicelab/spacy_tag_processing.py deps: - experiment/voicelab/spacy_tag_processing.py - experiment_data/dataset/voicelab_cbiz_testset_20220322 - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy outs: - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_alignment_wer_embeddings voicelab_ner_spacy_processing: cmd: | PYTHONPATH=. python -u experiment/voicelab/spacy_ner_processing.py deps: - experiment/voicelab/spacy_ner_processing.py - experiment_data/dataset/voicelab_cbiz_testset_20220322 - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr outs: - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_techmo_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_techmo_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_ajn_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_ajn_alignment_wer_embeddings voicelab_duckling_processing: cmd: | PYTHONPATH=. python -u experiment/voicelab/duckling_processing.py deps: - experiment/voicelab/duckling_processing.py - experiment_data/dataset/voicelab_cbiz_testset_20220322 - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr outs: - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_duckling - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_duckling - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_duckling voicelab_google_processing: cmd: | PYTHONPATH=. python -u experiment/voicelab/voicelab_google.py deps: - experiment/voicelab/pipeline/voicelab_google.py - experiment_data/cached_asr/voicelab_google - experiment_data/dataset/voicelab_cbiz_testset_20220322 - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy outs: - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/google_polish_asr - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_google_metrics_wer - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_google_alignment_wer - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/google_spacy - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_google_alignment_wer - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_google_metrics_wer - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_google_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_google_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_google_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_google_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_google_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_google_metrics_wer_embeddings common_voice_import_processing: cmd: | PYTHONPATH=. python -u experiment/common_voice/import_dataset.py deps: - experiment/common_voice/import_dataset.py outs: - experiment_data/dataset_relation_manager_data/common_voice common_voice_gold_transcript_processing: cmd: | PYTHONPATH=. python -u experiment/common_voice/pipeline/common_voice_gold_transcript.py deps: - experiment/common_voice/pipeline/common_voice_gold_transcript.py - experiment_data/dataset_relation_manager_data/common_voice outs: - experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript - experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript_spacy common_voice_google_processing: cmd: | PYTHONPATH=. python -u experiment/common_voice/pipeline/common_voice_google.py deps: - experiment/common_voice/pipeline/common_voice_google.py - experiment_data/cached_asr/common_voice_google - experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript - experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript_spacy outs: - experiment_data/pipeline/asr_benchmark_common_voice/google_polish_asr - experiment_data/pipeline/asr_benchmark_common_voice/word_google_metrics_wer - experiment_data/pipeline/asr_benchmark_common_voice/word_google_alignment_wer - experiment_data/pipeline/asr_benchmark_common_voice/google_spacy - experiment_data/pipeline/asr_benchmark_common_voice/pos_google_alignment_wer - experiment_data/pipeline/asr_benchmark_common_voice/pos_google_metrics_wer - experiment_data/pipeline/asr_benchmark_common_voice/word_google_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_common_voice/word_google_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_common_voice/ner_spacy_google_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_common_voice/ner_spacy_google_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_common_voice/tag_spacy_google_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_common_voice/tag_spacy_google_metrics_wer_embeddings minds14_import_processing: cmd: | PYTHONPATH=. python -u experiment/minds14/import_dataset.py deps: - experiment/minds14/import_dataset.py outs: - experiment_data/dataset_relation_manager_data/minds14 minds14_gold_transcript_processing: cmd: | PYTHONPATH=. python -u experiment/minds14/pipeline/minds14_gold_transcript.py deps: - experiment/minds14/pipeline/minds14_gold_transcript.py - experiment_data/dataset_relation_manager_data/minds14 outs: - experiment_data/pipeline/asr_benchmark_minds14/gold_transcript - experiment_data/pipeline/asr_benchmark_minds14/gold_transcript_spacy minds14_google_processing: cmd: | PYTHONPATH=. python -u experiment/minds14/pipeline/minds14_google.py deps: - experiment/minds14/pipeline/minds14_google.py - experiment_data/cached_asr/minds14_google - experiment_data/pipeline/asr_benchmark_minds14/gold_transcript - experiment_data/pipeline/asr_benchmark_minds14/gold_transcript_spacy outs: - experiment_data/pipeline/asr_benchmark_minds14/google_polish_asr - experiment_data/pipeline/asr_benchmark_minds14/word_google_metrics_wer - experiment_data/pipeline/asr_benchmark_minds14/word_google_alignment_wer - experiment_data/pipeline/asr_benchmark_minds14/google_spacy - experiment_data/pipeline/asr_benchmark_minds14/pos_google_alignment_wer - experiment_data/pipeline/asr_benchmark_minds14/pos_google_metrics_wer - experiment_data/pipeline/asr_benchmark_minds14/word_google_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_minds14/word_google_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_minds14/ner_spacy_google_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_minds14/ner_spacy_google_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_minds14/tag_spacy_google_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_minds14/tag_spacy_google_metrics_wer_embeddings google_fleurs_save_audio_files: cmd: | PYTHONPATH=. python -u experiment/google_fleurs/save_wav_files.py deps: - experiment/google_fleurs/save_wav_files.py outs: - experiment_data/audio/fleurs_audio google_fleurs_import_processing: cmd: | PYTHONPATH=. python -u experiment/google_fleurs/import_dataset.py deps: - experiment/google_fleurs/import_dataset.py outs: - experiment_data/dataset_relation_manager_data/google_fleurs google_fleurs_gold_transcript_processing: cmd: | PYTHONPATH=. python -u experiment/google_fleurs/pipeline/google_fleurs_gold_transcript.py deps: - experiment/google_fleurs/pipeline/google_fleurs_gold_transcript.py - experiment_data/dataset_relation_manager_data/google_fleurs outs: - experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript - experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript_spacy google_fleurs_google_processing: cmd: | PYTHONPATH=. python -u experiment/google_fleurs/pipeline/google_fleurs_google.py deps: - experiment/google_fleurs/pipeline/google_fleurs_google.py - experiment_data/cached_asr/google_fleurs_google - experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript - experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript_spacy outs: - experiment_data/pipeline/asr_benchmark_google_fleurs/google_polish_asr - experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_metrics_wer - experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_alignment_wer - experiment_data/pipeline/asr_benchmark_google_fleurs/google_spacy - experiment_data/pipeline/asr_benchmark_google_fleurs/pos_google_alignment_wer - experiment_data/pipeline/asr_benchmark_google_fleurs/pos_google_metrics_wer - experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_google_fleurs/ner_spacy_google_metrics_wer_embeddings - experiment_data/pipeline/asr_benchmark_google_fleurs/ner_spacy_google_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_google_fleurs/tag_spacy_google_alignment_wer_embeddings - experiment_data/pipeline/asr_benchmark_google_fleurs/tag_spacy_google_metrics_wer_embeddings