schema: '2.0' stages: import_luna_to_common_format: cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py deps: - path: experiment/luna/import_dataset/import_luna.py md5: f40adccbf0b51094a71b876c9ccad751 size: 8265 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 outs: - path: experiment_data/dataset_relation_manager_data/luna md5: 773f92667e16efd915ec6384d06aa4fb.dir size: 229007155 nfiles: 1000 luna_main_pipeline: cmd: "PYTHONPATH=. python -m spacy download pl_core_news_lg\nPYTHONPATH=. python\ \ experiment/luna/pipeline/luna_main.py\n" deps: - path: experiment_data/cached_asr/luna_techmo md5: 033ea7b5434dded73bf869bfdd299462.dir size: 4256479 nfiles: 500 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/dataset_relation_manager_data/luna md5: 773f92667e16efd915ec6384d06aa4fb.dir size: 229007155 nfiles: 1000 outs: - path: experiment_data/pipeline/asr_benchmark_luna md5: 2e334734387ab4579b7b5269d5029e81.dir size: 71627685 nfiles: 4000 luna_import_to_common_format: cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py deps: - path: experiment/luna/import_dataset/import_luna.py md5: 44a1b914afda2ae74462e7afd83f658e size: 8278 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 outs: - path: experiment_data/dataset_relation_manager_data/luna md5: ff680a49296818460a49bd0c70089a4a.dir size: 229007155 nfiles: 1000 voicelab_import_to_common_format: cmd: PYTHONPATH=. python -u experiment/voicelab/import_data.py deps: - path: experiment/voicelab/import_data.py md5: 41acb98a1517e66c052182fe0a1403ba size: 2108 - path: experiment_data/dataset/voicelab_cbiz_testset_20220322 md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir size: 4803739404 nfiles: 1600 outs: - path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322 md5: 4046ea5d80966f0c017b2c4bec0e7c9b.dir size: 110711470 nfiles: 1600 luna_gold_transcript_processing: cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py\n" deps: - path: experiment/luna/pipeline/luna_gold_transcript_processing.py md5: 2bae24d511febebb26b3264b204784f5 size: 1466 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/dataset_relation_manager_data/luna md5: ff680a49296818460a49bd0c70089a4a.dir size: 229007155 nfiles: 1000 outs: - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir size: 6706925 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy md5: 24a399475b752737db0f2a8671507014.dir size: 6785648 nfiles: 500 luna_ajn_processing: cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py\n" deps: - path: experiment/luna/pipeline/luna_ajn_asr_processing.py md5: 2d66cb8890c420b55e8b7eb33ac32ba2 size: 3558 - path: experiment_data/cached_asr/luna_ajn_polish_asr md5: 620e178854dbcb69f49a608f34573a88.dir size: 6159899 nfiles: 494 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir size: 6706925 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy md5: 24a399475b752737db0f2a8671507014.dir size: 6785648 nfiles: 500 outs: - path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr md5: fa9d926ae8fd0268c71f19c1d5d39fcf.dir size: 11080541 nfiles: 499 - path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy md5: 417d8f07266eb5da9c4bfbf84f3b4eac.dir size: 6579351 nfiles: 499 - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer md5: 2bf746c412e6bff4071f689d853b106f.dir size: 22061350 nfiles: 499 - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer md5: 3147413bdfd36ad91c64303e8705951b.dir size: 17002 nfiles: 499 - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer md5: 2bb11f8a97cdeb18c557fadb49a6f015.dir size: 25669158 nfiles: 499 - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer_embeddings md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir size: 44326962 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer md5: c48c74eccf1cfd0768900514d2fcfd1b.dir size: 10527 nfiles: 499 - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer_embeddings md5: 98a7edeee3b630e8e301acfc578a8393.dir size: 34869 nfiles: 500 luna_techmo_processing: cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py\n" deps: - path: experiment/luna/pipeline/luna_techmo_processing.py md5: 75069cd6e3a61dfaaf49c2bdb1e81976 size: 3416 - path: experiment_data/cached_asr/luna_techmo md5: 033ea7b5434dded73bf869bfdd299462.dir size: 4256479 nfiles: 500 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir size: 6706925 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy md5: 24a399475b752737db0f2a8671507014.dir size: 6785648 nfiles: 500 outs: - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer md5: 94762d19a853810064afd38319d05a2c.dir size: 20897599 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer md5: 4efbe309674d9d494bae3dac057025ba.dir size: 17341 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr md5: 3787c6a4c7941787253165e2ba760e73.dir size: 9697519 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy md5: 337b6bf947ee47cda30b3cc75f954e8e.dir size: 6124559 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer md5: afc25d6ad22bed4ded5cb07028bff1cf.dir size: 21380796 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer_embeddings md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir size: 44326962 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer md5: 4cfbb2830b280084ece14b1ef815b92a.dir size: 17298 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer_embeddings md5: 98a7edeee3b630e8e301acfc578a8393.dir size: 34869 nfiles: 500 voicelab_gold_transcript_processing: cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py\n" deps: - path: experiment/voicelab/voicelab_pipeline_gold_transcript.py md5: 4ba38fdfac616f8a0818cedabf66b94d size: 2312 - path: experiment_data/dataset/voicelab_cbiz_testset_20220322 md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir size: 4803739404 nfiles: 1600 - path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322 md5: 4046ea5d80966f0c017b2c4bec0e7c9b.dir size: 110711470 nfiles: 1600 outs: - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript md5: 9edf1e743faa9fc3515790acb6fd8cab.dir size: 21846798 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy md5: c166937f6e8ae9d28412ca1e3e43469e.dir size: 26643278 nfiles: 800 voicelab_techmo_processing: cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py\n" deps: - path: experiment/voicelab/voicelab_pipeline_techmo.py md5: 3d6347486055a11e399beac71ce2f877 size: 3479 - path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo md5: 6c3b356723d562c978f84e733b91f5d0.dir size: 17539259 nfiles: 800 - path: experiment_data/dataset/voicelab_cbiz_testset_20220322 md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir size: 4803739404 nfiles: 1600 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript md5: 9edf1e743faa9fc3515790acb6fd8cab.dir size: 21846798 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy md5: c166937f6e8ae9d28412ca1e3e43469e.dir size: 26643278 nfiles: 800 outs: - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer md5: a15a7a19f46e329c8b77eeecdda9d7b4.dir size: 81650836 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer md5: b1a674826142a44095a4c6439ac49024.dir size: 27934 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr md5: da32e6fa9d986deddb594cb66e649864.dir size: 39158267 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy md5: cd89a91a33629088ba6fc30ef8427dee.dir size: 24482297 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer md5: 0b714391682432408d74beee1cd5a14a.dir size: 83756423 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer_embeddings md5: 93d34d82f8536014ddbe0cf0645dd837.dir size: 174322727 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir size: 27780 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer_embeddings md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir size: 56182 nfiles: 800 voicelab_ajn_processing: cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_ajn_asr.py\n" deps: - path: experiment/voicelab/voicelab_pipeline_ajn_asr.py md5: 85e8d3d79379e6d5db751e03c5ebae75 size: 4161 - path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn md5: 49a38b90f1265a61b90b54f820415011.dir size: 32601414 nfiles: 800 - path: experiment_data/dataset/voicelab_cbiz_testset_20220322 md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir size: 4803739404 nfiles: 1600 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript md5: 9edf1e743faa9fc3515790acb6fd8cab.dir size: 21846798 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy md5: c166937f6e8ae9d28412ca1e3e43469e.dir size: 26643278 nfiles: 800 outs: - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr md5: 94181d7a0731e8defbdcb4b477ad72a2.dir size: 48470646 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy md5: ef8be18b8acca299f9b9542ac8643a87.dir size: 20536889 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer md5: b2d3a9872e6016cfde8e6d025bef373b.dir size: 78539613 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer md5: d0e1ef5f57de27a2356d2f2050a93349.dir size: 27353 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer md5: acb5337346e70bed974dfe7ca7947d79.dir size: 104789466 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer_embeddings md5: 93d34d82f8536014ddbe0cf0645dd837.dir size: 174322727 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer md5: 903096554a3ea6896c4abaa5e2c71d4c.dir size: 16505 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer_embeddings md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir size: 56182 nfiles: 800