schema: '2.0' stages: import_luna_to_common_format: cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py deps: - path: experiment/luna/import_dataset/import_luna.py md5: f40adccbf0b51094a71b876c9ccad751 size: 8265 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 outs: - path: experiment_data/dataset_relation_manager_data/luna md5: 773f92667e16efd915ec6384d06aa4fb.dir size: 229007155 nfiles: 1000 luna_main_pipeline: cmd: "PYTHONPATH=. python -m spacy download pl_core_news_lg\nPYTHONPATH=. python\ \ experiment/luna/pipeline/luna_main.py\n" deps: - path: experiment_data/cached_asr/luna_techmo md5: 033ea7b5434dded73bf869bfdd299462.dir size: 4256479 nfiles: 500 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/dataset_relation_manager_data/luna md5: 773f92667e16efd915ec6384d06aa4fb.dir size: 229007155 nfiles: 1000 outs: - path: experiment_data/pipeline/asr_benchmark_luna md5: 2e334734387ab4579b7b5269d5029e81.dir size: 71627685 nfiles: 4000 luna_import_to_common_format: cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py deps: - path: experiment/luna/import_dataset/import_luna.py md5: 44a1b914afda2ae74462e7afd83f658e size: 8278 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 outs: - path: experiment_data/dataset_relation_manager_data/luna md5: ff680a49296818460a49bd0c70089a4a.dir size: 229007155 nfiles: 1000 voicelab_import_to_common_format: cmd: PYTHONPATH=. python experiment/voicelab/import_data.py deps: - path: experiment/voicelab/import_data.py md5: 0cf7cf604b202489ce3b0cb51bb47fa2 size: 2264 - path: experiment_data/dataset/voicelab_cbiz_testset_20220322 md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir size: 4803739404 nfiles: 1600 outs: - path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322 md5: 6d56f24b0ff78c0d44ade2114158150d.dir size: 110711470 nfiles: 1600 luna_gold_transcript_processing: cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py\n" deps: - path: experiment/luna/pipeline/luna_gold_transcript_processing.py md5: 2bae24d511febebb26b3264b204784f5 size: 1466 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/dataset_relation_manager_data/luna md5: ff680a49296818460a49bd0c70089a4a.dir size: 229007155 nfiles: 1000 outs: - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir size: 6706925 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy md5: 24a399475b752737db0f2a8671507014.dir size: 6785648 nfiles: 500 luna_ajn_processing: cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py\n" deps: - path: experiment/luna/pipeline/luna_ajn_asr_processing.py md5: ec7d7b5384f845173d9fb77e9cfa9907 size: 2501 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir size: 6706925 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy md5: 24a399475b752737db0f2a8671507014.dir size: 6785648 nfiles: 500 outs: - path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr md5: 620e178854dbcb69f49a608f34573a88.dir size: 6159899 nfiles: 494 - path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy md5: 312be284d4ec9e38986048e785fcbbc1.dir size: 6535212 nfiles: 494 - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer md5: 8ad558edb6a8bd2508a7e25bcf53bf94.dir size: 21936929 nfiles: 494 - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer md5: 98c74c5bf87637749eac1ed5ff3393b4.dir size: 16842 nfiles: 494 - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer md5: 1741fff740259398b28bf2a6ba3aec41.dir size: 20671277 nfiles: 494 - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer md5: 18605657ff9c7ef3221e27b671a3b4d1.dir size: 16835 nfiles: 494 luna_techmo_processing: cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py\n" deps: - path: experiment/luna/pipeline/luna_techmo_processing.py md5: b4d5ad7a0d7fb0714a2dc02cb457e8c9 size: 2628 - path: experiment_data/cached_asr/luna_techmo md5: 033ea7b5434dded73bf869bfdd299462.dir size: 4256479 nfiles: 500 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir size: 6706925 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy md5: 24a399475b752737db0f2a8671507014.dir size: 6785648 nfiles: 500 outs: - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer md5: c71539f3889c627a371957958bd0907d.dir size: 20897599 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer md5: 4efbe309674d9d494bae3dac057025ba.dir size: 17341 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr md5: acfaec46b2415ed6a64e3a3464d164f8.dir size: 9697519 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy md5: e869581816457d1585a7e42d0a18b8b2.dir size: 6124559 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer md5: 0dabd65b3981d588cd23d943abc6e231.dir size: 21380796 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer md5: 4cfbb2830b280084ece14b1ef815b92a.dir size: 17298 nfiles: 500