schema: '2.0' stages: import_luna_to_common_format: cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py deps: - path: experiment/luna/import_dataset/import_luna.py md5: f40adccbf0b51094a71b876c9ccad751 size: 8265 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 outs: - path: experiment_data/dataset_relation_manager_data/luna md5: 773f92667e16efd915ec6384d06aa4fb.dir size: 229007155 nfiles: 1000 luna_main_pipeline: cmd: "PYTHONPATH=. python -m spacy download pl_core_news_lg\nPYTHONPATH=. python\ \ experiment/luna/pipeline/luna_main.py\n" deps: - path: experiment_data/cached_asr/luna_techmo md5: 033ea7b5434dded73bf869bfdd299462.dir size: 4256479 nfiles: 500 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/dataset_relation_manager_data/luna md5: 773f92667e16efd915ec6384d06aa4fb.dir size: 229007155 nfiles: 1000 outs: - path: experiment_data/pipeline/asr_benchmark_luna md5: 2e334734387ab4579b7b5269d5029e81.dir size: 71627685 nfiles: 4000 luna_import_to_common_format: cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py deps: - path: experiment/luna/import_dataset/import_luna.py md5: 44a1b914afda2ae74462e7afd83f658e size: 8278 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 outs: - path: experiment_data/dataset_relation_manager_data/luna md5: ff680a49296818460a49bd0c70089a4a.dir size: 229007155 nfiles: 1000 voicelab_import_to_common_format: cmd: PYTHONPATH=. python -u experiment/voicelab/import_data.py deps: - path: experiment/voicelab/import_data.py md5: 41acb98a1517e66c052182fe0a1403ba size: 2108 - path: experiment_data/dataset/voicelab_cbiz_testset_20220322 md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir size: 4803739404 nfiles: 1600 outs: - path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322 md5: 4046ea5d80966f0c017b2c4bec0e7c9b.dir size: 110711470 nfiles: 1600 luna_gold_transcript_processing: cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py\n" deps: - path: experiment/luna/pipeline/luna_gold_transcript_processing.py md5: 2bae24d511febebb26b3264b204784f5 size: 1466 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/dataset_relation_manager_data/luna md5: ff680a49296818460a49bd0c70089a4a.dir size: 229007155 nfiles: 1000 outs: - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir size: 6706925 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy md5: 24a399475b752737db0f2a8671507014.dir size: 6785648 nfiles: 500 luna_ajn_processing: cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py\n" deps: - path: experiment/luna/pipeline/luna_ajn_asr_processing.py md5: 2d66cb8890c420b55e8b7eb33ac32ba2 size: 3558 - path: experiment_data/cached_asr/luna_ajn_polish_asr md5: 10454ef4568c2023e9d51ad418db2854.dir size: 1276562 nfiles: 495 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir size: 6706925 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy md5: 24a399475b752737db0f2a8671507014.dir size: 6785648 nfiles: 500 outs: - path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr md5: 8c080d8110e5860e78bfcb311fe2b90d.dir size: 6204883 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy md5: f06d2f1369b18e5fa126af5a00a8f0b8.dir size: 6590702 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer md5: 164f3b4796bcab894831da4f0a0fa0af.dir size: 22096130 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer md5: ee5ae7387429992fe04fcbde24e2bd24.dir size: 17037 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer md5: 00d84c15ae1c1a491625ee4dd8db6418.dir size: 20803179 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer_embeddings md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir size: 44326962 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer md5: fdbccc71fa84d0a68f4cd6723399e5dd.dir size: 17045 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer_embeddings md5: 98a7edeee3b630e8e301acfc578a8393.dir size: 34869 nfiles: 500 luna_techmo_processing: cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py\n" deps: - path: experiment/luna/pipeline/luna_techmo_processing.py md5: 75069cd6e3a61dfaaf49c2bdb1e81976 size: 3416 - path: experiment_data/cached_asr/luna_techmo md5: 033ea7b5434dded73bf869bfdd299462.dir size: 4256479 nfiles: 500 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir size: 6706925 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy md5: 24a399475b752737db0f2a8671507014.dir size: 6785648 nfiles: 500 outs: - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer md5: 94762d19a853810064afd38319d05a2c.dir size: 20897599 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer md5: 4efbe309674d9d494bae3dac057025ba.dir size: 17341 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr md5: 3787c6a4c7941787253165e2ba760e73.dir size: 9697519 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy md5: 337b6bf947ee47cda30b3cc75f954e8e.dir size: 6124559 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer md5: afc25d6ad22bed4ded5cb07028bff1cf.dir size: 21380796 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer_embeddings md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir size: 44326962 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer md5: 4cfbb2830b280084ece14b1ef815b92a.dir size: 17298 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer_embeddings md5: 98a7edeee3b630e8e301acfc578a8393.dir size: 34869 nfiles: 500 voicelab_gold_transcript_processing: cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py\n" deps: - path: experiment/voicelab/voicelab_pipeline_gold_transcript.py md5: 4ba38fdfac616f8a0818cedabf66b94d size: 2312 - path: experiment_data/dataset/voicelab_cbiz_testset_20220322 md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir size: 4803739404 nfiles: 1600 - path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322 md5: 4046ea5d80966f0c017b2c4bec0e7c9b.dir size: 110711470 nfiles: 1600 outs: - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript md5: 9edf1e743faa9fc3515790acb6fd8cab.dir size: 21846798 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy md5: c166937f6e8ae9d28412ca1e3e43469e.dir size: 26643278 nfiles: 800 voicelab_techmo_processing: cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py\n" deps: - path: experiment/voicelab/voicelab_pipeline_techmo.py md5: 3d6347486055a11e399beac71ce2f877 size: 3479 - path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo md5: 6c3b356723d562c978f84e733b91f5d0.dir size: 17539259 nfiles: 800 - path: experiment_data/dataset/voicelab_cbiz_testset_20220322 md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir size: 4803739404 nfiles: 1600 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript md5: 9edf1e743faa9fc3515790acb6fd8cab.dir size: 21846798 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy md5: c166937f6e8ae9d28412ca1e3e43469e.dir size: 26643278 nfiles: 800 outs: - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer md5: a15a7a19f46e329c8b77eeecdda9d7b4.dir size: 81650836 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer md5: b1a674826142a44095a4c6439ac49024.dir size: 27934 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr md5: da32e6fa9d986deddb594cb66e649864.dir size: 39158267 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy md5: cd89a91a33629088ba6fc30ef8427dee.dir size: 24482297 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer md5: 0b714391682432408d74beee1cd5a14a.dir size: 83756423 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer_embeddings md5: 93d34d82f8536014ddbe0cf0645dd837.dir size: 174322727 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir size: 27780 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer_embeddings md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir size: 56182 nfiles: 800 voicelab_ajn_processing: cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_ajn_asr.py\n" deps: - path: experiment/voicelab/voicelab_pipeline_ajn_asr.py md5: 85e8d3d79379e6d5db751e03c5ebae75 size: 4161 - path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn md5: 0705aafa0969142288cc9baa88d1ed57.dir size: 6896694 nfiles: 800 - path: experiment_data/dataset/voicelab_cbiz_testset_20220322 md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir size: 4803739404 nfiles: 1600 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript md5: 9edf1e743faa9fc3515790acb6fd8cab.dir size: 21846798 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy md5: c166937f6e8ae9d28412ca1e3e43469e.dir size: 26643278 nfiles: 800 outs: - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr md5: da10bb60107a86f98b2d07fef5616390.dir size: 22765926 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy md5: e8a48a0a63c1569ec734e1c8bb03c7db.dir size: 20536889 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer md5: 7806779e936ec6121b8d72e0d0e3ed59.dir size: 78539613 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer md5: d0e1ef5f57de27a2356d2f2050a93349.dir size: 27353 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer md5: d190f33e6643f62ecbeb9e5ae5fb5e02.dir size: 78992762 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer_embeddings md5: 93d34d82f8536014ddbe0cf0645dd837.dir size: 174322727 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer md5: 04f6ccbaf94cf08c34ac201ae079c21c.dir size: 25307 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer_embeddings md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir size: 56182 nfiles: 800 voicelab_tag_spacy_processing: cmd: "PYTHONPATH=. python -u experiment/voicelab/spacy_tag_processing.py\n" deps: - path: experiment/voicelab/spacy_tag_processing.py md5: b5f996e5be56cdf93eec23b9c0c066b9 size: 2580 - path: experiment_data/dataset/voicelab_cbiz_testset_20220322 md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir size: 4803739404 nfiles: 1600 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy md5: e8a48a0a63c1569ec734e1c8bb03c7db.dir size: 20536889 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy md5: c166937f6e8ae9d28412ca1e3e43469e.dir size: 26643278 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy md5: cd89a91a33629088ba6fc30ef8427dee.dir size: 24482297 nfiles: 800 outs: - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_alignment_wer_embeddings md5: c7914fa4d415a4815e371017ef505358.dir size: 80129550 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_metrics_wer_embeddings md5: ac726b3b371256176aee1364cb1fec88.dir size: 27258 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_alignment_wer_embeddings md5: 47d1b5a820806dbad941b19547eb1273.dir size: 83756819 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_metrics_wer_embeddings md5: ba59691f467f0486f832bf86326c5142.dir size: 27781 nfiles: 800 luna_tag_spacy_processing: cmd: "PYTHONPATH=. python -u experiment/luna/pipeline/spacy_tag_processing.py\n" deps: - path: experiment/luna/pipeline/spacy_tag_processing.py md5: bdaf1cae6863815ce59e022a493379da size: 2567 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy md5: f06d2f1369b18e5fa126af5a00a8f0b8.dir size: 6590702 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy md5: 24a399475b752737db0f2a8671507014.dir size: 6785648 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy md5: 337b6bf947ee47cda30b3cc75f954e8e.dir size: 6124559 nfiles: 500 outs: - path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_alignment_wer_embeddings md5: bd0439d750f23b978adbfd4ef1151a9c.dir size: 22511019 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_metrics_wer_embeddings md5: 85a4a0b2d6b91d9745f55944fb3886df.dir size: 17014 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_alignment_wer_embeddings md5: a6f1ef0da2dc2f45f522e3b2e7dec0f9.dir size: 21411162 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_metrics_wer_embeddings md5: abb1f6102a1de3d63d0e9e2cb797d2b9.dir size: 17369 nfiles: 500 voicelab_ner_spacy_processing: cmd: "PYTHONPATH=. python -u experiment/voicelab/spacy_ner_processing.py\n" deps: - path: experiment/voicelab/spacy_ner_processing.py md5: f6a4f2e34a31d15345435e11b425b1dd size: 2868 - path: experiment_data/dataset/voicelab_cbiz_testset_20220322 md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir size: 4803739404 nfiles: 1600 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr md5: da10bb60107a86f98b2d07fef5616390.dir size: 22765926 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript md5: 9edf1e743faa9fc3515790acb6fd8cab.dir size: 21846798 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr md5: da32e6fa9d986deddb594cb66e649864.dir size: 39158267 nfiles: 800 outs: - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_ajn_alignment_wer_embeddings md5: 742f665235ea5911b79974a06de9551e.dir size: 76044699 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_ajn_metrics_wer_embeddings md5: 17c5ae5ffa5b380d9be5bcc5c1f71401.dir size: 27796 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_techmo_alignment_wer_embeddings md5: 4b161efe4a5dcedd0dfe481ff69a74a0.dir size: 28195 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_techmo_metrics_wer_embeddings md5: 9161b4f164b2e1a5e5e95d1069f71377.dir size: 79503990 nfiles: 800 luna_ner_spacy_processing: cmd: "PYTHONPATH=. python -u experiment/luna/pipeline/spacy_ner_processing.py\n" deps: - path: experiment/luna/pipeline/spacy_ner_processing.py md5: 95191ec772c0766911c8d297ad957778 size: 2855 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr md5: 8c080d8110e5860e78bfcb311fe2b90d.dir size: 6204883 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir size: 6706925 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr md5: 3787c6a4c7941787253165e2ba760e73.dir size: 9697519 nfiles: 500 outs: - path: experiment_data/pipeline/asr_benchmark_luna/ner_spacy_ajn_alignment_wer_embeddings md5: 79aecfe01e3ce64fef8f83de75b08bfb.dir size: 21308988 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/ner_spacy_ajn_metrics_wer_embeddings md5: 2e136b8d41d48429267c966d5688d78a.dir size: 17297 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/ner_spacy_techmo_alignment_wer_embeddings md5: e4948a2a68306630df0459d09ccd5ee6.dir size: 17418 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/ner_spacy_techmo_metrics_wer_embeddings md5: ccbdbd523fb102a196abe8750cb78367.dir size: 20451389 nfiles: 500 luna_duckling_processing: cmd: "PYTHONPATH=. python -u experiment/luna/pipeline/duckling_processing.py\n" deps: - path: experiment/luna/pipeline/duckling_processing.py md5: deca760ecaaf1da2e1c3c9f05a8058d1 size: 3375 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr md5: 8c080d8110e5860e78bfcb311fe2b90d.dir size: 6204883 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir size: 6706925 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr md5: 3787c6a4c7941787253165e2ba760e73.dir size: 9697519 nfiles: 500 outs: - path: experiment_data/pipeline/asr_benchmark_luna/ajn_duckling md5: 29d5e8e54dfa3479bca63b8d7e153618.dir size: 688654 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_duckling md5: 5b8c7cc874b2201c3b38d81cbd7f7d8e.dir size: 1169949 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/techmo_duckling md5: 4ebcdbb04173efe6ce11acd22641047f.dir size: 1058036 nfiles: 500 voicelab_duckling_processing: cmd: "PYTHONPATH=. python -u experiment/voicelab/duckling_processing.py\n" deps: - path: experiment/voicelab/duckling_processing.py md5: 78ddf7368a19eacf95fb6fa07e4cc6a1 size: 3388 - path: experiment_data/dataset/voicelab_cbiz_testset_20220322 md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir size: 4803739404 nfiles: 1600 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr md5: da10bb60107a86f98b2d07fef5616390.dir size: 22765926 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript md5: 9edf1e743faa9fc3515790acb6fd8cab.dir size: 21846798 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr md5: da32e6fa9d986deddb594cb66e649864.dir size: 39158267 nfiles: 800 outs: - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_duckling md5: d578db039b5e140b8ee93f1155989de0.dir size: 1275192 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_duckling md5: 7c7e509b454e1e5c230536772b6368a3.dir size: 2962070 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_duckling md5: 5079efe14a12c0d406c56c6423476f7e.dir size: 2554174 nfiles: 800 luna_wav2vec2_processing: cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_wav2vec2_asr_processing.py\n" deps: - path: experiment/luna/pipeline/luna_wav2vec2_asr_processing.py md5: 7326a6347c47bba7bd76419afa99a062 size: 3874 - path: experiment_data/cached_asr/luna_wav2vec2_polish_asr md5: 0555f6346ef6332d27fc0979e8ac16f7.dir size: 16356269 nfiles: 456 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir size: 6706925 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy md5: 24a399475b752737db0f2a8671507014.dir size: 6785648 nfiles: 500 outs: - path: experiment_data/pipeline/asr_benchmark_luna/pos_wav2vec2_alignment_wer md5: f6cb75172894f7ddccbff1d78b889e2b.dir size: 18871817 nfiles: 476 - path: experiment_data/pipeline/asr_benchmark_luna/pos_wav2vec2_metrics_wer md5: 9961aa5fc267ac0d7773be2679054fe8.dir size: 16385 nfiles: 476 - path: experiment_data/pipeline/asr_benchmark_luna/wav2vec2_polish_asr md5: a3dd221c9dfe28c8988e4be1b42877ce.dir size: 21669041 nfiles: 476 - path: experiment_data/pipeline/asr_benchmark_luna/wav2vec2_spacy md5: 6ffddf1732e170f6d75ce19df6bf40b9.dir size: 5157578 nfiles: 476 - path: experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_alignment_wer md5: c8f5f1bdf4f061860445038ccb192f9d.dir size: 19391267 nfiles: 476 - path: experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_alignment_wer_embeddings md5: 04017200d3c60911f4cff0869e80a67a.dir size: 40334791 nfiles: 476 - path: experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_metrics_wer md5: fbee5d3ef2f839d4868781a34b179608.dir size: 16281 nfiles: 476 - path: experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_metrics_wer_embeddings md5: dece407fd19adb3472270d032bac5e47.dir size: 25672 nfiles: 476 common_voice_gold_transcript_processing: cmd: "PYTHONPATH=. python -u experiment/common_voice/pipeline/common_voice_gold_transcript.py\n" deps: - path: experiment/common_voice/pipeline/common_voice_gold_transcript.py md5: 8bb88130aa769dc21b77c753f95cdc1f size: 1520 - path: experiment_data/dataset_relation_manager_data/common_voice md5: 7a345b4c6c4ac9b784e21b5f16a53f6b.dir size: 21756677 nfiles: 16310 outs: - path: experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript md5: 2321e1afe83ed0dd53d52dbdc38e98ee.dir size: 4093545 nfiles: 8155 - path: experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript_spacy md5: e5a4b007ef80cd253223884b1c1d8f2c.dir size: 6457976 nfiles: 8155 common_voice_google_processing: cmd: "PYTHONPATH=. python -u experiment/common_voice/pipeline/common_voice_google.py\n" deps: - path: experiment/common_voice/pipeline/common_voice_google.py md5: 45b499a1897efaf2d7ee2ffec58984e1 size: 4983 - path: experiment_data/cached_asr/common_voice_google md5: e640b898f723eed2af42b93596eabecd.dir size: 1701910 nfiles: 8143 - path: experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript md5: 2321e1afe83ed0dd53d52dbdc38e98ee.dir size: 4093545 nfiles: 8155 - path: experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript_spacy md5: e5a4b007ef80cd253223884b1c1d8f2c.dir size: 6457976 nfiles: 8155 outs: - path: experiment_data/pipeline/asr_benchmark_common_voice/google_polish_asr md5: c36e767c4aa84fdee97916089d585eec.dir size: 5854366 nfiles: 8143 - path: experiment_data/pipeline/asr_benchmark_common_voice/google_spacy md5: 4c0baa2bf90c445c1bca7bdf937764ff.dir size: 4755832 nfiles: 8143 - path: experiment_data/pipeline/asr_benchmark_common_voice/ner_spacy_google_alignment_wer_embeddings md5: 5ecdc0210532a46688141730893dce28.dir size: 17894226 nfiles: 8143 - path: experiment_data/pipeline/asr_benchmark_common_voice/ner_spacy_google_metrics_wer_embeddings md5: 252cd908a3a2307e88c1cccc7a5ce390.dir size: 236348 nfiles: 8143 - path: experiment_data/pipeline/asr_benchmark_common_voice/pos_google_alignment_wer md5: 1e9986ecbba5a4e5e3f32e28a6848679.dir size: 18316770 nfiles: 8143 - path: experiment_data/pipeline/asr_benchmark_common_voice/pos_google_metrics_wer md5: 573354d0f3eb4476335d801732cc170c.dir size: 235666 nfiles: 8143 - path: experiment_data/pipeline/asr_benchmark_common_voice/tag_spacy_google_alignment_wer_embeddings md5: 54cef5652ecc7bd892a49b9a6352d03a.dir size: 18753379 nfiles: 8143 - path: experiment_data/pipeline/asr_benchmark_common_voice/tag_spacy_google_metrics_wer_embeddings md5: 30177297b5a54778ffb841639711303b.dir size: 234437 nfiles: 8143 - path: experiment_data/pipeline/asr_benchmark_common_voice/word_google_alignment_wer md5: abe0a93778bd5447a20698d5169082ad.dir size: 15821424 nfiles: 8143 - path: experiment_data/pipeline/asr_benchmark_common_voice/word_google_alignment_wer_embeddings md5: b21ec15cbc4f35c69292e52322b64f06.dir size: 33384670 nfiles: 8143 - path: experiment_data/pipeline/asr_benchmark_common_voice/word_google_metrics_wer md5: deafb9c70dd9bb48c57dd0802bd8c923.dir size: 225225 nfiles: 8143 - path: experiment_data/pipeline/asr_benchmark_common_voice/word_google_metrics_wer_embeddings md5: e2dcce4d029ffe904d363790da375fac.dir size: 572632 nfiles: 8143 common_voice_import_processing: cmd: "PYTHONPATH=. python -u experiment/common_voice/import_dataset.py\n" deps: - path: experiment/common_voice/import_dataset.py md5: 9c1f063dacb954bbd84b4174764e9123 size: 435 outs: - path: experiment_data/dataset_relation_manager_data/common_voice md5: 7a345b4c6c4ac9b784e21b5f16a53f6b.dir size: 21756677 nfiles: 16310 minds14_import_processing: cmd: "PYTHONPATH=. python -u experiment/minds14/import_dataset.py\n" deps: - path: experiment/minds14/import_dataset.py md5: 7455925ab4edb4648b051a05d27d7e79 size: 455 outs: - path: experiment_data/dataset_relation_manager_data/minds14 md5: 541b9d22e9fe502f4ba74f620bed2432.dir size: 3545062 nfiles: 1124 minds14_gold_transcript_processing: cmd: "PYTHONPATH=. python -u experiment/minds14/pipeline/minds14_gold_transcript.py\n" deps: - path: experiment/minds14/pipeline/minds14_gold_transcript.py md5: 84ce9195c9ae96e80d79184f353d429b size: 1515 - path: experiment_data/dataset_relation_manager_data/minds14 md5: 541b9d22e9fe502f4ba74f620bed2432.dir size: 3545062 nfiles: 1124 outs: - path: experiment_data/pipeline/asr_benchmark_minds14/gold_transcript md5: e7c4ad252834e701b9d26e7b2f17fa85.dir size: 689374 nfiles: 562 - path: experiment_data/pipeline/asr_benchmark_minds14/gold_transcript_spacy md5: 718c0de8e14d3dceec43f910ffb2ccdc.dir size: 843382 nfiles: 562 minds14_google_processing: cmd: "PYTHONPATH=. python -u experiment/minds14/pipeline/minds14_google.py\n" deps: - path: experiment/minds14/pipeline/minds14_google.py md5: e55897cae4ed3998cf814daf8b253c8a size: 4662 - path: experiment_data/cached_asr/minds14_google md5: bb1acae0c3070424d635ae9d2c6e14f0.dir size: 247940 nfiles: 562 - path: experiment_data/pipeline/asr_benchmark_minds14/gold_transcript md5: e7c4ad252834e701b9d26e7b2f17fa85.dir size: 689374 nfiles: 562 - path: experiment_data/pipeline/asr_benchmark_minds14/gold_transcript_spacy md5: 718c0de8e14d3dceec43f910ffb2ccdc.dir size: 843382 nfiles: 562 outs: - path: experiment_data/pipeline/asr_benchmark_minds14/google_polish_asr md5: cc5c2a7098575b46034689b8cf90d66b.dir size: 985004 nfiles: 562 - path: experiment_data/pipeline/asr_benchmark_minds14/google_spacy md5: 8b15b1e376d53a372256e4ff2284135e.dir size: 851587 nfiles: 562 - path: experiment_data/pipeline/asr_benchmark_minds14/ner_spacy_google_alignment_wer_embeddings md5: f6aadc7bc34cdc190725d92e2fe3a735.dir size: 2599569 nfiles: 562 - path: experiment_data/pipeline/asr_benchmark_minds14/ner_spacy_google_metrics_wer_embeddings md5: 52bf1a1c0dc0b1fc12d56616fe43d4f0.dir size: 14099 nfiles: 562 - path: experiment_data/pipeline/asr_benchmark_minds14/pos_google_alignment_wer md5: a91c7d9dc9dfef02409a9ccc9a8fa7d3.dir size: 2659852 nfiles: 562 - path: experiment_data/pipeline/asr_benchmark_minds14/pos_google_metrics_wer md5: 5826a2bcb07f2f4bcedea052dc83de4d.dir size: 14524 nfiles: 562 - path: experiment_data/pipeline/asr_benchmark_minds14/tag_spacy_google_alignment_wer_embeddings md5: ffb2453de98aaa86436210632a7e162d.dir size: 2744428 nfiles: 562 - path: experiment_data/pipeline/asr_benchmark_minds14/tag_spacy_google_metrics_wer_embeddings md5: c3e050294ca4fb0f8bfcf905ca73b840.dir size: 14984 nfiles: 562 - path: experiment_data/pipeline/asr_benchmark_minds14/word_google_alignment_wer md5: f9856e7d3316c38002ab7f84aea89e8e.dir size: 2725626 nfiles: 562 - path: experiment_data/pipeline/asr_benchmark_minds14/word_google_alignment_wer_embeddings md5: 20a251a5aaf260c36f5b31d304cd826e.dir size: 5686170 nfiles: 562 - path: experiment_data/pipeline/asr_benchmark_minds14/word_google_metrics_wer md5: 41621d3845a0035ee77352902c46a186.dir size: 15012 nfiles: 562 - path: experiment_data/pipeline/asr_benchmark_minds14/word_google_metrics_wer_embeddings md5: 673f6b46c7735163726f7d9e3b1f2dd4.dir size: 40260 nfiles: 562 google_fleurs_save_audio_files: cmd: "PYTHONPATH=. python -u experiment/google_fleurs/save_wav_files.py\n" deps: - path: experiment/google_fleurs/save_wav_files.py md5: e4b807332d972adb4c5a2d0b096b09e5 size: 493 outs: - path: experiment_data/audio/fleurs_audio md5: 6a4acda378431fb30839ab2c58668665.dir size: 236272072 nfiles: 758 google_fleurs_import_processing: cmd: "PYTHONPATH=. python -u experiment/google_fleurs/import_dataset.py\n" deps: - path: experiment/google_fleurs/import_dataset.py md5: 01daea17400b56d2736669aaf3e2af09 size: 441 outs: - path: experiment_data/dataset_relation_manager_data/google_fleurs md5: e9045f637ccec39d67d3ee5e9763cf13.dir size: 4991297 nfiles: 1516 google_fleurs_gold_transcript_processing: cmd: "PYTHONPATH=. python -u experiment/google_fleurs/pipeline/google_fleurs_gold_transcript.py\n" deps: - path: experiment/google_fleurs/pipeline/google_fleurs_gold_transcript.py md5: 981d7596a8276f7952fcee705d0a828c size: 1521 - path: experiment_data/dataset_relation_manager_data/google_fleurs md5: e9045f637ccec39d67d3ee5e9763cf13.dir size: 4991297 nfiles: 1516 outs: - path: experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript md5: 291a1976499bd24a878636e4c76fc57a.dir size: 975209 nfiles: 758 - path: experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript_spacy md5: 76b462a21926ab83f4e63e3046b92d08.dir size: 1196907 nfiles: 758 google_fleurs_google_processing: cmd: "PYTHONPATH=. python -u experiment/google_fleurs/pipeline/google_fleurs_google.py\n" deps: - path: experiment/google_fleurs/pipeline/google_fleurs_google.py md5: 433c7eb5b98e35cdd22dfca8a0d50f5e size: 4673 - path: experiment_data/cached_asr/google_fleurs_google md5: c740521abae49feebfaf20c2384c8de5.dir size: 350126 nfiles: 758 - path: experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript md5: 291a1976499bd24a878636e4c76fc57a.dir size: 975209 nfiles: 758 - path: experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript_spacy md5: 76b462a21926ab83f4e63e3046b92d08.dir size: 1196907 nfiles: 758 outs: - path: experiment_data/pipeline/asr_benchmark_google_fleurs/google_polish_asr md5: d6cc3d584bac32b78328f18f5f9172cf.dir size: 1377134 nfiles: 758 - path: experiment_data/pipeline/asr_benchmark_google_fleurs/google_spacy md5: 28d584893d4f2d6afe8c1226bb813a1e.dir size: 1183960 nfiles: 758 - path: experiment_data/pipeline/asr_benchmark_google_fleurs/ner_spacy_google_alignment_wer_embeddings md5: 560b3f62ad19f41037f29609477262aa.dir size: 3638477 nfiles: 758 - path: experiment_data/pipeline/asr_benchmark_google_fleurs/ner_spacy_google_metrics_wer_embeddings md5: f0755063eeeb38680dcf1b59e1b61632.dir size: 21386 nfiles: 758 - path: experiment_data/pipeline/asr_benchmark_google_fleurs/pos_google_alignment_wer md5: 6881fdb748c383e8f121beeb3f996c15.dir size: 3712618 nfiles: 758 - path: experiment_data/pipeline/asr_benchmark_google_fleurs/pos_google_metrics_wer md5: a93545ba089d087be33c0cd41ab88497.dir size: 22099 nfiles: 758 - path: experiment_data/pipeline/asr_benchmark_google_fleurs/tag_spacy_google_alignment_wer_embeddings md5: 3af296e52f5e74e5e7db843bd8b7f41b.dir size: 3844097 nfiles: 758 - path: experiment_data/pipeline/asr_benchmark_google_fleurs/tag_spacy_google_metrics_wer_embeddings md5: 47f14edff7589a680a130e55055852bd.dir size: 23377 nfiles: 758 - path: experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_alignment_wer md5: 34600cd8f1d8bf718f60472afb77bc73.dir size: 3818553 nfiles: 758 - path: experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_alignment_wer_embeddings md5: 288fb014884ce9f11caf5f8cd1098cfa.dir size: 7984964 nfiles: 758 - path: experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_metrics_wer md5: 984770b57ca48fd793099c9ed67791d0.dir size: 23344 nfiles: 758 - path: experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_metrics_wer_embeddings md5: b2626f92231431e163813ef7112c119d.dir size: 53916 nfiles: 758