schema: '2.0'
stages:
  import_luna_to_common_format:
    cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
    deps:
    - path: experiment/luna/import_dataset/import_luna.py
      md5: f40adccbf0b51094a71b876c9ccad751
      size: 8265
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    outs:
    - path: experiment_data/dataset_relation_manager_data/luna
      md5: 773f92667e16efd915ec6384d06aa4fb.dir
      size: 229007155
      nfiles: 1000
  luna_main_pipeline:
    cmd: "PYTHONPATH=. python -m spacy download pl_core_news_lg\nPYTHONPATH=. python\
      \ experiment/luna/pipeline/luna_main.py\n"
    deps:
    - path: experiment_data/cached_asr/luna_techmo
      md5: 033ea7b5434dded73bf869bfdd299462.dir
      size: 4256479
      nfiles: 500
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/dataset_relation_manager_data/luna
      md5: 773f92667e16efd915ec6384d06aa4fb.dir
      size: 229007155
      nfiles: 1000
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna
      md5: 2e334734387ab4579b7b5269d5029e81.dir
      size: 71627685
      nfiles: 4000
  luna_import_to_common_format:
    cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
    deps:
    - path: experiment/luna/import_dataset/import_luna.py
      md5: 44a1b914afda2ae74462e7afd83f658e
      size: 8278
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    outs:
    - path: experiment_data/dataset_relation_manager_data/luna
      md5: ff680a49296818460a49bd0c70089a4a.dir
      size: 229007155
      nfiles: 1000
  voicelab_import_to_common_format:
    cmd: PYTHONPATH=. python -u experiment/voicelab/import_data.py
    deps:
    - path: experiment/voicelab/import_data.py
      md5: 41acb98a1517e66c052182fe0a1403ba
      size: 2108
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    outs:
    - path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
      md5: 4046ea5d80966f0c017b2c4bec0e7c9b.dir
      size: 110711470
      nfiles: 1600
  luna_gold_transcript_processing:
    cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py\n"
    deps:
    - path: experiment/luna/pipeline/luna_gold_transcript_processing.py
      md5: 2bae24d511febebb26b3264b204784f5
      size: 1466
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/dataset_relation_manager_data/luna
      md5: ff680a49296818460a49bd0c70089a4a.dir
      size: 229007155
      nfiles: 1000
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
      size: 6706925
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
      md5: 24a399475b752737db0f2a8671507014.dir
      size: 6785648
      nfiles: 500
  luna_ajn_processing:
    cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py\n"
    deps:
    - path: experiment/luna/pipeline/luna_ajn_asr_processing.py
      md5: 2d66cb8890c420b55e8b7eb33ac32ba2
      size: 3558
    - path: experiment_data/cached_asr/luna_ajn_polish_asr
      md5: 10454ef4568c2023e9d51ad418db2854.dir
      size: 1276562
      nfiles: 495
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
      size: 6706925
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
      md5: 24a399475b752737db0f2a8671507014.dir
      size: 6785648
      nfiles: 500
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
      md5: 8c080d8110e5860e78bfcb311fe2b90d.dir
      size: 6204883
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
      md5: f06d2f1369b18e5fa126af5a00a8f0b8.dir
      size: 6590702
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer
      md5: 164f3b4796bcab894831da4f0a0fa0af.dir
      size: 22096130
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer
      md5: ee5ae7387429992fe04fcbde24e2bd24.dir
      size: 17037
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer
      md5: 00d84c15ae1c1a491625ee4dd8db6418.dir
      size: 20803179
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer_embeddings
      md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir
      size: 44326962
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer
      md5: fdbccc71fa84d0a68f4cd6723399e5dd.dir
      size: 17045
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer_embeddings
      md5: 98a7edeee3b630e8e301acfc578a8393.dir
      size: 34869
      nfiles: 500
  luna_techmo_processing:
    cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py\n"
    deps:
    - path: experiment/luna/pipeline/luna_techmo_processing.py
      md5: 75069cd6e3a61dfaaf49c2bdb1e81976
      size: 3416
    - path: experiment_data/cached_asr/luna_techmo
      md5: 033ea7b5434dded73bf869bfdd299462.dir
      size: 4256479
      nfiles: 500
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
      size: 6706925
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
      md5: 24a399475b752737db0f2a8671507014.dir
      size: 6785648
      nfiles: 500
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer
      md5: 94762d19a853810064afd38319d05a2c.dir
      size: 20897599
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer
      md5: 4efbe309674d9d494bae3dac057025ba.dir
      size: 17341
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
      md5: 3787c6a4c7941787253165e2ba760e73.dir
      size: 9697519
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
      md5: 337b6bf947ee47cda30b3cc75f954e8e.dir
      size: 6124559
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer
      md5: afc25d6ad22bed4ded5cb07028bff1cf.dir
      size: 21380796
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer_embeddings
      md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir
      size: 44326962
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer
      md5: 4cfbb2830b280084ece14b1ef815b92a.dir
      size: 17298
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer_embeddings
      md5: 98a7edeee3b630e8e301acfc578a8393.dir
      size: 34869
      nfiles: 500
  voicelab_gold_transcript_processing:
    cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py\n"
    deps:
    - path: experiment/voicelab/voicelab_pipeline_gold_transcript.py
      md5: 4ba38fdfac616f8a0818cedabf66b94d
      size: 2312
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    - path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
      md5: 4046ea5d80966f0c017b2c4bec0e7c9b.dir
      size: 110711470
      nfiles: 1600
    outs:
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
      md5: 9edf1e743faa9fc3515790acb6fd8cab.dir
      size: 21846798
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
      md5: c166937f6e8ae9d28412ca1e3e43469e.dir
      size: 26643278
      nfiles: 800
  voicelab_techmo_processing:
    cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py\n"
    deps:
    - path: experiment/voicelab/voicelab_pipeline_techmo.py
      md5: 3d6347486055a11e399beac71ce2f877
      size: 3479
    - path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo
      md5: 6c3b356723d562c978f84e733b91f5d0.dir
      size: 17539259
      nfiles: 800
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
      md5: 9edf1e743faa9fc3515790acb6fd8cab.dir
      size: 21846798
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
      md5: c166937f6e8ae9d28412ca1e3e43469e.dir
      size: 26643278
      nfiles: 800
    outs:
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer
      md5: a15a7a19f46e329c8b77eeecdda9d7b4.dir
      size: 81650836
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer
      md5: b1a674826142a44095a4c6439ac49024.dir
      size: 27934
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
      md5: da32e6fa9d986deddb594cb66e649864.dir
      size: 39158267
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
      md5: cd89a91a33629088ba6fc30ef8427dee.dir
      size: 24482297
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer
      md5: 0b714391682432408d74beee1cd5a14a.dir
      size: 83756423
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer_embeddings
      md5: 93d34d82f8536014ddbe0cf0645dd837.dir
      size: 174322727
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer
      md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir
      size: 27780
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer_embeddings
      md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir
      size: 56182
      nfiles: 800
  voicelab_ajn_processing:
    cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_ajn_asr.py\n"
    deps:
    - path: experiment/voicelab/voicelab_pipeline_ajn_asr.py
      md5: 85e8d3d79379e6d5db751e03c5ebae75
      size: 4161
    - path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn
      md5: 0705aafa0969142288cc9baa88d1ed57.dir
      size: 6896694
      nfiles: 800
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
      md5: 9edf1e743faa9fc3515790acb6fd8cab.dir
      size: 21846798
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
      md5: c166937f6e8ae9d28412ca1e3e43469e.dir
      size: 26643278
      nfiles: 800
    outs:
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
      md5: da10bb60107a86f98b2d07fef5616390.dir
      size: 22765926
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
      md5: e8a48a0a63c1569ec734e1c8bb03c7db.dir
      size: 20536889
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer
      md5: 7806779e936ec6121b8d72e0d0e3ed59.dir
      size: 78539613
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer
      md5: d0e1ef5f57de27a2356d2f2050a93349.dir
      size: 27353
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer
      md5: d190f33e6643f62ecbeb9e5ae5fb5e02.dir
      size: 78992762
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer_embeddings
      md5: 93d34d82f8536014ddbe0cf0645dd837.dir
      size: 174322727
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer
      md5: 04f6ccbaf94cf08c34ac201ae079c21c.dir
      size: 25307
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer_embeddings
      md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir
      size: 56182
      nfiles: 800
  voicelab_tag_spacy_processing:
    cmd: "PYTHONPATH=. python -u experiment/voicelab/spacy_tag_processing.py\n"
    deps:
    - path: experiment/voicelab/spacy_tag_processing.py
      md5: b5f996e5be56cdf93eec23b9c0c066b9
      size: 2580
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
      md5: e8a48a0a63c1569ec734e1c8bb03c7db.dir
      size: 20536889
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
      md5: c166937f6e8ae9d28412ca1e3e43469e.dir
      size: 26643278
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
      md5: cd89a91a33629088ba6fc30ef8427dee.dir
      size: 24482297
      nfiles: 800
    outs:
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_alignment_wer_embeddings
      md5: c7914fa4d415a4815e371017ef505358.dir
      size: 80129550
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_metrics_wer_embeddings
      md5: ac726b3b371256176aee1364cb1fec88.dir
      size: 27258
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_alignment_wer_embeddings
      md5: 47d1b5a820806dbad941b19547eb1273.dir
      size: 83756819
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_metrics_wer_embeddings
      md5: ba59691f467f0486f832bf86326c5142.dir
      size: 27781
      nfiles: 800
  luna_tag_spacy_processing:
    cmd: "PYTHONPATH=. python -u experiment/luna/pipeline/spacy_tag_processing.py\n"
    deps:
    - path: experiment/luna/pipeline/spacy_tag_processing.py
      md5: bdaf1cae6863815ce59e022a493379da
      size: 2567
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
      md5: f06d2f1369b18e5fa126af5a00a8f0b8.dir
      size: 6590702
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
      md5: 24a399475b752737db0f2a8671507014.dir
      size: 6785648
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
      md5: 337b6bf947ee47cda30b3cc75f954e8e.dir
      size: 6124559
      nfiles: 500
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_alignment_wer_embeddings
      md5: bd0439d750f23b978adbfd4ef1151a9c.dir
      size: 22511019
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_metrics_wer_embeddings
      md5: 85a4a0b2d6b91d9745f55944fb3886df.dir
      size: 17014
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_alignment_wer_embeddings
      md5: a6f1ef0da2dc2f45f522e3b2e7dec0f9.dir
      size: 21411162
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_metrics_wer_embeddings
      md5: abb1f6102a1de3d63d0e9e2cb797d2b9.dir
      size: 17369
      nfiles: 500
  voicelab_ner_spacy_processing:
    cmd: "PYTHONPATH=. python -u experiment/voicelab/spacy_ner_processing.py\n"
    deps:
    - path: experiment/voicelab/spacy_ner_processing.py
      md5: f6a4f2e34a31d15345435e11b425b1dd
      size: 2868
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
      md5: da10bb60107a86f98b2d07fef5616390.dir
      size: 22765926
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
      md5: 9edf1e743faa9fc3515790acb6fd8cab.dir
      size: 21846798
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
      md5: da32e6fa9d986deddb594cb66e649864.dir
      size: 39158267
      nfiles: 800
    outs:
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_ajn_alignment_wer_embeddings
      md5: 742f665235ea5911b79974a06de9551e.dir
      size: 76044699
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_ajn_metrics_wer_embeddings
      md5: 17c5ae5ffa5b380d9be5bcc5c1f71401.dir
      size: 27796
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_techmo_alignment_wer_embeddings
      md5: 4b161efe4a5dcedd0dfe481ff69a74a0.dir
      size: 28195
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_techmo_metrics_wer_embeddings
      md5: 9161b4f164b2e1a5e5e95d1069f71377.dir
      size: 79503990
      nfiles: 800
  luna_ner_spacy_processing:
    cmd: "PYTHONPATH=. python -u experiment/luna/pipeline/spacy_ner_processing.py\n"
    deps:
    - path: experiment/luna/pipeline/spacy_ner_processing.py
      md5: 95191ec772c0766911c8d297ad957778
      size: 2855
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
      md5: 8c080d8110e5860e78bfcb311fe2b90d.dir
      size: 6204883
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
      size: 6706925
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
      md5: 3787c6a4c7941787253165e2ba760e73.dir
      size: 9697519
      nfiles: 500
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna/ner_spacy_ajn_alignment_wer_embeddings
      md5: 79aecfe01e3ce64fef8f83de75b08bfb.dir
      size: 21308988
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/ner_spacy_ajn_metrics_wer_embeddings
      md5: 2e136b8d41d48429267c966d5688d78a.dir
      size: 17297
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/ner_spacy_techmo_alignment_wer_embeddings
      md5: e4948a2a68306630df0459d09ccd5ee6.dir
      size: 17418
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/ner_spacy_techmo_metrics_wer_embeddings
      md5: ccbdbd523fb102a196abe8750cb78367.dir
      size: 20451389
      nfiles: 500
  luna_duckling_processing:
    cmd: "PYTHONPATH=. python -u experiment/luna/pipeline/duckling_processing.py\n"
    deps:
    - path: experiment/luna/pipeline/duckling_processing.py
      md5: deca760ecaaf1da2e1c3c9f05a8058d1
      size: 3375
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
      md5: 8c080d8110e5860e78bfcb311fe2b90d.dir
      size: 6204883
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
      size: 6706925
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
      md5: 3787c6a4c7941787253165e2ba760e73.dir
      size: 9697519
      nfiles: 500
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna/ajn_duckling
      md5: 29d5e8e54dfa3479bca63b8d7e153618.dir
      size: 688654
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_duckling
      md5: 5b8c7cc874b2201c3b38d81cbd7f7d8e.dir
      size: 1169949
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/techmo_duckling
      md5: 4ebcdbb04173efe6ce11acd22641047f.dir
      size: 1058036
      nfiles: 500
  voicelab_duckling_processing:
    cmd: "PYTHONPATH=. python -u experiment/voicelab/duckling_processing.py\n"
    deps:
    - path: experiment/voicelab/duckling_processing.py
      md5: 78ddf7368a19eacf95fb6fa07e4cc6a1
      size: 3388
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
      md5: da10bb60107a86f98b2d07fef5616390.dir
      size: 22765926
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
      md5: 9edf1e743faa9fc3515790acb6fd8cab.dir
      size: 21846798
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
      md5: da32e6fa9d986deddb594cb66e649864.dir
      size: 39158267
      nfiles: 800
    outs:
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_duckling
      md5: d578db039b5e140b8ee93f1155989de0.dir
      size: 1275192
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_duckling
      md5: 7c7e509b454e1e5c230536772b6368a3.dir
      size: 2962070
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_duckling
      md5: 5079efe14a12c0d406c56c6423476f7e.dir
      size: 2554174
      nfiles: 800
  luna_wav2vec2_processing:
    cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_wav2vec2_asr_processing.py\n"
    deps:
    - path: experiment/luna/pipeline/luna_wav2vec2_asr_processing.py
      md5: 7326a6347c47bba7bd76419afa99a062
      size: 3874
    - path: experiment_data/cached_asr/luna_wav2vec2_polish_asr
      md5: 0555f6346ef6332d27fc0979e8ac16f7.dir
      size: 16356269
      nfiles: 456
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
      size: 6706925
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
      md5: 24a399475b752737db0f2a8671507014.dir
      size: 6785648
      nfiles: 500
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna/pos_wav2vec2_alignment_wer
      md5: f6cb75172894f7ddccbff1d78b889e2b.dir
      size: 18871817
      nfiles: 476
    - path: experiment_data/pipeline/asr_benchmark_luna/pos_wav2vec2_metrics_wer
      md5: 9961aa5fc267ac0d7773be2679054fe8.dir
      size: 16385
      nfiles: 476
    - path: experiment_data/pipeline/asr_benchmark_luna/wav2vec2_polish_asr
      md5: a3dd221c9dfe28c8988e4be1b42877ce.dir
      size: 21669041
      nfiles: 476
    - path: experiment_data/pipeline/asr_benchmark_luna/wav2vec2_spacy
      md5: 6ffddf1732e170f6d75ce19df6bf40b9.dir
      size: 5157578
      nfiles: 476
    - path: experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_alignment_wer
      md5: c8f5f1bdf4f061860445038ccb192f9d.dir
      size: 19391267
      nfiles: 476
    - path: experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_alignment_wer_embeddings
      md5: 04017200d3c60911f4cff0869e80a67a.dir
      size: 40334791
      nfiles: 476
    - path: experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_metrics_wer
      md5: fbee5d3ef2f839d4868781a34b179608.dir
      size: 16281
      nfiles: 476
    - path: experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_metrics_wer_embeddings
      md5: dece407fd19adb3472270d032bac5e47.dir
      size: 25672
      nfiles: 476
  common_voice_gold_transcript_processing:
    cmd: "PYTHONPATH=. python -u experiment/common_voice/pipeline/common_voice_gold_transcript.py\n"
    deps:
    - path: experiment/common_voice/pipeline/common_voice_gold_transcript.py
      md5: 8bb88130aa769dc21b77c753f95cdc1f
      size: 1520
    - path: experiment_data/dataset_relation_manager_data/common_voice
      md5: 7a345b4c6c4ac9b784e21b5f16a53f6b.dir
      size: 21756677
      nfiles: 16310
    outs:
    - path: experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript
      md5: 2321e1afe83ed0dd53d52dbdc38e98ee.dir
      size: 4093545
      nfiles: 8155
    - path: experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript_spacy
      md5: e5a4b007ef80cd253223884b1c1d8f2c.dir
      size: 6457976
      nfiles: 8155
  common_voice_google_processing:
    cmd: "PYTHONPATH=. python -u experiment/common_voice/pipeline/common_voice_google.py\n"
    deps:
    - path: experiment/common_voice/pipeline/common_voice_google.py
      md5: 45b499a1897efaf2d7ee2ffec58984e1
      size: 4983
    - path: experiment_data/cached_asr/common_voice_google
      md5: e640b898f723eed2af42b93596eabecd.dir
      size: 1701910
      nfiles: 8143
    - path: experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript
      md5: 2321e1afe83ed0dd53d52dbdc38e98ee.dir
      size: 4093545
      nfiles: 8155
    - path: experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript_spacy
      md5: e5a4b007ef80cd253223884b1c1d8f2c.dir
      size: 6457976
      nfiles: 8155
    outs:
    - path: experiment_data/pipeline/asr_benchmark_common_voice/google_polish_asr
      md5: c36e767c4aa84fdee97916089d585eec.dir
      size: 5854366
      nfiles: 8143
    - path: experiment_data/pipeline/asr_benchmark_common_voice/google_spacy
      md5: 4c0baa2bf90c445c1bca7bdf937764ff.dir
      size: 4755832
      nfiles: 8143
    - path: experiment_data/pipeline/asr_benchmark_common_voice/ner_spacy_google_alignment_wer_embeddings
      md5: 5ecdc0210532a46688141730893dce28.dir
      size: 17894226
      nfiles: 8143
    - path: experiment_data/pipeline/asr_benchmark_common_voice/ner_spacy_google_metrics_wer_embeddings
      md5: 252cd908a3a2307e88c1cccc7a5ce390.dir
      size: 236348
      nfiles: 8143
    - path: experiment_data/pipeline/asr_benchmark_common_voice/pos_google_alignment_wer
      md5: 1e9986ecbba5a4e5e3f32e28a6848679.dir
      size: 18316770
      nfiles: 8143
    - path: experiment_data/pipeline/asr_benchmark_common_voice/pos_google_metrics_wer
      md5: 573354d0f3eb4476335d801732cc170c.dir
      size: 235666
      nfiles: 8143
    - path: experiment_data/pipeline/asr_benchmark_common_voice/tag_spacy_google_alignment_wer_embeddings
      md5: 54cef5652ecc7bd892a49b9a6352d03a.dir
      size: 18753379
      nfiles: 8143
    - path: experiment_data/pipeline/asr_benchmark_common_voice/tag_spacy_google_metrics_wer_embeddings
      md5: 30177297b5a54778ffb841639711303b.dir
      size: 234437
      nfiles: 8143
    - path: experiment_data/pipeline/asr_benchmark_common_voice/word_google_alignment_wer
      md5: abe0a93778bd5447a20698d5169082ad.dir
      size: 15821424
      nfiles: 8143
    - path: experiment_data/pipeline/asr_benchmark_common_voice/word_google_alignment_wer_embeddings
      md5: b21ec15cbc4f35c69292e52322b64f06.dir
      size: 33384670
      nfiles: 8143
    - path: experiment_data/pipeline/asr_benchmark_common_voice/word_google_metrics_wer
      md5: deafb9c70dd9bb48c57dd0802bd8c923.dir
      size: 225225
      nfiles: 8143
    - path: experiment_data/pipeline/asr_benchmark_common_voice/word_google_metrics_wer_embeddings
      md5: e2dcce4d029ffe904d363790da375fac.dir
      size: 572632
      nfiles: 8143
  common_voice_import_processing:
    cmd: "PYTHONPATH=. python -u experiment/common_voice/import_dataset.py\n"
    deps:
    - path: experiment/common_voice/import_dataset.py
      md5: 9c1f063dacb954bbd84b4174764e9123
      size: 435
    outs:
    - path: experiment_data/dataset_relation_manager_data/common_voice
      md5: 7a345b4c6c4ac9b784e21b5f16a53f6b.dir
      size: 21756677
      nfiles: 16310
  minds14_import_processing:
    cmd: "PYTHONPATH=. python -u experiment/minds14/import_dataset.py\n"
    deps:
    - path: experiment/minds14/import_dataset.py
      md5: 7455925ab4edb4648b051a05d27d7e79
      size: 455
    outs:
    - path: experiment_data/dataset_relation_manager_data/minds14
      md5: 541b9d22e9fe502f4ba74f620bed2432.dir
      size: 3545062
      nfiles: 1124
  minds14_gold_transcript_processing:
    cmd: "PYTHONPATH=. python -u experiment/minds14/pipeline/minds14_gold_transcript.py\n"
    deps:
    - path: experiment/minds14/pipeline/minds14_gold_transcript.py
      md5: 84ce9195c9ae96e80d79184f353d429b
      size: 1515
    - path: experiment_data/dataset_relation_manager_data/minds14
      md5: 541b9d22e9fe502f4ba74f620bed2432.dir
      size: 3545062
      nfiles: 1124
    outs:
    - path: experiment_data/pipeline/asr_benchmark_minds14/gold_transcript
      md5: e7c4ad252834e701b9d26e7b2f17fa85.dir
      size: 689374
      nfiles: 562
    - path: experiment_data/pipeline/asr_benchmark_minds14/gold_transcript_spacy
      md5: 718c0de8e14d3dceec43f910ffb2ccdc.dir
      size: 843382
      nfiles: 562
  minds14_google_processing:
    cmd: "PYTHONPATH=. python -u experiment/minds14/pipeline/minds14_google.py\n"
    deps:
    - path: experiment/minds14/pipeline/minds14_google.py
      md5: e55897cae4ed3998cf814daf8b253c8a
      size: 4662
    - path: experiment_data/cached_asr/minds14_google
      md5: bb1acae0c3070424d635ae9d2c6e14f0.dir
      size: 247940
      nfiles: 562
    - path: experiment_data/pipeline/asr_benchmark_minds14/gold_transcript
      md5: e7c4ad252834e701b9d26e7b2f17fa85.dir
      size: 689374
      nfiles: 562
    - path: experiment_data/pipeline/asr_benchmark_minds14/gold_transcript_spacy
      md5: 718c0de8e14d3dceec43f910ffb2ccdc.dir
      size: 843382
      nfiles: 562
    outs:
    - path: experiment_data/pipeline/asr_benchmark_minds14/google_polish_asr
      md5: cc5c2a7098575b46034689b8cf90d66b.dir
      size: 985004
      nfiles: 562
    - path: experiment_data/pipeline/asr_benchmark_minds14/google_spacy
      md5: 8b15b1e376d53a372256e4ff2284135e.dir
      size: 851587
      nfiles: 562
    - path: experiment_data/pipeline/asr_benchmark_minds14/ner_spacy_google_alignment_wer_embeddings
      md5: f6aadc7bc34cdc190725d92e2fe3a735.dir
      size: 2599569
      nfiles: 562
    - path: experiment_data/pipeline/asr_benchmark_minds14/ner_spacy_google_metrics_wer_embeddings
      md5: 52bf1a1c0dc0b1fc12d56616fe43d4f0.dir
      size: 14099
      nfiles: 562
    - path: experiment_data/pipeline/asr_benchmark_minds14/pos_google_alignment_wer
      md5: a91c7d9dc9dfef02409a9ccc9a8fa7d3.dir
      size: 2659852
      nfiles: 562
    - path: experiment_data/pipeline/asr_benchmark_minds14/pos_google_metrics_wer
      md5: 5826a2bcb07f2f4bcedea052dc83de4d.dir
      size: 14524
      nfiles: 562
    - path: experiment_data/pipeline/asr_benchmark_minds14/tag_spacy_google_alignment_wer_embeddings
      md5: ffb2453de98aaa86436210632a7e162d.dir
      size: 2744428
      nfiles: 562
    - path: experiment_data/pipeline/asr_benchmark_minds14/tag_spacy_google_metrics_wer_embeddings
      md5: c3e050294ca4fb0f8bfcf905ca73b840.dir
      size: 14984
      nfiles: 562
    - path: experiment_data/pipeline/asr_benchmark_minds14/word_google_alignment_wer
      md5: f9856e7d3316c38002ab7f84aea89e8e.dir
      size: 2725626
      nfiles: 562
    - path: experiment_data/pipeline/asr_benchmark_minds14/word_google_alignment_wer_embeddings
      md5: 20a251a5aaf260c36f5b31d304cd826e.dir
      size: 5686170
      nfiles: 562
    - path: experiment_data/pipeline/asr_benchmark_minds14/word_google_metrics_wer
      md5: 41621d3845a0035ee77352902c46a186.dir
      size: 15012
      nfiles: 562
    - path: experiment_data/pipeline/asr_benchmark_minds14/word_google_metrics_wer_embeddings
      md5: 673f6b46c7735163726f7d9e3b1f2dd4.dir
      size: 40260
      nfiles: 562
  google_fleurs_save_audio_files:
    cmd: "PYTHONPATH=. python -u experiment/google_fleurs/save_wav_files.py\n"
    deps:
    - path: experiment/google_fleurs/save_wav_files.py
      md5: e4b807332d972adb4c5a2d0b096b09e5
      size: 493
    outs:
    - path: experiment_data/audio/fleurs_audio
      md5: 6a4acda378431fb30839ab2c58668665.dir
      size: 236272072
      nfiles: 758
  google_fleurs_import_processing:
    cmd: "PYTHONPATH=. python -u experiment/google_fleurs/import_dataset.py\n"
    deps:
    - path: experiment/google_fleurs/import_dataset.py
      md5: 01daea17400b56d2736669aaf3e2af09
      size: 441
    outs:
    - path: experiment_data/dataset_relation_manager_data/google_fleurs
      md5: e9045f637ccec39d67d3ee5e9763cf13.dir
      size: 4991297
      nfiles: 1516
  google_fleurs_gold_transcript_processing:
    cmd: "PYTHONPATH=. python -u experiment/google_fleurs/pipeline/google_fleurs_gold_transcript.py\n"
    deps:
    - path: experiment/google_fleurs/pipeline/google_fleurs_gold_transcript.py
      md5: 981d7596a8276f7952fcee705d0a828c
      size: 1521
    - path: experiment_data/dataset_relation_manager_data/google_fleurs
      md5: e9045f637ccec39d67d3ee5e9763cf13.dir
      size: 4991297
      nfiles: 1516
    outs:
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript
      md5: 291a1976499bd24a878636e4c76fc57a.dir
      size: 975209
      nfiles: 758
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript_spacy
      md5: 76b462a21926ab83f4e63e3046b92d08.dir
      size: 1196907
      nfiles: 758
  google_fleurs_google_processing:
    cmd: "PYTHONPATH=. python -u experiment/google_fleurs/pipeline/google_fleurs_google.py\n"
    deps:
    - path: experiment/google_fleurs/pipeline/google_fleurs_google.py
      md5: 433c7eb5b98e35cdd22dfca8a0d50f5e
      size: 4673
    - path: experiment_data/cached_asr/google_fleurs_google
      md5: c740521abae49feebfaf20c2384c8de5.dir
      size: 350126
      nfiles: 758
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript
      md5: 291a1976499bd24a878636e4c76fc57a.dir
      size: 975209
      nfiles: 758
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript_spacy
      md5: 76b462a21926ab83f4e63e3046b92d08.dir
      size: 1196907
      nfiles: 758
    outs:
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/google_polish_asr
      md5: d6cc3d584bac32b78328f18f5f9172cf.dir
      size: 1377134
      nfiles: 758
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/google_spacy
      md5: 28d584893d4f2d6afe8c1226bb813a1e.dir
      size: 1183960
      nfiles: 758
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/ner_spacy_google_alignment_wer_embeddings
      md5: 560b3f62ad19f41037f29609477262aa.dir
      size: 3638477
      nfiles: 758
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/ner_spacy_google_metrics_wer_embeddings
      md5: f0755063eeeb38680dcf1b59e1b61632.dir
      size: 21386
      nfiles: 758
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/pos_google_alignment_wer
      md5: 6881fdb748c383e8f121beeb3f996c15.dir
      size: 3712618
      nfiles: 758
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/pos_google_metrics_wer
      md5: a93545ba089d087be33c0cd41ab88497.dir
      size: 22099
      nfiles: 758
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/tag_spacy_google_alignment_wer_embeddings
      md5: 3af296e52f5e74e5e7db843bd8b7f41b.dir
      size: 3844097
      nfiles: 758
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/tag_spacy_google_metrics_wer_embeddings
      md5: 47f14edff7589a680a130e55055852bd.dir
      size: 23377
      nfiles: 758
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_alignment_wer
      md5: 34600cd8f1d8bf718f60472afb77bc73.dir
      size: 3818553
      nfiles: 758
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_alignment_wer_embeddings
      md5: 288fb014884ce9f11caf5f8cd1098cfa.dir
      size: 7984964
      nfiles: 758
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_metrics_wer
      md5: 984770b57ca48fd793099c9ed67791d0.dir
      size: 23344
      nfiles: 758
    - path: experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_metrics_wer_embeddings
      md5: b2626f92231431e163813ef7112c119d.dir
      size: 53916
      nfiles: 758