Skip to content
Snippets Groups Projects
dvc.lock 9.63 KiB
Newer Older
Marcin Wątroba's avatar
Marcin Wątroba committed
schema: '2.0'
stages:
  import_luna_to_common_format:
    cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
    deps:
    - path: experiment/luna/import_dataset/import_luna.py
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: f40adccbf0b51094a71b876c9ccad751
      size: 8265
Marcin Wątroba's avatar
Marcin Wątroba committed
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    outs:
    - path: experiment_data/dataset_relation_manager_data/luna
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 773f92667e16efd915ec6384d06aa4fb.dir
      size: 229007155
      nfiles: 1000
  luna_main_pipeline:
    cmd: "PYTHONPATH=. python -m spacy download pl_core_news_lg\nPYTHONPATH=. python\
      \ experiment/luna/pipeline/luna_main.py\n"
    deps:
    - path: experiment_data/cached_asr/luna_techmo
      md5: 033ea7b5434dded73bf869bfdd299462.dir
      size: 4256479
      nfiles: 500
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/dataset_relation_manager_data/luna
      md5: 773f92667e16efd915ec6384d06aa4fb.dir
      size: 229007155
      nfiles: 1000
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna
      md5: 2e334734387ab4579b7b5269d5029e81.dir
      size: 71627685
      nfiles: 4000
Marcin Wątroba's avatar
Marcin Wątroba committed
  luna_import_to_common_format:
    cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
    deps:
    - path: experiment/luna/import_dataset/import_luna.py
      md5: 44a1b914afda2ae74462e7afd83f658e
      size: 8278
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    outs:
    - path: experiment_data/dataset_relation_manager_data/luna
      md5: ff680a49296818460a49bd0c70089a4a.dir
      size: 229007155
      nfiles: 1000
  voicelab_import_to_common_format:
    cmd: PYTHONPATH=. python experiment/voicelab/import_data.py
    deps:
    - path: experiment/voicelab/import_data.py
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 41acb98a1517e66c052182fe0a1403ba
      size: 2108
Marcin Wątroba's avatar
Marcin Wątroba committed
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    outs:
    - path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 926ef9bab4ce41b9de95f2f3d5ab67a0.dir
Marcin Wątroba's avatar
Marcin Wątroba committed
      size: 110711470
      nfiles: 1600
  luna_gold_transcript_processing:
    cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py\n"
    deps:
    - path: experiment/luna/pipeline/luna_gold_transcript_processing.py
      md5: 2bae24d511febebb26b3264b204784f5
      size: 1466
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/dataset_relation_manager_data/luna
      md5: ff680a49296818460a49bd0c70089a4a.dir
      size: 229007155
      nfiles: 1000
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
      size: 6706925
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
      md5: 24a399475b752737db0f2a8671507014.dir
      size: 6785648
      nfiles: 500
  luna_ajn_processing:
    cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py\n"
    deps:
    - path: experiment/luna/pipeline/luna_ajn_asr_processing.py
      md5: ec7d7b5384f845173d9fb77e9cfa9907
      size: 2501
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
      size: 6706925
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
      md5: 24a399475b752737db0f2a8671507014.dir
      size: 6785648
      nfiles: 500
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
      md5: 620e178854dbcb69f49a608f34573a88.dir
      size: 6159899
      nfiles: 494
    - path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
      md5: 312be284d4ec9e38986048e785fcbbc1.dir
      size: 6535212
      nfiles: 494
    - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer
      md5: 8ad558edb6a8bd2508a7e25bcf53bf94.dir
      size: 21936929
      nfiles: 494
    - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer
      md5: 98c74c5bf87637749eac1ed5ff3393b4.dir
      size: 16842
      nfiles: 494
    - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer
      md5: 1741fff740259398b28bf2a6ba3aec41.dir
      size: 20671277
      nfiles: 494
    - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer
      md5: 18605657ff9c7ef3221e27b671a3b4d1.dir
      size: 16835
      nfiles: 494
  luna_techmo_processing:
    cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py\n"
    deps:
    - path: experiment/luna/pipeline/luna_techmo_processing.py
      md5: b4d5ad7a0d7fb0714a2dc02cb457e8c9
      size: 2628
    - path: experiment_data/cached_asr/luna_techmo
      md5: 033ea7b5434dded73bf869bfdd299462.dir
      size: 4256479
      nfiles: 500
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
      size: 6706925
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
      md5: 24a399475b752737db0f2a8671507014.dir
      size: 6785648
      nfiles: 500
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer
      md5: c71539f3889c627a371957958bd0907d.dir
      size: 20897599
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer
      md5: 4efbe309674d9d494bae3dac057025ba.dir
      size: 17341
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
      md5: acfaec46b2415ed6a64e3a3464d164f8.dir
      size: 9697519
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
      md5: e869581816457d1585a7e42d0a18b8b2.dir
      size: 6124559
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer
      md5: 0dabd65b3981d588cd23d943abc6e231.dir
      size: 21380796
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer
      md5: 4cfbb2830b280084ece14b1ef815b92a.dir
      size: 17298
      nfiles: 500
Marcin Wątroba's avatar
Marcin Wątroba committed
  voicelab_gold_transcript_processing:
    cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py\n"
    deps:
    - path: experiment/voicelab/voicelab_pipeline_gold_transcript.py
      md5: 4ba38fdfac616f8a0818cedabf66b94d
      size: 2312
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    - path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
      md5: 926ef9bab4ce41b9de95f2f3d5ab67a0.dir
      size: 110711470
      nfiles: 1600
    outs:
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
      md5: fb6812b2f3044c0285ee6ee2b21d0523.dir
      size: 21846798
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
      md5: f2e68dcc8842a15e417ae6f5221a802a.dir
      size: 26643278
      nfiles: 800
  voicelab_techmo_processing:
    cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py\n"
    deps:
    - path: experiment/voicelab/voicelab_pipeline_techmo.py
      md5: 23c0869d7cc9f0088870362d669ab82e
      size: 2685
    - path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo
      md5: 6c3b356723d562c978f84e733b91f5d0.dir
      size: 17539259
      nfiles: 800
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
      md5: fb6812b2f3044c0285ee6ee2b21d0523.dir
      size: 21846798
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
      md5: f2e68dcc8842a15e417ae6f5221a802a.dir
      size: 26643278
      nfiles: 800
    outs:
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer
      md5: 8c5f0380ba2891b3e726d647c2863c60.dir
      size: 81650836
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer
      md5: b1a674826142a44095a4c6439ac49024.dir
      size: 27934
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
      md5: c45e29b08af7bb13cdf54da9655bd96c.dir
      size: 39158267
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
      md5: a39c82666419c2b7791952a1fa116d61.dir
      size: 24482297
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer
      md5: 72ff86c7cb2e89ac7e04677f532255b2.dir
      size: 83756423
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer
      md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir
      size: 27780
      nfiles: 800