Newer
Older
schema: '2.0'
stages:
import_luna_to_common_format:
cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
deps:
- path: experiment/luna/import_dataset/import_luna.py
md5: f40adccbf0b51094a71b876c9ccad751
size: 8265
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
outs:
- path: experiment_data/dataset_relation_manager_data/luna
md5: 773f92667e16efd915ec6384d06aa4fb.dir
size: 229007155
nfiles: 1000
luna_main_pipeline:
cmd: "PYTHONPATH=. python -m spacy download pl_core_news_lg\nPYTHONPATH=. python\
\ experiment/luna/pipeline/luna_main.py\n"
deps:
- path: experiment_data/cached_asr/luna_techmo
md5: 033ea7b5434dded73bf869bfdd299462.dir
size: 4256479
nfiles: 500
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/dataset_relation_manager_data/luna
md5: 773f92667e16efd915ec6384d06aa4fb.dir
size: 229007155
nfiles: 1000
outs:
- path: experiment_data/pipeline/asr_benchmark_luna
md5: 2e334734387ab4579b7b5269d5029e81.dir
size: 71627685
nfiles: 4000
luna_import_to_common_format:
cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
deps:
- path: experiment/luna/import_dataset/import_luna.py
md5: 44a1b914afda2ae74462e7afd83f658e
size: 8278
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
outs:
- path: experiment_data/dataset_relation_manager_data/luna
md5: ff680a49296818460a49bd0c70089a4a.dir
size: 229007155
nfiles: 1000
voicelab_import_to_common_format:
cmd: PYTHONPATH=. python -u experiment/voicelab/import_data.py
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
outs:
- path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
luna_gold_transcript_processing:
cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py\n"
deps:
- path: experiment/luna/pipeline/luna_gold_transcript_processing.py
md5: 2bae24d511febebb26b3264b204784f5
size: 1466
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/dataset_relation_manager_data/luna
md5: ff680a49296818460a49bd0c70089a4a.dir
size: 229007155
nfiles: 1000
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
luna_ajn_processing:
cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py\n"
deps:
- path: experiment/luna/pipeline/luna_ajn_asr_processing.py
md5: 2d66cb8890c420b55e8b7eb33ac32ba2
size: 3558
- path: experiment_data/cached_asr/luna_ajn_polish_asr
md5: 620e178854dbcb69f49a608f34573a88.dir
size: 6159899
nfiles: 494
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
md5: fa9d926ae8fd0268c71f19c1d5d39fcf.dir
size: 11080541
nfiles: 499
- path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
md5: 417d8f07266eb5da9c4bfbf84f3b4eac.dir
size: 6579351
nfiles: 499
- path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer
md5: 2bf746c412e6bff4071f689d853b106f.dir
size: 22061350
nfiles: 499
- path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer
md5: 3147413bdfd36ad91c64303e8705951b.dir
size: 17002
nfiles: 499
- path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer
md5: 2bb11f8a97cdeb18c557fadb49a6f015.dir
size: 25669158
nfiles: 499
- path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer_embeddings
md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir
size: 44326962
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer
md5: c48c74eccf1cfd0768900514d2fcfd1b.dir
size: 10527
nfiles: 499
- path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer_embeddings
md5: 98a7edeee3b630e8e301acfc578a8393.dir
size: 34869
nfiles: 500
luna_techmo_processing:
cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py\n"
deps:
- path: experiment/luna/pipeline/luna_techmo_processing.py
md5: 75069cd6e3a61dfaaf49c2bdb1e81976
size: 3416
- path: experiment_data/cached_asr/luna_techmo
md5: 033ea7b5434dded73bf869bfdd299462.dir
size: 4256479
nfiles: 500
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer
size: 20897599
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer
md5: 4efbe309674d9d494bae3dac057025ba.dir
size: 17341
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
size: 9697519
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
size: 6124559
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer
- path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer_embeddings
md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir
size: 44326962
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer
md5: 4cfbb2830b280084ece14b1ef815b92a.dir
size: 17298
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer_embeddings
md5: 98a7edeee3b630e8e301acfc578a8393.dir
size: 34869
nfiles: 500
voicelab_gold_transcript_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py\n"
deps:
- path: experiment/voicelab/voicelab_pipeline_gold_transcript.py
md5: 4ba38fdfac616f8a0818cedabf66b94d
size: 2312
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
size: 110711470
nfiles: 1600
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
size: 26643278
nfiles: 800
voicelab_techmo_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py\n"
deps:
- path: experiment/voicelab/voicelab_pipeline_techmo.py
md5: 3d6347486055a11e399beac71ce2f877
size: 3479
- path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo
md5: 6c3b356723d562c978f84e733b91f5d0.dir
size: 17539259
nfiles: 800
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
size: 26643278
nfiles: 800
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer
size: 81650836
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer
md5: b1a674826142a44095a4c6439ac49024.dir
size: 27934
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
size: 39158267
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
size: 24482297
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer_embeddings
md5: 93d34d82f8536014ddbe0cf0645dd837.dir
size: 174322727
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer
md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir
size: 27780
nfiles: 800
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer_embeddings
md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir
size: 56182
nfiles: 800
voicelab_ajn_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_ajn_asr.py\n"
deps:
- path: experiment/voicelab/voicelab_pipeline_ajn_asr.py
md5: 85e8d3d79379e6d5db751e03c5ebae75
size: 4161
- path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn
md5: 49a38b90f1265a61b90b54f820415011.dir
size: 32601414
nfiles: 800
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
md5: 9edf1e743faa9fc3515790acb6fd8cab.dir
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
md5: c166937f6e8ae9d28412ca1e3e43469e.dir
size: 26643278
nfiles: 800
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
md5: 94181d7a0731e8defbdcb4b477ad72a2.dir
size: 48470646
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
md5: ef8be18b8acca299f9b9542ac8643a87.dir
size: 20536889
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer
md5: b2d3a9872e6016cfde8e6d025bef373b.dir
size: 78539613
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer
md5: d0e1ef5f57de27a2356d2f2050a93349.dir
size: 27353
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer
md5: acb5337346e70bed974dfe7ca7947d79.dir
size: 104789466
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer_embeddings
md5: 93d34d82f8536014ddbe0cf0645dd837.dir
size: 174322727
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer
md5: 903096554a3ea6896c4abaa5e2c71d4c.dir
size: 16505
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer_embeddings
md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir
size: 56182
nfiles: 800