Newer
Older
schema: '2.0'
stages:
import_luna_to_common_format:
cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
deps:
- path: experiment/luna/import_dataset/import_luna.py
md5: f40adccbf0b51094a71b876c9ccad751
size: 8265
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
outs:
- path: experiment_data/dataset_relation_manager_data/luna
md5: 773f92667e16efd915ec6384d06aa4fb.dir
size: 229007155
nfiles: 1000
luna_main_pipeline:
cmd: "PYTHONPATH=. python -m spacy download pl_core_news_lg\nPYTHONPATH=. python\
\ experiment/luna/pipeline/luna_main.py\n"
deps:
- path: experiment_data/cached_asr/luna_techmo
md5: 033ea7b5434dded73bf869bfdd299462.dir
size: 4256479
nfiles: 500
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/dataset_relation_manager_data/luna
md5: 773f92667e16efd915ec6384d06aa4fb.dir
size: 229007155
nfiles: 1000
outs:
- path: experiment_data/pipeline/asr_benchmark_luna
md5: 2e334734387ab4579b7b5269d5029e81.dir
size: 71627685
nfiles: 4000
luna_import_to_common_format:
cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
deps:
- path: experiment/luna/import_dataset/import_luna.py
md5: 44a1b914afda2ae74462e7afd83f658e
size: 8278
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
outs:
- path: experiment_data/dataset_relation_manager_data/luna
md5: ff680a49296818460a49bd0c70089a4a.dir
size: 229007155
nfiles: 1000
voicelab_import_to_common_format:
cmd: PYTHONPATH=. python -u experiment/voicelab/import_data.py
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
outs:
- path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
luna_gold_transcript_processing:
cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py\n"
deps:
- path: experiment/luna/pipeline/luna_gold_transcript_processing.py
md5: 2bae24d511febebb26b3264b204784f5
size: 1466
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/dataset_relation_manager_data/luna
md5: ff680a49296818460a49bd0c70089a4a.dir
size: 229007155
nfiles: 1000
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
luna_ajn_processing:
cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py\n"
deps:
- path: experiment/luna/pipeline/luna_ajn_asr_processing.py
md5: 2d66cb8890c420b55e8b7eb33ac32ba2
size: 3558
- path: experiment_data/cached_asr/luna_ajn_polish_asr
md5: 10454ef4568c2023e9d51ad418db2854.dir
size: 1276562
nfiles: 495
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
md5: 8c080d8110e5860e78bfcb311fe2b90d.dir
size: 6204883
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
md5: f06d2f1369b18e5fa126af5a00a8f0b8.dir
size: 6590702
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer
md5: 164f3b4796bcab894831da4f0a0fa0af.dir
size: 22096130
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer
md5: ee5ae7387429992fe04fcbde24e2bd24.dir
size: 17037
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer
md5: 00d84c15ae1c1a491625ee4dd8db6418.dir
size: 20803179
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer_embeddings
md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir
size: 44326962
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer
md5: fdbccc71fa84d0a68f4cd6723399e5dd.dir
size: 17045
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer_embeddings
md5: 98a7edeee3b630e8e301acfc578a8393.dir
size: 34869
nfiles: 500
luna_techmo_processing:
cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py\n"
deps:
- path: experiment/luna/pipeline/luna_techmo_processing.py
md5: 75069cd6e3a61dfaaf49c2bdb1e81976
size: 3416
- path: experiment_data/cached_asr/luna_techmo
md5: 033ea7b5434dded73bf869bfdd299462.dir
size: 4256479
nfiles: 500
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer
size: 20897599
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer
md5: 4efbe309674d9d494bae3dac057025ba.dir
size: 17341
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
size: 9697519
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
size: 6124559
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer
- path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer_embeddings
md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir
size: 44326962
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer
md5: 4cfbb2830b280084ece14b1ef815b92a.dir
size: 17298
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer_embeddings
md5: 98a7edeee3b630e8e301acfc578a8393.dir
size: 34869
nfiles: 500
voicelab_gold_transcript_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py\n"
deps:
- path: experiment/voicelab/voicelab_pipeline_gold_transcript.py
md5: 4ba38fdfac616f8a0818cedabf66b94d
size: 2312
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
size: 110711470
nfiles: 1600
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
size: 26643278
nfiles: 800
voicelab_techmo_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py\n"
deps:
- path: experiment/voicelab/voicelab_pipeline_techmo.py
md5: 3d6347486055a11e399beac71ce2f877
size: 3479
- path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo
md5: 6c3b356723d562c978f84e733b91f5d0.dir
size: 17539259
nfiles: 800
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
size: 26643278
nfiles: 800
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer
size: 81650836
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer
md5: b1a674826142a44095a4c6439ac49024.dir
size: 27934
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
size: 39158267
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
size: 24482297
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer_embeddings
md5: 93d34d82f8536014ddbe0cf0645dd837.dir
size: 174322727
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer
md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir
size: 27780
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer_embeddings
md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir
size: 56182
nfiles: 800
voicelab_ajn_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_ajn_asr.py\n"
deps:
- path: experiment/voicelab/voicelab_pipeline_ajn_asr.py
md5: 85e8d3d79379e6d5db751e03c5ebae75
size: 4161
- path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn
md5: 0705aafa0969142288cc9baa88d1ed57.dir
size: 6896694
nfiles: 800
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
md5: 9edf1e743faa9fc3515790acb6fd8cab.dir
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
md5: c166937f6e8ae9d28412ca1e3e43469e.dir
size: 26643278
nfiles: 800
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
md5: da10bb60107a86f98b2d07fef5616390.dir
size: 22765926
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
size: 20536889
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer
size: 78539613
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer
md5: d0e1ef5f57de27a2356d2f2050a93349.dir
size: 27353
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer
md5: d190f33e6643f62ecbeb9e5ae5fb5e02.dir
size: 78992762
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer_embeddings
md5: 93d34d82f8536014ddbe0cf0645dd837.dir
size: 174322727
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer_embeddings
md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir
size: 56182
nfiles: 800
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
voicelab_tag_spacy_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/spacy_tag_processing.py\n"
deps:
- path: experiment/voicelab/spacy_tag_processing.py
md5: b5f996e5be56cdf93eec23b9c0c066b9
size: 2580
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
md5: e8a48a0a63c1569ec734e1c8bb03c7db.dir
size: 20536889
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
md5: c166937f6e8ae9d28412ca1e3e43469e.dir
size: 26643278
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
md5: cd89a91a33629088ba6fc30ef8427dee.dir
size: 24482297
nfiles: 800
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_alignment_wer_embeddings
md5: c7914fa4d415a4815e371017ef505358.dir
size: 80129550
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_metrics_wer_embeddings
md5: ac726b3b371256176aee1364cb1fec88.dir
size: 27258
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_alignment_wer_embeddings
md5: 47d1b5a820806dbad941b19547eb1273.dir
size: 83756819
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_metrics_wer_embeddings
md5: ba59691f467f0486f832bf86326c5142.dir
size: 27781
nfiles: 800
luna_tag_spacy_processing:
cmd: "PYTHONPATH=. python -u experiment/luna/pipeline/spacy_tag_processing.py\n"
deps:
- path: experiment/luna/pipeline/spacy_tag_processing.py
md5: bdaf1cae6863815ce59e022a493379da
size: 2567
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
md5: f06d2f1369b18e5fa126af5a00a8f0b8.dir
size: 6590702
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
md5: 337b6bf947ee47cda30b3cc75f954e8e.dir
size: 6124559
nfiles: 500
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_alignment_wer_embeddings
md5: bd0439d750f23b978adbfd4ef1151a9c.dir
size: 22511019
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_metrics_wer_embeddings
md5: 85a4a0b2d6b91d9745f55944fb3886df.dir
size: 17014
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_alignment_wer_embeddings
md5: a6f1ef0da2dc2f45f522e3b2e7dec0f9.dir
size: 21411162
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_metrics_wer_embeddings
md5: abb1f6102a1de3d63d0e9e2cb797d2b9.dir
size: 17369
nfiles: 500