Newer
Older
schema: '2.0'
stages:
import_luna_to_common_format:
cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
deps:
- path: experiment/luna/import_dataset/import_luna.py
md5: f40adccbf0b51094a71b876c9ccad751
size: 8265
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
outs:
- path: experiment_data/dataset_relation_manager_data/luna
md5: 773f92667e16efd915ec6384d06aa4fb.dir
size: 229007155
nfiles: 1000
luna_main_pipeline:
cmd: "PYTHONPATH=. python -m spacy download pl_core_news_lg\nPYTHONPATH=. python\
\ experiment/luna/pipeline/luna_main.py\n"
deps:
- path: experiment_data/cached_asr/luna_techmo
md5: 033ea7b5434dded73bf869bfdd299462.dir
size: 4256479
nfiles: 500
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/dataset_relation_manager_data/luna
md5: 773f92667e16efd915ec6384d06aa4fb.dir
size: 229007155
nfiles: 1000
outs:
- path: experiment_data/pipeline/asr_benchmark_luna
md5: 2e334734387ab4579b7b5269d5029e81.dir
size: 71627685
nfiles: 4000
luna_import_to_common_format:
cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
deps:
- path: experiment/luna/import_dataset/import_luna.py
md5: 44a1b914afda2ae74462e7afd83f658e
size: 8278
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
outs:
- path: experiment_data/dataset_relation_manager_data/luna
md5: ff680a49296818460a49bd0c70089a4a.dir
size: 229007155
nfiles: 1000
voicelab_import_to_common_format:
cmd: PYTHONPATH=. python -u experiment/voicelab/import_data.py
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
outs:
- path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
luna_gold_transcript_processing:
cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py\n"
deps:
- path: experiment/luna/pipeline/luna_gold_transcript_processing.py
md5: 2bae24d511febebb26b3264b204784f5
size: 1466
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/dataset_relation_manager_data/luna
md5: ff680a49296818460a49bd0c70089a4a.dir
size: 229007155
nfiles: 1000
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
luna_ajn_processing:
cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py\n"
deps:
- path: experiment/luna/pipeline/luna_ajn_asr_processing.py
md5: 2d66cb8890c420b55e8b7eb33ac32ba2
size: 3558
- path: experiment_data/cached_asr/luna_ajn_polish_asr
md5: 10454ef4568c2023e9d51ad418db2854.dir
size: 1276562
nfiles: 495
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
md5: 8c080d8110e5860e78bfcb311fe2b90d.dir
size: 6204883
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
md5: f06d2f1369b18e5fa126af5a00a8f0b8.dir
size: 6590702
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer
md5: 164f3b4796bcab894831da4f0a0fa0af.dir
size: 22096130
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer
md5: ee5ae7387429992fe04fcbde24e2bd24.dir
size: 17037
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer
md5: 00d84c15ae1c1a491625ee4dd8db6418.dir
size: 20803179
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer_embeddings
md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir
size: 44326962
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer
md5: fdbccc71fa84d0a68f4cd6723399e5dd.dir
size: 17045
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer_embeddings
md5: 98a7edeee3b630e8e301acfc578a8393.dir
size: 34869
nfiles: 500
luna_techmo_processing:
cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py\n"
deps:
- path: experiment/luna/pipeline/luna_techmo_processing.py
md5: 75069cd6e3a61dfaaf49c2bdb1e81976
size: 3416
- path: experiment_data/cached_asr/luna_techmo
md5: 033ea7b5434dded73bf869bfdd299462.dir
size: 4256479
nfiles: 500
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer
size: 20897599
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer
md5: 4efbe309674d9d494bae3dac057025ba.dir
size: 17341
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
size: 9697519
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
size: 6124559
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer
- path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer_embeddings
md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir
size: 44326962
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer
md5: 4cfbb2830b280084ece14b1ef815b92a.dir
size: 17298
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer_embeddings
md5: 98a7edeee3b630e8e301acfc578a8393.dir
size: 34869
nfiles: 500
voicelab_gold_transcript_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py\n"
deps:
- path: experiment/voicelab/voicelab_pipeline_gold_transcript.py
md5: 4ba38fdfac616f8a0818cedabf66b94d
size: 2312
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
size: 110711470
nfiles: 1600
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
size: 26643278
nfiles: 800
voicelab_techmo_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py\n"
deps:
- path: experiment/voicelab/voicelab_pipeline_techmo.py
md5: 3d6347486055a11e399beac71ce2f877
size: 3479
- path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo
md5: 6c3b356723d562c978f84e733b91f5d0.dir
size: 17539259
nfiles: 800
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
size: 26643278
nfiles: 800
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer
size: 81650836
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer
md5: b1a674826142a44095a4c6439ac49024.dir
size: 27934
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
size: 39158267
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
size: 24482297
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer_embeddings
md5: 93d34d82f8536014ddbe0cf0645dd837.dir
size: 174322727
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer
md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir
size: 27780
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer_embeddings
md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir
size: 56182
nfiles: 800
voicelab_ajn_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_ajn_asr.py\n"
deps:
- path: experiment/voicelab/voicelab_pipeline_ajn_asr.py
md5: 85e8d3d79379e6d5db751e03c5ebae75
size: 4161
- path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn
md5: 0705aafa0969142288cc9baa88d1ed57.dir
size: 6896694
nfiles: 800
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
md5: 9edf1e743faa9fc3515790acb6fd8cab.dir
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
md5: c166937f6e8ae9d28412ca1e3e43469e.dir
size: 26643278
nfiles: 800
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
md5: da10bb60107a86f98b2d07fef5616390.dir
size: 22765926
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
size: 20536889
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer
size: 78539613
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer
md5: d0e1ef5f57de27a2356d2f2050a93349.dir
size: 27353
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer
md5: d190f33e6643f62ecbeb9e5ae5fb5e02.dir
size: 78992762
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer_embeddings
md5: 93d34d82f8536014ddbe0cf0645dd837.dir
size: 174322727
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer_embeddings
md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir
size: 56182
nfiles: 800
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
voicelab_tag_spacy_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/spacy_tag_processing.py\n"
deps:
- path: experiment/voicelab/spacy_tag_processing.py
md5: b5f996e5be56cdf93eec23b9c0c066b9
size: 2580
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
md5: e8a48a0a63c1569ec734e1c8bb03c7db.dir
size: 20536889
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
md5: c166937f6e8ae9d28412ca1e3e43469e.dir
size: 26643278
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
md5: cd89a91a33629088ba6fc30ef8427dee.dir
size: 24482297
nfiles: 800
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_alignment_wer_embeddings
md5: c7914fa4d415a4815e371017ef505358.dir
size: 80129550
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_metrics_wer_embeddings
md5: ac726b3b371256176aee1364cb1fec88.dir
size: 27258
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_alignment_wer_embeddings
md5: 47d1b5a820806dbad941b19547eb1273.dir
size: 83756819
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_metrics_wer_embeddings
md5: ba59691f467f0486f832bf86326c5142.dir
size: 27781
nfiles: 800
luna_tag_spacy_processing:
cmd: "PYTHONPATH=. python -u experiment/luna/pipeline/spacy_tag_processing.py\n"
deps:
- path: experiment/luna/pipeline/spacy_tag_processing.py
md5: bdaf1cae6863815ce59e022a493379da
size: 2567
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
md5: f06d2f1369b18e5fa126af5a00a8f0b8.dir
size: 6590702
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
md5: 337b6bf947ee47cda30b3cc75f954e8e.dir
size: 6124559
nfiles: 500
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_alignment_wer_embeddings
md5: bd0439d750f23b978adbfd4ef1151a9c.dir
size: 22511019
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_metrics_wer_embeddings
md5: 85a4a0b2d6b91d9745f55944fb3886df.dir
size: 17014
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_alignment_wer_embeddings
md5: a6f1ef0da2dc2f45f522e3b2e7dec0f9.dir
size: 21411162
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_metrics_wer_embeddings
md5: abb1f6102a1de3d63d0e9e2cb797d2b9.dir
size: 17369
nfiles: 500
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
voicelab_ner_spacy_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/spacy_ner_processing.py\n"
deps:
- path: experiment/voicelab/spacy_ner_processing.py
md5: f6a4f2e34a31d15345435e11b425b1dd
size: 2868
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
md5: da10bb60107a86f98b2d07fef5616390.dir
size: 22765926
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
md5: 9edf1e743faa9fc3515790acb6fd8cab.dir
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
md5: da32e6fa9d986deddb594cb66e649864.dir
size: 39158267
nfiles: 800
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_ajn_alignment_wer_embeddings
md5: 742f665235ea5911b79974a06de9551e.dir
size: 76044699
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_ajn_metrics_wer_embeddings
md5: 17c5ae5ffa5b380d9be5bcc5c1f71401.dir
size: 27796
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_techmo_alignment_wer_embeddings
md5: 4b161efe4a5dcedd0dfe481ff69a74a0.dir
size: 28195
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ner_spacy_techmo_metrics_wer_embeddings
md5: 9161b4f164b2e1a5e5e95d1069f71377.dir
size: 79503990
nfiles: 800
luna_ner_spacy_processing:
cmd: "PYTHONPATH=. python -u experiment/luna/pipeline/spacy_ner_processing.py\n"
deps:
- path: experiment/luna/pipeline/spacy_ner_processing.py
md5: 95191ec772c0766911c8d297ad957778
size: 2855
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
md5: 8c080d8110e5860e78bfcb311fe2b90d.dir
size: 6204883
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
md5: 3787c6a4c7941787253165e2ba760e73.dir
size: 9697519
nfiles: 500
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/ner_spacy_ajn_alignment_wer_embeddings
md5: 79aecfe01e3ce64fef8f83de75b08bfb.dir
size: 21308988
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/ner_spacy_ajn_metrics_wer_embeddings
md5: 2e136b8d41d48429267c966d5688d78a.dir
size: 17297
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/ner_spacy_techmo_alignment_wer_embeddings
md5: e4948a2a68306630df0459d09ccd5ee6.dir
size: 17418
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/ner_spacy_techmo_metrics_wer_embeddings
md5: ccbdbd523fb102a196abe8750cb78367.dir
size: 20451389
nfiles: 500
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
luna_duckling_processing:
cmd: "PYTHONPATH=. python -u experiment/luna/pipeline/duckling_processing.py\n"
deps:
- path: experiment/luna/pipeline/duckling_processing.py
md5: deca760ecaaf1da2e1c3c9f05a8058d1
size: 3375
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
md5: 8c080d8110e5860e78bfcb311fe2b90d.dir
size: 6204883
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
md5: 3787c6a4c7941787253165e2ba760e73.dir
size: 9697519
nfiles: 500
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/ajn_duckling
md5: 29d5e8e54dfa3479bca63b8d7e153618.dir
size: 688654
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_duckling
md5: 5b8c7cc874b2201c3b38d81cbd7f7d8e.dir
size: 1169949
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/techmo_duckling
md5: 4ebcdbb04173efe6ce11acd22641047f.dir
size: 1058036
nfiles: 500
voicelab_duckling_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/duckling_processing.py\n"
deps:
- path: experiment/voicelab/duckling_processing.py
md5: 78ddf7368a19eacf95fb6fa07e4cc6a1
size: 3388
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
md5: da10bb60107a86f98b2d07fef5616390.dir
size: 22765926
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
md5: 9edf1e743faa9fc3515790acb6fd8cab.dir
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
md5: da32e6fa9d986deddb594cb66e649864.dir
size: 39158267
nfiles: 800
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_duckling
md5: d578db039b5e140b8ee93f1155989de0.dir
size: 1275192
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_duckling
md5: 7c7e509b454e1e5c230536772b6368a3.dir
size: 2962070
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_duckling
md5: 5079efe14a12c0d406c56c6423476f7e.dir
size: 2554174
nfiles: 800
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
luna_wav2vec2_processing:
cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_wav2vec2_asr_processing.py\n"
deps:
- path: experiment/luna/pipeline/luna_wav2vec2_asr_processing.py
md5: 7326a6347c47bba7bd76419afa99a062
size: 3874
- path: experiment_data/cached_asr/luna_wav2vec2_polish_asr
md5: 0555f6346ef6332d27fc0979e8ac16f7.dir
size: 16356269
nfiles: 456
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/pos_wav2vec2_alignment_wer
md5: f6cb75172894f7ddccbff1d78b889e2b.dir
size: 18871817
nfiles: 476
- path: experiment_data/pipeline/asr_benchmark_luna/pos_wav2vec2_metrics_wer
md5: 9961aa5fc267ac0d7773be2679054fe8.dir
size: 16385
nfiles: 476
- path: experiment_data/pipeline/asr_benchmark_luna/wav2vec2_polish_asr
md5: a3dd221c9dfe28c8988e4be1b42877ce.dir
size: 21669041
nfiles: 476
- path: experiment_data/pipeline/asr_benchmark_luna/wav2vec2_spacy
md5: 6ffddf1732e170f6d75ce19df6bf40b9.dir
size: 5157578
nfiles: 476
- path: experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_alignment_wer
md5: c8f5f1bdf4f061860445038ccb192f9d.dir
size: 19391267
nfiles: 476
- path: experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_alignment_wer_embeddings
md5: 04017200d3c60911f4cff0869e80a67a.dir
size: 40334791
nfiles: 476
- path: experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_metrics_wer
md5: fbee5d3ef2f839d4868781a34b179608.dir
size: 16281
nfiles: 476
- path: experiment_data/pipeline/asr_benchmark_luna/word_wav2vec2_metrics_wer_embeddings
md5: dece407fd19adb3472270d032bac5e47.dir
size: 25672
nfiles: 476
common_voice_gold_transcript_processing:
cmd: "PYTHONPATH=. python -u experiment/common_voice/pipeline/common_voice_gold_transcript.py\n"
deps:
- path: experiment/common_voice/pipeline/common_voice_gold_transcript.py
md5: 8bb88130aa769dc21b77c753f95cdc1f
size: 1520
- path: experiment_data/dataset_relation_manager_data/common_voice
md5: 7a345b4c6c4ac9b784e21b5f16a53f6b.dir
size: 21756677
nfiles: 16310
outs:
- path: experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript
md5: 2321e1afe83ed0dd53d52dbdc38e98ee.dir
size: 4093545
nfiles: 8155
- path: experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript_spacy
md5: e5a4b007ef80cd253223884b1c1d8f2c.dir
size: 6457976
nfiles: 8155
common_voice_google_processing:
cmd: "PYTHONPATH=. python -u experiment/common_voice/pipeline/common_voice_google.py\n"
deps:
- path: experiment/common_voice/pipeline/common_voice_google.py
md5: 45b499a1897efaf2d7ee2ffec58984e1
size: 4983
- path: experiment_data/cached_asr/common_voice_google
md5: e640b898f723eed2af42b93596eabecd.dir
size: 1701910
nfiles: 8143
- path: experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript
md5: 2321e1afe83ed0dd53d52dbdc38e98ee.dir
size: 4093545
nfiles: 8155
- path: experiment_data/pipeline/asr_benchmark_common_voice/gold_transcript_spacy
md5: e5a4b007ef80cd253223884b1c1d8f2c.dir
size: 6457976
nfiles: 8155
outs:
- path: experiment_data/pipeline/asr_benchmark_common_voice/google_polish_asr
md5: c36e767c4aa84fdee97916089d585eec.dir
size: 5854366
nfiles: 8143
- path: experiment_data/pipeline/asr_benchmark_common_voice/google_spacy
md5: 4c0baa2bf90c445c1bca7bdf937764ff.dir
size: 4755832
nfiles: 8143
- path: experiment_data/pipeline/asr_benchmark_common_voice/ner_spacy_google_alignment_wer_embeddings
md5: 5ecdc0210532a46688141730893dce28.dir
size: 17894226
nfiles: 8143
- path: experiment_data/pipeline/asr_benchmark_common_voice/ner_spacy_google_metrics_wer_embeddings
md5: 252cd908a3a2307e88c1cccc7a5ce390.dir
size: 236348
nfiles: 8143
- path: experiment_data/pipeline/asr_benchmark_common_voice/pos_google_alignment_wer
md5: 1e9986ecbba5a4e5e3f32e28a6848679.dir
size: 18316770
nfiles: 8143
- path: experiment_data/pipeline/asr_benchmark_common_voice/pos_google_metrics_wer
md5: 573354d0f3eb4476335d801732cc170c.dir
size: 235666
nfiles: 8143
- path: experiment_data/pipeline/asr_benchmark_common_voice/tag_spacy_google_alignment_wer_embeddings
md5: 54cef5652ecc7bd892a49b9a6352d03a.dir
size: 18753379
nfiles: 8143
- path: experiment_data/pipeline/asr_benchmark_common_voice/tag_spacy_google_metrics_wer_embeddings
md5: 30177297b5a54778ffb841639711303b.dir
size: 234437
nfiles: 8143
- path: experiment_data/pipeline/asr_benchmark_common_voice/word_google_alignment_wer
md5: abe0a93778bd5447a20698d5169082ad.dir
size: 15821424
nfiles: 8143
- path: experiment_data/pipeline/asr_benchmark_common_voice/word_google_alignment_wer_embeddings
md5: b21ec15cbc4f35c69292e52322b64f06.dir
size: 33384670
nfiles: 8143
- path: experiment_data/pipeline/asr_benchmark_common_voice/word_google_metrics_wer
md5: deafb9c70dd9bb48c57dd0802bd8c923.dir
size: 225225
nfiles: 8143
- path: experiment_data/pipeline/asr_benchmark_common_voice/word_google_metrics_wer_embeddings
md5: e2dcce4d029ffe904d363790da375fac.dir
size: 572632
nfiles: 8143
common_voice_import_processing:
cmd: "PYTHONPATH=. python -u experiment/common_voice/import_dataset.py\n"
deps:
- path: experiment/common_voice/import_dataset.py
md5: 9c1f063dacb954bbd84b4174764e9123
size: 435
outs:
- path: experiment_data/dataset_relation_manager_data/common_voice
md5: 7a345b4c6c4ac9b784e21b5f16a53f6b.dir
size: 21756677
nfiles: 16310
minds14_import_processing:
cmd: "PYTHONPATH=. python -u experiment/minds14/import_dataset.py\n"
deps:
- path: experiment/minds14/import_dataset.py
md5: 7455925ab4edb4648b051a05d27d7e79
size: 455
outs:
- path: experiment_data/dataset_relation_manager_data/minds14
md5: 541b9d22e9fe502f4ba74f620bed2432.dir
size: 3545062
nfiles: 1124
minds14_gold_transcript_processing:
cmd: "PYTHONPATH=. python -u experiment/minds14/pipeline/minds14_gold_transcript.py\n"
deps:
- path: experiment/minds14/pipeline/minds14_gold_transcript.py
md5: 84ce9195c9ae96e80d79184f353d429b
size: 1515
- path: experiment_data/dataset_relation_manager_data/minds14
md5: 541b9d22e9fe502f4ba74f620bed2432.dir
size: 3545062
nfiles: 1124
outs:
- path: experiment_data/pipeline/asr_benchmark_minds14/gold_transcript
md5: e7c4ad252834e701b9d26e7b2f17fa85.dir
size: 689374
nfiles: 562
- path: experiment_data/pipeline/asr_benchmark_minds14/gold_transcript_spacy
md5: 718c0de8e14d3dceec43f910ffb2ccdc.dir
size: 843382
nfiles: 562
minds14_google_processing:
cmd: "PYTHONPATH=. python -u experiment/minds14/pipeline/minds14_google.py\n"
deps:
- path: experiment/minds14/pipeline/minds14_google.py
md5: e55897cae4ed3998cf814daf8b253c8a
size: 4662
- path: experiment_data/cached_asr/minds14_google
md5: bb1acae0c3070424d635ae9d2c6e14f0.dir
size: 247940
nfiles: 562
- path: experiment_data/pipeline/asr_benchmark_minds14/gold_transcript
md5: e7c4ad252834e701b9d26e7b2f17fa85.dir
size: 689374
nfiles: 562
- path: experiment_data/pipeline/asr_benchmark_minds14/gold_transcript_spacy
md5: 718c0de8e14d3dceec43f910ffb2ccdc.dir
size: 843382
nfiles: 562
outs:
- path: experiment_data/pipeline/asr_benchmark_minds14/google_polish_asr
md5: cc5c2a7098575b46034689b8cf90d66b.dir
size: 985004
nfiles: 562
- path: experiment_data/pipeline/asr_benchmark_minds14/google_spacy
md5: 8b15b1e376d53a372256e4ff2284135e.dir
size: 851587
nfiles: 562
- path: experiment_data/pipeline/asr_benchmark_minds14/ner_spacy_google_alignment_wer_embeddings
md5: f6aadc7bc34cdc190725d92e2fe3a735.dir
size: 2599569
nfiles: 562
- path: experiment_data/pipeline/asr_benchmark_minds14/ner_spacy_google_metrics_wer_embeddings
md5: 52bf1a1c0dc0b1fc12d56616fe43d4f0.dir
size: 14099
nfiles: 562
- path: experiment_data/pipeline/asr_benchmark_minds14/pos_google_alignment_wer
md5: a91c7d9dc9dfef02409a9ccc9a8fa7d3.dir
size: 2659852
nfiles: 562
- path: experiment_data/pipeline/asr_benchmark_minds14/pos_google_metrics_wer
md5: 5826a2bcb07f2f4bcedea052dc83de4d.dir
size: 14524
nfiles: 562
- path: experiment_data/pipeline/asr_benchmark_minds14/tag_spacy_google_alignment_wer_embeddings
md5: ffb2453de98aaa86436210632a7e162d.dir
size: 2744428
nfiles: 562
- path: experiment_data/pipeline/asr_benchmark_minds14/tag_spacy_google_metrics_wer_embeddings
md5: c3e050294ca4fb0f8bfcf905ca73b840.dir
size: 14984
nfiles: 562
- path: experiment_data/pipeline/asr_benchmark_minds14/word_google_alignment_wer
md5: f9856e7d3316c38002ab7f84aea89e8e.dir
size: 2725626
nfiles: 562
- path: experiment_data/pipeline/asr_benchmark_minds14/word_google_alignment_wer_embeddings
md5: 20a251a5aaf260c36f5b31d304cd826e.dir
size: 5686170
nfiles: 562
- path: experiment_data/pipeline/asr_benchmark_minds14/word_google_metrics_wer
md5: 41621d3845a0035ee77352902c46a186.dir
size: 15012
nfiles: 562
- path: experiment_data/pipeline/asr_benchmark_minds14/word_google_metrics_wer_embeddings
md5: 673f6b46c7735163726f7d9e3b1f2dd4.dir
size: 40260
nfiles: 562
google_fleurs_save_audio_files:
cmd: "PYTHONPATH=. python -u experiment/google_fleurs/save_wav_files.py\n"
deps:
- path: experiment/google_fleurs/save_wav_files.py
md5: e4b807332d972adb4c5a2d0b096b09e5
size: 493
outs:
- path: experiment_data/audio/fleurs_audio
md5: 6a4acda378431fb30839ab2c58668665.dir
size: 236272072
nfiles: 758
google_fleurs_import_processing:
cmd: "PYTHONPATH=. python -u experiment/google_fleurs/import_dataset.py\n"
deps:
- path: experiment/google_fleurs/import_dataset.py
md5: 01daea17400b56d2736669aaf3e2af09
size: 441
outs:
- path: experiment_data/dataset_relation_manager_data/google_fleurs
md5: e9045f637ccec39d67d3ee5e9763cf13.dir
size: 4991297
nfiles: 1516
google_fleurs_gold_transcript_processing:
cmd: "PYTHONPATH=. python -u experiment/google_fleurs/pipeline/google_fleurs_gold_transcript.py\n"
deps:
- path: experiment/google_fleurs/pipeline/google_fleurs_gold_transcript.py
md5: 981d7596a8276f7952fcee705d0a828c
size: 1521
- path: experiment_data/dataset_relation_manager_data/google_fleurs
md5: e9045f637ccec39d67d3ee5e9763cf13.dir
size: 4991297
nfiles: 1516
outs:
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript
md5: 291a1976499bd24a878636e4c76fc57a.dir
size: 975209
nfiles: 758
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript_spacy
md5: 76b462a21926ab83f4e63e3046b92d08.dir
size: 1196907
nfiles: 758
google_fleurs_google_processing:
cmd: "PYTHONPATH=. python -u experiment/google_fleurs/pipeline/google_fleurs_google.py\n"
deps:
- path: experiment/google_fleurs/pipeline/google_fleurs_google.py
md5: 433c7eb5b98e35cdd22dfca8a0d50f5e
size: 4673
- path: experiment_data/cached_asr/google_fleurs_google
md5: c740521abae49feebfaf20c2384c8de5.dir
size: 350126
nfiles: 758
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript
md5: 291a1976499bd24a878636e4c76fc57a.dir
size: 975209
nfiles: 758
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/gold_transcript_spacy
md5: 76b462a21926ab83f4e63e3046b92d08.dir
size: 1196907
nfiles: 758
outs:
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/google_polish_asr
md5: d6cc3d584bac32b78328f18f5f9172cf.dir
size: 1377134
nfiles: 758
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/google_spacy
md5: 28d584893d4f2d6afe8c1226bb813a1e.dir
size: 1183960
nfiles: 758
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/ner_spacy_google_alignment_wer_embeddings
md5: 560b3f62ad19f41037f29609477262aa.dir
size: 3638477
nfiles: 758
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/ner_spacy_google_metrics_wer_embeddings
md5: f0755063eeeb38680dcf1b59e1b61632.dir
size: 21386
nfiles: 758
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/pos_google_alignment_wer
md5: 6881fdb748c383e8f121beeb3f996c15.dir
size: 3712618
nfiles: 758
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/pos_google_metrics_wer
md5: a93545ba089d087be33c0cd41ab88497.dir
size: 22099
nfiles: 758
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/tag_spacy_google_alignment_wer_embeddings
md5: 3af296e52f5e74e5e7db843bd8b7f41b.dir
size: 3844097
nfiles: 758
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/tag_spacy_google_metrics_wer_embeddings
md5: 47f14edff7589a680a130e55055852bd.dir
size: 23377
nfiles: 758
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_alignment_wer
md5: 34600cd8f1d8bf718f60472afb77bc73.dir
size: 3818553
nfiles: 758
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_alignment_wer_embeddings
md5: 288fb014884ce9f11caf5f8cd1098cfa.dir
size: 7984964
nfiles: 758
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_metrics_wer
md5: 984770b57ca48fd793099c9ed67791d0.dir
size: 23344
nfiles: 758
- path: experiment_data/pipeline/asr_benchmark_google_fleurs/word_google_metrics_wer_embeddings
md5: b2626f92231431e163813ef7112c119d.dir
size: 53916
nfiles: 758