Skip to content
Snippets Groups Projects
dvc.lock 16.6 KiB
Newer Older
Marcin Wątroba's avatar
Marcin Wątroba committed
schema: '2.0'
stages:
  import_luna_to_common_format:
    cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
    deps:
    - path: experiment/luna/import_dataset/import_luna.py
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: f40adccbf0b51094a71b876c9ccad751
      size: 8265
Marcin Wątroba's avatar
Marcin Wątroba committed
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    outs:
    - path: experiment_data/dataset_relation_manager_data/luna
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 773f92667e16efd915ec6384d06aa4fb.dir
      size: 229007155
      nfiles: 1000
  luna_main_pipeline:
    cmd: "PYTHONPATH=. python -m spacy download pl_core_news_lg\nPYTHONPATH=. python\
      \ experiment/luna/pipeline/luna_main.py\n"
    deps:
    - path: experiment_data/cached_asr/luna_techmo
      md5: 033ea7b5434dded73bf869bfdd299462.dir
      size: 4256479
      nfiles: 500
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/dataset_relation_manager_data/luna
      md5: 773f92667e16efd915ec6384d06aa4fb.dir
      size: 229007155
      nfiles: 1000
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna
      md5: 2e334734387ab4579b7b5269d5029e81.dir
      size: 71627685
      nfiles: 4000
Marcin Wątroba's avatar
Marcin Wątroba committed
  luna_import_to_common_format:
    cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
    deps:
    - path: experiment/luna/import_dataset/import_luna.py
      md5: 44a1b914afda2ae74462e7afd83f658e
      size: 8278
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    outs:
    - path: experiment_data/dataset_relation_manager_data/luna
      md5: ff680a49296818460a49bd0c70089a4a.dir
      size: 229007155
      nfiles: 1000
  voicelab_import_to_common_format:
Marcin Wątroba's avatar
Marcin Wątroba committed
    cmd: PYTHONPATH=. python -u experiment/voicelab/import_data.py
Marcin Wątroba's avatar
Marcin Wątroba committed
    deps:
    - path: experiment/voicelab/import_data.py
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 41acb98a1517e66c052182fe0a1403ba
      size: 2108
Marcin Wątroba's avatar
Marcin Wątroba committed
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    outs:
    - path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 4046ea5d80966f0c017b2c4bec0e7c9b.dir
Marcin Wątroba's avatar
Marcin Wątroba committed
      size: 110711470
      nfiles: 1600
  luna_gold_transcript_processing:
    cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py\n"
    deps:
    - path: experiment/luna/pipeline/luna_gold_transcript_processing.py
      md5: 2bae24d511febebb26b3264b204784f5
      size: 1466
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/dataset_relation_manager_data/luna
      md5: ff680a49296818460a49bd0c70089a4a.dir
      size: 229007155
      nfiles: 1000
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
      size: 6706925
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
      md5: 24a399475b752737db0f2a8671507014.dir
      size: 6785648
      nfiles: 500
  luna_ajn_processing:
    cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py\n"
    deps:
    - path: experiment/luna/pipeline/luna_ajn_asr_processing.py
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 2d66cb8890c420b55e8b7eb33ac32ba2
      size: 3558
    - path: experiment_data/cached_asr/luna_ajn_polish_asr
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 10454ef4568c2023e9d51ad418db2854.dir
      size: 1276562
      nfiles: 495
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
      size: 6706925
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
      md5: 24a399475b752737db0f2a8671507014.dir
      size: 6785648
      nfiles: 500
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 8c080d8110e5860e78bfcb311fe2b90d.dir
      size: 6204883
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: f06d2f1369b18e5fa126af5a00a8f0b8.dir
      size: 6590702
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 164f3b4796bcab894831da4f0a0fa0af.dir
      size: 22096130
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: ee5ae7387429992fe04fcbde24e2bd24.dir
      size: 17037
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 00d84c15ae1c1a491625ee4dd8db6418.dir
      size: 20803179
      nfiles: 500
Marcin Wątroba's avatar
Marcin Wątroba committed
    - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer_embeddings
      md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir
      size: 44326962
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: fdbccc71fa84d0a68f4cd6723399e5dd.dir
      size: 17045
      nfiles: 500
Marcin Wątroba's avatar
Marcin Wątroba committed
    - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer_embeddings
      md5: 98a7edeee3b630e8e301acfc578a8393.dir
      size: 34869
      nfiles: 500
  luna_techmo_processing:
    cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py\n"
    deps:
    - path: experiment/luna/pipeline/luna_techmo_processing.py
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 75069cd6e3a61dfaaf49c2bdb1e81976
      size: 3416
    - path: experiment_data/cached_asr/luna_techmo
      md5: 033ea7b5434dded73bf869bfdd299462.dir
      size: 4256479
      nfiles: 500
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
      size: 6706925
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
      md5: 24a399475b752737db0f2a8671507014.dir
      size: 6785648
      nfiles: 500
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 94762d19a853810064afd38319d05a2c.dir
      size: 20897599
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer
      md5: 4efbe309674d9d494bae3dac057025ba.dir
      size: 17341
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 3787c6a4c7941787253165e2ba760e73.dir
      size: 9697519
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 337b6bf947ee47cda30b3cc75f954e8e.dir
      size: 6124559
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: afc25d6ad22bed4ded5cb07028bff1cf.dir
      size: 21380796
      nfiles: 500
Marcin Wątroba's avatar
Marcin Wątroba committed
    - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer_embeddings
      md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir
      size: 44326962
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer
      md5: 4cfbb2830b280084ece14b1ef815b92a.dir
      size: 17298
      nfiles: 500
Marcin Wątroba's avatar
Marcin Wątroba committed
    - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer_embeddings
      md5: 98a7edeee3b630e8e301acfc578a8393.dir
      size: 34869
      nfiles: 500
Marcin Wątroba's avatar
Marcin Wątroba committed
  voicelab_gold_transcript_processing:
    cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py\n"
    deps:
    - path: experiment/voicelab/voicelab_pipeline_gold_transcript.py
      md5: 4ba38fdfac616f8a0818cedabf66b94d
      size: 2312
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    - path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 4046ea5d80966f0c017b2c4bec0e7c9b.dir
Marcin Wątroba's avatar
Marcin Wątroba committed
      size: 110711470
      nfiles: 1600
    outs:
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 9edf1e743faa9fc3515790acb6fd8cab.dir
Marcin Wątroba's avatar
Marcin Wątroba committed
      size: 21846798
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: c166937f6e8ae9d28412ca1e3e43469e.dir
Marcin Wątroba's avatar
Marcin Wątroba committed
      size: 26643278
      nfiles: 800
  voicelab_techmo_processing:
    cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py\n"
    deps:
    - path: experiment/voicelab/voicelab_pipeline_techmo.py
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 3d6347486055a11e399beac71ce2f877
      size: 3479
Marcin Wątroba's avatar
Marcin Wątroba committed
    - path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo
      md5: 6c3b356723d562c978f84e733b91f5d0.dir
      size: 17539259
      nfiles: 800
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 9edf1e743faa9fc3515790acb6fd8cab.dir
Marcin Wątroba's avatar
Marcin Wątroba committed
      size: 21846798
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: c166937f6e8ae9d28412ca1e3e43469e.dir
Marcin Wątroba's avatar
Marcin Wątroba committed
      size: 26643278
      nfiles: 800
    outs:
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: a15a7a19f46e329c8b77eeecdda9d7b4.dir
Marcin Wątroba's avatar
Marcin Wątroba committed
      size: 81650836
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer
      md5: b1a674826142a44095a4c6439ac49024.dir
      size: 27934
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: da32e6fa9d986deddb594cb66e649864.dir
Marcin Wątroba's avatar
Marcin Wątroba committed
      size: 39158267
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: cd89a91a33629088ba6fc30ef8427dee.dir
Marcin Wątroba's avatar
Marcin Wątroba committed
      size: 24482297
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 0b714391682432408d74beee1cd5a14a.dir
Marcin Wątroba's avatar
Marcin Wątroba committed
      size: 83756423
      nfiles: 800
Marcin Wątroba's avatar
Marcin Wątroba committed
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer_embeddings
      md5: 93d34d82f8536014ddbe0cf0645dd837.dir
      size: 174322727
      nfiles: 800
Marcin Wątroba's avatar
Marcin Wątroba committed
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer
      md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir
      size: 27780
      nfiles: 800
Marcin Wątroba's avatar
Marcin Wątroba committed
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer_embeddings
      md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir
      size: 56182
      nfiles: 800
  voicelab_ajn_processing:
    cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_ajn_asr.py\n"
    deps:
    - path: experiment/voicelab/voicelab_pipeline_ajn_asr.py
      md5: 85e8d3d79379e6d5db751e03c5ebae75
      size: 4161
    - path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 0705aafa0969142288cc9baa88d1ed57.dir
      size: 6896694
Marcin Wątroba's avatar
Marcin Wątroba committed
      nfiles: 800
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
      md5: 9edf1e743faa9fc3515790acb6fd8cab.dir
      size: 21846798
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
      md5: c166937f6e8ae9d28412ca1e3e43469e.dir
      size: 26643278
      nfiles: 800
    outs:
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: da10bb60107a86f98b2d07fef5616390.dir
      size: 22765926
Marcin Wątroba's avatar
Marcin Wątroba committed
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: e8a48a0a63c1569ec734e1c8bb03c7db.dir
Marcin Wątroba's avatar
Marcin Wątroba committed
      size: 20536889
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 7806779e936ec6121b8d72e0d0e3ed59.dir
Marcin Wątroba's avatar
Marcin Wątroba committed
      size: 78539613
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer
      md5: d0e1ef5f57de27a2356d2f2050a93349.dir
      size: 27353
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: d190f33e6643f62ecbeb9e5ae5fb5e02.dir
      size: 78992762
Marcin Wątroba's avatar
Marcin Wątroba committed
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer_embeddings
      md5: 93d34d82f8536014ddbe0cf0645dd837.dir
      size: 174322727
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer
Marcin Wątroba's avatar
Marcin Wątroba committed
      md5: 04f6ccbaf94cf08c34ac201ae079c21c.dir
      size: 25307
Marcin Wątroba's avatar
Marcin Wątroba committed
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer_embeddings
      md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir
      size: 56182
      nfiles: 800
Marcin Wątroba's avatar
Marcin Wątroba committed
  voicelab_tag_spacy_processing:
    cmd: "PYTHONPATH=. python -u experiment/voicelab/spacy_tag_processing.py\n"
    deps:
    - path: experiment/voicelab/spacy_tag_processing.py
      md5: b5f996e5be56cdf93eec23b9c0c066b9
      size: 2580
    - path: experiment_data/dataset/voicelab_cbiz_testset_20220322
      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
      size: 4803739404
      nfiles: 1600
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
      md5: e8a48a0a63c1569ec734e1c8bb03c7db.dir
      size: 20536889
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
      md5: c166937f6e8ae9d28412ca1e3e43469e.dir
      size: 26643278
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
      md5: cd89a91a33629088ba6fc30ef8427dee.dir
      size: 24482297
      nfiles: 800
    outs:
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_alignment_wer_embeddings
      md5: c7914fa4d415a4815e371017ef505358.dir
      size: 80129550
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_ajn_metrics_wer_embeddings
      md5: ac726b3b371256176aee1364cb1fec88.dir
      size: 27258
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_alignment_wer_embeddings
      md5: 47d1b5a820806dbad941b19547eb1273.dir
      size: 83756819
      nfiles: 800
    - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/tag_spacy_techmo_metrics_wer_embeddings
      md5: ba59691f467f0486f832bf86326c5142.dir
      size: 27781
      nfiles: 800
  luna_tag_spacy_processing:
    cmd: "PYTHONPATH=. python -u experiment/luna/pipeline/spacy_tag_processing.py\n"
    deps:
    - path: experiment/luna/pipeline/spacy_tag_processing.py
      md5: bdaf1cae6863815ce59e022a493379da
      size: 2567
    - path: experiment_data/dataset/LUNA.PL
      md5: d342155b1871e881797cf7da09d5dc3c.dir
      size: 1578358645
      nfiles: 4500
    - path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
      md5: f06d2f1369b18e5fa126af5a00a8f0b8.dir
      size: 6590702
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
      md5: 24a399475b752737db0f2a8671507014.dir
      size: 6785648
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
      md5: 337b6bf947ee47cda30b3cc75f954e8e.dir
      size: 6124559
      nfiles: 500
    outs:
    - path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_alignment_wer_embeddings
      md5: bd0439d750f23b978adbfd4ef1151a9c.dir
      size: 22511019
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_ajn_metrics_wer_embeddings
      md5: 85a4a0b2d6b91d9745f55944fb3886df.dir
      size: 17014
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_alignment_wer_embeddings
      md5: a6f1ef0da2dc2f45f522e3b2e7dec0f9.dir
      size: 21411162
      nfiles: 500
    - path: experiment_data/pipeline/asr_benchmark_luna/tag_spacy_techmo_metrics_wer_embeddings
      md5: abb1f6102a1de3d63d0e9e2cb797d2b9.dir
      size: 17369
      nfiles: 500