From 46860cf8ca64f03076c51e5222cabc718d80911e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20W=C4=85troba?= <markowanga@gmail.com>
Date: Wed, 11 Jan 2023 23:38:44 +0100
Subject: [PATCH] Add new experiment processing

---
 dvc.lock                                      | 5044 -----------------
 experiment/dataset_helper.py                  |    2 +-
 new_datasets/import_datasets/upload_audio.py  |    1 +
 new_experiment/new_dependency_provider.py     |   22 +
 .../pipeline/dataset_importer/__init__.py     |    0
 .../fleurs_dataset_importer.py                |   30 +
 .../minds14_dataset_importer.py               |   30 +
 .../voxpopuli_dataset_importer.py             |   30 +
 new_experiment/pipeline/import_datasets.py    |   35 +
 new_experiment/pipeline/import_fleurs.py      |   10 +
 new_experiment/utils/hf_dataset_importer.py   |   63 +
 new_experiment/utils/id_calculator.py         |    7 +
 .../utils/loaded_remote_dataset_helper.py     |   32 +
 .../utils/minio_audio_record_repository.py    |   28 +
 new_experiment/utils/property_helper.py       |    4 +-
 poetry.lock                                   |   76 +-
 pyproject.toml                                |    2 +-
 .../repository/experiment_repository.py       |    4 +
 .../repository/mongo_experiment_repository.py |    7 +-
 .../integration/task/processing_task.py       |    9 +-
 20 files changed, 353 insertions(+), 5083 deletions(-)
 create mode 100644 new_experiment/new_dependency_provider.py
 create mode 100644 new_experiment/pipeline/dataset_importer/__init__.py
 create mode 100644 new_experiment/pipeline/dataset_importer/fleurs_dataset_importer.py
 create mode 100644 new_experiment/pipeline/dataset_importer/minds14_dataset_importer.py
 create mode 100644 new_experiment/pipeline/dataset_importer/voxpopuli_dataset_importer.py
 create mode 100644 new_experiment/pipeline/import_datasets.py
 create mode 100644 new_experiment/pipeline/import_fleurs.py
 create mode 100644 new_experiment/utils/hf_dataset_importer.py
 create mode 100644 new_experiment/utils/id_calculator.py
 create mode 100644 new_experiment/utils/loaded_remote_dataset_helper.py
 create mode 100644 new_experiment/utils/minio_audio_record_repository.py

diff --git a/dvc.lock b/dvc.lock
index 601d374..e69de29 100644
--- a/dvc.lock
+++ b/dvc.lock
@@ -1,5044 +0,0 @@
-schema: '2.0'
-stages:
-  import_dataset_to_relation_manager@0:
-    cmd: PYTHONPATH=. python experiment/pl_luna/import_relation_manager.py
-    deps:
-    - path: experiment/pl_luna/import_relation_manager.py
-      md5: 0b72ccf1aa1107d85def09140e76bdf9
-      size: 8284
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    outs:
-    - path: experiment_data/dataset_relation_manager_data/pl_luna
-      md5: 7c6720b017c9fa82bb1c2264239b3b03.dir
-      size: 229007155
-      nfiles: 1000
-  import_dataset_to_relation_manager@1:
-    cmd: PYTHONPATH=. python experiment/pl_minds14/import_relation_manager.py
-    deps:
-    - path: experiment/pl_minds14/import_relation_manager.py
-      md5: d83011855f3f1b2ddec96713c3c41177
-      size: 438
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    outs:
-    - path: experiment_data/dataset_relation_manager_data/pl_minds14
-      md5: b6838bb00cb83d69ae818d8de96a2f18.dir
-      size: 3545062
-      nfiles: 1124
-  import_dataset_to_relation_manager@2:
-    cmd: PYTHONPATH=. python experiment/pl_common_voice/import_relation_manager.py
-    deps:
-    - path: experiment/pl_common_voice/import_relation_manager.py
-      md5: 2d0b47d14088c2c5b61a78ddeeff5293
-      size: 547
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    outs:
-    - path: experiment_data/dataset_relation_manager_data/pl_common_voice
-      md5: fe73df318524dc33331c62b22a7f5237.dir
-      size: 21756677
-      nfiles: 16310
-  process_gold_transcript@1:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_minds14
-    deps:
-    - path: experiment/pipeline_process_gold_transcript.py
-      md5: baac4f80dcdfcc5a49806eea6eec832a
-      size: 1195
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/dataset_relation_manager_data/pl_minds14
-      md5: b6838bb00cb83d69ae818d8de96a2f18.dir
-      size: 3545062
-      nfiles: 1124
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: 35661dfd20421a6a6068b7194e483eba.dir
-      size: 689374
-      nfiles: 562
-  process_gold_transcript@0:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_luna
-    deps:
-    - path: experiment/pipeline_process_gold_transcript.py
-      md5: baac4f80dcdfcc5a49806eea6eec832a
-      size: 1195
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/dataset_relation_manager_data/pl_luna
-      md5: 7c6720b017c9fa82bb1c2264239b3b03.dir
-      size: 229007155
-      nfiles: 1000
-    outs:
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 7e71aa1b181ad75dd08dd7932bc0fc65.dir
-      size: 6706925
-      nfiles: 500
-  process_asr@6:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_minds14
-      md5: 8dbd7480f408f284f4b349c02c991afc.dir
-      size: 3821899
-      nfiles: 2248
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/dataset_relation_manager_data/pl_minds14
-      md5: b6838bb00cb83d69ae818d8de96a2f18.dir
-      size: 3545062
-      nfiles: 1124
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
-      md5: 004f25308b0f5577af589bede338ae9f.dir
-      size: 3523907
-      nfiles: 562
-  process_asr@2:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_luna
-      md5: 8dd7fbde8090469f4a0e41ea1c873cda.dir
-      size: 22986007
-      nfiles: 1951
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/dataset_relation_manager_data/pl_luna
-      md5: 7c6720b017c9fa82bb1c2264239b3b03.dir
-      size: 229007155
-      nfiles: 1000
-    outs:
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
-      md5: 80994fc18d2cc2a1fa56fd4436c5378b.dir
-      size: 20658485
-      nfiles: 456
-  process_asr@5:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_minds14
-      md5: 8dbd7480f408f284f4b349c02c991afc.dir
-      size: 3821899
-      nfiles: 2248
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/dataset_relation_manager_data/pl_minds14
-      md5: b6838bb00cb83d69ae818d8de96a2f18.dir
-      size: 3545062
-      nfiles: 1124
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: b09eb8176b26586b06093384bbaea770.dir
-      size: 974727
-      nfiles: 559
-  process_asr@4:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_minds14
-      md5: 8dbd7480f408f284f4b349c02c991afc.dir
-      size: 3821899
-      nfiles: 2248
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/dataset_relation_manager_data/pl_minds14
-      md5: b6838bb00cb83d69ae818d8de96a2f18.dir
-      size: 3545062
-      nfiles: 1124
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/google__result
-      md5: 3505364058d488815dd90699e71fd00b.dir
-      size: 985004
-      nfiles: 562
-  process_asr@0:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_luna
-      md5: 8dd7fbde8090469f4a0e41ea1c873cda.dir
-      size: 22986007
-      nfiles: 1951
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/dataset_relation_manager_data/pl_luna
-      md5: 7c6720b017c9fa82bb1c2264239b3b03.dir
-      size: 229007155
-      nfiles: 1000
-    outs:
-    - path: experiment_data/pipeline/pl_luna/google__result
-      md5: 254e060731d669a14f3a5525ccd1fe28.dir
-      size: 5346497
-      nfiles: 500
-  process_asr@1:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_luna
-      md5: 8dd7fbde8090469f4a0e41ea1c873cda.dir
-      size: 22986007
-      nfiles: 1951
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/dataset_relation_manager_data/pl_luna
-      md5: 7c6720b017c9fa82bb1c2264239b3b03.dir
-      size: 229007155
-      nfiles: 1000
-    outs:
-    - path: experiment_data/pipeline/pl_luna/ajn__result
-      md5: bca7f2eae18f606867c9880bd0fe1c8d.dir
-      size: 6159899
-      nfiles: 494
-  process_asr@3:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_luna
-      md5: 8dd7fbde8090469f4a0e41ea1c873cda.dir
-      size: 22986007
-      nfiles: 1951
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/dataset_relation_manager_data/pl_luna
-      md5: 7c6720b017c9fa82bb1c2264239b3b03.dir
-      size: 229007155
-      nfiles: 1000
-    outs:
-    - path: experiment_data/pipeline/pl_luna/techmo__result
-      md5: 4c6609ab50706ad45f3919eaf902a75e.dir
-      size: 9697519
-      nfiles: 500
-  process_asr@7:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_minds14
-      md5: 8dbd7480f408f284f4b349c02c991afc.dir
-      size: 3821899
-      nfiles: 2248
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/dataset_relation_manager_data/pl_minds14
-      md5: b6838bb00cb83d69ae818d8de96a2f18.dir
-      size: 3545062
-      nfiles: 1124
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/techmo__result
-      md5: 13b6f9b23182b9255a802134c47ab4c4.dir
-      size: 1336305
-      nfiles: 562
-  save_wave_files__pl_google_fleurs:
-    cmd: PYTHONPATH=. python experiment/pl_google_fleurs/save_wav_files.py
-    deps:
-    - path: experiment/pl_google_fleurs/save_wav_files.py
-      md5: 5fd1d6210a9766c35cf11892a9951266
-      size: 610
-    outs:
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-  import_dataset_to_relation_manager@3:
-    cmd: PYTHONPATH=. python experiment/pl_vpl_voicelab_cbizoicelab/import_relation_manager.py
-    deps:
-    - path: experiment/pl_voicelab_cbiz/import_relation_manager.py
-      md5: 8b100b393470511b789ee4bc722f12ea
-      size: 2026
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    outs:
-    - path: experiment_data/dataset_relation_manager_data/pl_voicelab_cbiz
-      md5: 7ef4be9a4eeef095baf7a4cd765610d0.dir
-      size: 110711470
-      nfiles: 1600
-  import_dataset_to_relation_manager@4:
-    cmd: PYTHONPATH=. python experiment/pl_google_fleurs/import_relation_manager.py
-    deps:
-    - path: experiment/pl_google_fleurs/import_relation_manager.py
-      md5: f5547b678bb8fe0542eaaf047d1447aa
-      size: 458
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    outs:
-    - path: experiment_data/dataset_relation_manager_data/pl_google_fleurs
-      md5: 5a5609426d0a70990792c8a8b1854a48.dir
-      size: 4991297
-      nfiles: 1516
-  process_gold_transcript@2:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_common_voice
-    deps:
-    - path: experiment/pipeline_process_gold_transcript.py
-      md5: baac4f80dcdfcc5a49806eea6eec832a
-      size: 1195
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/dataset_relation_manager_data/pl_common_voice
-      md5: fe73df318524dc33331c62b22a7f5237.dir
-      size: 21756677
-      nfiles: 16310
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: 4903cf286f3c1ae49876c4ba52713d3c.dir
-      size: 4093545
-      nfiles: 8155
-  process_gold_transcript@3:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_voicelab_cbiz
-    deps:
-    - path: experiment/pipeline_process_gold_transcript.py
-      md5: baac4f80dcdfcc5a49806eea6eec832a
-      size: 1195
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/dataset_relation_manager_data/pl_voicelab_cbiz
-      md5: 7ef4be9a4eeef095baf7a4cd765610d0.dir
-      size: 110711470
-      nfiles: 1600
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: 3462fc9dea267f7db0e133ced6b5f4e9.dir
-      size: 21846798
-      nfiles: 800
-  process_gold_transcript@4:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_google_fleurs
-    deps:
-    - path: experiment/pipeline_process_gold_transcript.py
-      md5: baac4f80dcdfcc5a49806eea6eec832a
-      size: 1195
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/dataset_relation_manager_data/pl_google_fleurs
-      md5: 5a5609426d0a70990792c8a8b1854a48.dir
-      size: 4991297
-      nfiles: 1516
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 7ea17627b30e788f34c81ca9322f2879.dir
-      size: 975209
-      nfiles: 758
-  process_asr@17:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_common_voice
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_common_voice
-      md5: 43714b76048ec7311d08a7c48c282146.dir
-      size: 20219523
-      nfiles: 24453
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/dataset_relation_manager_data/pl_common_voice
-      md5: fe73df318524dc33331c62b22a7f5237.dir
-      size: 21756677
-      nfiles: 16310
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result
-      md5: 402f9a70d682d030b8f7b6a36044c651.dir
-      size: 19014997
-      nfiles: 8154
-  process_asr@10:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_google_fleurs
-      md5: dfa15d901991d0cc9634192e21a9cc23.dir
-      size: 5645552
-      nfiles: 3032
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/dataset_relation_manager_data/pl_google_fleurs
-      md5: 5a5609426d0a70990792c8a8b1854a48.dir
-      size: 4991297
-      nfiles: 1516
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
-      md5: 88a7ae4e0399446f996c11d6ffef701a.dir
-      size: 5137721
-      nfiles: 758
-  process_asr@8:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_google_fleurs
-      md5: dfa15d901991d0cc9634192e21a9cc23.dir
-      size: 5645552
-      nfiles: 3032
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/dataset_relation_manager_data/pl_google_fleurs
-      md5: 5a5609426d0a70990792c8a8b1854a48.dir
-      size: 4991297
-      nfiles: 1516
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/google__result
-      md5: 9cb6cac07efe8e5d0b33ae7c818c5766.dir
-      size: 1377134
-      nfiles: 758
-  save_audio_files:
-    cmd: PYTHONPATH=. python experiment/dataset_specific/pl_google_fleurs/pipeline_save_wav_files.py
-    deps:
-    - path: experiment/dataset_specific/pl_google_fleurs/pipeline_save_wav_files.py
-      md5: 0a386b55f0ad26109f110a4622690066
-      size: 735
-    outs:
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-  pipeline_prepare_relation_manager@4:
-    cmd: PYTHONPATH=. python experiment/dataset_specific/pl_voicelab_cbiz/import_relation_manager.py
-    deps:
-    - path: experiment/dataset_specific/pl_voicelab_cbiz/import_relation_manager.py
-      md5: ca3760c3f051a00575bf015322575d1a
-      size: 2075
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    outs:
-    - path: experiment_data/dataset_relation_manager_data/pl_voicelab_cbiz
-      md5: d23bd3824dc9e68a649edc94fbec50fb.dir
-      size: 110711470
-      nfiles: 1600
-  pipeline_prepare_relation_manager@3:
-    cmd: PYTHONPATH=. python experiment/dataset_specific/pl_minds14/import_relation_manager.py
-    deps:
-    - path: experiment/dataset_specific/pl_minds14/import_relation_manager.py
-      md5: 2deae168760fa019e6d19d0111baca41
-      size: 455
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    outs:
-    - path: experiment_data/dataset_relation_manager_data/pl_minds14
-      md5: 40bb7d02cc76d5b1093955b5046cc3c4.dir
-      size: 3545062
-      nfiles: 1124
-  pipeline_prepare_relation_manager@0:
-    cmd: PYTHONPATH=. python experiment/dataset_specific/pl_common_voice/import_relation_manager.py
-    deps:
-    - path: experiment/dataset_specific/pl_common_voice/import_relation_manager.py
-      md5: 2d0b47d14088c2c5b61a78ddeeff5293
-      size: 547
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    outs:
-    - path: experiment_data/dataset_relation_manager_data/pl_common_voice
-      md5: da29f883159b95496c488a3c9c35e632.dir
-      size: 21756677
-      nfiles: 16310
-  pipeline_prepare_relation_manager@1:
-    cmd: PYTHONPATH=. python experiment/dataset_specific/pl_google_fleurs/import_relation_manager.py
-    deps:
-    - path: experiment/dataset_specific/pl_google_fleurs/import_relation_manager.py
-      md5: f5547b678bb8fe0542eaaf047d1447aa
-      size: 458
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    outs:
-    - path: experiment_data/dataset_relation_manager_data/pl_google_fleurs
-      md5: ce176e60d7e55945e402d711b22b0c6a.dir
-      size: 4991297
-      nfiles: 1516
-  pipeline_gold_transcript@0:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_common_voice
-    deps:
-    - path: experiment/pipeline_process_gold_transcript.py
-      md5: baac4f80dcdfcc5a49806eea6eec832a
-      size: 1195
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/dataset_relation_manager_data/pl_common_voice
-      md5: da29f883159b95496c488a3c9c35e632.dir
-      size: 21756677
-      nfiles: 16310
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-  pipeline_prepare_relation_manager@2:
-    cmd: PYTHONPATH=. python experiment/dataset_specific/pl_luna/import_relation_manager.py
-    deps:
-    - path: experiment/dataset_specific/pl_luna/import_relation_manager.py
-      md5: 100bd47d6984532b076776702d93fb9f
-      size: 8301
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    outs:
-    - path: experiment_data/dataset_relation_manager_data/pl_luna
-      md5: 95ca6826129400092cdc41365a4f6dcb.dir
-      size: 229007155
-      nfiles: 1000
-  pipeline_gold_transcript@2:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_luna
-    deps:
-    - path: experiment/pipeline_process_gold_transcript.py
-      md5: baac4f80dcdfcc5a49806eea6eec832a
-      size: 1195
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/dataset_relation_manager_data/pl_luna
-      md5: 95ca6826129400092cdc41365a4f6dcb.dir
-      size: 229007155
-      nfiles: 1000
-    outs:
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-  pipeline_gold_transcript@3:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_minds14
-    deps:
-    - path: experiment/pipeline_process_gold_transcript.py
-      md5: baac4f80dcdfcc5a49806eea6eec832a
-      size: 1195
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/dataset_relation_manager_data/pl_minds14
-      md5: 40bb7d02cc76d5b1093955b5046cc3c4.dir
-      size: 3545062
-      nfiles: 1124
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-  pipeline_gold_transcript@4:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_voicelab_cbiz
-    deps:
-    - path: experiment/pipeline_process_gold_transcript.py
-      md5: baac4f80dcdfcc5a49806eea6eec832a
-      size: 1195
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/dataset_relation_manager_data/pl_voicelab_cbiz
-      md5: d23bd3824dc9e68a649edc94fbec50fb.dir
-      size: 110711470
-      nfiles: 1600
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-  pipeline_gold_transcript@1:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_google_fleurs
-    deps:
-    - path: experiment/pipeline_process_gold_transcript.py
-      md5: baac4f80dcdfcc5a49806eea6eec832a
-      size: 1195
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/dataset_relation_manager_data/pl_google_fleurs
-      md5: ce176e60d7e55945e402d711b22b0c6a.dir
-      size: 4991297
-      nfiles: 1516
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-  pipeline_asr_result@7:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_voicelab_cbiz
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_voicelab_cbiz/pl_voicelab_cbiz__techmo
-      md5: 6c3b356723d562c978f84e733b91f5d0.dir
-      size: 17539259
-      nfiles: 800
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result
-      md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir
-      size: 39158267
-      nfiles: 800
-  pipeline_asr_result@3:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_common_voice
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_common_voice/pl_common_voice__ajn
-      md5: 873867be79ac82f04d28bd3419fa95f3.dir
-      size: 1677089
-      nfiles: 8155
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/ajn__result
-      md5: 77d873041fe2952e3c45ee4ac6458061.dir
-      size: 6667841
-      nfiles: 8155
-  pipeline_asr_result@17:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_luna/pl_luna__wav2vec2
-      md5: 0555f6346ef6332d27fc0979e8ac16f7.dir
-      size: 16356269
-      nfiles: 456
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
-      md5: 9c63b061ac7763144bca121e163ee7aa.dir
-      size: 20658485
-      nfiles: 456
-  pipeline_asr_result@13:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__whisper_tiny
-      md5: f7ba36aee7148e39c4dccd325ad68228.dir
-      size: 1037576
-      nfiles: 758
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__result
-      md5: 7220584482e69b3a9f4b43fba97e97cf.dir
-      size: 2135360
-      nfiles: 758
-  pipeline_asr_result@15:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_luna/pl_luna__ajn
-      md5: 10454ef4568c2023e9d51ad418db2854.dir
-      size: 1276562
-      nfiles: 495
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/ajn__result
-      md5: 653d65e186a7d05958ce3cbef219038c.dir
-      size: 6159899
-      nfiles: 494
-  pipeline_asr_result@2:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_common_voice
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_common_voice/pl_common_voice__techmo
-      md5: 47a9d744ff9f02d0a82a4a30664270b5.dir
-      size: 3686716
-      nfiles: 8155
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/techmo__result
-      md5: 9030cf3640f2749d9c1b4439687bdc2f.dir
-      size: 7761880
-      nfiles: 8136
-  pipeline_asr_result@6:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_voicelab_cbiz
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_voicelab_cbiz/pl_voicelab_cbiz__ajn
-      md5: 0705aafa0969142288cc9baa88d1ed57.dir
-      size: 6896694
-      nfiles: 800
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
-      md5: 7de1137f44fad26766da0fc309720160.dir
-      size: 22765926
-      nfiles: 800
-  pipeline_asr_result@11:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__techmo
-      md5: 876e8cff4ebd191617fcd3844e411475.dir
-      size: 859227
-      nfiles: 758
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
-      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
-      size: 1880403
-      nfiles: 758
-  pipeline_asr_result@1:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_common_voice
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_common_voice/pl_common_voice__wav2vec2
-      md5: 8f84125abe861abbcd39499079aa1777.dir
-      size: 14830897
-      nfiles: 8155
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result
-      md5: b0d0042d77d7adce37890ca63ad40091.dir
-      size: 19014997
-      nfiles: 8154
-  pipeline_asr_result@8:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_voicelab_cbiz
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_voicelab_cbiz/pl_voicelab_cbiz__whisper_tiny
-      md5: 92d2e61aed2ae428dba95fc3356a37d4.dir
-      size: 19120968
-      nfiles: 800
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/whisper_tiny__result
-      md5: 5d8c8339129de7a9340bb64e3fed22ff.dir
-      size: 38889432
-      nfiles: 800
-  pipeline_asr_result@5:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_voicelab_cbiz
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_voicelab_cbiz/pl_voicelab_cbiz__google
-      md5: 35a79b96887fbf705327937fdd57c0c6.dir
-      size: 5651375
-      nfiles: 799
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result
-      md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir
-      size: 27432599
-      nfiles: 799
-  pipeline_asr_result@12:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__wav2vec2
-      md5: 9638746467b0dea972a0be101a7d5c58.dir
-      size: 4093001
-      nfiles: 758
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
-      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
-      size: 5137721
-      nfiles: 758
-  pipeline_asr_result@16:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_luna/pl_luna__techmo
-      md5: 033ea7b5434dded73bf869bfdd299462.dir
-      size: 4256479
-      nfiles: 500
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/techmo__result
-      md5: 0e596570e1502b38588427bc72dcc006.dir
-      size: 9697519
-      nfiles: 500
-  pipeline_asr_result@10:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__ajn
-      md5: 2218d65c2c73761ce8825015cf79bacd.dir
-      size: 343198
-      nfiles: 758
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
-      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
-      size: 1413262
-      nfiles: 758
-  pipeline_asr_result@4:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_common_voice
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_common_voice/pl_common_voice__whisper_tiny
-      md5: b89a21b6106f8bd93068b0e9159c90db.dir
-      size: 5700329
-      nfiles: 8155
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__result
-      md5: 6310f1b24332777817738abe33676505.dir
-      size: 10121705
-      nfiles: 8155
-  pipeline_asr_result@14:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_luna/pl_luna__google
-      md5: ac191369daf21c45c48da1bcd3e3a540.dir
-      size: 1096697
-      nfiles: 500
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/google__result
-      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
-      size: 5346497
-      nfiles: 500
-  pipeline_asr_result@9:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__google
-      md5: c740521abae49feebfaf20c2384c8de5.dir
-      size: 350126
-      nfiles: 758
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/google__result
-      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
-      size: 1377134
-      nfiles: 758
-  pipeline_asr_result@0:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_common_voice
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_common_voice/pl_common_voice__google
-      md5: e640b898f723eed2af42b93596eabecd.dir
-      size: 1701910
-      nfiles: 8143
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/google__result
-      md5: afb53476cc93ef4de3591908df41fd2a.dir
-      size: 5854366
-      nfiles: 8143
-  pipeline_word_wer@8:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__result
-      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
-      size: 1377134
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_classic_alignment
-      md5: 9beb57e9be598275ac9d449618da440e.dir
-      size: 3818553
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_classic_metrics
-      md5: 984770b57ca48fd793099c9ed67791d0.dir
-      size: 23344
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_embeddings_alignment
-      md5: aeb20f1662b696a6fc11bcd39a02a0de.dir
-      size: 7984964
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_embeddings_metrics
-      md5: b2626f92231431e163813ef7112c119d.dir
-      size: 53916
-      nfiles: 758
-  pipeline_word_wer@6:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
-      md5: 7de1137f44fad26766da0fc309720160.dir
-      size: 22765926
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_classic_alignment
-      md5: 1ed03de918f5373afdbdbb020c6161b5.dir
-      size: 78992762
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_classic_metrics
-      md5: 04f6ccbaf94cf08c34ac201ae079c21c.dir
-      size: 25307
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_embeddings_alignment
-      md5: a67e7a6e8a6e0755ea35a519d9decf86.dir
-      size: 128258410
-      nfiles: 646
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_embeddings_metrics
-      md5: c83561c448950860f36037c2287a25f5.dir
-      size: 25194
-      nfiles: 646
-  pipeline_word_wer@17:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__result
-      md5: 0237d69dfa56d1a29a8bc43f464c4f1e.dir
-      size: 11789990
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__word_wer_classic_alignment
-      md5: 41df69d888fbcd0e92d58deb676d5101.dir
-      size: 21939973
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__word_wer_classic_metrics
-      md5: c026954d3d4b240fa177a269530de31b.dir
-      size: 16939
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__word_wer_embeddings_alignment
-      md5: 28999392709820c671406c03b73b4f38.dir
-      size: 45305707
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__word_wer_embeddings_metrics
-      md5: c178c61c6e37b6ed729d2300aae65192.dir
-      size: 32880
-      nfiles: 500
-  pipeline_word_wer@15:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__result
-      md5: 0e596570e1502b38588427bc72dcc006.dir
-      size: 9697519
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/techmo__word_wer_classic_alignment
-      md5: 5e90722635a811db67a1f0d917707b0a.dir
-      size: 21380796
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__word_wer_classic_metrics
-      md5: 4cfbb2830b280084ece14b1ef815b92a.dir
-      size: 17298
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__word_wer_embeddings_alignment
-      md5: 74f8be84e1e913050483713fbc945d80.dir
-      size: 44326962
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__word_wer_embeddings_metrics
-      md5: 98a7edeee3b630e8e301acfc578a8393.dir
-      size: 34869
-      nfiles: 500
-  pipeline_word_wer@12:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__result
-      md5: 7220584482e69b3a9f4b43fba97e97cf.dir
-      size: 2135360
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__word_wer_classic_alignment
-      md5: 54e9e094016886220c23e7bb9808ca7a.dir
-      size: 4024460
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__word_wer_classic_metrics
-      md5: 4dfcbf74f27bd94910ab72561428a771.dir
-      size: 22975
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__word_wer_embeddings_alignment
-      md5: 8d7759d25cd8f31fb37a726e5c5f010f.dir
-      size: 8296122
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__word_wer_embeddings_metrics
-      md5: 4b043e5c91dd095be62726a33b2400df.dir
-      size: 52238
-      nfiles: 758
-  pipeline_word_wer@10:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
-      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
-      size: 1880403
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_classic_alignment
-      md5: 689776c77c4ecaa11578b53480100ecc.dir
-      size: 3821036
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_classic_metrics
-      md5: 8d0d99fd7d965d4070c0b391cd6fa2b0.dir
-      size: 23030
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_embeddings_alignment
-      md5: 0ec5fe337bde2254c91146fd16b9c6af.dir
-      size: 7995553
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_embeddings_metrics
-      md5: 55eb6eb0aacc12e3e2caf224b4b89df1.dir
-      size: 53591
-      nfiles: 758
-  pipeline_word_wer@13:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__result
-      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
-      size: 5346497
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/google__word_wer_classic_alignment
-      md5: 5bdee43e646a4e9470310073365ebc37.dir
-      size: 19650202
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__word_wer_classic_metrics
-      md5: 9951fb70382cc8bed9aa48d1185c1c7c.dir
-      size: 16989
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__word_wer_embeddings_alignment
-      md5: a06bd5f0b4c52c679315b6c6d7478084.dir
-      size: 40586004
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__word_wer_embeddings_metrics
-      md5: 351543fb54e2098ac07999482d2280a8.dir
-      size: 34530
-      nfiles: 500
-  pipeline_word_wer@3:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/ajn__result
-      md5: 77d873041fe2952e3c45ee4ac6458061.dir
-      size: 6667841
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_classic_alignment
-      md5: 7b7c5de97970c320cb8cf549839d16fd.dir
-      size: 17724868
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_classic_metrics
-      md5: b1bdb80faa8c728ea072632121a2f2d0.dir
-      size: 209927
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_embeddings_alignment
-      md5: 51798a8e9b7239c8833274dbf6644d1e.dir
-      size: 32293142
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_embeddings_metrics
-      md5: 63233cd4c3a18f791f228e0e385cdde1.dir
-      size: 561188
-      nfiles: 8155
-  pipeline_word_wer@16:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
-      md5: 9c63b061ac7763144bca121e163ee7aa.dir
-      size: 20658485
-      nfiles: 456
-    outs:
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_classic_alignment
-      md5: 4d4ae25d4fac2a52893e60389fcd2f9e.dir
-      size: 18462856
-      nfiles: 456
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_classic_metrics
-      md5: f209479afd2482800f1e75bdab9f83b2.dir
-      size: 15604
-      nfiles: 456
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_embeddings_alignment
-      md5: 41ad4c0923986a90fc9ed12aa6e30f73.dir
-      size: 38402586
-      nfiles: 456
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_embeddings_metrics
-      md5: df02c1f9d7e26ed10815175de086e0b6.dir
-      size: 24633
-      nfiles: 456
-  pipeline_word_wer@11:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
-      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
-      size: 5137721
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_classic_alignment
-      md5: 052fa8f97b2f9d5b8ca4175b4e92dfd1.dir
-      size: 3854678
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_classic_metrics
-      md5: d9dec56ef7ddb6a3bc03638e37d1c04c.dir
-      size: 23168
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_embeddings_alignment
-      md5: 069191de9f84f37303bc202968f26766.dir
-      size: 8069804
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_embeddings_metrics
-      md5: b45390641698e71fdf420fbb2afec7e0.dir
-      size: 53351
-      nfiles: 758
-  pipeline_word_wer@7:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result
-      md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir
-      size: 39158267
-      nfiles: 800
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_classic_alignment
-      md5: 37f0c27869efd72bf3eaedae70e426d5.dir
-      size: 83756423
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_classic_metrics
-      md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir
-      size: 27780
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_embeddings_alignment
-      md5: 01badff31f4d2f6583481c332fae8abc.dir
-      size: 174322727
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_embeddings_metrics
-      md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir
-      size: 56182
-      nfiles: 800
-  pipeline_word_wer@2:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/techmo__result
-      md5: 9030cf3640f2749d9c1b4439687bdc2f.dir
-      size: 7761880
-      nfiles: 8136
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/techmo__word_wer_classic_alignment
-      md5: 478503dcf96c6162f15466d5455b6271.dir
-      size: 15696317
-      nfiles: 8136
-    - path: experiment_data/pipeline/pl_common_voice/techmo__word_wer_classic_metrics
-      md5: 7b60102e7896228777eb92fdc2aaebc2.dir
-      size: 223467
-      nfiles: 8136
-    - path: experiment_data/pipeline/pl_common_voice/techmo__word_wer_embeddings_alignment
-      md5: 09126e1594234e97cd10858cbc06f4be.dir
-      size: 33111258
-      nfiles: 8136
-    - path: experiment_data/pipeline/pl_common_voice/techmo__word_wer_embeddings_metrics
-      md5: 4fdb7cea64f99b62b20a725929093d4d.dir
-      size: 571563
-      nfiles: 8136
-  pipeline_word_wer@14:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/ajn__result
-      md5: 653d65e186a7d05958ce3cbef219038c.dir
-      size: 6159899
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/ajn__word_wer_classic_alignment
-      md5: e25ae51f8dc1afe55d5c0b44a67ab8ee.dir
-      size: 20671277
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__word_wer_classic_metrics
-      md5: 18605657ff9c7ef3221e27b671a3b4d1.dir
-      size: 16835
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__word_wer_embeddings_alignment
-      md5: 6be0a1c035f4a84a9035bfba1458cdac.dir
-      size: 43099546
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__word_wer_embeddings_metrics
-      md5: 4f368d2ba1c5a54d5e3ab69a7581549e.dir
-      size: 19326
-      nfiles: 494
-  pipeline_word_wer@0:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/google__result
-      md5: afb53476cc93ef4de3591908df41fd2a.dir
-      size: 5854366
-      nfiles: 8143
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/google__word_wer_classic_alignment
-      md5: 0562ca30a9415b6a568430f34650f063.dir
-      size: 15821424
-      nfiles: 8143
-    - path: experiment_data/pipeline/pl_common_voice/google__word_wer_classic_metrics
-      md5: deafb9c70dd9bb48c57dd0802bd8c923.dir
-      size: 225225
-      nfiles: 8143
-    - path: experiment_data/pipeline/pl_common_voice/google__word_wer_embeddings_alignment
-      md5: 4b1952cbde7ce121c616f686459ef924.dir
-      size: 33384670
-      nfiles: 8143
-    - path: experiment_data/pipeline/pl_common_voice/google__word_wer_embeddings_metrics
-      md5: e2dcce4d029ffe904d363790da375fac.dir
-      size: 572632
-      nfiles: 8143
-  pipeline_word_wer@5:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result
-      md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir
-      size: 27432599
-      nfiles: 799
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_classic_alignment
-      md5: 184cdfdd465db609f514891b0330ef67.dir
-      size: 83997172
-      nfiles: 799
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_classic_metrics
-      md5: c12eab8acb3cee0219fc8046691b24ab.dir
-      size: 27841
-      nfiles: 799
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_embeddings_alignment
-      md5: 1e7ef6690d2e36f368eb02a8dcba9ab0.dir
-      size: 174732507
-      nfiles: 799
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_embeddings_metrics
-      md5: c15414f2ec9c0a1ab78199861b0b4ce0.dir
-      size: 55765
-      nfiles: 799
-  pipeline_word_wer@1:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result
-      md5: b0d0042d77d7adce37890ca63ad40091.dir
-      size: 19014997
-      nfiles: 8154
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__word_wer_classic_alignment
-      md5: 59beaa6beb3dc834c555376a5aeca7b4.dir
-      size: 15851643
-      nfiles: 8154
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__word_wer_classic_metrics
-      md5: ccb35fbf94c551765dca2f72babcb7cb.dir
-      size: 225545
-      nfiles: 8154
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__word_wer_embeddings_alignment
-      md5: a49f354af5d3f345fe2f85557f47931b.dir
-      size: 33444109
-      nfiles: 8154
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__word_wer_embeddings_metrics
-      md5: c829268cd43ce5af3f57be7db6d058ce.dir
-      size: 573538
-      nfiles: 8154
-  pipeline_word_wer@9:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
-      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
-      size: 1413262
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_classic_alignment
-      md5: 6437952d0ab383e44acca5fa70d02e54.dir
-      size: 3981739
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_classic_metrics
-      md5: 5b84bc58a25d7d0a4d04e6d5e1abc966.dir
-      size: 22391
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_embeddings_alignment
-      md5: f7494807662a0e632c7ae790d85e6c19.dir
-      size: 8197217
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_embeddings_metrics
-      md5: 3ca4ecc9fec05a9bc00aad4f898751ae.dir
-      size: 43801
-      nfiles: 758
-  pipeline_word_wer@4:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__result
-      md5: 6310f1b24332777817738abe33676505.dir
-      size: 10121705
-      nfiles: 8155
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__word_wer_classic_alignment
-      md5: d21d7cdfe4d13ec9f6c3ae105528531a.dir
-      size: 16691829
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__word_wer_classic_metrics
-      md5: 278bdbc2fa64f8b9b8d59e0cac978d0e.dir
-      size: 213692
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__word_wer_embeddings_alignment
-      md5: e19eed8fb2f2cdad4a3f0ed4ec857fb2.dir
-      size: 34644290
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__word_wer_embeddings_metrics
-      md5: b169e4d35af45cb82fb4cf86d8d2aa79.dir
-      size: 553962
-      nfiles: 8155
-  pipeline_spacy_pos_wer@14:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/ajn__result
-      md5: 653d65e186a7d05958ce3cbef219038c.dir
-      size: 6159899
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_alignment
-      md5: 0f310b8840f2d09ac558b9bac2a0d3b0.dir
-      size: 21936929
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_metrics
-      md5: 6c6bb673ea5f64c9d851878c9d8a7c09.dir
-      size: 8444
-      nfiles: 494
-  pipeline_spacy_pos_wer@2:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/techmo__result
-      md5: 9030cf3640f2749d9c1b4439687bdc2f.dir
-      size: 7761880
-      nfiles: 8136
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_pos_alignment
-      md5: c0c77bf3030799c32241555bf4d71e6b.dir
-      size: 18192387
-      nfiles: 8136
-    - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_pos_metrics
-      md5: 61c69fcd287051f4ab7d1ffcc68a9aca.dir
-      size: 96845
-      nfiles: 8136
-  pipeline_spacy_pos_wer@16:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__result
-      md5: 0237d69dfa56d1a29a8bc43f464c4f1e.dir
-      size: 11789990
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__spacy_pos_alignment
-      md5: 95446f56d8b5c32ce540d04452b810f6.dir
-      size: 25245301
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__spacy_pos_metrics
-      md5: b7cd2c7b8a385a5c97af1efee94c970b.dir
-      size: 8562
-      nfiles: 500
-  pipeline_spacy_pos_wer@6:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_voicelab_cbiz
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
-      md5: 7de1137f44fad26766da0fc309720160.dir
-      size: 22765926
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_pos_alignment
-      md5: 4f07fe60bf29e2148879db0c903c98f6.dir
-      size: 78539613
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_pos_metrics
-      md5: 7ec3bb7c838e7f06b8a1dbe7a68faac2.dir
-      size: 13753
-      nfiles: 800
-  pipeline_spacy_pos_wer@0:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/google__result
-      md5: afb53476cc93ef4de3591908df41fd2a.dir
-      size: 5854366
-      nfiles: 8143
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/google__spacy_pos_alignment
-      md5: d7f32c8741318563bdec3f35679cdc9a.dir
-      size: 18316770
-      nfiles: 8143
-    - path: experiment_data/pipeline/pl_common_voice/google__spacy_pos_metrics
-      md5: 117611317774e81fb482ba9c71ec806b.dir
-      size: 97235
-      nfiles: 8143
-  pipeline_spacy_pos_wer@11:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
-      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
-      size: 5137721
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_pos_alignment
-      md5: 3b2e3e75c644805681905c97ec37cf28.dir
-      size: 3737151
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_pos_metrics
-      md5: 782cc84e9116281dfc28734b2ae4a5ea.dir
-      size: 9004
-      nfiles: 758
-  pipeline_spacy_pos_wer@15:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__result
-      md5: 0e596570e1502b38588427bc72dcc006.dir
-      size: 9697519
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/techmo__spacy_pos_alignment
-      md5: a5cb7dd59d83b2ec678c6ba5177bc38f.dir
-      size: 20897599
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__spacy_pos_metrics
-      md5: 1478707020a96496b50eb732207c290e.dir
-      size: 8841
-      nfiles: 500
-  pipeline_spacy_pos_wer@12:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__result
-      md5: 7220584482e69b3a9f4b43fba97e97cf.dir
-      size: 2135360
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__spacy_pos_alignment
-      md5: 2f9237dcfa69ed025f290ca40681e28f.dir
-      size: 4382663
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__spacy_pos_metrics
-      md5: 15b44c2e516183028c14ee7cc9bf47e1.dir
-      size: 10407
-      nfiles: 758
-  pipeline_spacy_pos_wer@17:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__result
-      md5: 3f7a79298a5156fd2b023e673326e72f.dir
-      size: 985004
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/google__spacy_pos_alignment
-      md5: 26f0d02f96d867426ff26a358b596a4c.dir
-      size: 2660000
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__spacy_pos_metrics
-      md5: 44391a580b4a84f33a2fa0cbace4c5d6.dir
-      size: 5109
-      nfiles: 562
-  pipeline_spacy_pos_wer@1:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result
-      md5: b0d0042d77d7adce37890ca63ad40091.dir
-      size: 19014997
-      nfiles: 8154
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_pos_alignment
-      md5: caf518456afa1864c286cd5e15b9e563.dir
-      size: 18349138
-      nfiles: 8154
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_pos_metrics
-      md5: 71381fa7fd6c0cdba00f25c17fd6be5e.dir
-      size: 97426
-      nfiles: 8154
-  pipeline_spacy_pos_wer@7:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_voicelab_cbiz
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result
-      md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir
-      size: 39158267
-      nfiles: 800
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_pos_alignment
-      md5: 678171dbd7c038cd6eaadc5eb331b8b5.dir
-      size: 81650836
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_pos_metrics
-      md5: 11320499f29d2d7bfce68d35fb352b83.dir
-      size: 14334
-      nfiles: 800
-  pipeline_spacy_pos_wer@10:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
-      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
-      size: 1880403
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_pos_alignment
-      md5: d76701778be18566a4d3a45bc325196e.dir
-      size: 3707699
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_pos_metrics
-      md5: 9cae08bbb8a6331d06a33dbbb4a16301.dir
-      size: 9662
-      nfiles: 758
-  pipeline_spacy_pos_wer@13:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__result
-      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
-      size: 5346497
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/google__spacy_pos_alignment
-      md5: 460edffd716b1f46fb863e5b2b0d949e.dir
-      size: 19342263
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__spacy_pos_metrics
-      md5: a65dd7d74319da06f4ebaca08cde30ce.dir
-      size: 8659
-      nfiles: 500
-  pipeline_spacy_pos_wer@5:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_voicelab_cbiz
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result
-      md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir
-      size: 27432599
-      nfiles: 799
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_pos_alignment
-      md5: 22ccae20301046da3e40e93daa680d53.dir
-      size: 83052124
-      nfiles: 799
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_pos_metrics
-      md5: 43d79d47ba1e91e86daf1f66aa18c941.dir
-      size: 14239
-      nfiles: 799
-  pipeline_spacy_ner_wer@4:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__result
-      md5: 6310f1b24332777817738abe33676505.dir
-      size: 10121705
-      nfiles: 8155
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__spacy_ner_alignment
-      md5: b0057c661dc1197217535fc74c04bb37.dir
-      size: 20900270
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__spacy_ner_metrics
-      md5: 1c0279e4b2fc0e61314e7031b8d00dc1.dir
-      size: 93934
-      nfiles: 8155
-  pipeline_spacy_ner_wer@16:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
-      md5: 9c63b061ac7763144bca121e163ee7aa.dir
-      size: 20658485
-      nfiles: 456
-    outs:
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_ner_alignment
-      md5: 4530ad915e82cc9668e775d5de219b13.dir
-      size: 17577804
-      nfiles: 456
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_ner_metrics
-      md5: 0ad2dc0beeebd336771228f8751fe028.dir
-      size: 8014
-      nfiles: 456
-  pipeline_spacy_ner_wer@9:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
-      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
-      size: 1413262
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_ner_alignment
-      md5: 34ddaf22d6ba136b8257344899902015.dir
-      size: 3675109
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_ner_metrics
-      md5: 9379479c875f0fe49d6da70c9b189d67.dir
-      size: 10663
-      nfiles: 758
-  pipeline_spacy_ner_wer@15:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__result
-      md5: 0e596570e1502b38588427bc72dcc006.dir
-      size: 9697519
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/techmo__spacy_ner_alignment
-      md5: c301698fa01d07cfb3efb16ffbb06b69.dir
-      size: 20451389
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__spacy_ner_metrics
-      md5: 06cea0926a325c92a1ff79457db655a8.dir
-      size: 8918
-      nfiles: 500
-  pipeline_spacy_ner_wer@1:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result
-      md5: b0d0042d77d7adce37890ca63ad40091.dir
-      size: 19014997
-      nfiles: 8154
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_ner_alignment
-      md5: f67a9bd9c25520328ed3c64f149aea17.dir
-      size: 17938087
-      nfiles: 8154
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_ner_metrics
-      md5: badc978fc37c2cc9e1c3cafd77dbd064.dir
-      size: 98038
-      nfiles: 8154
-  pipeline_asr_result@18:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_luna/pl_luna__whisper_tiny
-      md5: 1214905c22db4c9bd7e439e68b7551cc.dir
-      size: 6202142
-      nfiles: 500
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__result
-      md5: 0237d69dfa56d1a29a8bc43f464c4f1e.dir
-      size: 11789990
-      nfiles: 500
-  pipeline_asr_result@19:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_minds14/pl_minds14__google
-      md5: bb1acae0c3070424d635ae9d2c6e14f0.dir
-      size: 247940
-      nfiles: 562
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/google__result
-      md5: 3f7a79298a5156fd2b023e673326e72f.dir
-      size: 985004
-      nfiles: 562
-  pipeline_spacy_ner_wer@12:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__result
-      md5: 7220584482e69b3a9f4b43fba97e97cf.dir
-      size: 2135360
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__spacy_ner_alignment
-      md5: 194c42115d93fddc20442ef0dc592513.dir
-      size: 4268672
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__spacy_ner_metrics
-      md5: 7884b951c828e6874b72a6d83cfc5a31.dir
-      size: 10855
-      nfiles: 758
-  pipeline_spacy_ner_wer@13:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__result
-      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
-      size: 5346497
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/google__spacy_ner_alignment
-      md5: 84a872f90985c615648ce02ea8a6951c.dir
-      size: 18946509
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__spacy_ner_metrics
-      md5: 7df5fdeb083c8b1e953e1ff097c69f11.dir
-      size: 8752
-      nfiles: 500
-  pipeline_spacy_ner_wer@11:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
-      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
-      size: 5137721
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_alignment
-      md5: d04f82c751e6ef28414b35ba4d844470.dir
-      size: 3660762
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_metrics
-      md5: 308e3f0f67d855482d7d3634954f4b57.dir
-      size: 7553
-      nfiles: 758
-  pipeline_spacy_ner_wer@6:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_voicelab_cbiz
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
-      md5: 7de1137f44fad26766da0fc309720160.dir
-      size: 22765926
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_ner_alignment
-      md5: 48a512d6811d00ec9dbe60e3131834ac.dir
-      size: 76044699
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_ner_metrics
-      md5: 61799b677b4d6af5e41c947c1d3b02cd.dir
-      size: 14196
-      nfiles: 800
-  pipeline_spacy_ner_wer@5:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_voicelab_cbiz
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result
-      md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir
-      size: 27432599
-      nfiles: 799
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_ner_alignment
-      md5: a83b6e086aa207da109fc06293501f52.dir
-      size: 80594165
-      nfiles: 799
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_ner_metrics
-      md5: 85388729ffbed2a328f94758c682c809.dir
-      size: 14708
-      nfiles: 799
-  pipeline_spacy_pos_wer@3:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/ajn__result
-      md5: 77d873041fe2952e3c45ee4ac6458061.dir
-      size: 6667841
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_pos_alignment
-      md5: f3b7dd0987fc7be5171478910c950657.dir
-      size: 19159060
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_pos_metrics
-      md5: 6094fb960e2eab979ecb33d40a253531.dir
-      size: 95146
-      nfiles: 8155
-  pipeline_spacy_ner_wer@0:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/google__result
-      md5: afb53476cc93ef4de3591908df41fd2a.dir
-      size: 5854366
-      nfiles: 8143
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/google__spacy_ner_alignment
-      md5: 0534b5b887fc62c82cb98fbac73f0e45.dir
-      size: 17894226
-      nfiles: 8143
-    - path: experiment_data/pipeline/pl_common_voice/google__spacy_ner_metrics
-      md5: da5556dd7ef27958f9ebabf8b71ab477.dir
-      size: 97917
-      nfiles: 8143
-  pipeline_spacy_pos_wer@9:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
-      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
-      size: 1413262
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_pos_alignment
-      md5: 210ec7ffdde2c076fe2ab72873f240c7.dir
-      size: 3799802
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_pos_metrics
-      md5: 0d95a4abb4a33aef3e242a304fd58698.dir
-      size: 10569
-      nfiles: 758
-  pipeline_wikineiural_ner@11:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
-      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
-      size: 5137721
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__wikineural_ner_alignment
-      md5: 320e1321cb3db403a9ec0bb662abe52c.dir
-      size: 7828240
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__wikineural_ner_metrics
-      md5: 0738885f0f798e8bc0fbb5c9824d9db1.dir
-      size: 9704
-      nfiles: 758
-  pipeline_wikineiural_ner@8:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__result
-      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
-      size: 1377134
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/google__wikineural_ner_alignment
-      md5: 39e078edbc3f65934358787ddbe40eec.dir
-      size: 7572934
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__wikineural_ner_metrics
-      md5: f8b3c4183e31fc9e612b189579644f74.dir
-      size: 10796
-      nfiles: 758
-  pipeline_wikineiural_ner@14:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/ajn__result
-      md5: 653d65e186a7d05958ce3cbef219038c.dir
-      size: 6159899
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/ajn__wikineural_ner_alignment
-      md5: 52c8c06ce74db727c4aa5f55a2a865cd.dir
-      size: 37478125
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__wikineural_ner_metrics
-      md5: 602eaef0046a2a8b551bbbced7123886.dir
-      size: 8816
-      nfiles: 494
-  pipeline_spacy_ner_wer@10:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
-      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
-      size: 1880403
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_ner_alignment
-      md5: 628a77d036410a40bd358db2fc8ff739.dir
-      size: 3626842
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_ner_metrics
-      md5: cda4c4246e8b3b6d57aed57c4fe5594a.dir
-      size: 8311
-      nfiles: 758
-  pipeline_spacy_ner_wer@3:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/ajn__result
-      md5: 77d873041fe2952e3c45ee4ac6458061.dir
-      size: 6667841
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_ner_alignment
-      md5: 4b5e01b54c2ce8118580ad02d6dffca1.dir
-      size: 18576865
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_ner_metrics
-      md5: 15078a7515bc939d08d29d079311b37d.dir
-      size: 91570
-      nfiles: 8155
-  pipeline_spacy_ner_wer@2:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/techmo__result
-      md5: 9030cf3640f2749d9c1b4439687bdc2f.dir
-      size: 7761880
-      nfiles: 8136
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_ner_alignment
-      md5: fb116356575fb0589d58449fbf1377f7.dir
-      size: 17774086
-      nfiles: 8136
-    - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_ner_metrics
-      md5: 45eaaef226809a58ecf9180a47dec8aa.dir
-      size: 97994
-      nfiles: 8136
-  pipeline_wikineiural_ner@3:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/ajn__result
-      md5: 77d873041fe2952e3c45ee4ac6458061.dir
-      size: 6667841
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/ajn__wikineural_ner_alignment
-      md5: 493bf2c6d82f18a117f2ae6d438d5c30.dir
-      size: 36352970
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/ajn__wikineural_ner_metrics
-      md5: 2275d851a39aafa755474fe79b16b337.dir
-      size: 117588
-      nfiles: 8155
-  pipeline_spacy_pos_wer@8:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__result
-      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
-      size: 1377134
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_alignment
-      md5: 3429e870a0541d01eb85b0a34fd16021.dir
-      size: 3712618
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_metrics
-      md5: 56dddb48cea2022b91fd4323efd43a8b.dir
-      size: 9213
-      nfiles: 758
-  pipeline_wikineiural_ner@1:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result
-      md5: b0d0042d77d7adce37890ca63ad40091.dir
-      size: 19014997
-      nfiles: 8154
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__wikineural_ner_alignment
-      md5: 9613a3fcb28ead3d3d39a16180ccfab8.dir
-      size: 37574594
-      nfiles: 8154
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__wikineural_ner_metrics
-      md5: 59ff9a09d1e232c66bd41afa757f9130.dir
-      size: 118556
-      nfiles: 8154
-  pipeline_flair_upos@13:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__result
-      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
-      size: 5346497
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/google__flair_upos_alignment
-      md5: 8a23016ee1f269beec30232dff1f751b.dir
-      size: 19210117
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__flair_upos_metrics
-      md5: 891ec9646dea43249aa9cb4eb562b2de.dir
-      size: 8654
-      nfiles: 500
-  pipeline_flair_upos@0:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/google__result
-      md5: afb53476cc93ef4de3591908df41fd2a.dir
-      size: 5854366
-      nfiles: 8143
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/google__flair_upos_alignment
-      md5: 169eb8cd967ea75404bd3bd7f5a41b5e.dir
-      size: 17519056
-      nfiles: 8143
-    - path: experiment_data/pipeline/pl_common_voice/google__flair_upos_metrics
-      md5: 768158db51050c79dd588c8b32e5b89e.dir
-      size: 95486
-      nfiles: 8143
-  pipeline_wikineiural_ner@13:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__result
-      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
-      size: 5346497
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/google__wikineural_ner_alignment
-      md5: dbfd406215b6d3b8ed5afcceb92034cb.dir
-      size: 34436914
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__wikineural_ner_metrics
-      md5: d4ca8091dac498b609dca9fda6d22f07.dir
-      size: 8807
-      nfiles: 500
-  pipeline_flair_upos@12:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__result
-      md5: 7220584482e69b3a9f4b43fba97e97cf.dir
-      size: 2135360
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__flair_upos_alignment
-      md5: fc0957cb3c17b47cea93b135220ac9ba.dir
-      size: 4233187
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__flair_upos_metrics
-      md5: 76e4bd62eb3a4eb3a34638683f06ba84.dir
-      size: 10449
-      nfiles: 758
-  pipeline_wikineiural_ner@10:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
-      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
-      size: 1880403
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__wikineural_ner_alignment
-      md5: 2fdab231aaf8c96a97268d6447f3ca1e.dir
-      size: 7640523
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__wikineural_ner_metrics
-      md5: 14d453272229bc7b8d7c4eb2790e6b32.dir
-      size: 10705
-      nfiles: 758
-  pipeline_wikineiural_ner@2:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/techmo__result
-      md5: 9030cf3640f2749d9c1b4439687bdc2f.dir
-      size: 7761880
-      nfiles: 8136
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/techmo__wikineural_ner_alignment
-      md5: 40094adf33d391cd6abb6199c326dfd7.dir
-      size: 37097148
-      nfiles: 8136
-    - path: experiment_data/pipeline/pl_common_voice/techmo__wikineural_ner_metrics
-      md5: 4570a295cf89a779de66565b0b9a77ad.dir
-      size: 118544
-      nfiles: 8136
-  pipeline_flair_upos@11:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
-      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
-      size: 5137721
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__flair_upos_alignment
-      md5: 07ec971011a67b5e3ad7822fc717612c.dir
-      size: 3720804
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__flair_upos_metrics
-      md5: 642857d546ab39ca2d680fda663eab38.dir
-      size: 7889
-      nfiles: 758
-  pipeline_wikineiural_ner@4:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__result
-      md5: 6310f1b24332777817738abe33676505.dir
-      size: 10121705
-      nfiles: 8155
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__wikineural_ner_alignment
-      md5: da4492daece0b76c95ea363246ff5414.dir
-      size: 39870020
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__wikineural_ner_metrics
-      md5: b7b319a512c38ab0ee0ef5c347b7b80e.dir
-      size: 113084
-      nfiles: 8155
-  pipeline_flair_upos@9:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
-      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
-      size: 1413262
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__flair_upos_alignment
-      md5: be13715f9aed5232f08da3be9095a797.dir
-      size: 3675596
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__flair_upos_metrics
-      md5: 27bb131921ad52d2235aeb2b7befe4d1.dir
-      size: 10590
-      nfiles: 758
-  pipeline_wikineiural_ner@9:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
-      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
-      size: 1413262
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__wikineural_ner_alignment
-      md5: 83f073c2166a9a2fcfc6dc4d2268efb4.dir
-      size: 7530856
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__wikineural_ner_metrics
-      md5: 7def95d54e9428f71fa43a5b9a95b54b.dir
-      size: 12379
-      nfiles: 758
-  pipeline_flair_upos@14:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/ajn__result
-      md5: 653d65e186a7d05958ce3cbef219038c.dir
-      size: 6159899
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/ajn__flair_upos_alignment
-      md5: 8aa7d2eeb9deb1bde3394ab5df9c15a0.dir
-      size: 21838304
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__flair_upos_metrics
-      md5: 4ccf8f54da1b47d0a9e0e7c95020135e.dir
-      size: 8450
-      nfiles: 494
-  pipeline_wikineiural_ner@0:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/google__result
-      md5: afb53476cc93ef4de3591908df41fd2a.dir
-      size: 5854366
-      nfiles: 8143
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/google__wikineural_ner_alignment
-      md5: 1adc1718f35e14b2aae8de6c6a49de1b.dir
-      size: 37254218
-      nfiles: 8143
-    - path: experiment_data/pipeline/pl_common_voice/google__wikineural_ner_metrics
-      md5: a8b91d2203368be020389a8a02ecbb7b.dir
-      size: 117584
-      nfiles: 8143
-  pipeline_wikineiural_ner@12:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__result
-      md5: 7220584482e69b3a9f4b43fba97e97cf.dir
-      size: 2135360
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__wikineural_ner_alignment
-      md5: 1e3b4afa928b74c1d58b0445bdc15a68.dir
-      size: 8311448
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__wikineural_ner_metrics
-      md5: 255cb7f51e1075cac6a2fbc65cbeb5fc.dir
-      size: 12402
-      nfiles: 758
-  pipeline_wikineiural_ner@5:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_voicelab_cbiz
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result
-      md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir
-      size: 27432599
-      nfiles: 799
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__wikineural_ner_alignment
-      md5: f977050b1f0445e9997f902f91634976.dir
-      size: 144157380
-      nfiles: 799
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__wikineural_ner_metrics
-      md5: 9a9056d774f8e901fa2beaa9a874b0a2.dir
-      size: 14800
-      nfiles: 799
-  pipeline_flair_upos@3:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/ajn__result
-      md5: 77d873041fe2952e3c45ee4ac6458061.dir
-      size: 6667841
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/ajn__flair_upos_alignment
-      md5: d8fd6968dfa66671d8fb8cc609782287.dir
-      size: 17420454
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/ajn__flair_upos_metrics
-      md5: 619e9209626ba47702db03fa56670078.dir
-      size: 93408
-      nfiles: 8155
-  pipeline_wikineiural_ner@7:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_voicelab_cbiz
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result
-      md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir
-      size: 39158267
-      nfiles: 800
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__wikineural_ner_alignment
-      md5: fc4d6251b6c8d8253b99aa87d4c7a189.dir
-      size: 144096029
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__wikineural_ner_metrics
-      md5: dca61a52df606b9c3b510007a48c53ee.dir
-      size: 14810
-      nfiles: 800
-  pipeline_spacy_ner_wer@8:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__result
-      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
-      size: 1377134
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_ner_alignment
-      md5: 67ea10ecbb9f5c3bfa14ffb85c843ba3.dir
-      size: 3638477
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_ner_metrics
-      md5: 0532a9596f26fd52037b6ecaa838ab9e.dir
-      size: 8500
-      nfiles: 758
-  pipeline_flair_upos@7:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_voicelab_cbiz
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result
-      md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir
-      size: 39158267
-      nfiles: 800
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__flair_upos_alignment
-      md5: b9435b169d923e56d45be4e3a489358d.dir
-      size: 81635416
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__flair_upos_metrics
-      md5: 00f24bd9e609cc89f29cb6c3cc8dba48.dir
-      size: 14348
-      nfiles: 800
-  pipeline_flair_upos@4:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__result
-      md5: 6310f1b24332777817738abe33676505.dir
-      size: 10121705
-      nfiles: 8155
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__flair_upos_alignment
-      md5: 147573fff8b9edf9df71f16aedd2646a.dir
-      size: 19643758
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__flair_upos_metrics
-      md5: 2f2b9f5e1e48ad1268d75099c357b26c.dir
-      size: 90154
-      nfiles: 8155
-  pipeline_spacy_pos_wer@4:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__result
-      md5: 6310f1b24332777817738abe33676505.dir
-      size: 10121705
-      nfiles: 8155
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__spacy_pos_alignment
-      md5: ccb3410d235e8cf849e09a9ebef528bf.dir
-      size: 21551242
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__spacy_pos_metrics
-      md5: f43c1cd06a76f9fa8f684a3d22b2ff48.dir
-      size: 94586
-      nfiles: 8155
-  pipeline_flair_upos@1:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result
-      md5: b0d0042d77d7adce37890ca63ad40091.dir
-      size: 19014997
-      nfiles: 8154
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__flair_upos_alignment
-      md5: c2de0794eab6b838cb889cd218c65651.dir
-      size: 17543992
-      nfiles: 8154
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__flair_upos_metrics
-      md5: 5ec6a253f5152fb5215f5d4a1243ca4e.dir
-      size: 95798
-      nfiles: 8154
-  pipeline_flair_upos@5:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_voicelab_cbiz
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result
-      md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir
-      size: 27432599
-      nfiles: 799
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__flair_upos_alignment
-      md5: 3446f07e21780b9319c2c22d88767dfc.dir
-      size: 81897055
-      nfiles: 799
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__flair_upos_metrics
-      md5: 06fc109e28016f32d68edf992b4c3072.dir
-      size: 14350
-      nfiles: 799
-  pipeline_wikineiural_ner@6:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_voicelab_cbiz
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
-      md5: 7de1137f44fad26766da0fc309720160.dir
-      size: 22765926
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__wikineural_ner_alignment
-      md5: 3363be14202fdb2577c8ddfce6549751.dir
-      size: 132627376
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__wikineural_ner_metrics
-      md5: b83cd24583e6fa648f4e106239e1ffcd.dir
-      size: 14486
-      nfiles: 800
-  pipeline_flair_upos@6:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_voicelab_cbiz
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
-      md5: 7de1137f44fad26766da0fc309720160.dir
-      size: 22765926
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__flair_upos_alignment
-      md5: d9c2a02375a3f9974ddf0ec4d64297b7.dir
-      size: 78332367
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__flair_upos_metrics
-      md5: 42f292cac09f5efc71215ac0f9bbf760.dir
-      size: 13841
-      nfiles: 800
-  pipeline_flair_upos@8:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__result
-      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
-      size: 1377134
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/google__flair_upos_alignment
-      md5: 7a58a3caced592a9f00af9515eb92413.dir
-      size: 3696853
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__flair_upos_metrics
-      md5: 680491a1374081f637ade245e79ca627.dir
-      size: 9562
-      nfiles: 758
-  pipeline_spacy_ner_wer@7:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_voicelab_cbiz
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result
-      md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir
-      size: 39158267
-      nfiles: 800
-    outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_ner_alignment
-      md5: 3c7bf6f59153f0eef4685795dc8c1246.dir
-      size: 79503990
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_ner_metrics
-      md5: f0dd0a8adb67c53d83ad32d152f90365.dir
-      size: 14595
-      nfiles: 800
-  pipeline_spacy_ner_wer@14:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/ajn__result
-      md5: 653d65e186a7d05958ce3cbef219038c.dir
-      size: 6159899
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/ajn__spacy_ner_alignment
-      md5: 68434e48e2e6652ae6c9b17f9eb6fa05.dir
-      size: 21154450
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__spacy_ner_metrics
-      md5: 288d81ffe41a25d5ab85bbb3ef908979.dir
-      size: 8703
-      nfiles: 494
-  pipeline_flair_upos@10:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
-      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
-      size: 1880403
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__flair_upos_alignment
-      md5: 527561b0790917cc03d782d5bf074294.dir
-      size: 3695215
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__flair_upos_metrics
-      md5: 06a349eee2ca119d3e79bbdbe5c95ef1.dir
-      size: 9384
-      nfiles: 758
-  pipeline_flair_upos@2:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/techmo__result
-      md5: 9030cf3640f2749d9c1b4439687bdc2f.dir
-      size: 7761880
-      nfiles: 8136
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/techmo__flair_upos_alignment
-      md5: b2f57705be67ced0652ce670e1dde2cd.dir
-      size: 17397042
-      nfiles: 8136
-    - path: experiment_data/pipeline/pl_common_voice/techmo__flair_upos_metrics
-      md5: 09659b49a5c99187bf64cf922138a7c1.dir
-      size: 95391
-      nfiles: 8136
-  pipeline_spacy_ner_wer@17:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__result
-      md5: 0237d69dfa56d1a29a8bc43f464c4f1e.dir
-      size: 11789990
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__spacy_ner_alignment
-      md5: 9c5a0efcc57695b5bdfb4f2ed6d1ce82.dir
-      size: 24437639
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__spacy_ner_metrics
-      md5: c0b89a3966675c348e9601bc68bfda11.dir
-      size: 8763
-      nfiles: 500
-  pipeline_wikineiural_ner@16:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
-      md5: 9c63b061ac7763144bca121e163ee7aa.dir
-      size: 20658485
-      nfiles: 456
-    outs:
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__wikineural_ner_alignment
-      md5: 4a1ad7d9221851e3735f1c3c2238862b.dir
-      size: 33934003
-      nfiles: 456
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__wikineural_ner_metrics
-      md5: 0368dfd54954d3aa317e359f32efe59e.dir
-      size: 8244
-      nfiles: 456
-  pipeline_flair_upos@17:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__result
-      md5: 0237d69dfa56d1a29a8bc43f464c4f1e.dir
-      size: 11789990
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__flair_upos_alignment
-      md5: b30b207caed04dd1be8c79313124a6ac.dir
-      size: 24799648
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__flair_upos_metrics
-      md5: ed1ff9fa572dd43e0229726b3be66c2d.dir
-      size: 8494
-      nfiles: 500
-  pipeline_wikineiural_ner@17:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__result
-      md5: 0237d69dfa56d1a29a8bc43f464c4f1e.dir
-      size: 11789990
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__wikineural_ner_alignment
-      md5: 2ce8893b2a29cb7c4907636e55a962ba.dir
-      size: 40512588
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__wikineural_ner_metrics
-      md5: 0dc6ffc41d0711297133dd93f4937b6f.dir
-      size: 8949
-      nfiles: 500
-  pipeline_flair_upos@15:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__result
-      md5: 0e596570e1502b38588427bc72dcc006.dir
-      size: 9697519
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/techmo__flair_upos_alignment
-      md5: 4319f089e3987d68b4622f864c17ad6c.dir
-      size: 20898162
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__flair_upos_metrics
-      md5: 0063866f11c1d82a4ba430f14c81ed8f.dir
-      size: 8844
-      nfiles: 500
-  pipeline_wikineiural_ner@15:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__result
-      md5: 0e596570e1502b38588427bc72dcc006.dir
-      size: 9697519
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/techmo__wikineural_ner_alignment
-      md5: 6819012ed6e05015753687b12eea426c.dir
-      size: 37042138
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__wikineural_ner_metrics
-      md5: 47cd0bc5270bf7e3300c91f211b7a6f8.dir
-      size: 9011
-      nfiles: 500
-  pipeline_flair_upos@16:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
-      md5: 9c63b061ac7763144bca121e163ee7aa.dir
-      size: 20658485
-      nfiles: 456
-    outs:
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__flair_upos_alignment
-      md5: ac11e3b8f28fd268a8bb90f851bc7b5b.dir
-      size: 17899335
-      nfiles: 456
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__flair_upos_metrics
-      md5: 56807b890f4abcb9c41f60ff72a2e6aa.dir
-      size: 8051
-      nfiles: 456
-  pipeline_spacy_tag_wer@4:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__result
-      md5: 6310f1b24332777817738abe33676505.dir
-      size: 10121705
-      nfiles: 8155
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__spacy_dep_tag_alignment
-      md5: a0ad9d2e145bdd55da93137bcbd89ca0.dir
-      size: 21688710
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__spacy_dep_tag_metrics
-      md5: efb4cd381d95cfdbe40b09cdc251e3bc.dir
-      size: 95585
-      nfiles: 8155
-  pipeline_spacy_tag_wer@17:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__result
-      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
-      size: 1336305
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_dep_tag_alignment
-      md5: 66efb4e5647eda2e2ab3116445bdf9b5.dir
-      size: 2666883
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_dep_tag_metrics
-      md5: 15c9ef006daec951119079da2794dcad.dir
-      size: 6736
-      nfiles: 562
-  pipeline_spacy_tag_wer@10:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__result
-      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
-      size: 5346497
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/google__spacy_dep_tag_alignment
-      md5: 5fc24c54101bce2e858b08f4c47e0667.dir
-      size: 19568605
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__spacy_dep_tag_metrics
-      md5: f64735e07b7c460895d1ccf8e4d0884c.dir
-      size: 8466
-      nfiles: 500
-  pipeline_spacy_tag_wer@0:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/google__result
-      md5: afb53476cc93ef4de3591908df41fd2a.dir
-      size: 5854366
-      nfiles: 8143
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/google__spacy_dep_tag_alignment
-      md5: f043c22d203a7efd123232f1a2a6b4ad.dir
-      size: 18474982
-      nfiles: 8143
-    - path: experiment_data/pipeline/pl_common_voice/google__spacy_dep_tag_metrics
-      md5: f12ee96b46679884f65c59fef8ce74ea.dir
-      size: 96519
-      nfiles: 8143
-  pipeline_spacy_tag_wer@18:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
-      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
-      size: 3523907
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_dep_tag_alignment
-      md5: fd108bf3d67c339ebc12a0965a6e4c18.dir
-      size: 2752423
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_dep_tag_metrics
-      md5: 88f7aef65d580d59cdc78610dd98e616.dir
-      size: 7100
-      nfiles: 562
-  pipeline_spacy_tag_wer@15:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__result
-      md5: 3f7a79298a5156fd2b023e673326e72f.dir
-      size: 985004
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/google__spacy_dep_tag_alignment
-      md5: 76be498f539e45e5650695e29f22b436.dir
-      size: 2685018
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__spacy_dep_tag_metrics
-      md5: 96aea021765fd076fc534e9f09b29037.dir
-      size: 5117
-      nfiles: 562
-  pipeline_spacy_tag_wer@6:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
-      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
-      size: 1413262
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_dep_tag_alignment
-      md5: 8ec2e9fc88d4b8ce5032bf809c1c025f.dir
-      size: 3799802
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_dep_tag_metrics
-      md5: 0d95a4abb4a33aef3e242a304fd58698.dir
-      size: 10569
-      nfiles: 758
-  pipeline_spacy_tag_wer@14:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__result
-      md5: 0237d69dfa56d1a29a8bc43f464c4f1e.dir
-      size: 11789990
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__spacy_dep_tag_alignment
-      md5: 639560135c84aab006b3c08f9d3bd77e.dir
-      size: 25464153
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/whisper_tiny__spacy_dep_tag_metrics
-      md5: f28d26d045db695e6b236abe5a90ed4f.dir
-      size: 8555
-      nfiles: 500
-  pipeline_spacy_tag_wer@16:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir
-      size: 974727
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_dep_tag_alignment
-      md5: 7635cf48e907c8d2939fa9ef6870cd6a.dir
-      size: 3200084
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_dep_tag_metrics
-      md5: 54a8912bab4cd197f01719d5340ef7e9.dir
-      size: 6867
-      nfiles: 559
-  pipeline_spacy_tag_wer@5:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__result
-      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
-      size: 1377134
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_dep_tag_alignment
-      md5: c15aa30b165152fac6813cd092763242.dir
-      size: 3712618
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_dep_tag_metrics
-      md5: 56dddb48cea2022b91fd4323efd43a8b.dir
-      size: 9213
-      nfiles: 758
-  pipeline_spacy_tag_wer@2:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/techmo__result
-      md5: 9030cf3640f2749d9c1b4439687bdc2f.dir
-      size: 7761880
-      nfiles: 8136
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_dep_tag_alignment
-      md5: 7f52c28043e0a7b311bd39e877998834.dir
-      size: 18352367
-      nfiles: 8136
-    - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_dep_tag_metrics
-      md5: 3d8555026f88adec199bcb380e30a0f0.dir
-      size: 96139
-      nfiles: 8136
-  pipeline_wikineiural_ner@18:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__result
-      md5: 3f7a79298a5156fd2b023e673326e72f.dir
-      size: 985004
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/google__wikineural_ner_alignment
-      md5: 539cc438ec67099bb95f58ccdde9d42f.dir
-      size: 5692456
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__wikineural_ner_metrics
-      md5: 0385f2138fd318df8852a7e38c1770ac.dir
-      size: 5322
-      nfiles: 562
-  pipeline_spacy_tag_wer@8:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
-      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
-      size: 5137721
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_dep_tag_alignment
-      md5: 8503661e0ee89ff96690e245f3144807.dir
-      size: 3772188
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_dep_tag_metrics
-      md5: be82f4400be9dd5f01a7a4f73c357b1c.dir
-      size: 9083
-      nfiles: 758
-  pipeline_word_wer@18:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__result
-      md5: 3f7a79298a5156fd2b023e673326e72f.dir
-      size: 985004
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/google__word_wer_classic_alignment
-      md5: 9259147092c1555cd087a4fc4f428b2b.dir
-      size: 2725626
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__word_wer_classic_metrics
-      md5: 41621d3845a0035ee77352902c46a186.dir
-      size: 15012
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__word_wer_embeddings_alignment
-      md5: 07eff4b993990ceb973120d4515e3b9e.dir
-      size: 5493309
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__word_wer_embeddings_metrics
-      md5: e9ff0ac8eac5389bfc6ea7b13cd94a9c.dir
-      size: 31258
-      nfiles: 562
-  pipeline_spacy_tag_wer@12:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__result
-      md5: 0e596570e1502b38588427bc72dcc006.dir
-      size: 9697519
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/techmo__spacy_dep_tag_alignment
-      md5: baefcd5dfadd9c62d6fc71ba0ac31fa9.dir
-      size: 20897599
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__spacy_dep_tag_metrics
-      md5: 1478707020a96496b50eb732207c290e.dir
-      size: 8841
-      nfiles: 500
-  pipeline_spacy_tag_wer@3:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/ajn__result
-      md5: 77d873041fe2952e3c45ee4ac6458061.dir
-      size: 6667841
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_dep_tag_alignment
-      md5: a735ec3c634bbe034cb67f7a54fb0d2f.dir
-      size: 19294281
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_dep_tag_metrics
-      md5: 516d26ee39867a1166c51edb014ad897.dir
-      size: 94253
-      nfiles: 8155
-  pipeline_spacy_tag_wer@11:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/ajn__result
-      md5: 653d65e186a7d05958ce3cbef219038c.dir
-      size: 6159899
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/ajn__spacy_dep_tag_alignment
-      md5: 034d072825c711a824f1280f4a390f74.dir
-      size: 21936929
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__spacy_dep_tag_metrics
-      md5: 6c6bb673ea5f64c9d851878c9d8a7c09.dir
-      size: 8444
-      nfiles: 494
-  pipeline_spacy_tag_wer@9:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__result
-      md5: 7220584482e69b3a9f4b43fba97e97cf.dir
-      size: 2135360
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__spacy_dep_tag_alignment
-      md5: ce259eeea86c4eb2188170f3afc3d20b.dir
-      size: 4422013
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__spacy_dep_tag_metrics
-      md5: ae86609116018717054c6eac6e17a668.dir
-      size: 10339
-      nfiles: 758
-  pipeline_spacy_tag_wer@1:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_common_voice
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
-      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
-      size: 4093545
-      nfiles: 8155
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result
-      md5: b0d0042d77d7adce37890ca63ad40091.dir
-      size: 19014997
-      nfiles: 8154
-    outs:
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_dep_tag_alignment
-      md5: 09fbe03eafa4948e0d3009ef392e9c40.dir
-      size: 18505763
-      nfiles: 8154
-    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_dep_tag_metrics
-      md5: d1bc1925fe39ccb98e8bb085a1b1b24f.dir
-      size: 96041
-      nfiles: 8154
-  pipeline_flair_upos@18:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__result
-      md5: 3f7a79298a5156fd2b023e673326e72f.dir
-      size: 985004
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/google__flair_upos_alignment
-      md5: 531ca099d7ca7667993016c9237fb875.dir
-      size: 2641461
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__flair_upos_metrics
-      md5: 9f97126702b6268ce448649d4d7a3666.dir
-      size: 5000
-      nfiles: 562
-  pipeline_spacy_pos_wer@18:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir
-      size: 974727
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_alignment
-      md5: e9d82d54f5f44e6bfe9da4af3954f20d.dir
-      size: 3180963
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_metrics
-      md5: a596f37b4eb1b7f3d04269fc1adb8a26.dir
-      size: 7003
-      nfiles: 559
-  pipeline_spacy_tag_wer@13:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
-      md5: 9c63b061ac7763144bca121e163ee7aa.dir
-      size: 20658485
-      nfiles: 456
-    outs:
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_dep_tag_alignment
-      md5: 6716464936f4f35ba81a43eb2c2f37b0.dir
-      size: 17967467
-      nfiles: 456
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_dep_tag_metrics
-      md5: 7848ddff997fd231f3857ff30dfd7154.dir
-      size: 7940
-      nfiles: 456
-  pipeline_spacy_tag_wer@7:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
-      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
-      size: 1880403
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_dep_tag_alignment
-      md5: 997d8e36cd023245065af9c1c3db1d72.dir
-      size: 3743812
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_dep_tag_metrics
-      md5: 386a8988937349d2ab69a4a335d0d270.dir
-      size: 9672
-      nfiles: 758
-  pipeline_spacy_ner_wer@18:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__result
-      md5: 3f7a79298a5156fd2b023e673326e72f.dir
-      size: 985004
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/google__spacy_ner_alignment
-      md5: 42116658821e33fc3360d8957bf68354.dir
-      size: 2599522
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__spacy_ner_metrics
-      md5: 4e363aa38b90467de244a923ddc14d98.dir
-      size: 4544
-      nfiles: 562
-  pipeline_asr_result@22:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_minds14/pl_minds14__wav2vec2
-      md5: 6174de2a0578f456aae4e4e796dec805.dir
-      size: 2755451
-      nfiles: 562
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
-      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
-      size: 3523907
-      nfiles: 562
-  pipeline_asr_result@21:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_minds14/pl_minds14__techmo
-      md5: 5fd62d52b1a39db98d86ff0a11dcfd52.dir
-      size: 612777
-      nfiles: 562
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/techmo__result
-      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
-      size: 1336305
-      nfiles: 562
-  pipeline_asr_result@20:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_minds14/pl_minds14__ajn
-      md5: 41b35b832b72e132575e1c8bc777682f.dir
-      size: 205731
-      nfiles: 562
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir
-      size: 974727
-      nfiles: 559
-  pipeline_asr_result@23:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_asr.py
-      md5: efe378505897550fe1a8d423c321ef53
-      size: 1403
-    - path: experiment_data/cached_asr/pl_minds14/pl_minds14__whisper_tiny
-      md5: 602bbd1622c4d9c1f147bafa16cd5811.dir
-      size: 845870
-      nfiles: 562
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__result
-      md5: 2b54257ba731e5aba5c9264293e6c70d.dir
-      size: 1631174
-      nfiles: 562
-  pipeline_wikineiural_ner@21:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
-      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
-      size: 3523907
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__wikineural_ner_alignment
-      md5: 926f05dfd25a3c0f1a35da955261f73b.dir
-      size: 5895431
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__wikineural_ner_metrics
-      md5: 6535a4779ab3be38804639e16cbbe70f.dir
-      size: 8602
-      nfiles: 562
-  pipeline_flair_upos@19:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir
-      size: 974727
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/ajn__flair_upos_alignment
-      md5: 482f5c68c013bf1547b58dc470ba4cc5.dir
-      size: 3084431
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__flair_upos_metrics
-      md5: a7791cb5b7b405e98b12834e1146d1bb.dir
-      size: 7147
-      nfiles: 559
-  pipeline_spacy_ner_wer@21:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
-      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
-      size: 3523907
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_ner_alignment
-      md5: c29a8e30d0dfbf6591d62ea3884f0dc1.dir
-      size: 2653254
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_ner_metrics
-      md5: 353ba9400e092ad54b9ee26a39e44df3.dir
-      size: 6036
-      nfiles: 562
-  pipeline_spacy_pos_wer@20:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
-      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
-      size: 3523907
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_alignment
-      md5: f594e5f289f7916ad6a70f81ff4677f8.dir
-      size: 2723830
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_metrics
-      md5: 9cb542718053e6ff28d9e79b813c664e.dir
-      size: 7001
-      nfiles: 562
-  pipeline_spacy_tag_wer@19:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
-      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
-      size: 1489
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__result
-      md5: 2b54257ba731e5aba5c9264293e6c70d.dir
-      size: 1631174
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__spacy_dep_tag_alignment
-      md5: 9e828d1150d2351dc0b84a5294849da3.dir
-      size: 3281307
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__spacy_dep_tag_metrics
-      md5: 7ba8d5c7cd580cd8ddaba742ddd1b60a.dir
-      size: 6749
-      nfiles: 562
-  pipeline_spacy_ner_wer@19:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir
-      size: 974727
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_alignment
-      md5: 57c9eb8cfc9ce3e89e169248a8d46a78.dir
-      size: 3070194
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_metrics
-      md5: 36cca512956bd2224ef86d126148272d.dir
-      size: 7332
-      nfiles: 559
-  pipeline_wikineiural_ner@20:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__result
-      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
-      size: 1336305
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/techmo__wikineural_ner_alignment
-      md5: ff67cff35bc6b045895ad3e14613ab39.dir
-      size: 5641778
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__wikineural_ner_metrics
-      md5: 1025b67d8420594920a3cef4ed5c1a6a.dir
-      size: 8339
-      nfiles: 562
-  pipeline_flair_upos@20:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__result
-      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
-      size: 1336305
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/techmo__flair_upos_alignment
-      md5: 348e7c7e61df45225b6c28f835a1f1ec.dir
-      size: 2632853
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__flair_upos_metrics
-      md5: eda987cf12cfa026c8906eaa434e6090.dir
-      size: 6285
-      nfiles: 562
-  pipeline_wikineiural_ner@19:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir
-      size: 974727
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/ajn__wikineural_ner_alignment
-      md5: f9d5e3fce4551db9fa1d3ba32df06ee0.dir
-      size: 5689217
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__wikineural_ner_metrics
-      md5: 6e1b50f31e4bd4f264e6a6a91da6cb05.dir
-      size: 8593
-      nfiles: 559
-  pipeline_flair_upos@22:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__result
-      md5: 2b54257ba731e5aba5c9264293e6c70d.dir
-      size: 1631174
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__flair_upos_alignment
-      md5: 9eb00b7f2b7ad96bd458d2b10db04397.dir
-      size: 3174118
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__flair_upos_metrics
-      md5: b2667614ba07f82acb821c6a303380c8.dir
-      size: 7013
-      nfiles: 562
-  pipeline_word_wer@19:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14
-      --asr=ajn
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir
-      size: 974727
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_classic_alignment
-      md5: f5fd8a87dfcbf4e998b9a1d215186921.dir
-      size: 2851613
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_classic_metrics
-      md5: 6d6d430e7f3672c67b9c78eb7d0ea296.dir
-      size: 15213
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_embeddings_alignment
-      md5: 57207c7e05493bf4862dda0394d4ccde.dir
-      size: 5945687
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_embeddings_metrics
-      md5: aca6839fbd1332d3bb26833c95e8e6b0.dir
-      size: 22441
-      nfiles: 559
-  pipeline_wikineiural_ner@22:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__result
-      md5: 2b54257ba731e5aba5c9264293e6c70d.dir
-      size: 1631174
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__wikineural_ner_alignment
-      md5: 83fb11da981b9d06686117efe97869e0.dir
-      size: 6168851
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__wikineural_ner_metrics
-      md5: bd9f3729a2a206312f7863c24353e755.dir
-      size: 8613
-      nfiles: 562
-  pipeline_flair_upos@21:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_flair_upos.py
-      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
-      size: 1916
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
-      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
-      size: 3523907
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__flair_upos_alignment
-      md5: 2055e7e07fd4979aa284d48b6ea7a713.dir
-      size: 2702561
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__flair_upos_metrics
-      md5: 103738afc0ef4f24adf3b855181b1acb.dir
-      size: 6757
-      nfiles: 562
-  pipeline_spacy_ner_wer@20:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__result
-      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
-      size: 1336305
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_alignment
-      md5: d957f3f6216d2e749226df128b611e26.dir
-      size: 2573280
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_metrics
-      md5: a9bb85e539da902ed87b5c78ab0d3133.dir
-      size: 5500
-      nfiles: 562
-  pipeline_word_wer@22:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__result
-      md5: 2b54257ba731e5aba5c9264293e6c70d.dir
-      size: 1631174
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__word_wer_classic_alignment
-      md5: cf3157d59e7af77248c524e504dc04f4.dir
-      size: 2916121
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__word_wer_classic_metrics
-      md5: 51e17aba487d6e86ae41b0a3379bb788.dir
-      size: 15967
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__word_wer_embeddings_alignment
-      md5: 10a6352d89a16e482f841d4766058794.dir
-      size: 6018264
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__word_wer_embeddings_metrics
-      md5: 365866e7c9c0db5b4041f39d0d9ed8f7.dir
-      size: 37953
-      nfiles: 562
-  pipeline_spacy_ner_wer@22:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__result
-      md5: 2b54257ba731e5aba5c9264293e6c70d.dir
-      size: 1631174
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__spacy_ner_alignment
-      md5: 3c4161f8db8a5f218ead787f77d2ff74.dir
-      size: 3161511
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__spacy_ner_metrics
-      md5: c55b2189a6033dd7bb183a479c73875d.dir
-      size: 7425
-      nfiles: 562
-  pipeline_spacy_pos_wer@21:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14
-      --asr=whisper_tiny
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__result
-      md5: 2b54257ba731e5aba5c9264293e6c70d.dir
-      size: 1631174
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__spacy_pos_alignment
-      md5: 4727dbf6ca0c0867479b67f29041018d.dir
-      size: 3252193
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/whisper_tiny__spacy_pos_metrics
-      md5: 5c87afa3d3f1cdeb843416eec888446d.dir
-      size: 7030
-      nfiles: 562
-  pipeline_word_wer@20:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__result
-      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
-      size: 1336305
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_classic_alignment
-      md5: 8cc588761bc5bd56fb867953da04ac35.dir
-      size: 2723010
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_classic_metrics
-      md5: b6a4f05d71bd3fbed796d54b1ac2b29d.dir
-      size: 17198
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_embeddings_alignment
-      md5: c2b20050da963980f2ca5bbed0b1457d.dir
-      size: 5505715
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_embeddings_metrics
-      md5: e2448cce1cb93fa872b3898c00af62ba.dir
-      size: 34993
-      nfiles: 562
-  pipeline_spacy_pos_wer@19:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__result
-      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
-      size: 1336305
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_alignment
-      md5: c1b9ede77bb9061f1c0507147029efee.dir
-      size: 2640298
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_metrics
-      md5: 3e56923bc3b678ee5263e5c8436da9ca.dir
-      size: 6118
-      nfiles: 562
-  pipeline_word_wer@21:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_word_wer.py
-      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
-      size: 2125
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
-      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
-      size: 3523907
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_classic_alignment
-      md5: 171d4dddb9fd5b96479c62fa86ccfe57.dir
-      size: 2811807
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_classic_metrics
-      md5: a3477ae81fcba4a8e21b044f279425bb.dir
-      size: 17073
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_embeddings_alignment
-      md5: d5b7c18b6537344d7e0113ac8a8d9c28.dir
-      size: 5684553
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_embeddings_metrics
-      md5: 64afcfdbe04279ed8dd3cc2b25e77093.dir
-      size: 37859
-      nfiles: 562
diff --git a/experiment/dataset_helper.py b/experiment/dataset_helper.py
index 964c4b8..c245b76 100644
--- a/experiment/dataset_helper.py
+++ b/experiment/dataset_helper.py
@@ -5,5 +5,5 @@ from sziszapangma.integration.record_path_provider import RecordPathProvider
 from sziszapangma.integration.relation_manager_provider import RelationManagerProvider
 
 
-class DatasetHelper(RecordIdIterator, RecordPathProvider, RelationManagerProvider, ABC):
+class DatasetHelper(RecordIdIterator, RecordPathProvider, ABC):
     pass
diff --git a/new_datasets/import_datasets/upload_audio.py b/new_datasets/import_datasets/upload_audio.py
index 3eb99bc..51db020 100644
--- a/new_datasets/import_datasets/upload_audio.py
+++ b/new_datasets/import_datasets/upload_audio.py
@@ -8,6 +8,7 @@ from hashlib import sha1
 
 _TEMP_WAV_FILE = '_temp_file.wav'
 
+
 def process_numpy_array_to_md5_hash(array: np.ndarray) -> str:
     return sha1(array).hexdigest()
 
diff --git a/new_experiment/new_dependency_provider.py b/new_experiment/new_dependency_provider.py
new file mode 100644
index 0000000..3ec1412
--- /dev/null
+++ b/new_experiment/new_dependency_provider.py
@@ -0,0 +1,22 @@
+from minio import Minio
+from pymongo import MongoClient
+
+from new_experiment.utils.minio_audio_record_repository import MinioAudioRecordRepository
+from sziszapangma.integration.repository.experiment_repository import ExperimentRepository
+from sziszapangma.integration.repository.mongo_experiment_repository import MongoExperimentRepository
+
+
+def get_minio_client() -> Minio:
+    return Minio('minio-asr-benchmarks.theliver.pl', 'minio_user', 'eUxzEQbyYPdzrLxuvvethSbk18kB2s7G')
+
+
+def get_mongo_client() -> MongoClient:
+    return MongoClient('mongodb://root:example@mongo-asr-benchmarks.theliver.pl:27021/', ssl=True)
+
+
+def get_experiment_repository(dataset_name: str) -> ExperimentRepository:
+    return MongoExperimentRepository(mongo_client=get_mongo_client(), database_name=dataset_name)
+
+
+def get_minio_audio_record_repository() -> MinioAudioRecordRepository:
+    return MinioAudioRecordRepository(get_minio_client(), 'dataset-audio')
diff --git a/new_experiment/pipeline/dataset_importer/__init__.py b/new_experiment/pipeline/dataset_importer/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/new_experiment/pipeline/dataset_importer/fleurs_dataset_importer.py b/new_experiment/pipeline/dataset_importer/fleurs_dataset_importer.py
new file mode 100644
index 0000000..9b88c89
--- /dev/null
+++ b/new_experiment/pipeline/dataset_importer/fleurs_dataset_importer.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+from typing import Dict, Any, List
+
+from minio import Minio
+from nltk import RegexpTokenizer
+
+from new_datasets.import_datasets.upload_audio import process_numpy_array_to_md5_hash
+from new_experiment.utils.hf_dataset_importer import HfDatasetImporter
+from new_experiment.utils.minio_audio_record_repository import MinioAudioRecordRepository
+from sziszapangma.integration.repository.experiment_repository import ExperimentRepository
+
+
+class FleursDatasetImporter(HfDatasetImporter):
+
+    def __init__(self, experiment_repository: ExperimentRepository,
+                 minio_audio_record_repository: MinioAudioRecordRepository, dataset_name: str):
+        super().__init__(experiment_repository, minio_audio_record_repository, dataset_name)
+
+    def get_words(self, record: Dict[str, Any]) -> List[str]:
+        tokenizer = RegexpTokenizer(r'\w+')
+        return tokenizer.tokenize(record['transcription'])
+
+    def get_raw_transcription(self, record: Dict[str, Any]) -> str:
+        return record['transcription']
+
+    def get_audio_file(self, record: Dict[str, Any]) -> Path:
+        return record['path']
+
+    def get_record_id(self, record: Dict[str, Any]) -> str:
+        return record["id"]
diff --git a/new_experiment/pipeline/dataset_importer/minds14_dataset_importer.py b/new_experiment/pipeline/dataset_importer/minds14_dataset_importer.py
new file mode 100644
index 0000000..b8e8e5e
--- /dev/null
+++ b/new_experiment/pipeline/dataset_importer/minds14_dataset_importer.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+from typing import Dict, Any, List
+
+from minio import Minio
+from nltk import RegexpTokenizer
+
+from new_datasets.import_datasets.upload_audio import process_numpy_array_to_md5_hash
+from new_experiment.utils.hf_dataset_importer import HfDatasetImporter
+from new_experiment.utils.minio_audio_record_repository import MinioAudioRecordRepository
+from sziszapangma.integration.repository.experiment_repository import ExperimentRepository
+
+
+class Minds14DatasetImporter(HfDatasetImporter):
+
+    def __init__(self, experiment_repository: ExperimentRepository,
+                 minio_audio_record_repository: MinioAudioRecordRepository, dataset_name: str):
+        super().__init__(experiment_repository, minio_audio_record_repository, dataset_name)
+
+    def get_words(self, record: Dict[str, Any]) -> List[str]:
+        tokenizer = RegexpTokenizer(r'\w+')
+        return tokenizer.tokenize(record['transcription'])
+
+    def get_raw_transcription(self, record: Dict[str, Any]) -> str:
+        return record['transcription']
+
+    def get_audio_file(self, record: Dict[str, Any]) -> Path:
+        return record['path']
+
+    def get_record_id(self, record: Dict[str, Any]) -> str:
+        return process_numpy_array_to_md5_hash(record['audio']['array'])
diff --git a/new_experiment/pipeline/dataset_importer/voxpopuli_dataset_importer.py b/new_experiment/pipeline/dataset_importer/voxpopuli_dataset_importer.py
new file mode 100644
index 0000000..d89ec95
--- /dev/null
+++ b/new_experiment/pipeline/dataset_importer/voxpopuli_dataset_importer.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+from typing import Dict, Any, List
+
+from minio import Minio
+from nltk import RegexpTokenizer
+
+from new_datasets.import_datasets.upload_audio import process_numpy_array_to_md5_hash
+from new_experiment.utils.hf_dataset_importer import HfDatasetImporter
+from new_experiment.utils.minio_audio_record_repository import MinioAudioRecordRepository
+from sziszapangma.integration.repository.experiment_repository import ExperimentRepository
+
+
+class VoxpopuliDatasetImporter(HfDatasetImporter):
+
+    def __init__(self, experiment_repository: ExperimentRepository,
+                 minio_audio_record_repository: MinioAudioRecordRepository, dataset_name: str):
+        super().__init__(experiment_repository, minio_audio_record_repository, dataset_name)
+
+    def get_words(self, record: Dict[str, Any]) -> List[str]:
+        tokenizer = RegexpTokenizer(r'\w+')
+        return tokenizer.tokenize(record['normalized_text'])
+
+    def get_raw_transcription(self, record: Dict[str, Any]) -> str:
+        return record['raw_text']
+
+    def get_audio_file(self, record: Dict[str, Any]) -> Path:
+        return record['audio']['path']
+
+    def get_record_id(self, record: Dict[str, Any]) -> str:
+        return process_numpy_array_to_md5_hash(record['audio']['array'])
diff --git a/new_experiment/pipeline/import_datasets.py b/new_experiment/pipeline/import_datasets.py
new file mode 100644
index 0000000..1633088
--- /dev/null
+++ b/new_experiment/pipeline/import_datasets.py
@@ -0,0 +1,35 @@
+from datasets import Dataset, load_dataset
+
+from new_experiment.new_dependency_provider import get_experiment_repository, get_minio_client, \
+    get_minio_audio_record_repository
+from new_experiment.pipeline.dataset_importer.fleurs_dataset_importer import FleursDatasetImporter
+from new_experiment.pipeline.dataset_importer.minds14_dataset_importer import Minds14DatasetImporter
+from new_experiment.utils.hf_dataset_importer import HfDatasetImporter
+
+
+def import_single_dataset(importer: HfDatasetImporter, dataset: Dataset):
+    importer.process_dataset(dataset)
+
+
+def import_fleurs_dataset(dataset_lang: str, experiment_dataset_name: str):
+    import_single_dataset(FleursDatasetImporter(
+        get_experiment_repository(experiment_dataset_name),
+        get_minio_audio_record_repository(),
+        experiment_dataset_name,
+    ), load_dataset('google/fleurs', dataset_lang))
+
+
+def import_minds14_dataset(dataset_lang: str, experiment_dataset_name: str):
+    import_single_dataset(Minds14DatasetImporter(
+        get_experiment_repository(experiment_dataset_name),
+        get_minio_audio_record_repository(),
+        experiment_dataset_name,
+    ), load_dataset('PolyAI/minds14', dataset_lang))
+
+
+def import_voxpopuli_dataset(dataset_lang: str, experiment_dataset_name: str):
+    import_single_dataset(Minds14DatasetImporter(
+        get_experiment_repository(experiment_dataset_name),
+        get_minio_audio_record_repository(),
+        experiment_dataset_name,
+    ), load_dataset('facebook/voxpopuli', dataset_lang))
diff --git a/new_experiment/pipeline/import_fleurs.py b/new_experiment/pipeline/import_fleurs.py
new file mode 100644
index 0000000..f08197d
--- /dev/null
+++ b/new_experiment/pipeline/import_fleurs.py
@@ -0,0 +1,10 @@
+from new_experiment.pipeline.import_datasets import import_fleurs_dataset
+
+if __name__ == '__main__':
+    import_fleurs_dataset('nl_nl', 'nl_google_fleurs')
+    import_fleurs_dataset('fr_fr', 'fr_google_fleurs')
+    import_fleurs_dataset('de_de', 'de_google_fleurs')
+    import_fleurs_dataset('it_it', 'it_google_fleurs')
+    import_fleurs_dataset('pl_pl', 'pl_google_fleurs')
+    import_fleurs_dataset('es_419', 'es_google_fleurs')
+    import_fleurs_dataset('en_us', 'en_google_fleurs')
diff --git a/new_experiment/utils/hf_dataset_importer.py b/new_experiment/utils/hf_dataset_importer.py
new file mode 100644
index 0000000..94db715
--- /dev/null
+++ b/new_experiment/utils/hf_dataset_importer.py
@@ -0,0 +1,63 @@
+from abc import ABC, abstractmethod
+from hashlib import sha1
+from pathlib import Path
+from typing import List, Any, Dict
+
+import numpy as np
+from datasets import Dataset
+from minio import Minio
+
+from new_experiment.utils.minio_audio_record_repository import MinioAudioRecordRepository
+from new_experiment.utils.property_helper import PropertyHelper
+from sziszapangma.integration.repository.experiment_repository import ExperimentRepository
+from sziszapangma.model.model_creators import create_new_word
+
+
+class HfDatasetImporter(ABC):
+    _experiment_repository: ExperimentRepository
+    _minio_audio_record_repository: MinioAudioRecordRepository
+    _experiment_dataset_name: str
+
+    def __init__(self, experiment_repository: ExperimentRepository,
+                 minio_audio_record_repository: MinioAudioRecordRepository, experiment_dataset_name: str):
+        self._experiment_repository = experiment_repository
+        self._minio_audio_record_repository = minio_audio_record_repository
+        self._experiment_dataset_name = experiment_dataset_name
+
+    @abstractmethod
+    def get_words(self, record: Dict[str, Any]) -> List[str]:
+        pass
+
+    @abstractmethod
+    def get_raw_transcription(self, record: Dict[str, Any]) -> str:
+        pass
+
+    @abstractmethod
+    def get_audio_file(self, record: Dict[str, Any]) -> Path:
+        pass
+
+    @abstractmethod
+    def get_record_id(self, record: Dict[str, Any]) -> str:
+        pass
+
+    def process_dataset(self, dataset: Dataset):
+        for it in dataset:
+            self.process_record(it)
+
+    def process_record(self, record: Dict[str, Any]):
+        record_id = self.get_record_id(record)
+        words = [create_new_word(it) for it in self.get_words(record)]
+        raw_transcription = self.get_raw_transcription(record)
+        audio_file_path = self.get_audio_file(record)
+        self._experiment_repository.update_property_for_key(
+            record_id=record_id,
+            property_name=PropertyHelper.get_gold_transcript_words(),
+            property_value=words
+        )
+        self._experiment_repository.update_property_for_key(
+            record_id=record_id,
+            property_name=PropertyHelper.get_gold_transcript_raw(),
+            property_value={'gold_transcript_raw': raw_transcription}
+        )
+        # TODO uncomment
+        # self._minio_audio_record_repository.save_file(audio_file_path, self._experiment_dataset_name, record_id)
diff --git a/new_experiment/utils/id_calculator.py b/new_experiment/utils/id_calculator.py
new file mode 100644
index 0000000..15419d1
--- /dev/null
+++ b/new_experiment/utils/id_calculator.py
@@ -0,0 +1,7 @@
+from hashlib import sha1
+
+import numpy as np
+
+
+def process_numpy_array_to_md5_hash(array: np.ndarray) -> str:
+    return sha1(array).hexdigest()
diff --git a/new_experiment/utils/loaded_remote_dataset_helper.py b/new_experiment/utils/loaded_remote_dataset_helper.py
new file mode 100644
index 0000000..78dc8bf
--- /dev/null
+++ b/new_experiment/utils/loaded_remote_dataset_helper.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+from typing import Set
+
+from minio import Minio
+from urllib3 import HTTPResponse
+
+from experiment.dataset_helper import DatasetHelper
+from new_experiment.utils.minio_audio_record_repository import MinioRecordRepository
+from new_experiment.utils.property_helper import PropertyHelper
+from sziszapangma.integration.repository.experiment_repository import ExperimentRepository
+
+
+class LoadedRemoteDatasetHelper(DatasetHelper):
+    _experiment_repository: ExperimentRepository
+    _minio_record_repository: MinioRecordRepository
+    _dataset_name: str
+
+    def __init__(self, experiment_repository: ExperimentRepository, minio_record_repository: MinioRecordRepository,
+                 dataset_name: str):
+        self._experiment_repository = experiment_repository
+        self._minio_record_repository = minio_record_repository
+        self._dataset_name = dataset_name
+
+    def get_all_records(self) -> Set[str]:
+        return self._experiment_repository.get_all_record_ids_for_property(PropertyHelper.get_gold_transcript_words())
+
+    def get_path(self, record_id: str) -> str:
+        record_path = Path.home() / f'.cache/asr_benchmark/{self._dataset_name}/{record_id}.wav'
+        if record_path.exists():
+            return record_path.as_posix()
+        self._minio_record_repository.save_file(record_path, self._dataset_name, record_id)
+        return record_path.as_posix()
diff --git a/new_experiment/utils/minio_audio_record_repository.py b/new_experiment/utils/minio_audio_record_repository.py
new file mode 100644
index 0000000..0c699c8
--- /dev/null
+++ b/new_experiment/utils/minio_audio_record_repository.py
@@ -0,0 +1,28 @@
+from pathlib import Path
+
+from minio import Minio
+from urllib3 import HTTPResponse
+
+
+class MinioAudioRecordRepository:
+    _minio: Minio
+    _bucket: str
+
+    def __init__(self, minio: Minio, bucket: str):
+        self._minio = minio
+        self._bucket = bucket
+
+    @staticmethod
+    def _get_record_path(dataset_name: str, record_id: str) -> str:
+        return f'{dataset_name}/{record_id}.wav'
+
+    def save_file(self, local_path: Path, dataset_name: str, record_id: str):
+        self._minio.put_object(self._bucket, self._get_record_path(dataset_name, record_id), open(local_path, 'rb'),
+                               len(open(local_path, 'rb').read()))
+
+    def load_file(self, local_path: Path, dataset_name: str, record_id: str):
+        record_response: HTTPResponse = self._minio.get_object(self._bucket,
+                                                               self._get_record_path(dataset_name, record_id))
+        local_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(local_path, 'wb') as writer:
+            writer.write(record_response.data)
diff --git a/new_experiment/utils/property_helper.py b/new_experiment/utils/property_helper.py
index 7cff60d..f00ebe8 100644
--- a/new_experiment/utils/property_helper.py
+++ b/new_experiment/utils/property_helper.py
@@ -1,8 +1,8 @@
 class PropertyHelper:
 
     @staticmethod
-    def get_gold_transcript() -> str:
-        return 'gold_transcript'
+    def get_gold_transcript_words() -> str:
+        return 'gold_transcript_words'
 
     @staticmethod
     def get_gold_transcript_raw() -> str:
diff --git a/poetry.lock b/poetry.lock
index bd253e1..236cdab 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -738,14 +738,14 @@ test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
 
 [[package]]
 name = "confection"
-version = "0.0.3"
+version = "0.0.4"
 description = "The sweetest config system for Python"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "confection-0.0.3-py3-none-any.whl", hash = "sha256:51af839c1240430421da2b248541ebc95f9d0ee385bcafa768b8acdbd2b0111d"},
-    {file = "confection-0.0.3.tar.gz", hash = "sha256:4fec47190057c43c9acbecb8b1b87a9bf31c469caa0d6888a5b9384432fdba5a"},
+    {file = "confection-0.0.4-py3-none-any.whl", hash = "sha256:aeac5919ba770c7b281aa5863bb6b0efed42568a7ad8ea26b6cb632154503fb2"},
+    {file = "confection-0.0.4.tar.gz", hash = "sha256:b1ddf5885da635f0e260a40b339730806dfb1bd17d30e08764f35af841b04ecf"},
 ]
 
 [package.dependencies]
@@ -1113,14 +1113,14 @@ wmi = ["wmi (>=1.5.1,<2.0.0)"]
 
 [[package]]
 name = "dpath"
-version = "2.1.3"
+version = "2.1.4"
 description = "Filesystem-like pathing and searching for dictionaries"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "dpath-2.1.3-py3-none-any.whl", hash = "sha256:d9560e03ccd83b3c6f29988b0162ce9b34fd28b9d8dbda46663b20c68d9cdae3"},
-    {file = "dpath-2.1.3.tar.gz", hash = "sha256:d1a7a0e6427d0a4156c792c82caf1f0109603f68ace792e36ca4596fd2cb8d9d"},
+    {file = "dpath-2.1.4-py3-none-any.whl", hash = "sha256:3a4f6cc07e3a1b34bc73baa3a6854ee0a48fb2cf18a8c9b1911b66fd72afaa85"},
+    {file = "dpath-2.1.4.tar.gz", hash = "sha256:3380a77d0db4abf104125860ff6eb4bd07c97c65b81aad42a609717089a1bed0"},
 ]
 
 [[package]]
@@ -1207,14 +1207,14 @@ pgp = ["gpg"]
 
 [[package]]
 name = "dvc"
-version = "2.39.0"
+version = "2.41.1"
 description = "Git for data scientists - manage your code and data together"
 category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "dvc-2.39.0-py3-none-any.whl", hash = "sha256:e0765516c85efac36148804c01fadaf19f4b3a862675499814a96a127a2fa9c1"},
-    {file = "dvc-2.39.0.tar.gz", hash = "sha256:efb00f96b296446c951107d4d82a2fb0b5be4c457d3c9f2d285fbcbf1f7be266"},
+    {file = "dvc-2.41.1-py3-none-any.whl", hash = "sha256:a3fafd63c3e9bedd278add480d62be38b7f51db891e14e37d754c986c6bc284d"},
+    {file = "dvc-2.41.1.tar.gz", hash = "sha256:d875358ce2463b0b93de1e4a91ee8ca960793b96462048e72f40d3168ae5e97c"},
 ]
 
 [package.dependencies]
@@ -1223,7 +1223,7 @@ colorama = ">=0.3.9"
 configobj = ">=5.0.6"
 distro = ">=1.3.0"
 dpath = ">=2.0.2,<3"
-dvc-data = "0.28.5"
+dvc-data = "0.29.0"
 dvc-http = "*"
 dvc-render = "0.0.17"
 dvc-s3 = {version = "2.21.0", optional = true, markers = "extra == \"s3\""}
@@ -1270,28 +1270,28 @@ ssh = ["dvc-ssh (==2.20.0)"]
 ssh-gssapi = ["dvc-ssh[gssapi] (==2.20.0)"]
 terraform = ["tpi[ssh] (>=2.1.0)"]
 testing = ["pytest-test-utils (==0.0.8)"]
-tests = ["beautifulsoup4 (==4.11.1)", "dvc-ssh (==2.20.0)", "filelock (==3.9.0)", "flaky (==3.7.0)", "mypy (==0.991)", "pylint (==2.15.9)", "pylint-plugin-utils (==0.7)", "pylint-pytest (==1.1.2)", "pytest (==7.2.0)", "pytest-cov (==4.0.0)", "pytest-docker (==0.11.0)", "pytest-lazy-fixture (==0.6.3)", "pytest-mock (==3.10.0)", "pytest-test-utils (==0.0.8)", "pytest-timeout (==2.1.0)", "pytest-xdist (==3.1.0)", "pywin32 (>=225)", "tpi[ssh] (>=2.1.0)", "types-requests", "types-tabulate", "types-toml"]
+tests = ["beautifulsoup4 (==4.11.1)", "dvc-ssh (==2.20.0)", "filelock (==3.9.0)", "flaky (==3.7.0)", "mypy (==0.991)", "pylint (==2.15.10)", "pylint-plugin-utils (==0.7)", "pylint-pytest (==1.1.2)", "pytest (==7.2.0)", "pytest-cov (==4.0.0)", "pytest-docker (==0.11.0)", "pytest-lazy-fixture (==0.6.3)", "pytest-mock (==3.10.0)", "pytest-test-utils (==0.0.8)", "pytest-timeout (==2.1.0)", "pytest-xdist (==3.1.0)", "pywin32 (>=225)", "tpi[ssh] (>=2.1.0)", "types-requests", "types-tabulate", "types-toml"]
 webdav = ["dvc-webdav (==2.19.0)"]
 webhdfs = ["dvc-webhdfs (==2.19.0)"]
 webhdfs-kerberos = ["dvc-webhdfs[kerberos] (==2.19.0)"]
 
 [[package]]
 name = "dvc-data"
-version = "0.28.5"
+version = "0.29.0"
 description = "dvc data"
 category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "dvc-data-0.28.5.tar.gz", hash = "sha256:cf6ae7a7d9220fcd223504427829ef7e9b6088e94b83e7d2882a9846eafb85cb"},
-    {file = "dvc_data-0.28.5-py3-none-any.whl", hash = "sha256:dc5fc5e4d20e20f595a5b84bf1b623652e3d65804f37cb6c937a75e4fc1c0bad"},
+    {file = "dvc-data-0.29.0.tar.gz", hash = "sha256:3385054422b66738948690d96944bc547f2f26edfa5776bc0c0a4f9f64a8cc42"},
+    {file = "dvc_data-0.29.0-py3-none-any.whl", hash = "sha256:004ee9c20971e17c26dbf0ef59ce38dde1c03612e55d81a91b1331ff71774bb5"},
 ]
 
 [package.dependencies]
 attrs = ">=21.3.0"
 dictdiffer = ">=0.8.1"
 diskcache = ">=5.2.1"
-dvc-objects = "0.14.0"
+dvc-objects = "0.14.1"
 funcy = ">=1.14"
 nanotime = ">=0.5.2"
 pygtrie = ">=2.3.2"
@@ -1325,14 +1325,14 @@ tests = ["dvc[testing]", "flaky (==3.7.0)", "mypy (==0.910)", "pylint (==2.15.9)
 
 [[package]]
 name = "dvc-objects"
-version = "0.14.0"
+version = "0.14.1"
 description = "dvc objects"
 category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "dvc-objects-0.14.0.tar.gz", hash = "sha256:039b7be98f4c8325bd90e2869072e3a776bf8e27419723b459704d7cd984e11f"},
-    {file = "dvc_objects-0.14.0-py3-none-any.whl", hash = "sha256:8767ab7df049dbd41ed318bdc402976073c51b0d0c95aaa48dd13d337572fcc2"},
+    {file = "dvc-objects-0.14.1.tar.gz", hash = "sha256:181ad4eadaee592cd37a5027b2c8b4727ca00aba7ae60e8417075dd4e5ef0d42"},
+    {file = "dvc_objects-0.14.1-py3-none-any.whl", hash = "sha256:36587b4a0e1eec1778998070fd804cd5feac33b03fe7f8bbcb191110631977ec"},
 ]
 
 [package.dependencies]
@@ -2757,6 +2757,7 @@ packaging = ">=20.0"
 pillow = ">=6.2.0"
 pyparsing = ">=2.2.1"
 python-dateutil = ">=2.7"
+setuptools_scm = ">=7"
 
 [[package]]
 name = "miniful"
@@ -3256,14 +3257,14 @@ files = [
 
 [[package]]
 name = "packaging"
-version = "22.0"
+version = "23.0"
 description = "Core utilities for Python packages"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "packaging-22.0-py3-none-any.whl", hash = "sha256:957e2148ba0e1a3b282772e791ef1d8083648bc131c8ab0c1feba110ce1146c3"},
-    {file = "packaging-22.0.tar.gz", hash = "sha256:2198ec20bd4c017b8f9717e00f0c8714076fc2fd93816750ab48e2c41de2cfd3"},
+    {file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"},
+    {file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"},
 ]
 
 [[package]]
@@ -4393,7 +4394,6 @@ files = [
     {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-win_amd64.whl", hash = "sha256:d000f258cf42fec2b1bbf2863c61d7b8918d31ffee905da62dede869254d3b8a"},
     {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:045e0626baf1c52e5527bd5db361bc83180faaba2ff586e763d3d5982a876a9e"},
     {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-macosx_12_6_arm64.whl", hash = "sha256:721bc4ba4525f53f6a611ec0967bdcee61b31df5a56801281027a3a6d1c2daf5"},
-    {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:41d0f1fa4c6830176eef5b276af04c89320ea616655d01327d5ce65e50575c94"},
     {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:4b3a93bb9bc662fc1f99c5c3ea8e623d8b23ad22f861eb6fce9377ac07ad6072"},
     {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-macosx_12_0_arm64.whl", hash = "sha256:a234a20ae07e8469da311e182e70ef6b199d0fbeb6c6cc2901204dd87fb867e8"},
     {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:15910ef4f3e537eea7fe45f8a5d19997479940d9196f357152a09031c5be59f3"},
@@ -4649,6 +4649,28 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-g
 testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
 testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
 
+[[package]]
+name = "setuptools-scm"
+version = "7.1.0"
+description = "the blessed package to manage your versions by scm tags"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "setuptools_scm-7.1.0-py3-none-any.whl", hash = "sha256:73988b6d848709e2af142aa48c986ea29592bbcfca5375678064708205253d8e"},
+    {file = "setuptools_scm-7.1.0.tar.gz", hash = "sha256:6c508345a771aad7d56ebff0e70628bf2b0ec7573762be9960214730de278f27"},
+]
+
+[package.dependencies]
+packaging = ">=20.0"
+setuptools = "*"
+tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
+typing-extensions = "*"
+
+[package.extras]
+test = ["pytest (>=6.2)", "virtualenv (>20)"]
+toml = ["setuptools (>=42)"]
+
 [[package]]
 name = "shortuuid"
 version = "1.0.11"
@@ -5091,7 +5113,7 @@ testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"]
 name = "tomli"
 version = "2.0.1"
 description = "A lil' TOML parser"
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -5306,14 +5328,14 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "1.26.13"
+version = "1.26.14"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
-    {file = "urllib3-1.26.13-py2.py3-none-any.whl", hash = "sha256:47cc05d99aaa09c9e72ed5809b60e7ba354e64b59c9c173ac3018642d8bb41fc"},
-    {file = "urllib3-1.26.13.tar.gz", hash = "sha256:c083dd0dce68dbfbe1129d5271cb90f9447dea7d52097c6e0126120c521ddea8"},
+    {file = "urllib3-1.26.14-py2.py3-none-any.whl", hash = "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1"},
+    {file = "urllib3-1.26.14.tar.gz", hash = "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72"},
 ]
 
 [package.extras]
@@ -5419,7 +5441,7 @@ dev = ["pytest"]
 type = "git"
 url = "https://github.com/openai/whisper.git"
 reference = "HEAD"
-resolved_reference = "28769fcfe50755a817ab922a7bc83483159600a9"
+resolved_reference = "f82bc59f5ea234d4b97fb2860842ed38519f7e65"
 
 [[package]]
 name = "wikipedia-api"
diff --git a/pyproject.toml b/pyproject.toml
index e7b17bc..69c3064 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,7 @@ spacy = "^3.4.4"
 pysoundfile = "^0.9.0.post1"
 datasets = "^2.8.0"
 torchaudio = "^0.13.1"
-flair = "^0.11.3"
+#flair = "^0.11.3"
 whisper = {git = "https://github.com/openai/whisper.git"}
 ffmpeg-python = "^0.2.0"
 pydub = "^0.25.1"
diff --git a/sziszapangma/integration/repository/experiment_repository.py b/sziszapangma/integration/repository/experiment_repository.py
index 4e01d55..e50666d 100644
--- a/sziszapangma/integration/repository/experiment_repository.py
+++ b/sziszapangma/integration/repository/experiment_repository.py
@@ -30,6 +30,10 @@ class ExperimentRepository(ABC):
     def get_all_record_ids(self) -> Set[str]:
         """Methods returns all possible record ids."""
 
+    @abstractmethod
+    def get_all_record_ids_for_property(self, property_name: str) -> Set[str]:
+        """Methods returns all possible record ids."""
+
     @abstractmethod
     def get_all_properties(self) -> Set[str]:
         """Methods returns all possible properties."""
diff --git a/sziszapangma/integration/repository/mongo_experiment_repository.py b/sziszapangma/integration/repository/mongo_experiment_repository.py
index b169735..6c87a1d 100644
--- a/sziszapangma/integration/repository/mongo_experiment_repository.py
+++ b/sziszapangma/integration/repository/mongo_experiment_repository.py
@@ -49,9 +49,12 @@ class MongoExperimentRepository(ExperimentRepository):
     def get_all_record_ids(self) -> Set[str]:
         records = set()
         for collection_name in self.get_all_properties():
-            for record in self._get_database()[collection_name].find():
-                records.add(record[ID])
+            for record_id in self.get_all_record_ids_for_property(collection_name):
+                records.add(record_id)
         return records
 
+    def get_all_record_ids_for_property(self, property_name: str) -> Set[str]:
+        return set(record[ID] for record in self._get_database()[property_name].find())
+
     def get_all_properties(self) -> Set[str]:
         return set(self._get_database().list_collection_names())
diff --git a/sziszapangma/integration/task/processing_task.py b/sziszapangma/integration/task/processing_task.py
index 5c03afb..be9e523 100644
--- a/sziszapangma/integration/task/processing_task.py
+++ b/sziszapangma/integration/task/processing_task.py
@@ -20,8 +20,7 @@ class ProcessingTask(ABC):
     def run_single_process(
         self,
         record_id: str,
-        experiment_repository: ExperimentRepository,
-        relation_manager: RelationManager,
+        experiment_repository: ExperimentRepository
     ):
         pass
 
@@ -32,8 +31,7 @@ class ProcessingTask(ABC):
     def process(
         self,
         record_id_iterator: RecordIdIterator,
-        experiment_repository: ExperimentRepository,
-        relation_manager_provider: RelationManagerProvider,
+        experiment_repository: ExperimentRepository
     ):
         records_ids = list(record_id_iterator.get_all_records())
         for record_index in range(len(records_ids)):
@@ -46,8 +44,7 @@ class ProcessingTask(ABC):
             if not skip or self._require_update:
                 print(base_log)
                 try:
-                    relation_manager = relation_manager_provider.get_relation_manager(record_id)
-                    self.run_single_process(record_id, experiment_repository, relation_manager)
+                    self.run_single_process(record_id, experiment_repository)
                 except Exception as err:
                     print("Handling run-time error:", err)
                     traceback.print_exc()
-- 
GitLab