From f4762cc2ef07f03d3c2bbfca8ba9aae48252df8d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20W=C4=85troba?= <markowanga@gmail.com>
Date: Sat, 16 Jul 2022 12:32:01 +0200
Subject: [PATCH] Finish experiments for polish

---
 docker/docker-compose.yml                     |   40 +-
 dvc.lock                                      | 3444 ++++++++++++-----
 dvc.yaml                                      |  113 +-
 .../pipeline_process_spacy_dep_tag_wer.py     |   35 +
 .../spacy_pos_sentence_dep_tag_processor.py   |   28 +
 .../pipeline/pl_common_voice/.gitignore       |   24 +
 .../pipeline/pl_google_fleurs/.gitignore      |   40 +
 experiment_data/pipeline/pl_luna/.gitignore   |   38 +
 .../pipeline/pl_minds14/.gitignore            |   34 +
 .../pipeline/pl_voicelab_cbiz/.gitignore      |   22 +
 10 files changed, 2910 insertions(+), 908 deletions(-)
 create mode 100644 experiment/pipeline_process_spacy_dep_tag_wer.py
 create mode 100644 experiment/sentence_wer_processor/spacy_pos_sentence_dep_tag_processor.py

diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index 2486d9e..ec1cfde 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -44,27 +44,27 @@ services:
             - /etc/localtime:/etc/localtime:ro
             - ./embedding_models:/models
 
-    ajn_asr:
-        image: gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/asr-clarin-pl-service:1.5
-        container_name: ajn_asr
-        restart: always
-        ports:
-            - "5431:5000"
-        environment:
-            - AUTH_TOKEN=__example_token__
-        volumes:
-            - /etc/localtime:/etc/localtime:ro
+#    ajn_asr:
+#        image: gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/asr-clarin-pl-service:1.5
+#        container_name: ajn_asr
+#        restart: always
+#        ports:
+#            - "5431:5000"
+#        environment:
+#            - AUTH_TOKEN=__example_token__
+#        volumes:
+#            - /etc/localtime:/etc/localtime:ro
 
-    wav2vec2-xls-r-1b-polish:
-        image: gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/wav2vec2-xls-r-1b-polish:1.5
-        container_name: wav2vec2-xls-r-1b-polish
-        restart: always
-        ports:
-            - "5437:5000"
-        environment:
-            - AUTH_TOKEN=__example_token__
-        volumes:
-            - /etc/localtime:/etc/localtime:ro
+#    wav2vec2-xls-r-1b-polish:
+#        image: gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/wav2vec2-xls-r-1b-polish:1.5
+#        container_name: wav2vec2-xls-r-1b-polish
+#        restart: always
+#        ports:
+#            - "5437:5000"
+#        environment:
+#            - AUTH_TOKEN=__example_token__
+#        volumes:
+#            - /etc/localtime:/etc/localtime:ro
 
 #    speechbrain_asr:
 #        image: gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/speechbrain-asr:1.5
diff --git a/dvc.lock b/dvc.lock
index 8a56c42..0155b5e 100644
--- a/dvc.lock
+++ b/dvc.lock
@@ -888,23 +888,23 @@ stages:
       nfiles: 559
   pipeline_asr_result@10:
     cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs
-      --asr=techmo
+      --asr=wav2vec2
     deps:
     - path: experiment/pipeline_process_asr.py
       md5: efe378505897550fe1a8d423c321ef53
       size: 1403
-    - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__techmo
-      md5: 876e8cff4ebd191617fcd3844e411475.dir
-      size: 859227
+    - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__wav2vec2
+      md5: 9638746467b0dea972a0be101a7d5c58.dir
+      size: 4093001
       nfiles: 758
     - path: experiment_data/dataset/pl_google_fleurs
       md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
       size: 236272072
       nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
-      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
-      size: 1880403
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
+      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
+      size: 5137721
       nfiles: 758
   pipeline_asr_result@4:
     cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_voicelab_cbiz
@@ -948,23 +948,23 @@ stages:
       nfiles: 456
   pipeline_asr_result@9:
     cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs
-      --asr=wav2vec2
+      --asr=techmo
     deps:
     - path: experiment/pipeline_process_asr.py
       md5: efe378505897550fe1a8d423c321ef53
       size: 1403
-    - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__wav2vec2
-      md5: 9638746467b0dea972a0be101a7d5c58.dir
-      size: 4093001
+    - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__techmo
+      md5: 876e8cff4ebd191617fcd3844e411475.dir
+      size: 859227
       nfiles: 758
     - path: experiment_data/dataset/pl_google_fleurs
       md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
       size: 236272072
       nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
-      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
-      size: 5137721
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
+      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
+      size: 1880403
       nfiles: 758
   pipeline_asr_result@0:
     cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_common_voice
@@ -988,7 +988,7 @@ stages:
       nfiles: 8143
   pipeline_word_wer@8:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs
-      --asr=techmo
+      --asr=ajn
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
@@ -997,70 +997,70 @@ stages:
       md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
       size: 236272072
       nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
+      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
+      size: 1413262
+      nfiles: 758
     - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
       md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
       size: 975209
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
-      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
-      size: 1880403
-      nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_classic_alignment
-      md5: 689776c77c4ecaa11578b53480100ecc.dir
-      size: 3821036
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_classic_alignment
+      md5: 6437952d0ab383e44acca5fa70d02e54.dir
+      size: 3981739
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_classic_metrics
-      md5: 8d0d99fd7d965d4070c0b391cd6fa2b0.dir
-      size: 23030
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_classic_metrics
+      md5: a40ae0901f53a33231a4c3dc18708d2c.dir
+      size: 22935
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_embeddings_alignment
-      md5: 0ec5fe337bde2254c91146fd16b9c6af.dir
-      size: 7995553
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_embeddings_alignment
+      md5: 06e64fca6fc73e1cc9e7e86b21eb11f3.dir
+      size: 8318047
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_embeddings_metrics
-      md5: 55eb6eb0aacc12e3e2caf224b4b89df1.dir
-      size: 53591
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_embeddings_metrics
+      md5: 68da1ded61389ae5d866bc762c61363c.dir
+      size: 33029
       nfiles: 758
   pipeline_word_wer@6:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs
-      --asr=google
+    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz
+      --asr=techmo
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
       size: 2125
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__result
-      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
-      size: 1377134
-      nfiles: 758
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result
+      md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir
+      size: 39158267
+      nfiles: 800
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_classic_alignment
-      md5: 9beb57e9be598275ac9d449618da440e.dir
-      size: 3818553
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_classic_metrics
-      md5: 984770b57ca48fd793099c9ed67791d0.dir
-      size: 23344
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_embeddings_alignment
-      md5: aeb20f1662b696a6fc11bcd39a02a0de.dir
-      size: 7984964
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_embeddings_metrics
-      md5: b2626f92231431e163813ef7112c119d.dir
-      size: 53916
-      nfiles: 758
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_classic_alignment
+      md5: 37f0c27869efd72bf3eaedae70e426d5.dir
+      size: 83756423
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_classic_metrics
+      md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir
+      size: 27780
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_embeddings_alignment
+      md5: 01badff31f4d2f6583481c332fae8abc.dir
+      size: 174322727
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_embeddings_metrics
+      md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir
+      size: 56182
+      nfiles: 800
   pipeline_word_wer@17:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14
-      --asr=wav2vec2
+      --asr=techmo
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
@@ -1073,30 +1073,30 @@ stages:
       md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
       size: 689374
       nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
-      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
-      size: 3523907
+    - path: experiment_data/pipeline/pl_minds14/techmo__result
+      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
+      size: 1336305
       nfiles: 562
     outs:
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_classic_alignment
-      md5: c1d947bbd7bb1dff8d39e03ad4a1d11e.dir
-      size: 2811807
+    - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_classic_alignment
+      md5: bb8b304a25b60f779c27ca4d6c9183ba.dir
+      size: 2723010
       nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_classic_metrics
-      md5: a3477ae81fcba4a8e21b044f279425bb.dir
-      size: 17073
+    - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_classic_metrics
+      md5: b6a4f05d71bd3fbed796d54b1ac2b29d.dir
+      size: 17198
       nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_embeddings_alignment
-      md5: cbdcbfe7157f28d70ca55b787483e26b.dir
-      size: 5870633
+    - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_embeddings_alignment
+      md5: 9956c918227d6b76f26ac6540a34e931.dir
+      size: 5673712
       nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_embeddings_metrics
-      md5: c3395e582b4562a33e97419134363d30.dir
-      size: 37892
+    - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_embeddings_metrics
+      md5: de8e393488ff05187c7866272675fd8e.dir
+      size: 39775
       nfiles: 562
   pipeline_word_wer@15:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14
-      --asr=ajn
+      --asr=google
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
@@ -1105,34 +1105,34 @@ stages:
       md5: d751713988987e9331980363e24189ce.dir
       size: 0
       nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir
-      size: 974727
-      nfiles: 559
     - path: experiment_data/pipeline/pl_minds14/gold_transcript
       md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
       size: 689374
       nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__result
+      md5: 3f7a79298a5156fd2b023e673326e72f.dir
+      size: 985004
+      nfiles: 562
     outs:
-    - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_classic_alignment
-      md5: c03a478840305afb1eadf4752b3a5678.dir
-      size: 2851613
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_classic_metrics
-      md5: 6d6d430e7f3672c67b9c78eb7d0ea296.dir
-      size: 15213
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_embeddings_alignment
-      md5: ae577cd5886aced83d7de4ba47bb4457.dir
-      size: 5960904
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_embeddings_metrics
-      md5: c599599e5935075cd26ac89e0d3b5f1b.dir
-      size: 22438
-      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/google__word_wer_classic_alignment
+      md5: 8469a41c345e0c21accb960782a49e75.dir
+      size: 2725626
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__word_wer_classic_metrics
+      md5: 41621d3845a0035ee77352902c46a186.dir
+      size: 15012
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__word_wer_embeddings_alignment
+      md5: a0e3d4ddda612480de739c898e79bf73.dir
+      size: 5686170
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__word_wer_embeddings_metrics
+      md5: 673f6b46c7735163726f7d9e3b1f2dd4.dir
+      size: 40260
+      nfiles: 562
   pipeline_word_wer@12:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna
-      --asr=techmo
+      --asr=ajn
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
@@ -1141,70 +1141,70 @@ stages:
       md5: d342155b1871e881797cf7da09d5dc3c.dir
       size: 1578358645
       nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/ajn__result
+      md5: 653d65e186a7d05958ce3cbef219038c.dir
+      size: 6159899
+      nfiles: 494
     - path: experiment_data/pipeline/pl_luna/gold_transcript
       md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
       size: 6706925
       nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__result
-      md5: 0e596570e1502b38588427bc72dcc006.dir
-      size: 9697519
-      nfiles: 500
     outs:
-    - path: experiment_data/pipeline/pl_luna/techmo__word_wer_classic_alignment
-      md5: 5e90722635a811db67a1f0d917707b0a.dir
-      size: 21380796
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__word_wer_classic_metrics
-      md5: 4cfbb2830b280084ece14b1ef815b92a.dir
-      size: 17298
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__word_wer_embeddings_alignment
-      md5: 74f8be84e1e913050483713fbc945d80.dir
-      size: 44326962
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__word_wer_embeddings_metrics
-      md5: 98a7edeee3b630e8e301acfc578a8393.dir
-      size: 34869
-      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/ajn__word_wer_classic_alignment
+      md5: e25ae51f8dc1afe55d5c0b44a67ab8ee.dir
+      size: 20671277
+      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/ajn__word_wer_classic_metrics
+      md5: 18605657ff9c7ef3221e27b671a3b4d1.dir
+      size: 16835
+      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/ajn__word_wer_embeddings_alignment
+      md5: 6be0a1c035f4a84a9035bfba1458cdac.dir
+      size: 43099546
+      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/ajn__word_wer_embeddings_metrics
+      md5: 4f368d2ba1c5a54d5e3ab69a7581549e.dir
+      size: 19326
+      nfiles: 494
   pipeline_word_wer@10:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna
-      --asr=google
+    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs
+      --asr=wav2vec2
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
       size: 2125
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__result
-      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
-      size: 5346497
-      nfiles: 500
-    outs:
-    - path: experiment_data/pipeline/pl_luna/google__word_wer_classic_alignment
-      md5: 5bdee43e646a4e9470310073365ebc37.dir
-      size: 19650202
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__word_wer_classic_metrics
-      md5: 9951fb70382cc8bed9aa48d1185c1c7c.dir
-      size: 16989
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__word_wer_embeddings_alignment
-      md5: a06bd5f0b4c52c679315b6c6d7478084.dir
-      size: 40586004
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__word_wer_embeddings_metrics
-      md5: 351543fb54e2098ac07999482d2280a8.dir
-      size: 34530
-      nfiles: 500
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
+      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
+      size: 5137721
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_classic_alignment
+      md5: 052fa8f97b2f9d5b8ca4175b4e92dfd1.dir
+      size: 3854678
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_classic_metrics
+      md5: d9dec56ef7ddb6a3bc03638e37d1c04c.dir
+      size: 23168
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_embeddings_alignment
+      md5: 069191de9f84f37303bc202968f26766.dir
+      size: 8069804
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_embeddings_metrics
+      md5: b45390641698e71fdf420fbb2afec7e0.dir
+      size: 53351
+      nfiles: 758
   pipeline_word_wer@13:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna
-      --asr=wav2vec2
+      --asr=techmo
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
@@ -1217,66 +1217,66 @@ stages:
       md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
       size: 6706925
       nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
-      md5: 9c63b061ac7763144bca121e163ee7aa.dir
-      size: 20658485
-      nfiles: 456
+    - path: experiment_data/pipeline/pl_luna/techmo__result
+      md5: 0e596570e1502b38588427bc72dcc006.dir
+      size: 9697519
+      nfiles: 500
     outs:
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_classic_alignment
-      md5: 4d4ae25d4fac2a52893e60389fcd2f9e.dir
-      size: 18462856
-      nfiles: 456
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_classic_metrics
-      md5: f209479afd2482800f1e75bdab9f83b2.dir
-      size: 15604
-      nfiles: 456
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_embeddings_alignment
-      md5: 41ad4c0923986a90fc9ed12aa6e30f73.dir
-      size: 38402586
-      nfiles: 456
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_embeddings_metrics
-      md5: df02c1f9d7e26ed10815175de086e0b6.dir
-      size: 24633
-      nfiles: 456
+    - path: experiment_data/pipeline/pl_luna/techmo__word_wer_classic_alignment
+      md5: 5e90722635a811db67a1f0d917707b0a.dir
+      size: 21380796
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__word_wer_classic_metrics
+      md5: 4cfbb2830b280084ece14b1ef815b92a.dir
+      size: 17298
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__word_wer_embeddings_alignment
+      md5: 74f8be84e1e913050483713fbc945d80.dir
+      size: 44326962
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__word_wer_embeddings_metrics
+      md5: 98a7edeee3b630e8e301acfc578a8393.dir
+      size: 34869
+      nfiles: 500
   pipeline_word_wer@3:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz
-      --asr=google
+    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice
+      --asr=ajn
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
       size: 2125
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result
-      md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir
-      size: 27432599
-      nfiles: 799
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/ajn__result
+      md5: 77d873041fe2952e3c45ee4ac6458061.dir
+      size: 6667841
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
     outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_classic_alignment
-      md5: 184cdfdd465db609f514891b0330ef67.dir
-      size: 83997172
-      nfiles: 799
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_classic_metrics
-      md5: c12eab8acb3cee0219fc8046691b24ab.dir
-      size: 27841
-      nfiles: 799
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_embeddings_alignment
-      md5: 1e7ef6690d2e36f368eb02a8dcba9ab0.dir
-      size: 174732507
-      nfiles: 799
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_embeddings_metrics
-      md5: c15414f2ec9c0a1ab78199861b0b4ce0.dir
-      size: 55765
-      nfiles: 799
+    - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_classic_alignment
+      md5: 7b7c5de97970c320cb8cf549839d16fd.dir
+      size: 17724868
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_classic_metrics
+      md5: b1bdb80faa8c728ea072632121a2f2d0.dir
+      size: 209927
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_embeddings_alignment
+      md5: efb06f9897e62077366362b2aab25d8c.dir
+      size: 36932578
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_embeddings_metrics
+      md5: a9132386ed7ccffcba68dfa0a1dca7ee.dir
+      size: 324358
+      nfiles: 8155
   pipeline_word_wer@16:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14
-      --asr=techmo
+      --asr=ajn
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
@@ -1285,34 +1285,34 @@ stages:
       md5: d751713988987e9331980363e24189ce.dir
       size: 0
       nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/ajn__result
+      md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir
+      size: 974727
+      nfiles: 559
     - path: experiment_data/pipeline/pl_minds14/gold_transcript
       md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
       size: 689374
       nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__result
-      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
-      size: 1336305
-      nfiles: 562
     outs:
-    - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_classic_alignment
-      md5: bb8b304a25b60f779c27ca4d6c9183ba.dir
-      size: 2723010
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_classic_metrics
-      md5: b6a4f05d71bd3fbed796d54b1ac2b29d.dir
-      size: 17198
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_embeddings_alignment
-      md5: 9956c918227d6b76f26ac6540a34e931.dir
-      size: 5673712
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_embeddings_metrics
-      md5: de8e393488ff05187c7866272675fd8e.dir
-      size: 39775
-      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_classic_alignment
+      md5: c03a478840305afb1eadf4752b3a5678.dir
+      size: 2851613
+      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_classic_metrics
+      md5: 6d6d430e7f3672c67b9c78eb7d0ea296.dir
+      size: 15213
+      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_embeddings_alignment
+      md5: ae577cd5886aced83d7de4ba47bb4457.dir
+      size: 5960904
+      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_embeddings_metrics
+      md5: c599599e5935075cd26ac89e0d3b5f1b.dir
+      size: 22438
+      nfiles: 559
   pipeline_word_wer@11:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna
-      --asr=ajn
+      --asr=google
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
@@ -1321,34 +1321,34 @@ stages:
       md5: d342155b1871e881797cf7da09d5dc3c.dir
       size: 1578358645
       nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/ajn__result
-      md5: 653d65e186a7d05958ce3cbef219038c.dir
-      size: 6159899
-      nfiles: 494
     - path: experiment_data/pipeline/pl_luna/gold_transcript
       md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
       size: 6706925
       nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/google__result
+      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
+      size: 5346497
+      nfiles: 500
     outs:
-    - path: experiment_data/pipeline/pl_luna/ajn__word_wer_classic_alignment
-      md5: e25ae51f8dc1afe55d5c0b44a67ab8ee.dir
-      size: 20671277
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__word_wer_classic_metrics
-      md5: 18605657ff9c7ef3221e27b671a3b4d1.dir
-      size: 16835
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__word_wer_embeddings_alignment
-      md5: 6be0a1c035f4a84a9035bfba1458cdac.dir
-      size: 43099546
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__word_wer_embeddings_metrics
-      md5: 4f368d2ba1c5a54d5e3ab69a7581549e.dir
-      size: 19326
-      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/google__word_wer_classic_alignment
+      md5: 5bdee43e646a4e9470310073365ebc37.dir
+      size: 19650202
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/google__word_wer_classic_metrics
+      md5: 9951fb70382cc8bed9aa48d1185c1c7c.dir
+      size: 16989
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/google__word_wer_embeddings_alignment
+      md5: a06bd5f0b4c52c679315b6c6d7478084.dir
+      size: 40586004
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/google__word_wer_embeddings_metrics
+      md5: 351543fb54e2098ac07999482d2280a8.dir
+      size: 34530
+      nfiles: 500
   pipeline_word_wer@7:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs
-      --asr=ajn
+      --asr=google
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
@@ -1357,30 +1357,30 @@ stages:
       md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
       size: 236272072
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
-      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
-      size: 1413262
-      nfiles: 758
     - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
       md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
       size: 975209
       nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/google__result
+      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
+      size: 1377134
+      nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_classic_alignment
-      md5: 6437952d0ab383e44acca5fa70d02e54.dir
-      size: 3981739
+    - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_classic_alignment
+      md5: 9beb57e9be598275ac9d449618da440e.dir
+      size: 3818553
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_classic_metrics
-      md5: a40ae0901f53a33231a4c3dc18708d2c.dir
-      size: 22935
+    - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_classic_metrics
+      md5: 984770b57ca48fd793099c9ed67791d0.dir
+      size: 23344
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_embeddings_alignment
-      md5: 06e64fca6fc73e1cc9e7e86b21eb11f3.dir
-      size: 8318047
+    - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_embeddings_alignment
+      md5: aeb20f1662b696a6fc11bcd39a02a0de.dir
+      size: 7984964
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_embeddings_metrics
-      md5: 68da1ded61389ae5d866bc762c61363c.dir
-      size: 33029
+    - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_embeddings_metrics
+      md5: b2626f92231431e163813ef7112c119d.dir
+      size: 53916
       nfiles: 758
   pipeline_word_wer@2:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice
@@ -1419,41 +1419,41 @@ stages:
       size: 571563
       nfiles: 8136
   pipeline_word_wer@14:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14
-      --asr=google
+    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna
+      --asr=wav2vec2
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
       size: 2125
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__result
-      md5: 3f7a79298a5156fd2b023e673326e72f.dir
-      size: 985004
-      nfiles: 562
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
+      md5: 9c63b061ac7763144bca121e163ee7aa.dir
+      size: 20658485
+      nfiles: 456
     outs:
-    - path: experiment_data/pipeline/pl_minds14/google__word_wer_classic_alignment
-      md5: 8469a41c345e0c21accb960782a49e75.dir
-      size: 2725626
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__word_wer_classic_metrics
-      md5: 41621d3845a0035ee77352902c46a186.dir
-      size: 15012
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__word_wer_embeddings_alignment
-      md5: a0e3d4ddda612480de739c898e79bf73.dir
-      size: 5686170
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__word_wer_embeddings_metrics
-      md5: 673f6b46c7735163726f7d9e3b1f2dd4.dir
-      size: 40260
-      nfiles: 562
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_classic_alignment
+      md5: 4d4ae25d4fac2a52893e60389fcd2f9e.dir
+      size: 18462856
+      nfiles: 456
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_classic_metrics
+      md5: f209479afd2482800f1e75bdab9f83b2.dir
+      size: 15604
+      nfiles: 456
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_embeddings_alignment
+      md5: 41ad4c0923986a90fc9ed12aa6e30f73.dir
+      size: 38402586
+      nfiles: 456
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_embeddings_metrics
+      md5: df02c1f9d7e26ed10815175de086e0b6.dir
+      size: 24633
+      nfiles: 456
   pipeline_word_wer@0:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice
       --asr=google
@@ -1492,7 +1492,7 @@ stages:
       nfiles: 8143
   pipeline_word_wer@5:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz
-      --asr=techmo
+      --asr=ajn
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
@@ -1501,31 +1501,31 @@ stages:
       md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
       size: 4803739404
       nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
+      md5: 7de1137f44fad26766da0fc309720160.dir
+      size: 22765926
+      nfiles: 800
     - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
       md5: ebffd3814a48564f4e33b9a4e0956af3.dir
       size: 21846798
       nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result
-      md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir
-      size: 39158267
-      nfiles: 800
     outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_classic_alignment
-      md5: 37f0c27869efd72bf3eaedae70e426d5.dir
-      size: 83756423
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_classic_metrics
-      md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir
-      size: 27780
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_embeddings_alignment
-      md5: 01badff31f4d2f6583481c332fae8abc.dir
-      size: 174322727
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_classic_alignment
+      md5: 1ed03de918f5373afdbdbb020c6161b5.dir
+      size: 78992762
       nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_embeddings_metrics
-      md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir
-      size: 56182
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_classic_metrics
+      md5: 04f6ccbaf94cf08c34ac201ae079c21c.dir
+      size: 25307
       nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_embeddings_alignment
+      md5: a67e7a6e8a6e0755ea35a519d9decf86.dir
+      size: 128258410
+      nfiles: 646
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_embeddings_metrics
+      md5: c83561c448950860f36037c2287a25f5.dir
+      size: 25194
+      nfiles: 646
   pipeline_word_wer@1:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice
       --asr=wav2vec2
@@ -1564,7 +1564,7 @@ stages:
       nfiles: 8154
   pipeline_word_wer@9:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs
-      --asr=wav2vec2
+      --asr=techmo
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
@@ -1577,30 +1577,30 @@ stages:
       md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
       size: 975209
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
-      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
-      size: 5137721
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
+      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
+      size: 1880403
       nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_classic_alignment
-      md5: 052fa8f97b2f9d5b8ca4175b4e92dfd1.dir
-      size: 3854678
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_classic_alignment
+      md5: 689776c77c4ecaa11578b53480100ecc.dir
+      size: 3821036
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_classic_metrics
-      md5: d9dec56ef7ddb6a3bc03638e37d1c04c.dir
-      size: 23168
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_classic_metrics
+      md5: 8d0d99fd7d965d4070c0b391cd6fa2b0.dir
+      size: 23030
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_embeddings_alignment
-      md5: 069191de9f84f37303bc202968f26766.dir
-      size: 8069804
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_embeddings_alignment
+      md5: 0ec5fe337bde2254c91146fd16b9c6af.dir
+      size: 7995553
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_embeddings_metrics
-      md5: b45390641698e71fdf420fbb2afec7e0.dir
-      size: 53351
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_embeddings_metrics
+      md5: 55eb6eb0aacc12e3e2caf224b4b89df1.dir
+      size: 53591
       nfiles: 758
   pipeline_word_wer@4:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz
-      --asr=ajn
+      --asr=google
     deps:
     - path: experiment/pipeline_process_word_wer.py
       md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
@@ -1609,65 +1609,65 @@ stages:
       md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
       size: 4803739404
       nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
-      md5: 7de1137f44fad26766da0fc309720160.dir
-      size: 22765926
-      nfiles: 800
     - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
       md5: ebffd3814a48564f4e33b9a4e0956af3.dir
       size: 21846798
       nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result
+      md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir
+      size: 27432599
+      nfiles: 799
     outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_classic_alignment
-      md5: 1ed03de918f5373afdbdbb020c6161b5.dir
-      size: 78992762
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_classic_metrics
-      md5: 04f6ccbaf94cf08c34ac201ae079c21c.dir
-      size: 25307
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_embeddings_alignment
-      md5: a67e7a6e8a6e0755ea35a519d9decf86.dir
-      size: 128258410
-      nfiles: 646
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_embeddings_metrics
-      md5: c83561c448950860f36037c2287a25f5.dir
-      size: 25194
-      nfiles: 646
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_classic_alignment
+      md5: 184cdfdd465db609f514891b0330ef67.dir
+      size: 83997172
+      nfiles: 799
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_classic_metrics
+      md5: c12eab8acb3cee0219fc8046691b24ab.dir
+      size: 27841
+      nfiles: 799
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_embeddings_alignment
+      md5: 1e7ef6690d2e36f368eb02a8dcba9ab0.dir
+      size: 174732507
+      nfiles: 799
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_embeddings_metrics
+      md5: c15414f2ec9c0a1ab78199861b0b4ce0.dir
+      size: 55765
+      nfiles: 799
   pipeline_spacy_pos_wer@14:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna
       --asr=wav2vec2
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
       size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
-      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
-      size: 3523907
-      nfiles: 562
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
+      md5: 9c63b061ac7763144bca121e163ee7aa.dir
+      size: 20658485
+      nfiles: 456
     outs:
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_alignment
-      md5: 0d04963621be1dd6a1c81225734de652.dir
-      size: 2724276
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_metrics
-      md5: 818882d685785a9d7d9b8d757c4c8e90.dir
-      size: 6840
-      nfiles: 562
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_pos_alignment
+      md5: a791fae1ca5599b9422b94d4c6ce760a.dir
+      size: 17967467
+      nfiles: 456
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_pos_metrics
+      md5: 7848ddff997fd231f3857ff30dfd7154.dir
+      size: 7940
+      nfiles: 456
   pipeline_spacy_pos_wer@2:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice
       --asr=techmo
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 273f598ac14ea7b894189bf0d553a39a
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
       size: 1469
     - path: experiment_data/dataset/pl_common_voice
       md5: d751713988987e9331980363e24189ce.dir
@@ -1683,7 +1683,7 @@ stages:
       nfiles: 8136
     outs:
     - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_pos_alignment
-      md5: 1d2f7fbcca280abdbfb91e83f70e9789.dir
+      md5: c0c77bf3030799c32241555bf4d71e6b.dir
       size: 18192387
       nfiles: 8136
     - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_pos_metrics
@@ -1692,7 +1692,7 @@ stages:
       nfiles: 8136
   pipeline_spacy_pos_wer@16:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14
-      --asr=techmo
+      --asr=ajn
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
@@ -1701,51 +1701,51 @@ stages:
       md5: d751713988987e9331980363e24189ce.dir
       size: 0
       nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/ajn__result
+      md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir
+      size: 974727
+      nfiles: 559
     - path: experiment_data/pipeline/pl_minds14/gold_transcript
       md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
       size: 689374
       nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__result
-      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
-      size: 1336305
-      nfiles: 562
     outs:
-    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_alignment
-      md5: aebbf34f80eb8151d8cf4d1ed840757c.dir
-      size: 2640387
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_metrics
-      md5: 7d555363b27e9c0d0ab7d0e3011c6d13.dir
-      size: 6095
-      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_alignment
+      md5: 013fbb68289e1c35986bcaaa773c396c.dir
+      size: 3178442
+      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_metrics
+      md5: 014fbe751a4f357f78f073920890e06d.dir
+      size: 7059
+      nfiles: 559
   pipeline_spacy_pos_wer@6:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs
-      --asr=google
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_voicelab_cbiz
+      --asr=techmo
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 1475a345757c2c9d699c0d4bc6c194f6
-      size: 1734
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__result
-      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
-      size: 1377134
-      nfiles: 758
-    outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_alignment
-      md5: 39ad38a46e1489b88629b8512ca09ea3.dir
-      size: 3712618
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_metrics
-      md5: 56dddb48cea2022b91fd4323efd43a8b.dir
-      size: 9213
-      nfiles: 758
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
+      size: 1469
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result
+      md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir
+      size: 39158267
+      nfiles: 800
+    outs:
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_pos_alignment
+      md5: 678171dbd7c038cd6eaadc5eb331b8b5.dir
+      size: 81650836
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_pos_metrics
+      md5: 11320499f29d2d7bfce68d35fb352b83.dir
+      size: 14334
+      nfiles: 800
   pipeline_spacy_pos_wer@0:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice
       --asr=google
@@ -1776,91 +1776,91 @@ stages:
       nfiles: 8143
   pipeline_spacy_pos_wer@11:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna
-      --asr=ajn
+      --asr=google
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 273f598ac14ea7b894189bf0d553a39a
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
       size: 1469
     - path: experiment_data/dataset/pl_luna
       md5: d342155b1871e881797cf7da09d5dc3c.dir
       size: 1578358645
       nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/ajn__result
-      md5: 653d65e186a7d05958ce3cbef219038c.dir
-      size: 6159899
-      nfiles: 494
     - path: experiment_data/pipeline/pl_luna/gold_transcript
       md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
       size: 6706925
       nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/google__result
+      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
+      size: 5346497
+      nfiles: 500
     outs:
-    - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_alignment
-      md5: f12f57b3229bbbd18fb6584d6891b5cc.dir
-      size: 21936929
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_metrics
-      md5: 6c6bb673ea5f64c9d851878c9d8a7c09.dir
-      size: 8444
-      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/google__spacy_pos_alignment
+      md5: 460edffd716b1f46fb863e5b2b0d949e.dir
+      size: 19342263
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/google__spacy_pos_metrics
+      md5: a65dd7d74319da06f4ebaca08cde30ce.dir
+      size: 8659
+      nfiles: 500
   pipeline_spacy_pos_wer@15:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14
-      --asr=ajn
+      --asr=google
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 1475a345757c2c9d699c0d4bc6c194f6
-      size: 1734
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
+      size: 1469
     - path: experiment_data/dataset/pl_minds14
       md5: d751713988987e9331980363e24189ce.dir
       size: 0
       nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir
-      size: 974727
-      nfiles: 559
     - path: experiment_data/pipeline/pl_minds14/gold_transcript
       md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
       size: 689374
       nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__result
+      md5: 3f7a79298a5156fd2b023e673326e72f.dir
+      size: 985004
+      nfiles: 562
     outs:
-    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_alignment
-      md5: 101ee541a794af147e48dd460afc227f.dir
-      size: 3178442
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_metrics
-      md5: 014fbe751a4f357f78f073920890e06d.dir
-      size: 7059
-      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/google__spacy_pos_alignment
+      md5: 1c0c5687fc5b80e2a178e2a4618aa04c.dir
+      size: 2659852
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__spacy_pos_metrics
+      md5: aeda105b01366dee65935d3c07fe3444.dir
+      size: 4970
+      nfiles: 562
   pipeline_spacy_pos_wer@12:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna
       --asr=ajn
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
       size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir
-      size: 974727
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
-      size: 689374
-      nfiles: 562
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/ajn__result
+      md5: 653d65e186a7d05958ce3cbef219038c.dir
+      size: 6159899
+      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
     outs:
-    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_alignment
-      md5: 013fbb68289e1c35986bcaaa773c396c.dir
-      size: 3178442
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_metrics
-      md5: 014fbe751a4f357f78f073920890e06d.dir
-      size: 7059
-      nfiles: 559
+    - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_alignment
+      md5: 0f310b8840f2d09ac558b9bac2a0d3b0.dir
+      size: 21936929
+      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_metrics
+      md5: 6c6bb673ea5f64c9d851878c9d8a7c09.dir
+      size: 8444
+      nfiles: 494
   pipeline_spacy_pos_wer@17:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14
-      --asr=wav2vec2
+      --asr=techmo
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
@@ -1873,18 +1873,18 @@ stages:
       md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
       size: 689374
       nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
-      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
-      size: 3523907
+    - path: experiment_data/pipeline/pl_minds14/techmo__result
+      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
+      size: 1336305
       nfiles: 562
     outs:
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_alignment
-      md5: 0d04963621be1dd6a1c81225734de652.dir
-      size: 2724276
+    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_alignment
+      md5: aebbf34f80eb8151d8cf4d1ed840757c.dir
+      size: 2640387
       nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_metrics
-      md5: 818882d685785a9d7d9b8d757c4c8e90.dir
-      size: 6840
+    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_metrics
+      md5: 7d555363b27e9c0d0ab7d0e3011c6d13.dir
+      size: 6095
       nfiles: 562
   pipeline_spacy_pos_wer@1:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice
@@ -1915,92 +1915,36 @@ stages:
       size: 97426
       nfiles: 8154
   pipeline_spacy_pos_wer@7:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs
       --asr=google
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
       size: 1469
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__result
-      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
-      size: 5346497
-      nfiles: 500
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/google__result
+      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
+      size: 1377134
+      nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_luna/google__spacy_pos_alignment
-      md5: 460edffd716b1f46fb863e5b2b0d949e.dir
-      size: 19342263
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__spacy_pos_metrics
-      md5: a65dd7d74319da06f4ebaca08cde30ce.dir
-      size: 8659
-      nfiles: 500
+    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_alignment
+      md5: 3429e870a0541d01eb85b0a34fd16021.dir
+      size: 3712618
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_metrics
+      md5: 56dddb48cea2022b91fd4323efd43a8b.dir
+      size: 9213
+      nfiles: 758
   pipeline_spacy_pos_wer@10:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
-      md5: 9c63b061ac7763144bca121e163ee7aa.dir
-      size: 20658485
-      nfiles: 456
-    outs:
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_pos_alignment
-      md5: a791fae1ca5599b9422b94d4c6ce760a.dir
-      size: 17967467
-      nfiles: 456
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_pos_metrics
-      md5: 7848ddff997fd231f3857ff30dfd7154.dir
-      size: 7940
-      nfiles: 456
-  pipeline_spacy_pos_wer@13:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14
-      --asr=techmo
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__result
-      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
-      size: 1336305
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_alignment
-      md5: aebbf34f80eb8151d8cf4d1ed840757c.dir
-      size: 2640387
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_metrics
-      md5: 7d555363b27e9c0d0ab7d0e3011c6d13.dir
-      size: 6095
-      nfiles: 562
-  pipeline_spacy_pos_wer@5:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs
-      --asr=techmo
+      --asr=wav2vec2
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
@@ -2013,50 +1957,106 @@ stages:
       md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
       size: 975209
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
-      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
-      size: 1880403
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
+      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
+      size: 5137721
       nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_pos_alignment
-      md5: d76701778be18566a4d3a45bc325196e.dir
-      size: 3707699
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_pos_alignment
+      md5: 3b2e3e75c644805681905c97ec37cf28.dir
+      size: 3737151
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_pos_metrics
-      md5: 9cae08bbb8a6331d06a33dbbb4a16301.dir
-      size: 9662
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_pos_metrics
+      md5: 782cc84e9116281dfc28734b2ae4a5ea.dir
+      size: 9004
       nfiles: 758
-  pipeline_spacy_ner_wer@4:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_voicelab_cbiz
-      --asr=ajn
+  pipeline_spacy_pos_wer@13:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna
+      --asr=techmo
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
       size: 1469
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
-      md5: 7de1137f44fad26766da0fc309720160.dir
-      size: 22765926
-      nfiles: 800
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__result
+      md5: 0e596570e1502b38588427bc72dcc006.dir
+      size: 9697519
+      nfiles: 500
+    outs:
+    - path: experiment_data/pipeline/pl_luna/techmo__spacy_pos_alignment
+      md5: a5cb7dd59d83b2ec678c6ba5177bc38f.dir
+      size: 20897599
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__spacy_pos_metrics
+      md5: 1478707020a96496b50eb732207c290e.dir
+      size: 8841
+      nfiles: 500
+  pipeline_spacy_pos_wer@5:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_voicelab_cbiz
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_spacy_pos_wer.py
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
+      size: 1469
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
+      md5: 7de1137f44fad26766da0fc309720160.dir
+      size: 22765926
+      nfiles: 800
     - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
       md5: ebffd3814a48564f4e33b9a4e0956af3.dir
       size: 21846798
       nfiles: 800
     outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_ner_alignment
-      md5: 48a512d6811d00ec9dbe60e3131834ac.dir
-      size: 76044699
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_pos_alignment
+      md5: 4f07fe60bf29e2148879db0c903c98f6.dir
+      size: 78539613
       nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_ner_metrics
-      md5: 61799b677b4d6af5e41c947c1d3b02cd.dir
-      size: 14196
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_pos_metrics
+      md5: 7ec3bb7c838e7f06b8a1dbe7a68faac2.dir
+      size: 13753
+      nfiles: 800
+  pipeline_spacy_ner_wer@4:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_voicelab_cbiz
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_spacy_pos_wer.py
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
+      size: 1469
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
       nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result
+      md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir
+      size: 27432599
+      nfiles: 799
+    outs:
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_ner_alignment
+      md5: a83b6e086aa207da109fc06293501f52.dir
+      size: 80594165
+      nfiles: 799
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_ner_metrics
+      md5: 85388729ffbed2a328f94758c682c809.dir
+      size: 14708
+      nfiles: 799
   pipeline_spacy_ner_wer@16:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14
-      --asr=techmo
+      --asr=ajn
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
@@ -2065,26 +2065,26 @@ stages:
       md5: d751713988987e9331980363e24189ce.dir
       size: 0
       nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/ajn__result
+      md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir
+      size: 974727
+      nfiles: 559
     - path: experiment_data/pipeline/pl_minds14/gold_transcript
       md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
       size: 689374
       nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__result
-      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
-      size: 1336305
-      nfiles: 562
     outs:
-    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_alignment
-      md5: b9b4f2685f681ad7334ac60236a6ebe7.dir
-      size: 2573182
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_metrics
-      md5: d306e0d18df9ee86f853719244bcfd35.dir
-      size: 5484
-      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_alignment
+      md5: 69280464b63869e12150a53f35f096b0.dir
+      size: 3070948
+      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_metrics
+      md5: 5e97918fe79741d3d054d3ba77bc1537.dir
+      size: 7316
+      nfiles: 559
   pipeline_spacy_ner_wer@9:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs
-      --asr=wav2vec2
+      --asr=techmo
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
@@ -2097,22 +2097,22 @@ stages:
       md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
       size: 975209
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
-      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
-      size: 5137721
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
+      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
+      size: 1880403
       nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_alignment
-      md5: d04f82c751e6ef28414b35ba4d844470.dir
-      size: 3660762
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_ner_alignment
+      md5: 628a77d036410a40bd358db2fc8ff739.dir
+      size: 3626842
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_metrics
-      md5: 308e3f0f67d855482d7d3634954f4b57.dir
-      size: 7553
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_ner_metrics
+      md5: cda4c4246e8b3b6d57aed57c4fe5594a.dir
+      size: 8311
       nfiles: 758
   pipeline_spacy_ner_wer@15:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14
-      --asr=ajn
+      --asr=google
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
@@ -2121,23 +2121,23 @@ stages:
       md5: d751713988987e9331980363e24189ce.dir
       size: 0
       nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir
-      size: 974727
-      nfiles: 559
     - path: experiment_data/pipeline/pl_minds14/gold_transcript
       md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
       size: 689374
       nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__result
+      md5: 3f7a79298a5156fd2b023e673326e72f.dir
+      size: 985004
+      nfiles: 562
     outs:
-    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_alignment
-      md5: 69280464b63869e12150a53f35f096b0.dir
-      size: 3070948
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_metrics
-      md5: 5e97918fe79741d3d054d3ba77bc1537.dir
-      size: 7316
-      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/google__spacy_ner_alignment
+      md5: b251bb8eda29c1644a26b4db0bcf0766.dir
+      size: 2599569
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__spacy_ner_metrics
+      md5: 85361b5e0ac555a688bbc7dd4477e7a8.dir
+      size: 4545
+      nfiles: 562
   pipeline_spacy_ner_wer@1:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice
       --asr=wav2vec2
@@ -2207,173 +2207,173 @@ stages:
       size: 3523907
       nfiles: 562
   pipeline_spacy_ner_wer@12:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna
       --asr=ajn
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
       size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir
-      size: 974727
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
-      size: 689374
-      nfiles: 562
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/ajn__result
+      md5: 653d65e186a7d05958ce3cbef219038c.dir
+      size: 6159899
+      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
     outs:
-    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_alignment
-      md5: 69280464b63869e12150a53f35f096b0.dir
-      size: 3070948
-      nfiles: 559
-    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_metrics
-      md5: 5e97918fe79741d3d054d3ba77bc1537.dir
-      size: 7316
-      nfiles: 559
+    - path: experiment_data/pipeline/pl_luna/ajn__spacy_ner_alignment
+      md5: 68434e48e2e6652ae6c9b17f9eb6fa05.dir
+      size: 21154450
+      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/ajn__spacy_ner_metrics
+      md5: 288d81ffe41a25d5ab85bbb3ef908979.dir
+      size: 8703
+      nfiles: 494
   pipeline_spacy_ner_wer@13:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna
       --asr=techmo
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
       size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__result
-      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
-      size: 1336305
-      nfiles: 562
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__result
+      md5: 0e596570e1502b38588427bc72dcc006.dir
+      size: 9697519
+      nfiles: 500
     outs:
-    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_alignment
-      md5: b9b4f2685f681ad7334ac60236a6ebe7.dir
-      size: 2573182
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_metrics
-      md5: d306e0d18df9ee86f853719244bcfd35.dir
-      size: 5484
-      nfiles: 562
+    - path: experiment_data/pipeline/pl_luna/techmo__spacy_ner_alignment
+      md5: c301698fa01d07cfb3efb16ffbb06b69.dir
+      size: 20451389
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__spacy_ner_metrics
+      md5: 06cea0926a325c92a1ff79457db655a8.dir
+      size: 8918
+      nfiles: 500
   pipeline_spacy_ner_wer@11:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna
       --asr=google
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
       size: 1469
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__result
-      md5: 3f7a79298a5156fd2b023e673326e72f.dir
-      size: 985004
-      nfiles: 562
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/google__result
+      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
+      size: 5346497
+      nfiles: 500
     outs:
-    - path: experiment_data/pipeline/pl_minds14/google__spacy_ner_alignment
-      md5: b251bb8eda29c1644a26b4db0bcf0766.dir
-      size: 2599569
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__spacy_ner_metrics
-      md5: 85361b5e0ac555a688bbc7dd4477e7a8.dir
-      size: 4545
-      nfiles: 562
+    - path: experiment_data/pipeline/pl_luna/google__spacy_ner_alignment
+      md5: 84a872f90985c615648ce02ea8a6951c.dir
+      size: 18946509
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/google__spacy_ner_metrics
+      md5: 7df5fdeb083c8b1e953e1ff097c69f11.dir
+      size: 8752
+      nfiles: 500
   pipeline_spacy_ner_wer@6:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs
-      --asr=wav2vec2
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_voicelab_cbiz
+      --asr=techmo
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
       size: 1469
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
-      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
-      size: 5137721
-      nfiles: 758
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result
+      md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir
+      size: 39158267
+      nfiles: 800
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_alignment
-      md5: d04f82c751e6ef28414b35ba4d844470.dir
-      size: 3660762
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_metrics
-      md5: 308e3f0f67d855482d7d3634954f4b57.dir
-      size: 7553
-      nfiles: 758
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_ner_alignment
+      md5: 3c7bf6f59153f0eef4685795dc8c1246.dir
+      size: 79503990
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_ner_metrics
+      md5: f0dd0a8adb67c53d83ad32d152f90365.dir
+      size: 14595
+      nfiles: 800
   pipeline_spacy_ner_wer@5:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs
-      --asr=techmo
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_voicelab_cbiz
+      --asr=ajn
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
       size: 1469
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
-      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
-      size: 1880403
-      nfiles: 758
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
+      md5: 7de1137f44fad26766da0fc309720160.dir
+      size: 22765926
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
+      nfiles: 800
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_ner_alignment
-      md5: 628a77d036410a40bd358db2fc8ff739.dir
-      size: 3626842
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_ner_metrics
-      md5: cda4c4246e8b3b6d57aed57c4fe5594a.dir
-      size: 8311
-      nfiles: 758
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_ner_alignment
+      md5: 48a512d6811d00ec9dbe60e3131834ac.dir
+      size: 76044699
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_ner_metrics
+      md5: 61799b677b4d6af5e41c947c1d3b02cd.dir
+      size: 14196
+      nfiles: 800
   pipeline_spacy_pos_wer@3:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs
-      --asr=google
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice
+      --asr=ajn
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
       size: 1469
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__result
-      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
-      size: 1377134
-      nfiles: 758
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/ajn__result
+      md5: 77d873041fe2952e3c45ee4ac6458061.dir
+      size: 6667841
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_alignment
-      md5: 3429e870a0541d01eb85b0a34fd16021.dir
-      size: 3712618
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_metrics
-      md5: 56dddb48cea2022b91fd4323efd43a8b.dir
-      size: 9213
-      nfiles: 758
+    - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_pos_alignment
+      md5: f3b7dd0987fc7be5171478910c950657.dir
+      size: 19159060
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_pos_metrics
+      md5: 6094fb960e2eab979ecb33d40a253531.dir
+      size: 95146
+      nfiles: 8155
   pipeline_spacy_ner_wer@0:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice
       --asr=google
@@ -2403,12 +2403,40 @@ stages:
       size: 97917
       nfiles: 8143
   pipeline_spacy_pos_wer@9:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs
       --asr=techmo
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
       size: 1469
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
+      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
+      size: 1880403
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_pos_alignment
+      md5: d76701778be18566a4d3a45bc325196e.dir
+      size: 3707699
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_pos_metrics
+      md5: 9cae08bbb8a6331d06a33dbbb4a16301.dir
+      size: 9662
+      nfiles: 758
+  pipeline_wikineiural_ner@11:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
     - path: experiment_data/dataset/pl_luna
       md5: d342155b1871e881797cf7da09d5dc3c.dir
       size: 1578358645
@@ -2417,110 +2445,54 @@ stages:
       md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
       size: 6706925
       nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__result
-      md5: 0e596570e1502b38588427bc72dcc006.dir
-      size: 9697519
+    - path: experiment_data/pipeline/pl_luna/google__result
+      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
+      size: 5346497
       nfiles: 500
     outs:
-    - path: experiment_data/pipeline/pl_luna/techmo__spacy_pos_alignment
-      md5: a5cb7dd59d83b2ec678c6ba5177bc38f.dir
-      size: 20897599
+    - path: experiment_data/pipeline/pl_luna/google__wikineural_ner_alignment
+      md5: dbfd406215b6d3b8ed5afcceb92034cb.dir
+      size: 34436914
       nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__spacy_pos_metrics
-      md5: 1478707020a96496b50eb732207c290e.dir
-      size: 8841
+    - path: experiment_data/pipeline/pl_luna/google__wikineural_ner_metrics
+      md5: d4ca8091dac498b609dca9fda6d22f07.dir
+      size: 8807
       nfiles: 500
-  pipeline_wikineiural_ner@11:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14
-      --asr=google
-    deps:
-    - path: experiment/pipeline_process_wikineural_ner.py
-      md5: c2c4e92a33346a3c097a321f1f5f2af3
-      size: 1802
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__result
-      md5: 3f7a79298a5156fd2b023e673326e72f.dir
-      size: 985004
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/google__wikineural_ner_alignment
-      md5: e632c40f440b7b59e2d94771e29f2dea.dir
-      size: 5692456
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__wikineural_ner_metrics
-      md5: 0385f2138fd318df8852a7e38c1770ac.dir
-      size: 5322
-      nfiles: 562
   pipeline_wikineiural_ner@8:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs
       --asr=ajn
     deps:
     - path: experiment/pipeline_process_wikineural_ner.py
       md5: c2c4e92a33346a3c097a321f1f5f2af3
       size: 1802
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/ajn__result
-      md5: 653d65e186a7d05958ce3cbef219038c.dir
-      size: 6159899
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
+      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
+      size: 1413262
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_luna/ajn__wikineural_ner_alignment
-      md5: 52c8c06ce74db727c4aa5f55a2a865cd.dir
-      size: 37478125
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__wikineural_ner_metrics
-      md5: 602eaef0046a2a8b551bbbced7123886.dir
-      size: 8816
-      nfiles: 494
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__wikineural_ner_alignment
+      md5: 83f073c2166a9a2fcfc6dc4d2268efb4.dir
+      size: 7530856
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__wikineural_ner_metrics
+      md5: 7def95d54e9428f71fa43a5b9a95b54b.dir
+      size: 12379
+      nfiles: 758
   pipeline_wikineiural_ner@14:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna
       --asr=wav2vec2
     deps:
     - path: experiment/pipeline_process_wikineural_ner.py
       md5: c2c4e92a33346a3c097a321f1f5f2af3
       size: 1802
-    - path: experiment_data/dataset/pl_minds14
-      md5: d751713988987e9331980363e24189ce.dir
-      size: 0
-      nfiles: 0
-    - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
-      size: 689374
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
-      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
-      size: 3523907
-      nfiles: 562
-    outs:
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__wikineural_ner_alignment
-      md5: 5bc3f08dd32f7769c6c6c0b25df52bf8.dir
-      size: 5895431
-      nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/wav2vec2__wikineural_ner_metrics
-      md5: 6535a4779ab3be38804639e16cbbe70f.dir
-      size: 8602
-      nfiles: 562
-  pipeline_spacy_ner_wer@10:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna
-      --asr=wav2vec2
-    deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
     - path: experiment_data/dataset/pl_luna
       md5: d342155b1871e881797cf7da09d5dc3c.dir
       size: 1578358645
@@ -2534,17 +2506,17 @@ stages:
       size: 20658485
       nfiles: 456
     outs:
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_ner_alignment
-      md5: 4530ad915e82cc9668e775d5de219b13.dir
-      size: 17577804
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__wikineural_ner_alignment
+      md5: 4a1ad7d9221851e3735f1c3c2238862b.dir
+      size: 33934003
       nfiles: 456
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_ner_metrics
-      md5: 0ad2dc0beeebd336771228f8751fe028.dir
-      size: 8014
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__wikineural_ner_metrics
+      md5: 0368dfd54954d3aa317e359f32efe59e.dir
+      size: 8244
       nfiles: 456
-  pipeline_spacy_ner_wer@3:
+  pipeline_spacy_ner_wer@10:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs
-      --asr=google
+      --asr=wav2vec2
     deps:
     - path: experiment/pipeline_process_spacy_pos_wer.py
       md5: 3817c96d1d91d2cf2d8ec7fe570f1472
@@ -2557,19 +2529,47 @@ stages:
       md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
       size: 975209
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__result
-      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
-      size: 1377134
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
+      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
+      size: 5137721
       nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_ner_alignment
-      md5: 67ea10ecbb9f5c3bfa14ffb85c843ba3.dir
-      size: 3638477
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_alignment
+      md5: d04f82c751e6ef28414b35ba4d844470.dir
+      size: 3660762
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_ner_metrics
-      md5: 0532a9596f26fd52037b6ecaa838ab9e.dir
-      size: 8500
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_metrics
+      md5: 308e3f0f67d855482d7d3634954f4b57.dir
+      size: 7553
       nfiles: 758
+  pipeline_spacy_ner_wer@3:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_spacy_pos_wer.py
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
+      size: 1469
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/ajn__result
+      md5: 77d873041fe2952e3c45ee4ac6458061.dir
+      size: 6667841
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
+    outs:
+    - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_ner_alignment
+      md5: 4b5e01b54c2ce8118580ad02d6dffca1.dir
+      size: 18576865
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_ner_metrics
+      md5: 15078a7515bc939d08d29d079311b37d.dir
+      size: 91570
+      nfiles: 8155
   pipeline_spacy_ner_wer@2:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice
       --asr=techmo
@@ -2599,40 +2599,180 @@ stages:
       size: 97994
       nfiles: 8136
   pipeline_wikineiural_ner@3:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs
-      --asr=google
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice
+      --asr=ajn
     deps:
     - path: experiment/pipeline_process_wikineural_ner.py
       md5: c2c4e92a33346a3c097a321f1f5f2af3
       size: 1802
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/ajn__result
+      md5: 77d873041fe2952e3c45ee4ac6458061.dir
+      size: 6667841
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
+    outs:
+    - path: experiment_data/pipeline/pl_common_voice/ajn__wikineural_ner_alignment
+      md5: 493bf2c6d82f18a117f2ae6d438d5c30.dir
+      size: 36352970
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/ajn__wikineural_ner_metrics
+      md5: 2275d851a39aafa755474fe79b16b337.dir
+      size: 117588
+      nfiles: 8155
+  pipeline_spacy_pos_wer@8:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_spacy_pos_wer.py
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
+      size: 1469
     - path: experiment_data/dataset/pl_google_fleurs
       md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
       size: 236272072
       nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
+      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
+      size: 1413262
+      nfiles: 758
     - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
       md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
       size: 975209
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__result
-      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
-      size: 1377134
-      nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/google__wikineural_ner_alignment
-      md5: 39e078edbc3f65934358787ddbe40eec.dir
-      size: 7572934
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_pos_alignment
+      md5: 210ec7ffdde2c076fe2ab72873f240c7.dir
+      size: 3799802
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__wikineural_ner_metrics
-      md5: f8b3c4183e31fc9e612b189579644f74.dir
-      size: 10796
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_pos_metrics
+      md5: 0d95a4abb4a33aef3e242a304fd58698.dir
+      size: 10569
       nfiles: 758
-  pipeline_spacy_pos_wer@8:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna
+  pipeline_wikineiural_ner@1:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result
+      md5: b0d0042d77d7adce37890ca63ad40091.dir
+      size: 19014997
+      nfiles: 8154
+    outs:
+    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__wikineural_ner_alignment
+      md5: 9613a3fcb28ead3d3d39a16180ccfab8.dir
+      size: 37574594
+      nfiles: 8154
+    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__wikineural_ner_metrics
+      md5: 59ff9a09d1e232c66bd41afa757f9130.dir
+      size: 118556
+      nfiles: 8154
+  pipeline_flair_upos@13:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__result
+      md5: 0e596570e1502b38588427bc72dcc006.dir
+      size: 9697519
+      nfiles: 500
+    outs:
+    - path: experiment_data/pipeline/pl_luna/techmo__flair_upos_alignment
+      md5: 4319f089e3987d68b4622f864c17ad6c.dir
+      size: 20898162
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__flair_upos_metrics
+      md5: 0063866f11c1d82a4ba430f14c81ed8f.dir
+      size: 8844
+      nfiles: 500
+  pipeline_flair_upos@0:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/google__result
+      md5: afb53476cc93ef4de3591908df41fd2a.dir
+      size: 5854366
+      nfiles: 8143
+    outs:
+    - path: experiment_data/pipeline/pl_common_voice/google__flair_upos_alignment
+      md5: 169eb8cd967ea75404bd3bd7f5a41b5e.dir
+      size: 17519056
+      nfiles: 8143
+    - path: experiment_data/pipeline/pl_common_voice/google__flair_upos_metrics
+      md5: 768158db51050c79dd588c8b32e5b89e.dir
+      size: 95486
+      nfiles: 8143
+  pipeline_wikineiural_ner@13:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__result
+      md5: 0e596570e1502b38588427bc72dcc006.dir
+      size: 9697519
+      nfiles: 500
+    outs:
+    - path: experiment_data/pipeline/pl_luna/techmo__wikineural_ner_alignment
+      md5: 6819012ed6e05015753687b12eea426c.dir
+      size: 37042138
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__wikineural_ner_metrics
+      md5: 47cd0bc5270bf7e3300c91f211b7a6f8.dir
+      size: 9011
+      nfiles: 500
+  pipeline_flair_upos@12:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna
       --asr=ajn
     deps:
-    - path: experiment/pipeline_process_spacy_pos_wer.py
-      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
-      size: 1469
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
     - path: experiment_data/dataset/pl_luna
       md5: d342155b1871e881797cf7da09d5dc3c.dir
       size: 1578358645
@@ -2646,11 +2786,1587 @@ stages:
       size: 6706925
       nfiles: 500
     outs:
-    - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_alignment
-      md5: 0f310b8840f2d09ac558b9bac2a0d3b0.dir
-      size: 21936929
+    - path: experiment_data/pipeline/pl_luna/ajn__flair_upos_alignment
+      md5: 8aa7d2eeb9deb1bde3394ab5df9c15a0.dir
+      size: 21838304
       nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_metrics
-      md5: 6c6bb673ea5f64c9d851878c9d8a7c09.dir
-      size: 8444
+    - path: experiment_data/pipeline/pl_luna/ajn__flair_upos_metrics
+      md5: 4ccf8f54da1b47d0a9e0e7c95020135e.dir
+      size: 8450
       nfiles: 494
+  pipeline_wikineiural_ner@10:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
+      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
+      size: 5137721
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__wikineural_ner_alignment
+      md5: 320e1321cb3db403a9ec0bb662abe52c.dir
+      size: 7828240
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__wikineural_ner_metrics
+      md5: 0738885f0f798e8bc0fbb5c9824d9db1.dir
+      size: 9704
+      nfiles: 758
+  pipeline_wikineiural_ner@2:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/techmo__result
+      md5: 9030cf3640f2749d9c1b4439687bdc2f.dir
+      size: 7761880
+      nfiles: 8136
+    outs:
+    - path: experiment_data/pipeline/pl_common_voice/techmo__wikineural_ner_alignment
+      md5: 40094adf33d391cd6abb6199c326dfd7.dir
+      size: 37097148
+      nfiles: 8136
+    - path: experiment_data/pipeline/pl_common_voice/techmo__wikineural_ner_metrics
+      md5: 4570a295cf89a779de66565b0b9a77ad.dir
+      size: 118544
+      nfiles: 8136
+  pipeline_flair_upos@11:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/google__result
+      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
+      size: 5346497
+      nfiles: 500
+    outs:
+    - path: experiment_data/pipeline/pl_luna/google__flair_upos_alignment
+      md5: 8a23016ee1f269beec30232dff1f751b.dir
+      size: 19210117
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/google__flair_upos_metrics
+      md5: 891ec9646dea43249aa9cb4eb562b2de.dir
+      size: 8654
+      nfiles: 500
+  pipeline_wikineiural_ner@4:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_voicelab_cbiz
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result
+      md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir
+      size: 27432599
+      nfiles: 799
+    outs:
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__wikineural_ner_alignment
+      md5: f977050b1f0445e9997f902f91634976.dir
+      size: 144157380
+      nfiles: 799
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__wikineural_ner_metrics
+      md5: 9a9056d774f8e901fa2beaa9a874b0a2.dir
+      size: 14800
+      nfiles: 799
+  pipeline_flair_upos@9:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
+      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
+      size: 1880403
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__flair_upos_alignment
+      md5: 527561b0790917cc03d782d5bf074294.dir
+      size: 3695215
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__flair_upos_metrics
+      md5: 06a349eee2ca119d3e79bbdbe5c95ef1.dir
+      size: 9384
+      nfiles: 758
+  pipeline_wikineiural_ner@9:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
+      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
+      size: 1880403
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__wikineural_ner_alignment
+      md5: 2fdab231aaf8c96a97268d6447f3ca1e.dir
+      size: 7640523
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__wikineural_ner_metrics
+      md5: 14d453272229bc7b8d7c4eb2790e6b32.dir
+      size: 10705
+      nfiles: 758
+  pipeline_flair_upos@14:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
+      md5: 9c63b061ac7763144bca121e163ee7aa.dir
+      size: 20658485
+      nfiles: 456
+    outs:
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__flair_upos_alignment
+      md5: ac11e3b8f28fd268a8bb90f851bc7b5b.dir
+      size: 17899335
+      nfiles: 456
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__flair_upos_metrics
+      md5: 56807b890f4abcb9c41f60ff72a2e6aa.dir
+      size: 8051
+      nfiles: 456
+  pipeline_wikineiural_ner@0:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/google__result
+      md5: afb53476cc93ef4de3591908df41fd2a.dir
+      size: 5854366
+      nfiles: 8143
+    outs:
+    - path: experiment_data/pipeline/pl_common_voice/google__wikineural_ner_alignment
+      md5: 1adc1718f35e14b2aae8de6c6a49de1b.dir
+      size: 37254218
+      nfiles: 8143
+    - path: experiment_data/pipeline/pl_common_voice/google__wikineural_ner_metrics
+      md5: a8b91d2203368be020389a8a02ecbb7b.dir
+      size: 117584
+      nfiles: 8143
+  pipeline_wikineiural_ner@12:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/ajn__result
+      md5: 653d65e186a7d05958ce3cbef219038c.dir
+      size: 6159899
+      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    outs:
+    - path: experiment_data/pipeline/pl_luna/ajn__wikineural_ner_alignment
+      md5: 52c8c06ce74db727c4aa5f55a2a865cd.dir
+      size: 37478125
+      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/ajn__wikineural_ner_metrics
+      md5: 602eaef0046a2a8b551bbbced7123886.dir
+      size: 8816
+      nfiles: 494
+  pipeline_wikineiural_ner@5:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_voicelab_cbiz
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
+      md5: 7de1137f44fad26766da0fc309720160.dir
+      size: 22765926
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
+      nfiles: 800
+    outs:
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__wikineural_ner_alignment
+      md5: 3363be14202fdb2577c8ddfce6549751.dir
+      size: 132627376
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__wikineural_ner_metrics
+      md5: b83cd24583e6fa648f4e106239e1ffcd.dir
+      size: 14486
+      nfiles: 800
+  pipeline_flair_upos@3:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/ajn__result
+      md5: 77d873041fe2952e3c45ee4ac6458061.dir
+      size: 6667841
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
+    outs:
+    - path: experiment_data/pipeline/pl_common_voice/ajn__flair_upos_alignment
+      md5: d8fd6968dfa66671d8fb8cc609782287.dir
+      size: 17420454
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/ajn__flair_upos_metrics
+      md5: 619e9209626ba47702db03fa56670078.dir
+      size: 93408
+      nfiles: 8155
+  pipeline_wikineiural_ner@7:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/google__result
+      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
+      size: 1377134
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/google__wikineural_ner_alignment
+      md5: 39e078edbc3f65934358787ddbe40eec.dir
+      size: 7572934
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/google__wikineural_ner_metrics
+      md5: f8b3c4183e31fc9e612b189579644f74.dir
+      size: 10796
+      nfiles: 758
+  pipeline_spacy_ner_wer@8:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_spacy_pos_wer.py
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
+      size: 1469
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
+      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
+      size: 1413262
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_ner_alignment
+      md5: 34ddaf22d6ba136b8257344899902015.dir
+      size: 3675109
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_ner_metrics
+      md5: 9379479c875f0fe49d6da70c9b189d67.dir
+      size: 10663
+      nfiles: 758
+  pipeline_flair_upos@7:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/google__result
+      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
+      size: 1377134
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/google__flair_upos_alignment
+      md5: 7a58a3caced592a9f00af9515eb92413.dir
+      size: 3696853
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/google__flair_upos_metrics
+      md5: 680491a1374081f637ade245e79ca627.dir
+      size: 9562
+      nfiles: 758
+  pipeline_flair_upos@4:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_voicelab_cbiz
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result
+      md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir
+      size: 27432599
+      nfiles: 799
+    outs:
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__flair_upos_alignment
+      md5: 3446f07e21780b9319c2c22d88767dfc.dir
+      size: 81897055
+      nfiles: 799
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__flair_upos_metrics
+      md5: 06fc109e28016f32d68edf992b4c3072.dir
+      size: 14350
+      nfiles: 799
+  pipeline_spacy_pos_wer@4:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_voicelab_cbiz
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_spacy_pos_wer.py
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
+      size: 1469
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result
+      md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir
+      size: 27432599
+      nfiles: 799
+    outs:
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_pos_alignment
+      md5: 22ccae20301046da3e40e93daa680d53.dir
+      size: 83052124
+      nfiles: 799
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_pos_metrics
+      md5: 43d79d47ba1e91e86daf1f66aa18c941.dir
+      size: 14239
+      nfiles: 799
+  pipeline_flair_upos@1:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result
+      md5: b0d0042d77d7adce37890ca63ad40091.dir
+      size: 19014997
+      nfiles: 8154
+    outs:
+    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__flair_upos_alignment
+      md5: c2de0794eab6b838cb889cd218c65651.dir
+      size: 17543992
+      nfiles: 8154
+    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__flair_upos_metrics
+      md5: 5ec6a253f5152fb5215f5d4a1243ca4e.dir
+      size: 95798
+      nfiles: 8154
+  pipeline_flair_upos@5:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_voicelab_cbiz
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
+      md5: 7de1137f44fad26766da0fc309720160.dir
+      size: 22765926
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
+      nfiles: 800
+    outs:
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__flair_upos_alignment
+      md5: d9c2a02375a3f9974ddf0ec4d64297b7.dir
+      size: 78332367
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__flair_upos_metrics
+      md5: 42f292cac09f5efc71215ac0f9bbf760.dir
+      size: 13841
+      nfiles: 800
+  pipeline_wikineiural_ner@6:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_voicelab_cbiz
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result
+      md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir
+      size: 39158267
+      nfiles: 800
+    outs:
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__wikineural_ner_alignment
+      md5: fc4d6251b6c8d8253b99aa87d4c7a189.dir
+      size: 144096029
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__wikineural_ner_metrics
+      md5: dca61a52df606b9c3b510007a48c53ee.dir
+      size: 14810
+      nfiles: 800
+  pipeline_flair_upos@6:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_voicelab_cbiz
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result
+      md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir
+      size: 39158267
+      nfiles: 800
+    outs:
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__flair_upos_alignment
+      md5: b9435b169d923e56d45be4e3a489358d.dir
+      size: 81635416
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__flair_upos_metrics
+      md5: 00f24bd9e609cc89f29cb6c3cc8dba48.dir
+      size: 14348
+      nfiles: 800
+  pipeline_flair_upos@8:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
+      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
+      size: 1413262
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__flair_upos_alignment
+      md5: be13715f9aed5232f08da3be9095a797.dir
+      size: 3675596
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__flair_upos_metrics
+      md5: 27bb131921ad52d2235aeb2b7befe4d1.dir
+      size: 10590
+      nfiles: 758
+  pipeline_spacy_ner_wer@7:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_spacy_pos_wer.py
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
+      size: 1469
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/google__result
+      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
+      size: 1377134
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_ner_alignment
+      md5: 67ea10ecbb9f5c3bfa14ffb85c843ba3.dir
+      size: 3638477
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_ner_metrics
+      md5: 0532a9596f26fd52037b6ecaa838ab9e.dir
+      size: 8500
+      nfiles: 758
+  pipeline_spacy_ner_wer@14:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_spacy_pos_wer.py
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
+      size: 1469
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
+      md5: 9c63b061ac7763144bca121e163ee7aa.dir
+      size: 20658485
+      nfiles: 456
+    outs:
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_ner_alignment
+      md5: 4530ad915e82cc9668e775d5de219b13.dir
+      size: 17577804
+      nfiles: 456
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_ner_metrics
+      md5: 0ad2dc0beeebd336771228f8751fe028.dir
+      size: 8014
+      nfiles: 456
+  pipeline_flair_upos@10:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
+      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
+      size: 5137721
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__flair_upos_alignment
+      md5: 07ec971011a67b5e3ad7822fc717612c.dir
+      size: 3720804
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__flair_upos_metrics
+      md5: 642857d546ab39ca2d680fda663eab38.dir
+      size: 7889
+      nfiles: 758
+  pipeline_flair_upos@2:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/techmo__result
+      md5: 9030cf3640f2749d9c1b4439687bdc2f.dir
+      size: 7761880
+      nfiles: 8136
+    outs:
+    - path: experiment_data/pipeline/pl_common_voice/techmo__flair_upos_alignment
+      md5: b2f57705be67ced0652ce670e1dde2cd.dir
+      size: 17397042
+      nfiles: 8136
+    - path: experiment_data/pipeline/pl_common_voice/techmo__flair_upos_metrics
+      md5: 09659b49a5c99187bf64cf922138a7c1.dir
+      size: 95391
+      nfiles: 8136
+  pipeline_spacy_ner_wer@17:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_spacy_pos_wer.py
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
+      size: 1469
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/techmo__result
+      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
+      size: 1336305
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_alignment
+      md5: b9b4f2685f681ad7334ac60236a6ebe7.dir
+      size: 2573182
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_metrics
+      md5: d306e0d18df9ee86f853719244bcfd35.dir
+      size: 5484
+      nfiles: 562
+  pipeline_wikineiural_ner@16:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/ajn__result
+      md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir
+      size: 974727
+      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/ajn__wikineural_ner_alignment
+      md5: 6be03b571a6462a5e3e4c96678f32fce.dir
+      size: 5689217
+      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/ajn__wikineural_ner_metrics
+      md5: 6e1b50f31e4bd4f264e6a6a91da6cb05.dir
+      size: 8593
+      nfiles: 559
+  pipeline_flair_upos@17:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/techmo__result
+      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
+      size: 1336305
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/techmo__flair_upos_alignment
+      md5: fcfda34330dcd231409db4eff282a85a.dir
+      size: 2632853
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/techmo__flair_upos_metrics
+      md5: eda987cf12cfa026c8906eaa434e6090.dir
+      size: 6285
+      nfiles: 562
+  pipeline_wikineiural_ner@17:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/techmo__result
+      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
+      size: 1336305
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/techmo__wikineural_ner_alignment
+      md5: cc2e98939b0f8ab82a9eb30a546cea2c.dir
+      size: 5641778
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/techmo__wikineural_ner_metrics
+      md5: 1025b67d8420594920a3cef4ed5c1a6a.dir
+      size: 8339
+      nfiles: 562
+  pipeline_flair_upos@15:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__result
+      md5: 3f7a79298a5156fd2b023e673326e72f.dir
+      size: 985004
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/google__flair_upos_alignment
+      md5: 71c60e45f4045e34c221d9c22701aab7.dir
+      size: 2641461
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__flair_upos_metrics
+      md5: 9f97126702b6268ce448649d4d7a3666.dir
+      size: 5000
+      nfiles: 562
+  pipeline_wikineiural_ner@15:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__result
+      md5: 3f7a79298a5156fd2b023e673326e72f.dir
+      size: 985004
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/google__wikineural_ner_alignment
+      md5: e632c40f440b7b59e2d94771e29f2dea.dir
+      size: 5692456
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__wikineural_ner_metrics
+      md5: 0385f2138fd318df8852a7e38c1770ac.dir
+      size: 5322
+      nfiles: 562
+  pipeline_flair_upos@16:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/ajn__result
+      md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir
+      size: 974727
+      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/ajn__flair_upos_alignment
+      md5: 53a0fee0db0e7b4818c328730d33a8bc.dir
+      size: 3084431
+      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/ajn__flair_upos_metrics
+      md5: a7791cb5b7b405e98b12834e1146d1bb.dir
+      size: 7147
+      nfiles: 559
+  pipeline_spacy_tag_wer@4:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_voicelab_cbiz
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result
+      md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir
+      size: 27432599
+      nfiles: 799
+    outs:
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_dep_tag_alignment
+      md5: e2028c14acd625109a465c36ef166e7a.dir
+      size: 83052124
+      nfiles: 799
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_dep_tag_metrics
+      md5: 43d79d47ba1e91e86daf1f66aa18c941.dir
+      size: 14239
+      nfiles: 799
+  pipeline_spacy_tag_wer@17:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/techmo__result
+      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
+      size: 1336305
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_dep_tag_alignment
+      md5: 9ea9473e990f9b1adafd9a6fef5a05ec.dir
+      size: 2640387
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_dep_tag_metrics
+      md5: 7d555363b27e9c0d0ab7d0e3011c6d13.dir
+      size: 6095
+      nfiles: 562
+  pipeline_spacy_tag_wer@10:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
+      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
+      size: 5137721
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_dep_tag_alignment
+      md5: 4ca975e9b42db749a368760f5190805b.dir
+      size: 3737151
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_dep_tag_metrics
+      md5: 782cc84e9116281dfc28734b2ae4a5ea.dir
+      size: 9004
+      nfiles: 758
+  pipeline_spacy_tag_wer@0:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/google__result
+      md5: afb53476cc93ef4de3591908df41fd2a.dir
+      size: 5854366
+      nfiles: 8143
+    outs:
+    - path: experiment_data/pipeline/pl_common_voice/google__spacy_dep_tag_alignment
+      md5: 38e2f031c443eea54bf86af578d2b79d.dir
+      size: 18316770
+      nfiles: 8143
+    - path: experiment_data/pipeline/pl_common_voice/google__spacy_dep_tag_metrics
+      md5: 117611317774e81fb482ba9c71ec806b.dir
+      size: 97235
+      nfiles: 8143
+  pipeline_spacy_tag_wer@18:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
+      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
+      size: 3523907
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_dep_tag_alignment
+      md5: 0b266094262fe4ee6684527729caed32.dir
+      size: 2724276
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_dep_tag_metrics
+      md5: 818882d685785a9d7d9b8d757c4c8e90.dir
+      size: 6840
+      nfiles: 562
+  pipeline_spacy_tag_wer@15:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__result
+      md5: 3f7a79298a5156fd2b023e673326e72f.dir
+      size: 985004
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/google__spacy_dep_tag_alignment
+      md5: 715afeb1c31961d4680f8b98ba61d4ad.dir
+      size: 2659852
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__spacy_dep_tag_metrics
+      md5: aeda105b01366dee65935d3c07fe3444.dir
+      size: 4970
+      nfiles: 562
+  pipeline_spacy_tag_wer@6:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_voicelab_cbiz
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result
+      md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir
+      size: 39158267
+      nfiles: 800
+    outs:
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_dep_tag_alignment
+      md5: 19c1d8b3e8704af06e943ba6962cf9ad.dir
+      size: 81650836
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_dep_tag_metrics
+      md5: 11320499f29d2d7bfce68d35fb352b83.dir
+      size: 14334
+      nfiles: 800
+  pipeline_spacy_tag_wer@14:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
+      md5: 9c63b061ac7763144bca121e163ee7aa.dir
+      size: 20658485
+      nfiles: 456
+    outs:
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_dep_tag_alignment
+      md5: 6716464936f4f35ba81a43eb2c2f37b0.dir
+      size: 17967467
+      nfiles: 456
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_dep_tag_metrics
+      md5: 7848ddff997fd231f3857ff30dfd7154.dir
+      size: 7940
+      nfiles: 456
+  pipeline_spacy_tag_wer@16:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/ajn__result
+      md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir
+      size: 974727
+      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_dep_tag_alignment
+      md5: 372c26bb5e808d7856fed1c06b25de5e.dir
+      size: 3178442
+      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_dep_tag_metrics
+      md5: 014fbe751a4f357f78f073920890e06d.dir
+      size: 7059
+      nfiles: 559
+  pipeline_spacy_tag_wer@5:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_voicelab_cbiz
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_voicelab_cbiz
+      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
+      size: 4803739404
+      nfiles: 1600
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
+      md5: 7de1137f44fad26766da0fc309720160.dir
+      size: 22765926
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
+      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
+      size: 21846798
+      nfiles: 800
+    outs:
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_dep_tag_alignment
+      md5: 3d4a9a912756443a1de46cf91f6e5805.dir
+      size: 78539613
+      nfiles: 800
+    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_dep_tag_metrics
+      md5: 7ec3bb7c838e7f06b8a1dbe7a68faac2.dir
+      size: 13753
+      nfiles: 800
+  pipeline_spacy_tag_wer@2:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/techmo__result
+      md5: 9030cf3640f2749d9c1b4439687bdc2f.dir
+      size: 7761880
+      nfiles: 8136
+    outs:
+    - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_dep_tag_alignment
+      md5: 3e1f2b39cd9d82679013649a1ad8d983.dir
+      size: 18192387
+      nfiles: 8136
+    - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_dep_tag_metrics
+      md5: 61c69fcd287051f4ab7d1ffcc68a9aca.dir
+      size: 96845
+      nfiles: 8136
+  pipeline_wikineiural_ner@18:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_wikineural_ner.py
+      md5: c2c4e92a33346a3c097a321f1f5f2af3
+      size: 1802
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
+      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
+      size: 3523907
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__wikineural_ner_alignment
+      md5: 5bc3f08dd32f7769c6c6c0b25df52bf8.dir
+      size: 5895431
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__wikineural_ner_metrics
+      md5: 6535a4779ab3be38804639e16cbbe70f.dir
+      size: 8602
+      nfiles: 562
+  pipeline_spacy_tag_wer@8:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
+      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
+      size: 1413262
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_dep_tag_alignment
+      md5: 8ec2e9fc88d4b8ce5032bf809c1c025f.dir
+      size: 3799802
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_dep_tag_metrics
+      md5: 0d95a4abb4a33aef3e242a304fd58698.dir
+      size: 10569
+      nfiles: 758
+  pipeline_word_wer@18:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_word_wer.py
+      md5: 98c7d6b43efbe0c2e84b5ad44d01fec9
+      size: 2125
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
+      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
+      size: 3523907
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_classic_alignment
+      md5: c1d947bbd7bb1dff8d39e03ad4a1d11e.dir
+      size: 2811807
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_classic_metrics
+      md5: a3477ae81fcba4a8e21b044f279425bb.dir
+      size: 17073
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_embeddings_alignment
+      md5: cbdcbfe7157f28d70ca55b787483e26b.dir
+      size: 5870633
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_embeddings_metrics
+      md5: c3395e582b4562a33e97419134363d30.dir
+      size: 37892
+      nfiles: 562
+  pipeline_spacy_tag_wer@12:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/ajn__result
+      md5: 653d65e186a7d05958ce3cbef219038c.dir
+      size: 6159899
+      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    outs:
+    - path: experiment_data/pipeline/pl_luna/ajn__spacy_dep_tag_alignment
+      md5: 034d072825c711a824f1280f4a390f74.dir
+      size: 21936929
+      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/ajn__spacy_dep_tag_metrics
+      md5: 6c6bb673ea5f64c9d851878c9d8a7c09.dir
+      size: 8444
+      nfiles: 494
+  pipeline_spacy_tag_wer@3:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice
+      --asr=ajn
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/ajn__result
+      md5: 77d873041fe2952e3c45ee4ac6458061.dir
+      size: 6667841
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
+    outs:
+    - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_dep_tag_alignment
+      md5: 10af363d90689138f55e3295f562efc4.dir
+      size: 19159060
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_dep_tag_metrics
+      md5: 6094fb960e2eab979ecb33d40a253531.dir
+      size: 95146
+      nfiles: 8155
+  pipeline_spacy_tag_wer@11:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/google__result
+      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
+      size: 5346497
+      nfiles: 500
+    outs:
+    - path: experiment_data/pipeline/pl_luna/google__spacy_dep_tag_alignment
+      md5: 4663cdc1bb88d7d6de3691c734fe0ab6.dir
+      size: 19342263
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/google__spacy_dep_tag_metrics
+      md5: a65dd7d74319da06f4ebaca08cde30ce.dir
+      size: 8659
+      nfiles: 500
+  pipeline_spacy_tag_wer@9:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
+      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
+      size: 1880403
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_dep_tag_alignment
+      md5: 18da1eb1ac1485f74337ea502e395b57.dir
+      size: 3707699
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_dep_tag_metrics
+      md5: 9cae08bbb8a6331d06a33dbbb4a16301.dir
+      size: 9662
+      nfiles: 758
+  pipeline_spacy_tag_wer@1:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_common_voice
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_common_voice/gold_transcript
+      md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir
+      size: 4093545
+      nfiles: 8155
+    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result
+      md5: b0d0042d77d7adce37890ca63ad40091.dir
+      size: 19014997
+      nfiles: 8154
+    outs:
+    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_dep_tag_alignment
+      md5: c46f35654ac42c3ddfd14e0197f36ae5.dir
+      size: 18349138
+      nfiles: 8154
+    - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_dep_tag_metrics
+      md5: 71381fa7fd6c0cdba00f25c17fd6be5e.dir
+      size: 97426
+      nfiles: 8154
+  pipeline_flair_upos@18:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_flair_upos.py
+      md5: 2ce2de99df9c06c5d9b0833ca7cdffda
+      size: 1916
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
+      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
+      size: 3523907
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__flair_upos_alignment
+      md5: e543ccad47d9829fdb6dbf3238ed99cc.dir
+      size: 2702561
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__flair_upos_metrics
+      md5: 103738afc0ef4f24adf3b855181b1acb.dir
+      size: 6757
+      nfiles: 562
+  pipeline_spacy_pos_wer@18:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_spacy_pos_wer.py
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
+      size: 1469
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
+      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
+      size: 3523907
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_alignment
+      md5: 0d04963621be1dd6a1c81225734de652.dir
+      size: 2724276
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_metrics
+      md5: 818882d685785a9d7d9b8d757c4c8e90.dir
+      size: 6840
+      nfiles: 562
+  pipeline_spacy_tag_wer@13:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
+      --asr=techmo
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__result
+      md5: 0e596570e1502b38588427bc72dcc006.dir
+      size: 9697519
+      nfiles: 500
+    outs:
+    - path: experiment_data/pipeline/pl_luna/techmo__spacy_dep_tag_alignment
+      md5: baefcd5dfadd9c62d6fc71ba0ac31fa9.dir
+      size: 20897599
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__spacy_dep_tag_metrics
+      md5: 1478707020a96496b50eb732207c290e.dir
+      size: 8841
+      nfiles: 500
+  pipeline_spacy_tag_wer@7:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
+      --asr=google
+    deps:
+    - path: experiment/pipeline_process_spacy_dep_tag_wer.py
+      md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
+      size: 1489
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/google__result
+      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
+      size: 1377134
+      nfiles: 758
+    outs:
+    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_dep_tag_alignment
+      md5: c15aa30b165152fac6813cd092763242.dir
+      size: 3712618
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_dep_tag_metrics
+      md5: 56dddb48cea2022b91fd4323efd43a8b.dir
+      size: 9213
+      nfiles: 758
+  pipeline_spacy_ner_wer@18:
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14
+      --asr=wav2vec2
+    deps:
+    - path: experiment/pipeline_process_spacy_pos_wer.py
+      md5: 3817c96d1d91d2cf2d8ec7fe570f1472
+      size: 1469
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
+      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
+      size: 3523907
+      nfiles: 562
+    outs:
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_ner_alignment
+      md5: 5c0650273cc3a942beb9ec39c01866f1.dir
+      size: 2653625
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_ner_metrics
+      md5: 9aba9dba4721ced4ab9ae20a4f6b44cb.dir
+      size: 6092
+      nfiles: 562
diff --git a/dvc.yaml b/dvc.yaml
index 9b24294..09be1b7 100644
--- a/dvc.yaml
+++ b/dvc.yaml
@@ -99,6 +99,8 @@ stages:
                 asr: wav2vec2
             -   dataset: pl_common_voice
                 asr: techmo
+            -   dataset: pl_common_voice
+                asr: ajn
 
             -   dataset: pl_voicelab_cbiz
                 asr: google
@@ -154,13 +156,15 @@ stages:
                 asr: wav2vec2
             -   dataset: pl_common_voice
                 asr: techmo
+            -   dataset: pl_common_voice
+                asr: ajn
 
-#            -   dataset: pl_voicelab_cbiz
-#                asr: google
-#            -   dataset: pl_voicelab_cbiz
-#                asr: ajn
-#            -   dataset: pl_voicelab_cbiz
-#                asr: techmo
+            -   dataset: pl_voicelab_cbiz
+                asr: google
+            -   dataset: pl_voicelab_cbiz
+                asr: ajn
+            -   dataset: pl_voicelab_cbiz
+                asr: techmo
 
             -   dataset: pl_google_fleurs
                 asr: google
@@ -207,13 +211,15 @@ stages:
                 asr: wav2vec2
             -   dataset: pl_common_voice
                 asr: techmo
+            -   dataset: pl_common_voice
+                asr: ajn
 
-#            -   dataset: pl_voicelab_cbiz
-#                asr: google
-#            -   dataset: pl_voicelab_cbiz
-#                asr: ajn
-#            -   dataset: pl_voicelab_cbiz
-#                asr: techmo
+            -   dataset: pl_voicelab_cbiz
+                asr: google
+            -   dataset: pl_voicelab_cbiz
+                asr: ajn
+            -   dataset: pl_voicelab_cbiz
+                asr: techmo
 
             -   dataset: pl_google_fleurs
                 asr: google
@@ -260,13 +266,15 @@ stages:
                 asr: wav2vec2
             -   dataset: pl_common_voice
                 asr: techmo
+            -   dataset: pl_common_voice
+                asr: ajn
 
-#            -   dataset: pl_voicelab_cbiz
-#                asr: google
-#            -   dataset: pl_voicelab_cbiz
-#                asr: ajn
-#            -   dataset: pl_voicelab_cbiz
-#                asr: techmo
+            -   dataset: pl_voicelab_cbiz
+                asr: google
+            -   dataset: pl_voicelab_cbiz
+                asr: ajn
+            -   dataset: pl_voicelab_cbiz
+                asr: techmo
 
             -   dataset: pl_google_fleurs
                 asr: google
@@ -313,13 +321,15 @@ stages:
                 asr: wav2vec2
             -   dataset: pl_common_voice
                 asr: techmo
+            -   dataset: pl_common_voice
+                asr: ajn
 
-#            -   dataset: pl_voicelab_cbiz
-#                asr: google
-#            -   dataset: pl_voicelab_cbiz
-#                asr: ajn
-#            -   dataset: pl_voicelab_cbiz
-#                asr: techmo
+            -   dataset: pl_voicelab_cbiz
+                asr: google
+            -   dataset: pl_voicelab_cbiz
+                asr: ajn
+            -   dataset: pl_voicelab_cbiz
+                asr: techmo
 
             -   dataset: pl_google_fleurs
                 asr: google
@@ -357,3 +367,58 @@ stages:
             outs:
                 - experiment_data/pipeline/${item.dataset}/${item.asr}__flair_upos_alignment
                 - experiment_data/pipeline/${item.dataset}/${item.asr}__flair_upos_metrics
+
+    pipeline_spacy_tag_wer:
+        foreach:
+            -   dataset: pl_common_voice
+                asr: google
+            -   dataset: pl_common_voice
+                asr: wav2vec2
+            -   dataset: pl_common_voice
+                asr: techmo
+            -   dataset: pl_common_voice
+                asr: ajn
+
+            -   dataset: pl_voicelab_cbiz
+                asr: google
+            -   dataset: pl_voicelab_cbiz
+                asr: ajn
+            -   dataset: pl_voicelab_cbiz
+                asr: techmo
+
+            -   dataset: pl_google_fleurs
+                asr: google
+            -   dataset: pl_google_fleurs
+                asr: ajn
+            -   dataset: pl_google_fleurs
+                asr: techmo
+            -   dataset: pl_google_fleurs
+                asr: wav2vec2
+
+            -   dataset: pl_luna
+                asr: google
+            -   dataset: pl_luna
+                asr: ajn
+            -   dataset: pl_luna
+                asr: techmo
+            -   dataset: pl_luna
+                asr: wav2vec2
+
+            -   dataset: pl_minds14
+                asr: google
+            -   dataset: pl_minds14
+                asr: ajn
+            -   dataset: pl_minds14
+                asr: techmo
+            -   dataset: pl_minds14
+                asr: wav2vec2
+        do:
+            cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=${item.dataset} --asr=${item.asr}
+            deps:
+                - experiment/pipeline_process_spacy_dep_tag_wer.py
+                - experiment_data/dataset/${item.dataset}
+                - experiment_data/pipeline/${item.dataset}/gold_transcript
+                - experiment_data/pipeline/${item.dataset}/${item.asr}__result
+            outs:
+                - experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_dep_tag_alignment
+                - experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_dep_tag_metrics
diff --git a/experiment/pipeline_process_spacy_dep_tag_wer.py b/experiment/pipeline_process_spacy_dep_tag_wer.py
new file mode 100644
index 0000000..49cba63
--- /dev/null
+++ b/experiment/pipeline_process_spacy_dep_tag_wer.py
@@ -0,0 +1,35 @@
+import argparse
+
+from experiment.const_pipeline_names import GOLD_TRANSCRIPT
+from experiment.experiment_dependency_provider import get_record_provider, get_repository
+from experiment.sentence_wer_processor.spacy_pos_sentence_dep_tag_processor import SpacyDepTagSentenceWerProcessor
+from sziszapangma.integration.experiment_manager import ExperimentManager
+
+
+def run_spacy_pos_wer_pipeline(dataset_name: str, asr_name: str):
+    record_provider = get_record_provider(dataset_name)
+    experiment_processor = ExperimentManager(
+        record_id_iterator=record_provider,
+        processing_tasks=[
+            SpacyDepTagSentenceWerProcessor(
+                model_name='pl_core_news_lg',
+                gold_transcript_property_name=GOLD_TRANSCRIPT,
+                asr_property_name=f'{asr_name}__result',
+                alignment_property_name=f'{asr_name}__spacy_dep_tag_alignment',
+                wer_property_name=f'{asr_name}__spacy_dep_tag_metrics',
+                task_name=f'SpacyDepTagSentenceWerProcessor___{dataset_name}___{asr_name}',
+                require_update=False
+            )
+        ],
+        experiment_repository=get_repository(dataset_name),
+        relation_manager_provider=record_provider
+    )
+    experiment_processor.process()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dataset")
+    parser.add_argument("--asr")
+    args = parser.parse_args()
+    run_spacy_pos_wer_pipeline(args.dataset, args.asr)
diff --git a/experiment/sentence_wer_processor/spacy_pos_sentence_dep_tag_processor.py b/experiment/sentence_wer_processor/spacy_pos_sentence_dep_tag_processor.py
new file mode 100644
index 0000000..ca467e0
--- /dev/null
+++ b/experiment/sentence_wer_processor/spacy_pos_sentence_dep_tag_processor.py
@@ -0,0 +1,28 @@
+from abc import ABC
+from typing import List, Any
+
+import spacy
+
+from experiment.sentence_wer_processor.sentence_wer_processor import SentenceWerProcessor
+
+
+class SpacyDepTagSentenceWerProcessor(SentenceWerProcessor):
+    _nlp: Any
+
+    def __init__(
+        self,
+        model_name: str,
+        gold_transcript_property_name: str,
+        asr_property_name: str,
+        alignment_property_name: str,
+        wer_property_name: str,
+        task_name: str,
+        require_update: bool
+    ):
+        super().__init__(gold_transcript_property_name, asr_property_name, alignment_property_name, wer_property_name,
+                         task_name, require_update)
+        self._nlp = spacy.load(model_name)
+
+    def process_text(self, text: str) -> List[str]:
+        document = self._nlp(text)
+        return [token.pos_ for token in document]
diff --git a/experiment_data/pipeline/pl_common_voice/.gitignore b/experiment_data/pipeline/pl_common_voice/.gitignore
index 182d144..2fc1cf4 100644
--- a/experiment_data/pipeline/pl_common_voice/.gitignore
+++ b/experiment_data/pipeline/pl_common_voice/.gitignore
@@ -16,3 +16,27 @@
 /wav2vec2__word_wer_embeddings_alignment
 /wav2vec2__spacy_ner_alignment
 /wav2vec2__spacy_ner_metrics
+/techmo__wikineural_ner_alignment
+/techmo__wikineural_ner_metrics
+/google__wikineural_ner_alignment
+/google__wikineural_ner_metrics
+/techmo__spacy_pos_alignment
+/techmo__spacy_pos_metrics
+/wav2vec2__flair_upos_alignment
+/wav2vec2__flair_upos_metrics
+/techmo__flair_upos_alignment
+/techmo__flair_upos_metrics
+/ajn__wikineural_ner_alignment
+/ajn__wikineural_ner_metrics
+/ajn__spacy_dep_tag_alignment
+/ajn__spacy_dep_tag_metrics
+/ajn__word_wer_classic_metrics
+/ajn__word_wer_classic_alignment
+/ajn__word_wer_embeddings_metrics
+/ajn__word_wer_embeddings_alignment
+/ajn__flair_upos_alignment
+/ajn__flair_upos_metrics
+/wav2vec2__spacy_dep_tag_alignment
+/wav2vec2__spacy_dep_tag_metrics
+/ajn__spacy_pos_alignment
+/ajn__spacy_pos_metrics
diff --git a/experiment_data/pipeline/pl_google_fleurs/.gitignore b/experiment_data/pipeline/pl_google_fleurs/.gitignore
index 324088b..5edb9f9 100644
--- a/experiment_data/pipeline/pl_google_fleurs/.gitignore
+++ b/experiment_data/pipeline/pl_google_fleurs/.gitignore
@@ -17,3 +17,43 @@
 /wav2vec2__word_wer_embeddings_alignment
 /wav2vec2__spacy_ner_alignment
 /wav2vec2__spacy_ner_metrics
+/ajn__wikineural_ner_alignment
+/ajn__wikineural_ner_metrics
+/techmo__wikineural_ner_alignment
+/techmo__wikineural_ner_metrics
+/google__flair_upos_alignment
+/google__flair_upos_metrics
+/wav2vec2__spacy_pos_alignment
+/wav2vec2__spacy_pos_metrics
+/ajn__flair_upos_alignment
+/ajn__flair_upos_metrics
+/ajn__spacy_pos_alignment
+/ajn__spacy_pos_metrics
+/techmo__flair_upos_alignment
+/techmo__flair_upos_metrics
+/ajn__spacy_ner_alignment
+/ajn__spacy_ner_metrics
+/wav2vec2__wikineural_ner_alignment
+/wav2vec2__wikineural_ner_metrics
+/wav2vec2__flair_upos_alignment
+/wav2vec2__flair_upos_metrics
+/google__spacy_ner_alignment
+/google__spacy_ner_metrics
+/techmo__spacy_ner_alignment
+/techmo__spacy_ner_metrics
+/google__spacy_pos_alignment
+/google__spacy_pos_metrics
+/techmo__spacy_pos_alignment
+/techmo__spacy_pos_metrics
+/google__wikineural_ner_alignment
+/google__wikineural_ner_metrics
+/ajn__spacy_dep_tag_alignment
+/ajn__spacy_dep_tag_metrics
+/techmo__spacy_dep_tag_alignment
+/techmo__spacy_dep_tag_metrics
+/google__word_wer_classic_metrics
+/google__word_wer_classic_alignment
+/google__word_wer_embeddings_metrics
+/google__word_wer_embeddings_alignment
+/google__spacy_dep_tag_alignment
+/google__spacy_dep_tag_metrics
diff --git a/experiment_data/pipeline/pl_luna/.gitignore b/experiment_data/pipeline/pl_luna/.gitignore
index 715f0fb..e67f426 100644
--- a/experiment_data/pipeline/pl_luna/.gitignore
+++ b/experiment_data/pipeline/pl_luna/.gitignore
@@ -19,3 +19,41 @@
 /ajn__word_wer_classic_alignment
 /ajn__word_wer_embeddings_metrics
 /ajn__word_wer_embeddings_alignment
+/wav2vec2__wikineural_ner_alignment
+/wav2vec2__wikineural_ner_metrics
+/techmo__flair_upos_alignment
+/techmo__flair_upos_metrics
+/techmo__wikineural_ner_alignment
+/techmo__wikineural_ner_metrics
+/techmo__spacy_ner_alignment
+/techmo__spacy_ner_metrics
+/google__wikineural_ner_alignment
+/google__wikineural_ner_metrics
+/google__flair_upos_alignment
+/google__flair_upos_metrics
+/ajn__flair_upos_alignment
+/ajn__flair_upos_metrics
+/google__spacy_ner_alignment
+/google__spacy_ner_metrics
+/wav2vec2__flair_upos_alignment
+/wav2vec2__flair_upos_metrics
+/google__spacy_pos_alignment
+/google__spacy_pos_metrics
+/techmo__spacy_pos_alignment
+/techmo__spacy_pos_metrics
+/ajn__spacy_ner_alignment
+/ajn__spacy_ner_metrics
+/ajn__wikineural_ner_alignment
+/ajn__wikineural_ner_metrics
+/wav2vec2__spacy_pos_alignment
+/wav2vec2__spacy_pos_metrics
+/ajn__spacy_pos_alignment
+/ajn__spacy_pos_metrics
+/wav2vec2__spacy_ner_alignment
+/wav2vec2__spacy_ner_metrics
+/ajn__spacy_dep_tag_alignment
+/ajn__spacy_dep_tag_metrics
+/google__spacy_dep_tag_alignment
+/google__spacy_dep_tag_metrics
+/techmo__spacy_dep_tag_alignment
+/techmo__spacy_dep_tag_metrics
diff --git a/experiment_data/pipeline/pl_minds14/.gitignore b/experiment_data/pipeline/pl_minds14/.gitignore
index f6e42cc..05ce9ab 100644
--- a/experiment_data/pipeline/pl_minds14/.gitignore
+++ b/experiment_data/pipeline/pl_minds14/.gitignore
@@ -15,3 +15,37 @@
 /techmo__spacy_ner_metrics
 /ajn__spacy_ner_alignment
 /ajn__spacy_ner_metrics
+/techmo__flair_upos_alignment
+/techmo__flair_upos_metrics
+/google__flair_upos_alignment
+/google__flair_upos_metrics
+/google__spacy_pos_alignment
+/google__spacy_pos_metrics
+/wav2vec2__flair_upos_alignment
+/wav2vec2__flair_upos_metrics
+/ajn__wikineural_ner_alignment
+/ajn__wikineural_ner_metrics
+/wav2vec2__spacy_ner_alignment
+/wav2vec2__spacy_ner_metrics
+/google__spacy_ner_alignment
+/google__spacy_ner_metrics
+/techmo__wikineural_ner_alignment
+/techmo__wikineural_ner_metrics
+/google__wikineural_ner_alignment
+/google__wikineural_ner_metrics
+/wav2vec2__wikineural_ner_alignment
+/wav2vec2__wikineural_ner_metrics
+/ajn__flair_upos_alignment
+/ajn__flair_upos_metrics
+/ajn__spacy_pos_alignment
+/ajn__spacy_pos_metrics
+/wav2vec2__word_wer_classic_metrics
+/wav2vec2__word_wer_classic_alignment
+/wav2vec2__word_wer_embeddings_metrics
+/wav2vec2__word_wer_embeddings_alignment
+/wav2vec2__spacy_pos_alignment
+/wav2vec2__spacy_pos_metrics
+/ajn__word_wer_classic_metrics
+/ajn__word_wer_classic_alignment
+/ajn__word_wer_embeddings_metrics
+/ajn__word_wer_embeddings_alignment
diff --git a/experiment_data/pipeline/pl_voicelab_cbiz/.gitignore b/experiment_data/pipeline/pl_voicelab_cbiz/.gitignore
index a64acdd..10cac58 100644
--- a/experiment_data/pipeline/pl_voicelab_cbiz/.gitignore
+++ b/experiment_data/pipeline/pl_voicelab_cbiz/.gitignore
@@ -16,3 +16,25 @@
 /ajn__word_wer_embeddings_alignment
 /ajn__spacy_ner_alignment
 /ajn__spacy_ner_metrics
+/google__spacy_pos_alignment
+/google__spacy_pos_metrics
+/techmo__spacy_ner_alignment
+/techmo__spacy_ner_metrics
+/ajn__flair_upos_alignment
+/ajn__flair_upos_metrics
+/google__flair_upos_alignment
+/google__flair_upos_metrics
+/google__spacy_ner_alignment
+/google__spacy_ner_metrics
+/techmo__spacy_pos_alignment
+/techmo__spacy_pos_metrics
+/google__wikineural_ner_alignment
+/google__wikineural_ner_metrics
+/ajn__wikineural_ner_alignment
+/ajn__wikineural_ner_metrics
+/techmo__wikineural_ner_alignment
+/techmo__wikineural_ner_metrics
+/ajn__spacy_pos_alignment
+/ajn__spacy_pos_metrics
+/techmo__flair_upos_alignment
+/techmo__flair_upos_metrics
-- 
GitLab