diff --git a/dvc.lock b/dvc.lock
index 0155b5eeb15e4cbf40a56a8511721880ce15204b..b58cc07cf72ae7d5acff483a339f491f364eceac 100644
--- a/dvc.lock
+++ b/dvc.lock
@@ -483,7 +483,7 @@ stages:
       nfiles: 0
     outs:
     - path: experiment_data/dataset_relation_manager_data/pl_minds14
-      md5: 6608d45aee735eaf0a387c52d01c9fa8.dir
+      md5: 40bb7d02cc76d5b1093955b5046cc3c4.dir
       size: 3545062
       nfiles: 1124
   pipeline_prepare_relation_manager@0:
@@ -580,12 +580,12 @@ stages:
       size: 0
       nfiles: 0
     - path: experiment_data/dataset_relation_manager_data/pl_minds14
-      md5: 6608d45aee735eaf0a387c52d01c9fa8.dir
+      md5: 40bb7d02cc76d5b1093955b5046cc3c4.dir
       size: 3545062
       nfiles: 1124
     outs:
     - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
       size: 689374
       nfiles: 562
   pipeline_gold_transcript@4:
@@ -883,7 +883,7 @@ stages:
       nfiles: 0
     outs:
     - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir
+      md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir
       size: 974727
       nfiles: 559
   pipeline_asr_result@10:
@@ -1015,12 +1015,12 @@ stages:
       size: 22935
       nfiles: 758
     - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_embeddings_alignment
-      md5: 06e64fca6fc73e1cc9e7e86b21eb11f3.dir
-      size: 8318047
+      md5: 54a91c0e615bccc40da99b525cb566bb.dir
+      size: 7785758
       nfiles: 758
     - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_embeddings_metrics
-      md5: 68da1ded61389ae5d866bc762c61363c.dir
-      size: 33029
+      md5: a6c4e1185a8adc98f36b95f421f06c5b.dir
+      size: 52372
       nfiles: 758
   pipeline_word_wer@6:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz
@@ -1159,12 +1159,12 @@ stages:
       size: 16835
       nfiles: 494
     - path: experiment_data/pipeline/pl_luna/ajn__word_wer_embeddings_alignment
-      md5: 6be0a1c035f4a84a9035bfba1458cdac.dir
-      size: 43099546
+      md5: d3c1d515e47c5cb9c71f56e1ae65de29.dir
+      size: 42557665
       nfiles: 494
     - path: experiment_data/pipeline/pl_luna/ajn__word_wer_embeddings_metrics
-      md5: 4f368d2ba1c5a54d5e3ab69a7581549e.dir
-      size: 19326
+      md5: 023357c311e695217fa66463be6f5eb8.dir
+      size: 33868
       nfiles: 494
   pipeline_word_wer@10:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs
@@ -1267,12 +1267,12 @@ stages:
       size: 209927
       nfiles: 8155
     - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_embeddings_alignment
-      md5: efb06f9897e62077366362b2aab25d8c.dir
-      size: 36932578
+      md5: 51798a8e9b7239c8833274dbf6644d1e.dir
+      size: 32293142
       nfiles: 8155
     - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_embeddings_metrics
-      md5: a9132386ed7ccffcba68dfa0a1dca7ee.dir
-      size: 324358
+      md5: 63233cd4c3a18f791f228e0e385cdde1.dir
+      size: 561188
       nfiles: 8155
   pipeline_word_wer@16:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14
@@ -1286,16 +1286,16 @@ stages:
       size: 0
       nfiles: 0
     - path: experiment_data/pipeline/pl_minds14/ajn__result
-      md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir
+      md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir
       size: 974727
       nfiles: 559
     - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
       size: 689374
       nfiles: 562
     outs:
     - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_classic_alignment
-      md5: c03a478840305afb1eadf4752b3a5678.dir
+      md5: f5fd8a87dfcbf4e998b9a1d215186921.dir
       size: 2851613
       nfiles: 559
     - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_classic_metrics
@@ -1303,12 +1303,12 @@ stages:
       size: 15213
       nfiles: 559
     - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_embeddings_alignment
-      md5: ae577cd5886aced83d7de4ba47bb4457.dir
-      size: 5960904
+      md5: fb57500ec3f203fc88bbe20aa877c735.dir
+      size: 5671751
       nfiles: 559
     - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_embeddings_metrics
-      md5: c599599e5935075cd26ac89e0d3b5f1b.dir
-      size: 22438
+      md5: 55349a39a515ae9b11e49dfc98791a8d.dir
+      size: 38411
       nfiles: 559
   pipeline_word_wer@11:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna
@@ -3691,33 +3691,33 @@ stages:
       size: 7147
       nfiles: 559
   pipeline_spacy_tag_wer@4:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_voicelab_cbiz
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
       --asr=google
     deps:
     - path: experiment/pipeline_process_spacy_dep_tag_wer.py
       md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
       size: 1489
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result
-      md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir
-      size: 27432599
-      nfiles: 799
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/google__result
+      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
+      size: 1377134
+      nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_dep_tag_alignment
-      md5: e2028c14acd625109a465c36ef166e7a.dir
-      size: 83052124
-      nfiles: 799
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_dep_tag_metrics
-      md5: 43d79d47ba1e91e86daf1f66aa18c941.dir
-      size: 14239
-      nfiles: 799
+    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_dep_tag_alignment
+      md5: 95fcbc37e49ff7f2d5c0e610446f4936.dir
+      size: 3747833
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_dep_tag_metrics
+      md5: 6ee2469b6f6008337564fd05ad07725c.dir
+      size: 9422
+      nfiles: 758
   pipeline_spacy_tag_wer@17:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
       --asr=techmo
@@ -3747,33 +3747,33 @@ stages:
       size: 6095
       nfiles: 562
   pipeline_spacy_tag_wer@10:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
-      --asr=wav2vec2
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
+      --asr=techmo
     deps:
     - path: experiment/pipeline_process_spacy_dep_tag_wer.py
       md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
       size: 1489
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
-      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
-      size: 5137721
-      nfiles: 758
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__result
+      md5: 0e596570e1502b38588427bc72dcc006.dir
+      size: 9697519
+      nfiles: 500
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_dep_tag_alignment
-      md5: 4ca975e9b42db749a368760f5190805b.dir
-      size: 3737151
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_dep_tag_metrics
-      md5: 782cc84e9116281dfc28734b2ae4a5ea.dir
-      size: 9004
-      nfiles: 758
+    - path: experiment_data/pipeline/pl_luna/techmo__spacy_dep_tag_alignment
+      md5: fc7318a6f7511ad1436d71b994cb3aaf.dir
+      size: 21165688
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/techmo__spacy_dep_tag_metrics
+      md5: 238ef4a951d198de3573a67f0fbb2e75.dir
+      size: 8680
+      nfiles: 500
   pipeline_spacy_tag_wer@0:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice
       --asr=google
@@ -3795,12 +3795,12 @@ stages:
       nfiles: 8143
     outs:
     - path: experiment_data/pipeline/pl_common_voice/google__spacy_dep_tag_alignment
-      md5: 38e2f031c443eea54bf86af578d2b79d.dir
-      size: 18316770
+      md5: f043c22d203a7efd123232f1a2a6b4ad.dir
+      size: 18474982
       nfiles: 8143
     - path: experiment_data/pipeline/pl_common_voice/google__spacy_dep_tag_metrics
-      md5: 117611317774e81fb482ba9c71ec806b.dir
-      size: 97235
+      md5: f12ee96b46679884f65c59fef8ce74ea.dir
+      size: 96519
       nfiles: 8143
   pipeline_spacy_tag_wer@18:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
@@ -3832,7 +3832,7 @@ stages:
       nfiles: 562
   pipeline_spacy_tag_wer@15:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
-      --asr=google
+      --asr=wav2vec2
     deps:
     - path: experiment/pipeline_process_spacy_dep_tag_wer.py
       md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
@@ -3842,78 +3842,78 @@ stages:
       size: 0
       nfiles: 0
     - path: experiment_data/pipeline/pl_minds14/gold_transcript
-      md5: d2d48495000b3ea7ea6f4212ddb113a5.dir
+      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
       size: 689374
       nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__result
-      md5: 3f7a79298a5156fd2b023e673326e72f.dir
-      size: 985004
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__result
+      md5: 5658da01ecdce39ed99156bbc7f2dc62.dir
+      size: 3523907
       nfiles: 562
     outs:
-    - path: experiment_data/pipeline/pl_minds14/google__spacy_dep_tag_alignment
-      md5: 715afeb1c31961d4680f8b98ba61d4ad.dir
-      size: 2659852
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_dep_tag_alignment
+      md5: fd108bf3d67c339ebc12a0965a6e4c18.dir
+      size: 2752423
       nfiles: 562
-    - path: experiment_data/pipeline/pl_minds14/google__spacy_dep_tag_metrics
-      md5: aeda105b01366dee65935d3c07fe3444.dir
-      size: 4970
+    - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_dep_tag_metrics
+      md5: 88f7aef65d580d59cdc78610dd98e616.dir
+      size: 7100
       nfiles: 562
   pipeline_spacy_tag_wer@6:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_voicelab_cbiz
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
       --asr=techmo
     deps:
     - path: experiment/pipeline_process_spacy_dep_tag_wer.py
       md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
       size: 1489
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result
-      md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir
-      size: 39158267
-      nfiles: 800
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
+      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
+      size: 1880403
+      nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_dep_tag_alignment
-      md5: 19c1d8b3e8704af06e943ba6962cf9ad.dir
-      size: 81650836
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_dep_tag_metrics
-      md5: 11320499f29d2d7bfce68d35fb352b83.dir
-      size: 14334
-      nfiles: 800
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_dep_tag_alignment
+      md5: 997d8e36cd023245065af9c1c3db1d72.dir
+      size: 3743812
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_dep_tag_metrics
+      md5: 386a8988937349d2ab69a4a335d0d270.dir
+      size: 9672
+      nfiles: 758
   pipeline_spacy_tag_wer@14:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
-      --asr=wav2vec2
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
+      --asr=techmo
     deps:
     - path: experiment/pipeline_process_spacy_dep_tag_wer.py
       md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
       size: 1489
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
-      md5: 9c63b061ac7763144bca121e163ee7aa.dir
-      size: 20658485
-      nfiles: 456
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/techmo__result
+      md5: 4c43636b4773f2bf9a2153ef3393a558.dir
+      size: 1336305
+      nfiles: 562
     outs:
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_dep_tag_alignment
-      md5: 6716464936f4f35ba81a43eb2c2f37b0.dir
-      size: 17967467
-      nfiles: 456
-    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_dep_tag_metrics
-      md5: 7848ddff997fd231f3857ff30dfd7154.dir
-      size: 7940
-      nfiles: 456
+    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_dep_tag_alignment
+      md5: 66efb4e5647eda2e2ab3116445bdf9b5.dir
+      size: 2666883
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/techmo__spacy_dep_tag_metrics
+      md5: 15c9ef006daec951119079da2794dcad.dir
+      size: 6736
+      nfiles: 562
   pipeline_spacy_tag_wer@16:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
       --asr=ajn
@@ -3943,33 +3943,33 @@ stages:
       size: 7059
       nfiles: 559
   pipeline_spacy_tag_wer@5:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_voicelab_cbiz
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
       --asr=ajn
     deps:
     - path: experiment/pipeline_process_spacy_dep_tag_wer.py
       md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
       size: 1489
-    - path: experiment_data/dataset/pl_voicelab_cbiz
-      md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
-      size: 4803739404
-      nfiles: 1600
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result
-      md5: 7de1137f44fad26766da0fc309720160.dir
-      size: 22765926
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript
-      md5: ebffd3814a48564f4e33b9a4e0956af3.dir
-      size: 21846798
-      nfiles: 800
+    - path: experiment_data/dataset/pl_google_fleurs
+      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
+      size: 236272072
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
+      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
+      size: 1413262
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
+      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
+      size: 975209
+      nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_dep_tag_alignment
-      md5: 3d4a9a912756443a1de46cf91f6e5805.dir
-      size: 78539613
-      nfiles: 800
-    - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_dep_tag_metrics
-      md5: 7ec3bb7c838e7f06b8a1dbe7a68faac2.dir
-      size: 13753
-      nfiles: 800
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_dep_tag_alignment
+      md5: 21d9be660fc3037a8dd6dc1c93c0499a.dir
+      size: 3834884
+      nfiles: 758
+    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_dep_tag_metrics
+      md5: 27dbce8684441f84dd51327f1ed07e7d.dir
+      size: 10594
+      nfiles: 758
   pipeline_spacy_tag_wer@2:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice
       --asr=techmo
@@ -3991,12 +3991,12 @@ stages:
       nfiles: 8136
     outs:
     - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_dep_tag_alignment
-      md5: 3e1f2b39cd9d82679013649a1ad8d983.dir
-      size: 18192387
+      md5: 7f52c28043e0a7b311bd39e877998834.dir
+      size: 18352367
       nfiles: 8136
     - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_dep_tag_metrics
-      md5: 61c69fcd287051f4ab7d1ffcc68a9aca.dir
-      size: 96845
+      md5: 3d8555026f88adec199bcb380e30a0f0.dir
+      size: 96139
       nfiles: 8136
   pipeline_wikineiural_ner@18:
     cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14
@@ -4027,33 +4027,33 @@ stages:
       size: 8602
       nfiles: 562
   pipeline_spacy_tag_wer@8:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
-      --asr=ajn
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
+      --asr=google
     deps:
     - path: experiment/pipeline_process_spacy_dep_tag_wer.py
       md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
       size: 1489
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__result
-      md5: 545e63a6daf9c46387c1d7d40b85499f.dir
-      size: 1413262
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/google__result
+      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
+      size: 5346497
+      nfiles: 500
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_dep_tag_alignment
-      md5: 8ec2e9fc88d4b8ce5032bf809c1c025f.dir
-      size: 3799802
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_dep_tag_metrics
-      md5: 0d95a4abb4a33aef3e242a304fd58698.dir
-      size: 10569
-      nfiles: 758
+    - path: experiment_data/pipeline/pl_luna/google__spacy_dep_tag_alignment
+      md5: 5fc24c54101bce2e858b08f4c47e0667.dir
+      size: 19568605
+      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/google__spacy_dep_tag_metrics
+      md5: f64735e07b7c460895d1ccf8e4d0884c.dir
+      size: 8466
+      nfiles: 500
   pipeline_word_wer@18:
     cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14
       --asr=wav2vec2
@@ -4091,33 +4091,33 @@ stages:
       size: 37892
       nfiles: 562
   pipeline_spacy_tag_wer@12:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
-      --asr=ajn
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
+      --asr=google
     deps:
     - path: experiment/pipeline_process_spacy_dep_tag_wer.py
       md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
       size: 1489
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/ajn__result
-      md5: 653d65e186a7d05958ce3cbef219038c.dir
-      size: 6159899
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
+      size: 689374
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__result
+      md5: 3f7a79298a5156fd2b023e673326e72f.dir
+      size: 985004
+      nfiles: 562
     outs:
-    - path: experiment_data/pipeline/pl_luna/ajn__spacy_dep_tag_alignment
-      md5: 034d072825c711a824f1280f4a390f74.dir
-      size: 21936929
-      nfiles: 494
-    - path: experiment_data/pipeline/pl_luna/ajn__spacy_dep_tag_metrics
-      md5: 6c6bb673ea5f64c9d851878c9d8a7c09.dir
-      size: 8444
-      nfiles: 494
+    - path: experiment_data/pipeline/pl_minds14/google__spacy_dep_tag_alignment
+      md5: 76be498f539e45e5650695e29f22b436.dir
+      size: 2685018
+      nfiles: 562
+    - path: experiment_data/pipeline/pl_minds14/google__spacy_dep_tag_metrics
+      md5: 96aea021765fd076fc534e9f09b29037.dir
+      size: 5117
+      nfiles: 562
   pipeline_spacy_tag_wer@3:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice
       --asr=ajn
@@ -4139,16 +4139,16 @@ stages:
       nfiles: 8155
     outs:
     - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_dep_tag_alignment
-      md5: 10af363d90689138f55e3295f562efc4.dir
-      size: 19159060
+      md5: a735ec3c634bbe034cb67f7a54fb0d2f.dir
+      size: 19294281
       nfiles: 8155
     - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_dep_tag_metrics
-      md5: 6094fb960e2eab979ecb33d40a253531.dir
-      size: 95146
+      md5: 516d26ee39867a1166c51edb014ad897.dir
+      size: 94253
       nfiles: 8155
   pipeline_spacy_tag_wer@11:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
-      --asr=google
+      --asr=wav2vec2
     deps:
     - path: experiment/pipeline_process_spacy_dep_tag_wer.py
       md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
@@ -4161,47 +4161,47 @@ stages:
       md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
       size: 6706925
       nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__result
-      md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir
-      size: 5346497
-      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__result
+      md5: 9c63b061ac7763144bca121e163ee7aa.dir
+      size: 20658485
+      nfiles: 456
     outs:
-    - path: experiment_data/pipeline/pl_luna/google__spacy_dep_tag_alignment
-      md5: 4663cdc1bb88d7d6de3691c734fe0ab6.dir
-      size: 19342263
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/google__spacy_dep_tag_metrics
-      md5: a65dd7d74319da06f4ebaca08cde30ce.dir
-      size: 8659
-      nfiles: 500
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_dep_tag_alignment
+      md5: 4edb321a6629205a105e76d48ab834ff.dir
+      size: 18188630
+      nfiles: 456
+    - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_dep_tag_metrics
+      md5: d3afeff3bf782eef2d0d34e8fdebee8f.dir
+      size: 7784
+      nfiles: 456
   pipeline_spacy_tag_wer@9:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
-      --asr=techmo
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
+      --asr=ajn
     deps:
     - path: experiment/pipeline_process_spacy_dep_tag_wer.py
       md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
       size: 1489
-    - path: experiment_data/dataset/pl_google_fleurs
-      md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir
-      size: 236272072
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript
-      md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
-      size: 975209
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__result
-      md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir
-      size: 1880403
-      nfiles: 758
+    - path: experiment_data/dataset/pl_luna
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/pl_luna/ajn__result
+      md5: 653d65e186a7d05958ce3cbef219038c.dir
+      size: 6159899
+      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/gold_transcript
+      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
+      size: 6706925
+      nfiles: 500
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_dep_tag_alignment
-      md5: 18da1eb1ac1485f74337ea502e395b57.dir
-      size: 3707699
-      nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_dep_tag_metrics
-      md5: 9cae08bbb8a6331d06a33dbbb4a16301.dir
-      size: 9662
-      nfiles: 758
+    - path: experiment_data/pipeline/pl_luna/ajn__spacy_dep_tag_alignment
+      md5: 39fb1ccfff7b9b87a8d5606d172cbcc9.dir
+      size: 22110364
+      nfiles: 494
+    - path: experiment_data/pipeline/pl_luna/ajn__spacy_dep_tag_metrics
+      md5: 607486e6c531d49e5a093cc5d0dda949.dir
+      size: 8366
+      nfiles: 494
   pipeline_spacy_tag_wer@1:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice
       --asr=wav2vec2
@@ -4223,12 +4223,12 @@ stages:
       nfiles: 8154
     outs:
     - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_dep_tag_alignment
-      md5: c46f35654ac42c3ddfd14e0197f36ae5.dir
-      size: 18349138
+      md5: 09fbe03eafa4948e0d3009ef392e9c40.dir
+      size: 18505763
       nfiles: 8154
     - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_dep_tag_metrics
-      md5: 71381fa7fd6c0cdba00f25c17fd6be5e.dir
-      size: 97426
+      md5: d1bc1925fe39ccb98e8bb085a1b1b24f.dir
+      size: 96041
       nfiles: 8154
   pipeline_flair_upos@18:
     cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14
@@ -4287,36 +4287,36 @@ stages:
       size: 6840
       nfiles: 562
   pipeline_spacy_tag_wer@13:
-    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna
-      --asr=techmo
+    cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14
+      --asr=ajn
     deps:
     - path: experiment/pipeline_process_spacy_dep_tag_wer.py
       md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
       size: 1489
-    - path: experiment_data/dataset/pl_luna
-      md5: d342155b1871e881797cf7da09d5dc3c.dir
-      size: 1578358645
-      nfiles: 4500
-    - path: experiment_data/pipeline/pl_luna/gold_transcript
-      md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir
-      size: 6706925
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__result
-      md5: 0e596570e1502b38588427bc72dcc006.dir
-      size: 9697519
-      nfiles: 500
+    - path: experiment_data/dataset/pl_minds14
+      md5: d751713988987e9331980363e24189ce.dir
+      size: 0
+      nfiles: 0
+    - path: experiment_data/pipeline/pl_minds14/ajn__result
+      md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir
+      size: 974727
+      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/gold_transcript
+      md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir
+      size: 689374
+      nfiles: 562
     outs:
-    - path: experiment_data/pipeline/pl_luna/techmo__spacy_dep_tag_alignment
-      md5: baefcd5dfadd9c62d6fc71ba0ac31fa9.dir
-      size: 20897599
-      nfiles: 500
-    - path: experiment_data/pipeline/pl_luna/techmo__spacy_dep_tag_metrics
-      md5: 1478707020a96496b50eb732207c290e.dir
-      size: 8841
-      nfiles: 500
+    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_dep_tag_alignment
+      md5: 7635cf48e907c8d2939fa9ef6870cd6a.dir
+      size: 3200084
+      nfiles: 559
+    - path: experiment_data/pipeline/pl_minds14/ajn__spacy_dep_tag_metrics
+      md5: 54a8912bab4cd197f01719d5340ef7e9.dir
+      size: 6867
+      nfiles: 559
   pipeline_spacy_tag_wer@7:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs
-      --asr=google
+      --asr=wav2vec2
     deps:
     - path: experiment/pipeline_process_spacy_dep_tag_wer.py
       md5: 83fc16ed68e85cfd89d8d84dc61d6d0f
@@ -4329,18 +4329,18 @@ stages:
       md5: 607f551eca5dabcca0caf31c87bd2ac6.dir
       size: 975209
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__result
-      md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir
-      size: 1377134
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result
+      md5: bf9c77e34376bcda73dbdb6afee55c8c.dir
+      size: 5137721
       nfiles: 758
     outs:
-    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_dep_tag_alignment
-      md5: c15aa30b165152fac6813cd092763242.dir
-      size: 3712618
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_dep_tag_alignment
+      md5: 8503661e0ee89ff96690e245f3144807.dir
+      size: 3772188
       nfiles: 758
-    - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_dep_tag_metrics
-      md5: 56dddb48cea2022b91fd4323efd43a8b.dir
-      size: 9213
+    - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_dep_tag_metrics
+      md5: be82f4400be9dd5f01a7a4f73c357b1c.dir
+      size: 9083
       nfiles: 758
   pipeline_spacy_ner_wer@18:
     cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14
diff --git a/dvc.yaml b/dvc.yaml
index 09be1b7c618bd17725b995911a0bed808ca1fcd3..4f3541c854c716bc7d5256ab91e378ba8399a890 100644
--- a/dvc.yaml
+++ b/dvc.yaml
@@ -379,12 +379,12 @@ stages:
             -   dataset: pl_common_voice
                 asr: ajn
 
-            -   dataset: pl_voicelab_cbiz
-                asr: google
-            -   dataset: pl_voicelab_cbiz
-                asr: ajn
-            -   dataset: pl_voicelab_cbiz
-                asr: techmo
+#            -   dataset: pl_voicelab_cbiz
+#                asr: google
+#            -   dataset: pl_voicelab_cbiz
+#                asr: ajn
+#            -   dataset: pl_voicelab_cbiz
+#                asr: techmo
 
             -   dataset: pl_google_fleurs
                 asr: google
diff --git a/experiment/sentence_wer_processor/spacy_pos_sentence_dep_tag_processor.py b/experiment/sentence_wer_processor/spacy_pos_sentence_dep_tag_processor.py
index ca467e016b1332dc04640028f0078c1c10207b64..8a27e6611e0a986bf90bd1c7c5849f727c5e1bff 100644
--- a/experiment/sentence_wer_processor/spacy_pos_sentence_dep_tag_processor.py
+++ b/experiment/sentence_wer_processor/spacy_pos_sentence_dep_tag_processor.py
@@ -25,4 +25,4 @@ class SpacyDepTagSentenceWerProcessor(SentenceWerProcessor):
 
     def process_text(self, text: str) -> List[str]:
         document = self._nlp(text)
-        return [token.pos_ for token in document]
+        return [token.dep_ for token in document]
diff --git a/experiment_data/pipeline/pl_common_voice/.gitignore b/experiment_data/pipeline/pl_common_voice/.gitignore
index 2fc1cf46d1f96726026d9652a738b0bf53ca7620..156a70b8fe4d7f8aa592243b0902f9891751838f 100644
--- a/experiment_data/pipeline/pl_common_voice/.gitignore
+++ b/experiment_data/pipeline/pl_common_voice/.gitignore
@@ -40,3 +40,7 @@
 /wav2vec2__spacy_dep_tag_metrics
 /ajn__spacy_pos_alignment
 /ajn__spacy_pos_metrics
+/google__spacy_dep_tag_alignment
+/google__spacy_dep_tag_metrics
+/techmo__spacy_dep_tag_alignment
+/techmo__spacy_dep_tag_metrics
diff --git a/experiment_data/pipeline/pl_google_fleurs/.gitignore b/experiment_data/pipeline/pl_google_fleurs/.gitignore
index 5edb9f93066d59baa5a140a25b146448e4c52b00..d9d64b994a438c2605fbeaaf389ceac793f41c73 100644
--- a/experiment_data/pipeline/pl_google_fleurs/.gitignore
+++ b/experiment_data/pipeline/pl_google_fleurs/.gitignore
@@ -57,3 +57,5 @@
 /google__word_wer_embeddings_alignment
 /google__spacy_dep_tag_alignment
 /google__spacy_dep_tag_metrics
+/wav2vec2__spacy_dep_tag_alignment
+/wav2vec2__spacy_dep_tag_metrics
diff --git a/experiment_data/pipeline/pl_luna/.gitignore b/experiment_data/pipeline/pl_luna/.gitignore
index e67f42668889ee34145b969998c8e8d5e77fb861..6c913f2572c08da29113afd8363031f01adbf73c 100644
--- a/experiment_data/pipeline/pl_luna/.gitignore
+++ b/experiment_data/pipeline/pl_luna/.gitignore
@@ -57,3 +57,5 @@
 /google__spacy_dep_tag_metrics
 /techmo__spacy_dep_tag_alignment
 /techmo__spacy_dep_tag_metrics
+/wav2vec2__spacy_dep_tag_alignment
+/wav2vec2__spacy_dep_tag_metrics
diff --git a/experiment_data/pipeline/pl_minds14/.gitignore b/experiment_data/pipeline/pl_minds14/.gitignore
index 05ce9ab47032182a640654489dd27ed3083053f3..777d1ab112c555ac27c97f73ec95f438ced95e02 100644
--- a/experiment_data/pipeline/pl_minds14/.gitignore
+++ b/experiment_data/pipeline/pl_minds14/.gitignore
@@ -49,3 +49,11 @@
 /ajn__word_wer_classic_alignment
 /ajn__word_wer_embeddings_metrics
 /ajn__word_wer_embeddings_alignment
+/wav2vec2__spacy_dep_tag_alignment
+/wav2vec2__spacy_dep_tag_metrics
+/google__spacy_dep_tag_alignment
+/google__spacy_dep_tag_metrics
+/techmo__spacy_dep_tag_alignment
+/techmo__spacy_dep_tag_metrics
+/ajn__spacy_dep_tag_alignment
+/ajn__spacy_dep_tag_metrics
diff --git a/sziszapangma/core/alignment/distance_matrix_calculator.py b/sziszapangma/core/alignment/distance_matrix_calculator.py
index 7fabcb3518d87a4345508c8b97d00067df4747c9..44c10317f64da6826625c5bbe15e2dab4e179245 100644
--- a/sziszapangma/core/alignment/distance_matrix_calculator.py
+++ b/sziszapangma/core/alignment/distance_matrix_calculator.py
@@ -68,7 +68,12 @@ class CosineDistanceCalculator(DistanceCalculator):
             raise RuntimeError("array dimensions {} not right".format(a.ndim))
         similarity = np.dot(a, b.T) / (a_norm * b_norm)
         dist = 1.0 - similarity
-        return float(dist)
+        # return float(dist)
+        float_dist = float(dist)
+        if abs(float_dist) < 0.000001:
+            return 0.0
+        else:
+            return float_dist
 
     def calculate_distance_matrix(
         self, reference: List[Word], hypothesis: List[Word]
diff --git a/sziszapangma/core/wer/__pycache__/wer_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_calculator.cpython-38.pyc
index 800b2cf0b964c1458fee0d3b9aa1ab514e39441d..7eadfc43e8caede32ebe853909383161106dd9d6 100644
Binary files a/sziszapangma/core/wer/__pycache__/wer_calculator.cpython-38.pyc and b/sziszapangma/core/wer/__pycache__/wer_calculator.cpython-38.pyc differ
diff --git a/sziszapangma/core/wer/wer_calculator.py b/sziszapangma/core/wer/wer_calculator.py
index b5cdab3d025fa6f206bcf968e42925b34e09cfda..ac3ce661815a6acc886e0f4ef3308f232f4bc508 100644
--- a/sziszapangma/core/wer/wer_calculator.py
+++ b/sziszapangma/core/wer/wer_calculator.py
@@ -1,8 +1,11 @@
 from abc import ABC
 from typing import List
 
+import numpy as np
+
 from sziszapangma.core.alignment.alignment_step import AlignmentStep
 from sziszapangma.core.alignment.alignment_util import AlignmentUtil
+from sziszapangma.core.alignment.step_type import StepType
 from sziszapangma.core.wer.span import Span
 
 
@@ -27,7 +30,8 @@ class WerCalculator(ABC):
         steps: List[AlignmentStep],
     ) -> float:
         reference_len = AlignmentUtil.get_reference_length(steps)
-        return sum([step.step_cost for step in steps]) / reference_len
+        fixed_step_costs = [step.step_cost for step in steps]
+        return sum(fixed_step_costs) / reference_len
 
     def calculate_wer(self, steps: List[AlignmentStep]) -> float:
         return self._calculate_wer(steps)
diff --git a/sziszapangma/integration/task/__pycache__/embedding_wer_metrics_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/embedding_wer_metrics_task.cpython-38.pyc
index 388f4fe48dd157ff91257c305c0a27b700e0f078..a8a8f78d692af7ebea7c238aeeee2face160921a 100644
Binary files a/sziszapangma/integration/task/__pycache__/embedding_wer_metrics_task.cpython-38.pyc and b/sziszapangma/integration/task/__pycache__/embedding_wer_metrics_task.cpython-38.pyc differ
diff --git a/sziszapangma/integration/task/embedding_wer_metrics_task.py b/sziszapangma/integration/task/embedding_wer_metrics_task.py
index c0f54ad88a57914f115b9589c49b73623a6af2f4..eefc2fd4285adbbb7f0ba7315b959e45f20a937d 100644
--- a/sziszapangma/integration/task/embedding_wer_metrics_task.py
+++ b/sziszapangma/integration/task/embedding_wer_metrics_task.py
@@ -1,3 +1,5 @@
+from typing import List
+
 from sziszapangma.core.alignment.alignment_embedding_calculator import AlignmentEmbeddingCalculator
 from sziszapangma.core.alignment.alignment_soft_calculator import AlignmentSoftCalculator
 from sziszapangma.core.transformer.cached_embedding_transformer import CachedEmbeddingTransformer
@@ -7,6 +9,7 @@ from sziszapangma.integration.mapper.alignment_step_mapper import AlignmentStepM
 from sziszapangma.integration.repository.experiment_repository import ExperimentRepository
 from sziszapangma.integration.task.processing_task import ProcessingTask
 from sziszapangma.integration.task.task_util import TaskUtil
+from sziszapangma.model.model import Word
 from sziszapangma.model.relation_manager import RelationManager
 
 _SOFT_WER = "soft_wer"
@@ -51,6 +54,10 @@ class EmbeddingWerMetricsTask(ProcessingTask):
             is not None
         )
 
+    @staticmethod
+    def filter_empty_words(words: List[Word]) -> List[Word]:
+        return [it for it in words if len(it['text']) > 0]
+
     def run_single_process(
         self,
         record_id: str,
@@ -60,8 +67,8 @@ class EmbeddingWerMetricsTask(ProcessingTask):
         gold_transcript = TaskUtil.get_words_from_record(relation_manager)
         asr_result = experiment_repository.get_property_for_key(record_id, self._asr_property_name)
         if gold_transcript is not None and asr_result is not None and "transcription" in asr_result:
-            gold_transcript_lower = TaskUtil.words_to_lower(gold_transcript)
-            asr_transcript_lower = TaskUtil.words_to_lower(asr_result["transcription"])
+            gold_transcript_lower = self.filter_empty_words(TaskUtil.words_to_lower(gold_transcript))
+            asr_transcript_lower = self.filter_empty_words(TaskUtil.words_to_lower(asr_result["transcription"]))
 
             soft_alignment = self._alignment_soft_calculator.calculate_alignment(
                 gold_transcript_lower, asr_transcript_lower
@@ -80,6 +87,7 @@ class EmbeddingWerMetricsTask(ProcessingTask):
                 ],
             }
             wer_results = {"soft_wer": soft_wer, "embedding_wer": embedding_wer}
+            print(wer_results)
 
             experiment_repository.update_property_for_key(
                 record_id, self._alignment_property_name, alignment_results