From f4762cc2ef07f03d3c2bbfca8ba9aae48252df8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20W=C4=85troba?= <markowanga@gmail.com> Date: Sat, 16 Jul 2022 12:32:01 +0200 Subject: [PATCH] Finish experiments for polish --- docker/docker-compose.yml | 40 +- dvc.lock | 3444 ++++++++++++----- dvc.yaml | 113 +- .../pipeline_process_spacy_dep_tag_wer.py | 35 + .../spacy_pos_sentence_dep_tag_processor.py | 28 + .../pipeline/pl_common_voice/.gitignore | 24 + .../pipeline/pl_google_fleurs/.gitignore | 40 + experiment_data/pipeline/pl_luna/.gitignore | 38 + .../pipeline/pl_minds14/.gitignore | 34 + .../pipeline/pl_voicelab_cbiz/.gitignore | 22 + 10 files changed, 2910 insertions(+), 908 deletions(-) create mode 100644 experiment/pipeline_process_spacy_dep_tag_wer.py create mode 100644 experiment/sentence_wer_processor/spacy_pos_sentence_dep_tag_processor.py diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 2486d9e..ec1cfde 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -44,27 +44,27 @@ services: - /etc/localtime:/etc/localtime:ro - ./embedding_models:/models - ajn_asr: - image: gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/asr-clarin-pl-service:1.5 - container_name: ajn_asr - restart: always - ports: - - "5431:5000" - environment: - - AUTH_TOKEN=__example_token__ - volumes: - - /etc/localtime:/etc/localtime:ro +# ajn_asr: +# image: gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/asr-clarin-pl-service:1.5 +# container_name: ajn_asr +# restart: always +# ports: +# - "5431:5000" +# environment: +# - AUTH_TOKEN=__example_token__ +# volumes: +# - /etc/localtime:/etc/localtime:ro - wav2vec2-xls-r-1b-polish: - image: gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/wav2vec2-xls-r-1b-polish:1.5 - container_name: wav2vec2-xls-r-1b-polish - restart: always - ports: - - "5437:5000" - environment: - - AUTH_TOKEN=__example_token__ - volumes: - - /etc/localtime:/etc/localtime:ro +# wav2vec2-xls-r-1b-polish: +# image: gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/wav2vec2-xls-r-1b-polish:1.5 +# container_name: wav2vec2-xls-r-1b-polish +# restart: always +# ports: +# - "5437:5000" +# environment: +# - AUTH_TOKEN=__example_token__ +# volumes: +# - /etc/localtime:/etc/localtime:ro # speechbrain_asr: # image: gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/speechbrain-asr:1.5 diff --git a/dvc.lock b/dvc.lock index 8a56c42..0155b5e 100644 --- a/dvc.lock +++ b/dvc.lock @@ -888,23 +888,23 @@ stages: nfiles: 559 pipeline_asr_result@10: cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs - --asr=techmo + --asr=wav2vec2 deps: - path: experiment/pipeline_process_asr.py md5: efe378505897550fe1a8d423c321ef53 size: 1403 - - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__techmo - md5: 876e8cff4ebd191617fcd3844e411475.dir - size: 859227 + - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__wav2vec2 + md5: 9638746467b0dea972a0be101a7d5c58.dir + size: 4093001 nfiles: 758 - path: experiment_data/dataset/pl_google_fleurs md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir size: 236272072 nfiles: 758 outs: - - path: experiment_data/pipeline/pl_google_fleurs/techmo__result - md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir - size: 1880403 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result + md5: bf9c77e34376bcda73dbdb6afee55c8c.dir + size: 5137721 nfiles: 758 pipeline_asr_result@4: cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_voicelab_cbiz @@ -948,23 +948,23 @@ stages: nfiles: 456 pipeline_asr_result@9: cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs - --asr=wav2vec2 + --asr=techmo deps: - path: experiment/pipeline_process_asr.py md5: efe378505897550fe1a8d423c321ef53 size: 1403 - - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__wav2vec2 - md5: 9638746467b0dea972a0be101a7d5c58.dir - size: 4093001 + - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__techmo + md5: 876e8cff4ebd191617fcd3844e411475.dir + size: 859227 nfiles: 758 - path: experiment_data/dataset/pl_google_fleurs md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir size: 236272072 nfiles: 758 outs: - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result - md5: bf9c77e34376bcda73dbdb6afee55c8c.dir - size: 5137721 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__result + md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir + size: 1880403 nfiles: 758 pipeline_asr_result@0: cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_common_voice @@ -988,7 +988,7 @@ stages: nfiles: 8143 pipeline_word_wer@8: cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs - --asr=techmo + --asr=ajn deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 @@ -997,70 +997,70 @@ stages: md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir size: 236272072 nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__result + md5: 545e63a6daf9c46387c1d7d40b85499f.dir + size: 1413262 + nfiles: 758 - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript md5: 607f551eca5dabcca0caf31c87bd2ac6.dir size: 975209 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__result - md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir - size: 1880403 - nfiles: 758 outs: - - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_classic_alignment - md5: 689776c77c4ecaa11578b53480100ecc.dir - size: 3821036 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_classic_alignment + md5: 6437952d0ab383e44acca5fa70d02e54.dir + size: 3981739 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_classic_metrics - md5: 8d0d99fd7d965d4070c0b391cd6fa2b0.dir - size: 23030 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_classic_metrics + md5: a40ae0901f53a33231a4c3dc18708d2c.dir + size: 22935 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_embeddings_alignment - md5: 0ec5fe337bde2254c91146fd16b9c6af.dir - size: 7995553 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_embeddings_alignment + md5: 06e64fca6fc73e1cc9e7e86b21eb11f3.dir + size: 8318047 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_embeddings_metrics - md5: 55eb6eb0aacc12e3e2caf224b4b89df1.dir - size: 53591 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_embeddings_metrics + md5: 68da1ded61389ae5d866bc762c61363c.dir + size: 33029 nfiles: 758 pipeline_word_wer@6: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs - --asr=google + cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz + --asr=techmo deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 size: 2125 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__result - md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir - size: 1377134 - nfiles: 758 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result + md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir + size: 39158267 + nfiles: 800 outs: - - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_classic_alignment - md5: 9beb57e9be598275ac9d449618da440e.dir - size: 3818553 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_classic_metrics - md5: 984770b57ca48fd793099c9ed67791d0.dir - size: 23344 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_embeddings_alignment - md5: aeb20f1662b696a6fc11bcd39a02a0de.dir - size: 7984964 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_embeddings_metrics - md5: b2626f92231431e163813ef7112c119d.dir - size: 53916 - nfiles: 758 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_classic_alignment + md5: 37f0c27869efd72bf3eaedae70e426d5.dir + size: 83756423 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_classic_metrics + md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir + size: 27780 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_embeddings_alignment + md5: 01badff31f4d2f6583481c332fae8abc.dir + size: 174322727 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_embeddings_metrics + md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir + size: 56182 + nfiles: 800 pipeline_word_wer@17: cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14 - --asr=wav2vec2 + --asr=techmo deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 @@ -1073,30 +1073,30 @@ stages: md5: d2d48495000b3ea7ea6f4212ddb113a5.dir size: 689374 nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__result - md5: 5658da01ecdce39ed99156bbc7f2dc62.dir - size: 3523907 + - path: experiment_data/pipeline/pl_minds14/techmo__result + md5: 4c43636b4773f2bf9a2153ef3393a558.dir + size: 1336305 nfiles: 562 outs: - - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_classic_alignment - md5: c1d947bbd7bb1dff8d39e03ad4a1d11e.dir - size: 2811807 + - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_classic_alignment + md5: bb8b304a25b60f779c27ca4d6c9183ba.dir + size: 2723010 nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_classic_metrics - md5: a3477ae81fcba4a8e21b044f279425bb.dir - size: 17073 + - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_classic_metrics + md5: b6a4f05d71bd3fbed796d54b1ac2b29d.dir + size: 17198 nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_embeddings_alignment - md5: cbdcbfe7157f28d70ca55b787483e26b.dir - size: 5870633 + - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_embeddings_alignment + md5: 9956c918227d6b76f26ac6540a34e931.dir + size: 5673712 nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_embeddings_metrics - md5: c3395e582b4562a33e97419134363d30.dir - size: 37892 + - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_embeddings_metrics + md5: de8e393488ff05187c7866272675fd8e.dir + size: 39775 nfiles: 562 pipeline_word_wer@15: cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14 - --asr=ajn + --asr=google deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 @@ -1105,34 +1105,34 @@ stages: md5: d751713988987e9331980363e24189ce.dir size: 0 nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/ajn__result - md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir - size: 974727 - nfiles: 559 - path: experiment_data/pipeline/pl_minds14/gold_transcript md5: d2d48495000b3ea7ea6f4212ddb113a5.dir size: 689374 nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/google__result + md5: 3f7a79298a5156fd2b023e673326e72f.dir + size: 985004 + nfiles: 562 outs: - - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_classic_alignment - md5: c03a478840305afb1eadf4752b3a5678.dir - size: 2851613 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_classic_metrics - md5: 6d6d430e7f3672c67b9c78eb7d0ea296.dir - size: 15213 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_embeddings_alignment - md5: ae577cd5886aced83d7de4ba47bb4457.dir - size: 5960904 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_embeddings_metrics - md5: c599599e5935075cd26ac89e0d3b5f1b.dir - size: 22438 - nfiles: 559 + - path: experiment_data/pipeline/pl_minds14/google__word_wer_classic_alignment + md5: 8469a41c345e0c21accb960782a49e75.dir + size: 2725626 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/google__word_wer_classic_metrics + md5: 41621d3845a0035ee77352902c46a186.dir + size: 15012 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/google__word_wer_embeddings_alignment + md5: a0e3d4ddda612480de739c898e79bf73.dir + size: 5686170 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/google__word_wer_embeddings_metrics + md5: 673f6b46c7735163726f7d9e3b1f2dd4.dir + size: 40260 + nfiles: 562 pipeline_word_wer@12: cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna - --asr=techmo + --asr=ajn deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 @@ -1141,70 +1141,70 @@ stages: md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/ajn__result + md5: 653d65e186a7d05958ce3cbef219038c.dir + size: 6159899 + nfiles: 494 - path: experiment_data/pipeline/pl_luna/gold_transcript md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir size: 6706925 nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__result - md5: 0e596570e1502b38588427bc72dcc006.dir - size: 9697519 - nfiles: 500 outs: - - path: experiment_data/pipeline/pl_luna/techmo__word_wer_classic_alignment - md5: 5e90722635a811db67a1f0d917707b0a.dir - size: 21380796 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__word_wer_classic_metrics - md5: 4cfbb2830b280084ece14b1ef815b92a.dir - size: 17298 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__word_wer_embeddings_alignment - md5: 74f8be84e1e913050483713fbc945d80.dir - size: 44326962 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__word_wer_embeddings_metrics - md5: 98a7edeee3b630e8e301acfc578a8393.dir - size: 34869 - nfiles: 500 + - path: experiment_data/pipeline/pl_luna/ajn__word_wer_classic_alignment + md5: e25ae51f8dc1afe55d5c0b44a67ab8ee.dir + size: 20671277 + nfiles: 494 + - path: experiment_data/pipeline/pl_luna/ajn__word_wer_classic_metrics + md5: 18605657ff9c7ef3221e27b671a3b4d1.dir + size: 16835 + nfiles: 494 + - path: experiment_data/pipeline/pl_luna/ajn__word_wer_embeddings_alignment + md5: 6be0a1c035f4a84a9035bfba1458cdac.dir + size: 43099546 + nfiles: 494 + - path: experiment_data/pipeline/pl_luna/ajn__word_wer_embeddings_metrics + md5: 4f368d2ba1c5a54d5e3ab69a7581549e.dir + size: 19326 + nfiles: 494 pipeline_word_wer@10: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna - --asr=google + cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs + --asr=wav2vec2 deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 size: 2125 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__result - md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir - size: 5346497 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/google__word_wer_classic_alignment - md5: 5bdee43e646a4e9470310073365ebc37.dir - size: 19650202 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__word_wer_classic_metrics - md5: 9951fb70382cc8bed9aa48d1185c1c7c.dir - size: 16989 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__word_wer_embeddings_alignment - md5: a06bd5f0b4c52c679315b6c6d7478084.dir - size: 40586004 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__word_wer_embeddings_metrics - md5: 351543fb54e2098ac07999482d2280a8.dir - size: 34530 - nfiles: 500 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result + md5: bf9c77e34376bcda73dbdb6afee55c8c.dir + size: 5137721 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_classic_alignment + md5: 052fa8f97b2f9d5b8ca4175b4e92dfd1.dir + size: 3854678 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_classic_metrics + md5: d9dec56ef7ddb6a3bc03638e37d1c04c.dir + size: 23168 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_embeddings_alignment + md5: 069191de9f84f37303bc202968f26766.dir + size: 8069804 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_embeddings_metrics + md5: b45390641698e71fdf420fbb2afec7e0.dir + size: 53351 + nfiles: 758 pipeline_word_wer@13: cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna - --asr=wav2vec2 + --asr=techmo deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 @@ -1217,66 +1217,66 @@ stages: md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir size: 6706925 nfiles: 500 - - path: experiment_data/pipeline/pl_luna/wav2vec2__result - md5: 9c63b061ac7763144bca121e163ee7aa.dir - size: 20658485 - nfiles: 456 + - path: experiment_data/pipeline/pl_luna/techmo__result + md5: 0e596570e1502b38588427bc72dcc006.dir + size: 9697519 + nfiles: 500 outs: - - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_classic_alignment - md5: 4d4ae25d4fac2a52893e60389fcd2f9e.dir - size: 18462856 - nfiles: 456 - - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_classic_metrics - md5: f209479afd2482800f1e75bdab9f83b2.dir - size: 15604 - nfiles: 456 - - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_embeddings_alignment - md5: 41ad4c0923986a90fc9ed12aa6e30f73.dir - size: 38402586 - nfiles: 456 - - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_embeddings_metrics - md5: df02c1f9d7e26ed10815175de086e0b6.dir - size: 24633 - nfiles: 456 + - path: experiment_data/pipeline/pl_luna/techmo__word_wer_classic_alignment + md5: 5e90722635a811db67a1f0d917707b0a.dir + size: 21380796 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/techmo__word_wer_classic_metrics + md5: 4cfbb2830b280084ece14b1ef815b92a.dir + size: 17298 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/techmo__word_wer_embeddings_alignment + md5: 74f8be84e1e913050483713fbc945d80.dir + size: 44326962 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/techmo__word_wer_embeddings_metrics + md5: 98a7edeee3b630e8e301acfc578a8393.dir + size: 34869 + nfiles: 500 pipeline_word_wer@3: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz - --asr=google + cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice + --asr=ajn deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 size: 2125 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result - md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir - size: 27432599 - nfiles: 799 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/ajn__result + md5: 77d873041fe2952e3c45ee4ac6458061.dir + size: 6667841 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_classic_alignment - md5: 184cdfdd465db609f514891b0330ef67.dir - size: 83997172 - nfiles: 799 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_classic_metrics - md5: c12eab8acb3cee0219fc8046691b24ab.dir - size: 27841 - nfiles: 799 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_embeddings_alignment - md5: 1e7ef6690d2e36f368eb02a8dcba9ab0.dir - size: 174732507 - nfiles: 799 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_embeddings_metrics - md5: c15414f2ec9c0a1ab78199861b0b4ce0.dir - size: 55765 - nfiles: 799 + - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_classic_alignment + md5: 7b7c5de97970c320cb8cf549839d16fd.dir + size: 17724868 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_classic_metrics + md5: b1bdb80faa8c728ea072632121a2f2d0.dir + size: 209927 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_embeddings_alignment + md5: efb06f9897e62077366362b2aab25d8c.dir + size: 36932578 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_embeddings_metrics + md5: a9132386ed7ccffcba68dfa0a1dca7ee.dir + size: 324358 + nfiles: 8155 pipeline_word_wer@16: cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14 - --asr=techmo + --asr=ajn deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 @@ -1285,34 +1285,34 @@ stages: md5: d751713988987e9331980363e24189ce.dir size: 0 nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/ajn__result + md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir + size: 974727 + nfiles: 559 - path: experiment_data/pipeline/pl_minds14/gold_transcript md5: d2d48495000b3ea7ea6f4212ddb113a5.dir size: 689374 nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__result - md5: 4c43636b4773f2bf9a2153ef3393a558.dir - size: 1336305 - nfiles: 562 outs: - - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_classic_alignment - md5: bb8b304a25b60f779c27ca4d6c9183ba.dir - size: 2723010 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_classic_metrics - md5: b6a4f05d71bd3fbed796d54b1ac2b29d.dir - size: 17198 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_embeddings_alignment - md5: 9956c918227d6b76f26ac6540a34e931.dir - size: 5673712 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_embeddings_metrics - md5: de8e393488ff05187c7866272675fd8e.dir - size: 39775 - nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_classic_alignment + md5: c03a478840305afb1eadf4752b3a5678.dir + size: 2851613 + nfiles: 559 + - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_classic_metrics + md5: 6d6d430e7f3672c67b9c78eb7d0ea296.dir + size: 15213 + nfiles: 559 + - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_embeddings_alignment + md5: ae577cd5886aced83d7de4ba47bb4457.dir + size: 5960904 + nfiles: 559 + - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_embeddings_metrics + md5: c599599e5935075cd26ac89e0d3b5f1b.dir + size: 22438 + nfiles: 559 pipeline_word_wer@11: cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna - --asr=ajn + --asr=google deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 @@ -1321,34 +1321,34 @@ stages: md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/ajn__result - md5: 653d65e186a7d05958ce3cbef219038c.dir - size: 6159899 - nfiles: 494 - path: experiment_data/pipeline/pl_luna/gold_transcript md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir size: 6706925 nfiles: 500 + - path: experiment_data/pipeline/pl_luna/google__result + md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir + size: 5346497 + nfiles: 500 outs: - - path: experiment_data/pipeline/pl_luna/ajn__word_wer_classic_alignment - md5: e25ae51f8dc1afe55d5c0b44a67ab8ee.dir - size: 20671277 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/ajn__word_wer_classic_metrics - md5: 18605657ff9c7ef3221e27b671a3b4d1.dir - size: 16835 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/ajn__word_wer_embeddings_alignment - md5: 6be0a1c035f4a84a9035bfba1458cdac.dir - size: 43099546 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/ajn__word_wer_embeddings_metrics - md5: 4f368d2ba1c5a54d5e3ab69a7581549e.dir - size: 19326 - nfiles: 494 + - path: experiment_data/pipeline/pl_luna/google__word_wer_classic_alignment + md5: 5bdee43e646a4e9470310073365ebc37.dir + size: 19650202 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/google__word_wer_classic_metrics + md5: 9951fb70382cc8bed9aa48d1185c1c7c.dir + size: 16989 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/google__word_wer_embeddings_alignment + md5: a06bd5f0b4c52c679315b6c6d7478084.dir + size: 40586004 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/google__word_wer_embeddings_metrics + md5: 351543fb54e2098ac07999482d2280a8.dir + size: 34530 + nfiles: 500 pipeline_word_wer@7: cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs - --asr=ajn + --asr=google deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 @@ -1357,30 +1357,30 @@ stages: md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir size: 236272072 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__result - md5: 545e63a6daf9c46387c1d7d40b85499f.dir - size: 1413262 - nfiles: 758 - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript md5: 607f551eca5dabcca0caf31c87bd2ac6.dir size: 975209 nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/google__result + md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir + size: 1377134 + nfiles: 758 outs: - - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_classic_alignment - md5: 6437952d0ab383e44acca5fa70d02e54.dir - size: 3981739 + - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_classic_alignment + md5: 9beb57e9be598275ac9d449618da440e.dir + size: 3818553 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_classic_metrics - md5: a40ae0901f53a33231a4c3dc18708d2c.dir - size: 22935 + - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_classic_metrics + md5: 984770b57ca48fd793099c9ed67791d0.dir + size: 23344 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_embeddings_alignment - md5: 06e64fca6fc73e1cc9e7e86b21eb11f3.dir - size: 8318047 + - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_embeddings_alignment + md5: aeb20f1662b696a6fc11bcd39a02a0de.dir + size: 7984964 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_embeddings_metrics - md5: 68da1ded61389ae5d866bc762c61363c.dir - size: 33029 + - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_embeddings_metrics + md5: b2626f92231431e163813ef7112c119d.dir + size: 53916 nfiles: 758 pipeline_word_wer@2: cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice @@ -1419,41 +1419,41 @@ stages: size: 571563 nfiles: 8136 pipeline_word_wer@14: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14 - --asr=google + cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna + --asr=wav2vec2 deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 size: 2125 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d2d48495000b3ea7ea6f4212ddb113a5.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__result - md5: 3f7a79298a5156fd2b023e673326e72f.dir - size: 985004 - nfiles: 562 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/wav2vec2__result + md5: 9c63b061ac7763144bca121e163ee7aa.dir + size: 20658485 + nfiles: 456 outs: - - path: experiment_data/pipeline/pl_minds14/google__word_wer_classic_alignment - md5: 8469a41c345e0c21accb960782a49e75.dir - size: 2725626 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__word_wer_classic_metrics - md5: 41621d3845a0035ee77352902c46a186.dir - size: 15012 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__word_wer_embeddings_alignment - md5: a0e3d4ddda612480de739c898e79bf73.dir - size: 5686170 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__word_wer_embeddings_metrics - md5: 673f6b46c7735163726f7d9e3b1f2dd4.dir - size: 40260 - nfiles: 562 + - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_classic_alignment + md5: 4d4ae25d4fac2a52893e60389fcd2f9e.dir + size: 18462856 + nfiles: 456 + - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_classic_metrics + md5: f209479afd2482800f1e75bdab9f83b2.dir + size: 15604 + nfiles: 456 + - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_embeddings_alignment + md5: 41ad4c0923986a90fc9ed12aa6e30f73.dir + size: 38402586 + nfiles: 456 + - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_embeddings_metrics + md5: df02c1f9d7e26ed10815175de086e0b6.dir + size: 24633 + nfiles: 456 pipeline_word_wer@0: cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice --asr=google @@ -1492,7 +1492,7 @@ stages: nfiles: 8143 pipeline_word_wer@5: cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz - --asr=techmo + --asr=ajn deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 @@ -1501,31 +1501,31 @@ stages: md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir size: 4803739404 nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result + md5: 7de1137f44fad26766da0fc309720160.dir + size: 22765926 + nfiles: 800 - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript md5: ebffd3814a48564f4e33b9a4e0956af3.dir size: 21846798 nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result - md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir - size: 39158267 - nfiles: 800 outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_classic_alignment - md5: 37f0c27869efd72bf3eaedae70e426d5.dir - size: 83756423 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_classic_metrics - md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir - size: 27780 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_embeddings_alignment - md5: 01badff31f4d2f6583481c332fae8abc.dir - size: 174322727 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_classic_alignment + md5: 1ed03de918f5373afdbdbb020c6161b5.dir + size: 78992762 nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_embeddings_metrics - md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir - size: 56182 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_classic_metrics + md5: 04f6ccbaf94cf08c34ac201ae079c21c.dir + size: 25307 nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_embeddings_alignment + md5: a67e7a6e8a6e0755ea35a519d9decf86.dir + size: 128258410 + nfiles: 646 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_embeddings_metrics + md5: c83561c448950860f36037c2287a25f5.dir + size: 25194 + nfiles: 646 pipeline_word_wer@1: cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice --asr=wav2vec2 @@ -1564,7 +1564,7 @@ stages: nfiles: 8154 pipeline_word_wer@9: cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs - --asr=wav2vec2 + --asr=techmo deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 @@ -1577,30 +1577,30 @@ stages: md5: 607f551eca5dabcca0caf31c87bd2ac6.dir size: 975209 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result - md5: bf9c77e34376bcda73dbdb6afee55c8c.dir - size: 5137721 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__result + md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir + size: 1880403 nfiles: 758 outs: - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_classic_alignment - md5: 052fa8f97b2f9d5b8ca4175b4e92dfd1.dir - size: 3854678 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_classic_alignment + md5: 689776c77c4ecaa11578b53480100ecc.dir + size: 3821036 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_classic_metrics - md5: d9dec56ef7ddb6a3bc03638e37d1c04c.dir - size: 23168 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_classic_metrics + md5: 8d0d99fd7d965d4070c0b391cd6fa2b0.dir + size: 23030 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_embeddings_alignment - md5: 069191de9f84f37303bc202968f26766.dir - size: 8069804 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_embeddings_alignment + md5: 0ec5fe337bde2254c91146fd16b9c6af.dir + size: 7995553 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_embeddings_metrics - md5: b45390641698e71fdf420fbb2afec7e0.dir - size: 53351 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_embeddings_metrics + md5: 55eb6eb0aacc12e3e2caf224b4b89df1.dir + size: 53591 nfiles: 758 pipeline_word_wer@4: cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz - --asr=ajn + --asr=google deps: - path: experiment/pipeline_process_word_wer.py md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 @@ -1609,65 +1609,65 @@ stages: md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir size: 4803739404 nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result - md5: 7de1137f44fad26766da0fc309720160.dir - size: 22765926 - nfiles: 800 - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript md5: ebffd3814a48564f4e33b9a4e0956af3.dir size: 21846798 nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result + md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir + size: 27432599 + nfiles: 799 outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_classic_alignment - md5: 1ed03de918f5373afdbdbb020c6161b5.dir - size: 78992762 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_classic_metrics - md5: 04f6ccbaf94cf08c34ac201ae079c21c.dir - size: 25307 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_embeddings_alignment - md5: a67e7a6e8a6e0755ea35a519d9decf86.dir - size: 128258410 - nfiles: 646 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_embeddings_metrics - md5: c83561c448950860f36037c2287a25f5.dir - size: 25194 - nfiles: 646 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_classic_alignment + md5: 184cdfdd465db609f514891b0330ef67.dir + size: 83997172 + nfiles: 799 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_classic_metrics + md5: c12eab8acb3cee0219fc8046691b24ab.dir + size: 27841 + nfiles: 799 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_embeddings_alignment + md5: 1e7ef6690d2e36f368eb02a8dcba9ab0.dir + size: 174732507 + nfiles: 799 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_embeddings_metrics + md5: c15414f2ec9c0a1ab78199861b0b4ce0.dir + size: 55765 + nfiles: 799 pipeline_spacy_pos_wer@14: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14 + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna --asr=wav2vec2 deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d2d48495000b3ea7ea6f4212ddb113a5.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__result - md5: 5658da01ecdce39ed99156bbc7f2dc62.dir - size: 3523907 - nfiles: 562 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/wav2vec2__result + md5: 9c63b061ac7763144bca121e163ee7aa.dir + size: 20658485 + nfiles: 456 outs: - - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_alignment - md5: 0d04963621be1dd6a1c81225734de652.dir - size: 2724276 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_metrics - md5: 818882d685785a9d7d9b8d757c4c8e90.dir - size: 6840 - nfiles: 562 + - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_pos_alignment + md5: a791fae1ca5599b9422b94d4c6ce760a.dir + size: 17967467 + nfiles: 456 + - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_pos_metrics + md5: 7848ddff997fd231f3857ff30dfd7154.dir + size: 7940 + nfiles: 456 pipeline_spacy_pos_wer@2: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice --asr=techmo deps: - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 273f598ac14ea7b894189bf0d553a39a + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 size: 1469 - path: experiment_data/dataset/pl_common_voice md5: d751713988987e9331980363e24189ce.dir @@ -1683,7 +1683,7 @@ stages: nfiles: 8136 outs: - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_pos_alignment - md5: 1d2f7fbcca280abdbfb91e83f70e9789.dir + md5: c0c77bf3030799c32241555bf4d71e6b.dir size: 18192387 nfiles: 8136 - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_pos_metrics @@ -1692,7 +1692,7 @@ stages: nfiles: 8136 pipeline_spacy_pos_wer@16: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14 - --asr=techmo + --asr=ajn deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 @@ -1701,51 +1701,51 @@ stages: md5: d751713988987e9331980363e24189ce.dir size: 0 nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/ajn__result + md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir + size: 974727 + nfiles: 559 - path: experiment_data/pipeline/pl_minds14/gold_transcript md5: d2d48495000b3ea7ea6f4212ddb113a5.dir size: 689374 nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__result - md5: 4c43636b4773f2bf9a2153ef3393a558.dir - size: 1336305 - nfiles: 562 outs: - - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_alignment - md5: aebbf34f80eb8151d8cf4d1ed840757c.dir - size: 2640387 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_metrics - md5: 7d555363b27e9c0d0ab7d0e3011c6d13.dir - size: 6095 - nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_alignment + md5: 013fbb68289e1c35986bcaaa773c396c.dir + size: 3178442 + nfiles: 559 + - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_metrics + md5: 014fbe751a4f357f78f073920890e06d.dir + size: 7059 + nfiles: 559 pipeline_spacy_pos_wer@6: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs - --asr=google + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_voicelab_cbiz + --asr=techmo deps: - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 1475a345757c2c9d699c0d4bc6c194f6 - size: 1734 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__result - md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir - size: 1377134 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_alignment - md5: 39ad38a46e1489b88629b8512ca09ea3.dir - size: 3712618 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_metrics - md5: 56dddb48cea2022b91fd4323efd43a8b.dir - size: 9213 - nfiles: 758 + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 + size: 1469 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result + md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir + size: 39158267 + nfiles: 800 + outs: + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_pos_alignment + md5: 678171dbd7c038cd6eaadc5eb331b8b5.dir + size: 81650836 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_pos_metrics + md5: 11320499f29d2d7bfce68d35fb352b83.dir + size: 14334 + nfiles: 800 pipeline_spacy_pos_wer@0: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice --asr=google @@ -1776,91 +1776,91 @@ stages: nfiles: 8143 pipeline_spacy_pos_wer@11: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna - --asr=ajn + --asr=google deps: - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 273f598ac14ea7b894189bf0d553a39a + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 size: 1469 - path: experiment_data/dataset/pl_luna md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/ajn__result - md5: 653d65e186a7d05958ce3cbef219038c.dir - size: 6159899 - nfiles: 494 - path: experiment_data/pipeline/pl_luna/gold_transcript md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir size: 6706925 nfiles: 500 + - path: experiment_data/pipeline/pl_luna/google__result + md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir + size: 5346497 + nfiles: 500 outs: - - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_alignment - md5: f12f57b3229bbbd18fb6584d6891b5cc.dir - size: 21936929 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_metrics - md5: 6c6bb673ea5f64c9d851878c9d8a7c09.dir - size: 8444 - nfiles: 494 + - path: experiment_data/pipeline/pl_luna/google__spacy_pos_alignment + md5: 460edffd716b1f46fb863e5b2b0d949e.dir + size: 19342263 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/google__spacy_pos_metrics + md5: a65dd7d74319da06f4ebaca08cde30ce.dir + size: 8659 + nfiles: 500 pipeline_spacy_pos_wer@15: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14 - --asr=ajn + --asr=google deps: - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 1475a345757c2c9d699c0d4bc6c194f6 - size: 1734 + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 + size: 1469 - path: experiment_data/dataset/pl_minds14 md5: d751713988987e9331980363e24189ce.dir size: 0 nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/ajn__result - md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir - size: 974727 - nfiles: 559 - path: experiment_data/pipeline/pl_minds14/gold_transcript md5: d2d48495000b3ea7ea6f4212ddb113a5.dir size: 689374 nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/google__result + md5: 3f7a79298a5156fd2b023e673326e72f.dir + size: 985004 + nfiles: 562 outs: - - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_alignment - md5: 101ee541a794af147e48dd460afc227f.dir - size: 3178442 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_metrics - md5: 014fbe751a4f357f78f073920890e06d.dir - size: 7059 - nfiles: 559 + - path: experiment_data/pipeline/pl_minds14/google__spacy_pos_alignment + md5: 1c0c5687fc5b80e2a178e2a4618aa04c.dir + size: 2659852 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/google__spacy_pos_metrics + md5: aeda105b01366dee65935d3c07fe3444.dir + size: 4970 + nfiles: 562 pipeline_spacy_pos_wer@12: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14 + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna --asr=ajn deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/ajn__result - md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir - size: 974727 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d2d48495000b3ea7ea6f4212ddb113a5.dir - size: 689374 - nfiles: 562 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/ajn__result + md5: 653d65e186a7d05958ce3cbef219038c.dir + size: 6159899 + nfiles: 494 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 outs: - - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_alignment - md5: 013fbb68289e1c35986bcaaa773c396c.dir - size: 3178442 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_metrics - md5: 014fbe751a4f357f78f073920890e06d.dir - size: 7059 - nfiles: 559 + - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_alignment + md5: 0f310b8840f2d09ac558b9bac2a0d3b0.dir + size: 21936929 + nfiles: 494 + - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_metrics + md5: 6c6bb673ea5f64c9d851878c9d8a7c09.dir + size: 8444 + nfiles: 494 pipeline_spacy_pos_wer@17: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14 - --asr=wav2vec2 + --asr=techmo deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 @@ -1873,18 +1873,18 @@ stages: md5: d2d48495000b3ea7ea6f4212ddb113a5.dir size: 689374 nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__result - md5: 5658da01ecdce39ed99156bbc7f2dc62.dir - size: 3523907 + - path: experiment_data/pipeline/pl_minds14/techmo__result + md5: 4c43636b4773f2bf9a2153ef3393a558.dir + size: 1336305 nfiles: 562 outs: - - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_alignment - md5: 0d04963621be1dd6a1c81225734de652.dir - size: 2724276 + - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_alignment + md5: aebbf34f80eb8151d8cf4d1ed840757c.dir + size: 2640387 nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_metrics - md5: 818882d685785a9d7d9b8d757c4c8e90.dir - size: 6840 + - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_metrics + md5: 7d555363b27e9c0d0ab7d0e3011c6d13.dir + size: 6095 nfiles: 562 pipeline_spacy_pos_wer@1: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice @@ -1915,92 +1915,36 @@ stages: size: 97426 nfiles: 8154 pipeline_spacy_pos_wer@7: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs --asr=google deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 size: 1469 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__result - md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir - size: 5346497 - nfiles: 500 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/google__result + md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir + size: 1377134 + nfiles: 758 outs: - - path: experiment_data/pipeline/pl_luna/google__spacy_pos_alignment - md5: 460edffd716b1f46fb863e5b2b0d949e.dir - size: 19342263 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__spacy_pos_metrics - md5: a65dd7d74319da06f4ebaca08cde30ce.dir - size: 8659 - nfiles: 500 + - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_alignment + md5: 3429e870a0541d01eb85b0a34fd16021.dir + size: 3712618 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_metrics + md5: 56dddb48cea2022b91fd4323efd43a8b.dir + size: 9213 + nfiles: 758 pipeline_spacy_pos_wer@10: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/wav2vec2__result - md5: 9c63b061ac7763144bca121e163ee7aa.dir - size: 20658485 - nfiles: 456 - outs: - - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_pos_alignment - md5: a791fae1ca5599b9422b94d4c6ce760a.dir - size: 17967467 - nfiles: 456 - - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_pos_metrics - md5: 7848ddff997fd231f3857ff30dfd7154.dir - size: 7940 - nfiles: 456 - pipeline_spacy_pos_wer@13: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14 - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d2d48495000b3ea7ea6f4212ddb113a5.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__result - md5: 4c43636b4773f2bf9a2153ef3393a558.dir - size: 1336305 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_alignment - md5: aebbf34f80eb8151d8cf4d1ed840757c.dir - size: 2640387 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_metrics - md5: 7d555363b27e9c0d0ab7d0e3011c6d13.dir - size: 6095 - nfiles: 562 - pipeline_spacy_pos_wer@5: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs - --asr=techmo + --asr=wav2vec2 deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 @@ -2013,50 +1957,106 @@ stages: md5: 607f551eca5dabcca0caf31c87bd2ac6.dir size: 975209 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__result - md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir - size: 1880403 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result + md5: bf9c77e34376bcda73dbdb6afee55c8c.dir + size: 5137721 nfiles: 758 outs: - - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_pos_alignment - md5: d76701778be18566a4d3a45bc325196e.dir - size: 3707699 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_pos_alignment + md5: 3b2e3e75c644805681905c97ec37cf28.dir + size: 3737151 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_pos_metrics - md5: 9cae08bbb8a6331d06a33dbbb4a16301.dir - size: 9662 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_pos_metrics + md5: 782cc84e9116281dfc28734b2ae4a5ea.dir + size: 9004 nfiles: 758 - pipeline_spacy_ner_wer@4: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_voicelab_cbiz - --asr=ajn + pipeline_spacy_pos_wer@13: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna + --asr=techmo deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 size: 1469 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result - md5: 7de1137f44fad26766da0fc309720160.dir - size: 22765926 - nfiles: 800 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/techmo__result + md5: 0e596570e1502b38588427bc72dcc006.dir + size: 9697519 + nfiles: 500 + outs: + - path: experiment_data/pipeline/pl_luna/techmo__spacy_pos_alignment + md5: a5cb7dd59d83b2ec678c6ba5177bc38f.dir + size: 20897599 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/techmo__spacy_pos_metrics + md5: 1478707020a96496b50eb732207c290e.dir + size: 8841 + nfiles: 500 + pipeline_spacy_pos_wer@5: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_voicelab_cbiz + --asr=ajn + deps: + - path: experiment/pipeline_process_spacy_pos_wer.py + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 + size: 1469 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result + md5: 7de1137f44fad26766da0fc309720160.dir + size: 22765926 + nfiles: 800 - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript md5: ebffd3814a48564f4e33b9a4e0956af3.dir size: 21846798 nfiles: 800 outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_ner_alignment - md5: 48a512d6811d00ec9dbe60e3131834ac.dir - size: 76044699 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_pos_alignment + md5: 4f07fe60bf29e2148879db0c903c98f6.dir + size: 78539613 nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_ner_metrics - md5: 61799b677b4d6af5e41c947c1d3b02cd.dir - size: 14196 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_pos_metrics + md5: 7ec3bb7c838e7f06b8a1dbe7a68faac2.dir + size: 13753 + nfiles: 800 + pipeline_spacy_ner_wer@4: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_voicelab_cbiz + --asr=google + deps: + - path: experiment/pipeline_process_spacy_pos_wer.py + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 + size: 1469 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result + md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir + size: 27432599 + nfiles: 799 + outs: + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_ner_alignment + md5: a83b6e086aa207da109fc06293501f52.dir + size: 80594165 + nfiles: 799 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_ner_metrics + md5: 85388729ffbed2a328f94758c682c809.dir + size: 14708 + nfiles: 799 pipeline_spacy_ner_wer@16: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14 - --asr=techmo + --asr=ajn deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 @@ -2065,26 +2065,26 @@ stages: md5: d751713988987e9331980363e24189ce.dir size: 0 nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/ajn__result + md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir + size: 974727 + nfiles: 559 - path: experiment_data/pipeline/pl_minds14/gold_transcript md5: d2d48495000b3ea7ea6f4212ddb113a5.dir size: 689374 nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__result - md5: 4c43636b4773f2bf9a2153ef3393a558.dir - size: 1336305 - nfiles: 562 outs: - - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_alignment - md5: b9b4f2685f681ad7334ac60236a6ebe7.dir - size: 2573182 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_metrics - md5: d306e0d18df9ee86f853719244bcfd35.dir - size: 5484 - nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_alignment + md5: 69280464b63869e12150a53f35f096b0.dir + size: 3070948 + nfiles: 559 + - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_metrics + md5: 5e97918fe79741d3d054d3ba77bc1537.dir + size: 7316 + nfiles: 559 pipeline_spacy_ner_wer@9: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs - --asr=wav2vec2 + --asr=techmo deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 @@ -2097,22 +2097,22 @@ stages: md5: 607f551eca5dabcca0caf31c87bd2ac6.dir size: 975209 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result - md5: bf9c77e34376bcda73dbdb6afee55c8c.dir - size: 5137721 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__result + md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir + size: 1880403 nfiles: 758 outs: - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_alignment - md5: d04f82c751e6ef28414b35ba4d844470.dir - size: 3660762 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_ner_alignment + md5: 628a77d036410a40bd358db2fc8ff739.dir + size: 3626842 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_metrics - md5: 308e3f0f67d855482d7d3634954f4b57.dir - size: 7553 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_ner_metrics + md5: cda4c4246e8b3b6d57aed57c4fe5594a.dir + size: 8311 nfiles: 758 pipeline_spacy_ner_wer@15: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14 - --asr=ajn + --asr=google deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 @@ -2121,23 +2121,23 @@ stages: md5: d751713988987e9331980363e24189ce.dir size: 0 nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/ajn__result - md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir - size: 974727 - nfiles: 559 - path: experiment_data/pipeline/pl_minds14/gold_transcript md5: d2d48495000b3ea7ea6f4212ddb113a5.dir size: 689374 nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/google__result + md5: 3f7a79298a5156fd2b023e673326e72f.dir + size: 985004 + nfiles: 562 outs: - - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_alignment - md5: 69280464b63869e12150a53f35f096b0.dir - size: 3070948 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_metrics - md5: 5e97918fe79741d3d054d3ba77bc1537.dir - size: 7316 - nfiles: 559 + - path: experiment_data/pipeline/pl_minds14/google__spacy_ner_alignment + md5: b251bb8eda29c1644a26b4db0bcf0766.dir + size: 2599569 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/google__spacy_ner_metrics + md5: 85361b5e0ac555a688bbc7dd4477e7a8.dir + size: 4545 + nfiles: 562 pipeline_spacy_ner_wer@1: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice --asr=wav2vec2 @@ -2207,173 +2207,173 @@ stages: size: 3523907 nfiles: 562 pipeline_spacy_ner_wer@12: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14 + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna --asr=ajn deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/ajn__result - md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir - size: 974727 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d2d48495000b3ea7ea6f4212ddb113a5.dir - size: 689374 - nfiles: 562 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/ajn__result + md5: 653d65e186a7d05958ce3cbef219038c.dir + size: 6159899 + nfiles: 494 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 outs: - - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_alignment - md5: 69280464b63869e12150a53f35f096b0.dir - size: 3070948 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_metrics - md5: 5e97918fe79741d3d054d3ba77bc1537.dir - size: 7316 - nfiles: 559 + - path: experiment_data/pipeline/pl_luna/ajn__spacy_ner_alignment + md5: 68434e48e2e6652ae6c9b17f9eb6fa05.dir + size: 21154450 + nfiles: 494 + - path: experiment_data/pipeline/pl_luna/ajn__spacy_ner_metrics + md5: 288d81ffe41a25d5ab85bbb3ef908979.dir + size: 8703 + nfiles: 494 pipeline_spacy_ner_wer@13: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14 + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna --asr=techmo deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d2d48495000b3ea7ea6f4212ddb113a5.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__result - md5: 4c43636b4773f2bf9a2153ef3393a558.dir - size: 1336305 - nfiles: 562 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/techmo__result + md5: 0e596570e1502b38588427bc72dcc006.dir + size: 9697519 + nfiles: 500 outs: - - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_alignment - md5: b9b4f2685f681ad7334ac60236a6ebe7.dir - size: 2573182 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_metrics - md5: d306e0d18df9ee86f853719244bcfd35.dir - size: 5484 - nfiles: 562 + - path: experiment_data/pipeline/pl_luna/techmo__spacy_ner_alignment + md5: c301698fa01d07cfb3efb16ffbb06b69.dir + size: 20451389 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/techmo__spacy_ner_metrics + md5: 06cea0926a325c92a1ff79457db655a8.dir + size: 8918 + nfiles: 500 pipeline_spacy_ner_wer@11: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14 + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna --asr=google deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d2d48495000b3ea7ea6f4212ddb113a5.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__result - md5: 3f7a79298a5156fd2b023e673326e72f.dir - size: 985004 - nfiles: 562 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/google__result + md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir + size: 5346497 + nfiles: 500 outs: - - path: experiment_data/pipeline/pl_minds14/google__spacy_ner_alignment - md5: b251bb8eda29c1644a26b4db0bcf0766.dir - size: 2599569 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__spacy_ner_metrics - md5: 85361b5e0ac555a688bbc7dd4477e7a8.dir - size: 4545 - nfiles: 562 + - path: experiment_data/pipeline/pl_luna/google__spacy_ner_alignment + md5: 84a872f90985c615648ce02ea8a6951c.dir + size: 18946509 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/google__spacy_ner_metrics + md5: 7df5fdeb083c8b1e953e1ff097c69f11.dir + size: 8752 + nfiles: 500 pipeline_spacy_ner_wer@6: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs - --asr=wav2vec2 + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_voicelab_cbiz + --asr=techmo deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 size: 1469 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result - md5: bf9c77e34376bcda73dbdb6afee55c8c.dir - size: 5137721 - nfiles: 758 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result + md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir + size: 39158267 + nfiles: 800 outs: - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_alignment - md5: d04f82c751e6ef28414b35ba4d844470.dir - size: 3660762 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_metrics - md5: 308e3f0f67d855482d7d3634954f4b57.dir - size: 7553 - nfiles: 758 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_ner_alignment + md5: 3c7bf6f59153f0eef4685795dc8c1246.dir + size: 79503990 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_ner_metrics + md5: f0dd0a8adb67c53d83ad32d152f90365.dir + size: 14595 + nfiles: 800 pipeline_spacy_ner_wer@5: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs - --asr=techmo + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_voicelab_cbiz + --asr=ajn deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 size: 1469 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__result - md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir - size: 1880403 - nfiles: 758 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result + md5: 7de1137f44fad26766da0fc309720160.dir + size: 22765926 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 + nfiles: 800 outs: - - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_ner_alignment - md5: 628a77d036410a40bd358db2fc8ff739.dir - size: 3626842 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_ner_metrics - md5: cda4c4246e8b3b6d57aed57c4fe5594a.dir - size: 8311 - nfiles: 758 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_ner_alignment + md5: 48a512d6811d00ec9dbe60e3131834ac.dir + size: 76044699 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_ner_metrics + md5: 61799b677b4d6af5e41c947c1d3b02cd.dir + size: 14196 + nfiles: 800 pipeline_spacy_pos_wer@3: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs - --asr=google + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice + --asr=ajn deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 size: 1469 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__result - md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir - size: 1377134 - nfiles: 758 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/ajn__result + md5: 77d873041fe2952e3c45ee4ac6458061.dir + size: 6667841 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 outs: - - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_alignment - md5: 3429e870a0541d01eb85b0a34fd16021.dir - size: 3712618 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_metrics - md5: 56dddb48cea2022b91fd4323efd43a8b.dir - size: 9213 - nfiles: 758 + - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_pos_alignment + md5: f3b7dd0987fc7be5171478910c950657.dir + size: 19159060 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_pos_metrics + md5: 6094fb960e2eab979ecb33d40a253531.dir + size: 95146 + nfiles: 8155 pipeline_spacy_ner_wer@0: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice --asr=google @@ -2403,12 +2403,40 @@ stages: size: 97917 nfiles: 8143 pipeline_spacy_pos_wer@9: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs --asr=techmo deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 size: 1469 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__result + md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir + size: 1880403 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_pos_alignment + md5: d76701778be18566a4d3a45bc325196e.dir + size: 3707699 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_pos_metrics + md5: 9cae08bbb8a6331d06a33dbbb4a16301.dir + size: 9662 + nfiles: 758 + pipeline_wikineiural_ner@11: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna + --asr=google + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 - path: experiment_data/dataset/pl_luna md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 @@ -2417,110 +2445,54 @@ stages: md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir size: 6706925 nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__result - md5: 0e596570e1502b38588427bc72dcc006.dir - size: 9697519 + - path: experiment_data/pipeline/pl_luna/google__result + md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir + size: 5346497 nfiles: 500 outs: - - path: experiment_data/pipeline/pl_luna/techmo__spacy_pos_alignment - md5: a5cb7dd59d83b2ec678c6ba5177bc38f.dir - size: 20897599 + - path: experiment_data/pipeline/pl_luna/google__wikineural_ner_alignment + md5: dbfd406215b6d3b8ed5afcceb92034cb.dir + size: 34436914 nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__spacy_pos_metrics - md5: 1478707020a96496b50eb732207c290e.dir - size: 8841 + - path: experiment_data/pipeline/pl_luna/google__wikineural_ner_metrics + md5: d4ca8091dac498b609dca9fda6d22f07.dir + size: 8807 nfiles: 500 - pipeline_wikineiural_ner@11: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14 - --asr=google - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d2d48495000b3ea7ea6f4212ddb113a5.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__result - md5: 3f7a79298a5156fd2b023e673326e72f.dir - size: 985004 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/google__wikineural_ner_alignment - md5: e632c40f440b7b59e2d94771e29f2dea.dir - size: 5692456 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__wikineural_ner_metrics - md5: 0385f2138fd318df8852a7e38c1770ac.dir - size: 5322 - nfiles: 562 pipeline_wikineiural_ner@8: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs --asr=ajn deps: - path: experiment/pipeline_process_wikineural_ner.py md5: c2c4e92a33346a3c097a321f1f5f2af3 size: 1802 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/ajn__result - md5: 653d65e186a7d05958ce3cbef219038c.dir - size: 6159899 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__result + md5: 545e63a6daf9c46387c1d7d40b85499f.dir + size: 1413262 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 outs: - - path: experiment_data/pipeline/pl_luna/ajn__wikineural_ner_alignment - md5: 52c8c06ce74db727c4aa5f55a2a865cd.dir - size: 37478125 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/ajn__wikineural_ner_metrics - md5: 602eaef0046a2a8b551bbbced7123886.dir - size: 8816 - nfiles: 494 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__wikineural_ner_alignment + md5: 83f073c2166a9a2fcfc6dc4d2268efb4.dir + size: 7530856 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__wikineural_ner_metrics + md5: 7def95d54e9428f71fa43a5b9a95b54b.dir + size: 12379 + nfiles: 758 pipeline_wikineiural_ner@14: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14 + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna --asr=wav2vec2 deps: - path: experiment/pipeline_process_wikineural_ner.py md5: c2c4e92a33346a3c097a321f1f5f2af3 size: 1802 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d2d48495000b3ea7ea6f4212ddb113a5.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__result - md5: 5658da01ecdce39ed99156bbc7f2dc62.dir - size: 3523907 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/wav2vec2__wikineural_ner_alignment - md5: 5bc3f08dd32f7769c6c6c0b25df52bf8.dir - size: 5895431 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__wikineural_ner_metrics - md5: 6535a4779ab3be38804639e16cbbe70f.dir - size: 8602 - nfiles: 562 - pipeline_spacy_ner_wer@10: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - path: experiment_data/dataset/pl_luna md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 @@ -2534,17 +2506,17 @@ stages: size: 20658485 nfiles: 456 outs: - - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_ner_alignment - md5: 4530ad915e82cc9668e775d5de219b13.dir - size: 17577804 + - path: experiment_data/pipeline/pl_luna/wav2vec2__wikineural_ner_alignment + md5: 4a1ad7d9221851e3735f1c3c2238862b.dir + size: 33934003 nfiles: 456 - - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_ner_metrics - md5: 0ad2dc0beeebd336771228f8751fe028.dir - size: 8014 + - path: experiment_data/pipeline/pl_luna/wav2vec2__wikineural_ner_metrics + md5: 0368dfd54954d3aa317e359f32efe59e.dir + size: 8244 nfiles: 456 - pipeline_spacy_ner_wer@3: + pipeline_spacy_ner_wer@10: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs - --asr=google + --asr=wav2vec2 deps: - path: experiment/pipeline_process_spacy_pos_wer.py md5: 3817c96d1d91d2cf2d8ec7fe570f1472 @@ -2557,19 +2529,47 @@ stages: md5: 607f551eca5dabcca0caf31c87bd2ac6.dir size: 975209 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__result - md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir - size: 1377134 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result + md5: bf9c77e34376bcda73dbdb6afee55c8c.dir + size: 5137721 nfiles: 758 outs: - - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_ner_alignment - md5: 67ea10ecbb9f5c3bfa14ffb85c843ba3.dir - size: 3638477 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_alignment + md5: d04f82c751e6ef28414b35ba4d844470.dir + size: 3660762 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_ner_metrics - md5: 0532a9596f26fd52037b6ecaa838ab9e.dir - size: 8500 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_metrics + md5: 308e3f0f67d855482d7d3634954f4b57.dir + size: 7553 nfiles: 758 + pipeline_spacy_ner_wer@3: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice + --asr=ajn + deps: + - path: experiment/pipeline_process_spacy_pos_wer.py + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 + size: 1469 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/ajn__result + md5: 77d873041fe2952e3c45ee4ac6458061.dir + size: 6667841 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 + outs: + - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_ner_alignment + md5: 4b5e01b54c2ce8118580ad02d6dffca1.dir + size: 18576865 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_ner_metrics + md5: 15078a7515bc939d08d29d079311b37d.dir + size: 91570 + nfiles: 8155 pipeline_spacy_ner_wer@2: cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice --asr=techmo @@ -2599,40 +2599,180 @@ stages: size: 97994 nfiles: 8136 pipeline_wikineiural_ner@3: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs - --asr=google + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice + --asr=ajn deps: - path: experiment/pipeline_process_wikineural_ner.py md5: c2c4e92a33346a3c097a321f1f5f2af3 size: 1802 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/ajn__result + md5: 77d873041fe2952e3c45ee4ac6458061.dir + size: 6667841 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 + outs: + - path: experiment_data/pipeline/pl_common_voice/ajn__wikineural_ner_alignment + md5: 493bf2c6d82f18a117f2ae6d438d5c30.dir + size: 36352970 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/ajn__wikineural_ner_metrics + md5: 2275d851a39aafa755474fe79b16b337.dir + size: 117588 + nfiles: 8155 + pipeline_spacy_pos_wer@8: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs + --asr=ajn + deps: + - path: experiment/pipeline_process_spacy_pos_wer.py + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 + size: 1469 - path: experiment_data/dataset/pl_google_fleurs md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir size: 236272072 nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__result + md5: 545e63a6daf9c46387c1d7d40b85499f.dir + size: 1413262 + nfiles: 758 - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript md5: 607f551eca5dabcca0caf31c87bd2ac6.dir size: 975209 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__result - md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir - size: 1377134 - nfiles: 758 outs: - - path: experiment_data/pipeline/pl_google_fleurs/google__wikineural_ner_alignment - md5: 39e078edbc3f65934358787ddbe40eec.dir - size: 7572934 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_pos_alignment + md5: 210ec7ffdde2c076fe2ab72873f240c7.dir + size: 3799802 nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__wikineural_ner_metrics - md5: f8b3c4183e31fc9e612b189579644f74.dir - size: 10796 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_pos_metrics + md5: 0d95a4abb4a33aef3e242a304fd58698.dir + size: 10569 nfiles: 758 - pipeline_spacy_pos_wer@8: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna + pipeline_wikineiural_ner@1: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result + md5: b0d0042d77d7adce37890ca63ad40091.dir + size: 19014997 + nfiles: 8154 + outs: + - path: experiment_data/pipeline/pl_common_voice/wav2vec2__wikineural_ner_alignment + md5: 9613a3fcb28ead3d3d39a16180ccfab8.dir + size: 37574594 + nfiles: 8154 + - path: experiment_data/pipeline/pl_common_voice/wav2vec2__wikineural_ner_metrics + md5: 59ff9a09d1e232c66bd41afa757f9130.dir + size: 118556 + nfiles: 8154 + pipeline_flair_upos@13: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna + --asr=techmo + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/techmo__result + md5: 0e596570e1502b38588427bc72dcc006.dir + size: 9697519 + nfiles: 500 + outs: + - path: experiment_data/pipeline/pl_luna/techmo__flair_upos_alignment + md5: 4319f089e3987d68b4622f864c17ad6c.dir + size: 20898162 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/techmo__flair_upos_metrics + md5: 0063866f11c1d82a4ba430f14c81ed8f.dir + size: 8844 + nfiles: 500 + pipeline_flair_upos@0: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice + --asr=google + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/google__result + md5: afb53476cc93ef4de3591908df41fd2a.dir + size: 5854366 + nfiles: 8143 + outs: + - path: experiment_data/pipeline/pl_common_voice/google__flair_upos_alignment + md5: 169eb8cd967ea75404bd3bd7f5a41b5e.dir + size: 17519056 + nfiles: 8143 + - path: experiment_data/pipeline/pl_common_voice/google__flair_upos_metrics + md5: 768158db51050c79dd588c8b32e5b89e.dir + size: 95486 + nfiles: 8143 + pipeline_wikineiural_ner@13: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna + --asr=techmo + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/techmo__result + md5: 0e596570e1502b38588427bc72dcc006.dir + size: 9697519 + nfiles: 500 + outs: + - path: experiment_data/pipeline/pl_luna/techmo__wikineural_ner_alignment + md5: 6819012ed6e05015753687b12eea426c.dir + size: 37042138 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/techmo__wikineural_ner_metrics + md5: 47cd0bc5270bf7e3300c91f211b7a6f8.dir + size: 9011 + nfiles: 500 + pipeline_flair_upos@12: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna --asr=ajn deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 - path: experiment_data/dataset/pl_luna md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 @@ -2646,11 +2786,1587 @@ stages: size: 6706925 nfiles: 500 outs: - - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_alignment - md5: 0f310b8840f2d09ac558b9bac2a0d3b0.dir - size: 21936929 + - path: experiment_data/pipeline/pl_luna/ajn__flair_upos_alignment + md5: 8aa7d2eeb9deb1bde3394ab5df9c15a0.dir + size: 21838304 nfiles: 494 - - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_metrics - md5: 6c6bb673ea5f64c9d851878c9d8a7c09.dir - size: 8444 + - path: experiment_data/pipeline/pl_luna/ajn__flair_upos_metrics + md5: 4ccf8f54da1b47d0a9e0e7c95020135e.dir + size: 8450 nfiles: 494 + pipeline_wikineiural_ner@10: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result + md5: bf9c77e34376bcda73dbdb6afee55c8c.dir + size: 5137721 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__wikineural_ner_alignment + md5: 320e1321cb3db403a9ec0bb662abe52c.dir + size: 7828240 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__wikineural_ner_metrics + md5: 0738885f0f798e8bc0fbb5c9824d9db1.dir + size: 9704 + nfiles: 758 + pipeline_wikineiural_ner@2: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice + --asr=techmo + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/techmo__result + md5: 9030cf3640f2749d9c1b4439687bdc2f.dir + size: 7761880 + nfiles: 8136 + outs: + - path: experiment_data/pipeline/pl_common_voice/techmo__wikineural_ner_alignment + md5: 40094adf33d391cd6abb6199c326dfd7.dir + size: 37097148 + nfiles: 8136 + - path: experiment_data/pipeline/pl_common_voice/techmo__wikineural_ner_metrics + md5: 4570a295cf89a779de66565b0b9a77ad.dir + size: 118544 + nfiles: 8136 + pipeline_flair_upos@11: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna + --asr=google + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/google__result + md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir + size: 5346497 + nfiles: 500 + outs: + - path: experiment_data/pipeline/pl_luna/google__flair_upos_alignment + md5: 8a23016ee1f269beec30232dff1f751b.dir + size: 19210117 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/google__flair_upos_metrics + md5: 891ec9646dea43249aa9cb4eb562b2de.dir + size: 8654 + nfiles: 500 + pipeline_wikineiural_ner@4: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_voicelab_cbiz + --asr=google + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result + md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir + size: 27432599 + nfiles: 799 + outs: + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__wikineural_ner_alignment + md5: f977050b1f0445e9997f902f91634976.dir + size: 144157380 + nfiles: 799 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__wikineural_ner_metrics + md5: 9a9056d774f8e901fa2beaa9a874b0a2.dir + size: 14800 + nfiles: 799 + pipeline_flair_upos@9: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs + --asr=techmo + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__result + md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir + size: 1880403 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/techmo__flair_upos_alignment + md5: 527561b0790917cc03d782d5bf074294.dir + size: 3695215 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__flair_upos_metrics + md5: 06a349eee2ca119d3e79bbdbe5c95ef1.dir + size: 9384 + nfiles: 758 + pipeline_wikineiural_ner@9: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs + --asr=techmo + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__result + md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir + size: 1880403 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/techmo__wikineural_ner_alignment + md5: 2fdab231aaf8c96a97268d6447f3ca1e.dir + size: 7640523 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__wikineural_ner_metrics + md5: 14d453272229bc7b8d7c4eb2790e6b32.dir + size: 10705 + nfiles: 758 + pipeline_flair_upos@14: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/wav2vec2__result + md5: 9c63b061ac7763144bca121e163ee7aa.dir + size: 20658485 + nfiles: 456 + outs: + - path: experiment_data/pipeline/pl_luna/wav2vec2__flair_upos_alignment + md5: ac11e3b8f28fd268a8bb90f851bc7b5b.dir + size: 17899335 + nfiles: 456 + - path: experiment_data/pipeline/pl_luna/wav2vec2__flair_upos_metrics + md5: 56807b890f4abcb9c41f60ff72a2e6aa.dir + size: 8051 + nfiles: 456 + pipeline_wikineiural_ner@0: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice + --asr=google + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/google__result + md5: afb53476cc93ef4de3591908df41fd2a.dir + size: 5854366 + nfiles: 8143 + outs: + - path: experiment_data/pipeline/pl_common_voice/google__wikineural_ner_alignment + md5: 1adc1718f35e14b2aae8de6c6a49de1b.dir + size: 37254218 + nfiles: 8143 + - path: experiment_data/pipeline/pl_common_voice/google__wikineural_ner_metrics + md5: a8b91d2203368be020389a8a02ecbb7b.dir + size: 117584 + nfiles: 8143 + pipeline_wikineiural_ner@12: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna + --asr=ajn + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/ajn__result + md5: 653d65e186a7d05958ce3cbef219038c.dir + size: 6159899 + nfiles: 494 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + outs: + - path: experiment_data/pipeline/pl_luna/ajn__wikineural_ner_alignment + md5: 52c8c06ce74db727c4aa5f55a2a865cd.dir + size: 37478125 + nfiles: 494 + - path: experiment_data/pipeline/pl_luna/ajn__wikineural_ner_metrics + md5: 602eaef0046a2a8b551bbbced7123886.dir + size: 8816 + nfiles: 494 + pipeline_wikineiural_ner@5: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_voicelab_cbiz + --asr=ajn + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result + md5: 7de1137f44fad26766da0fc309720160.dir + size: 22765926 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 + nfiles: 800 + outs: + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__wikineural_ner_alignment + md5: 3363be14202fdb2577c8ddfce6549751.dir + size: 132627376 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__wikineural_ner_metrics + md5: b83cd24583e6fa648f4e106239e1ffcd.dir + size: 14486 + nfiles: 800 + pipeline_flair_upos@3: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice + --asr=ajn + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/ajn__result + md5: 77d873041fe2952e3c45ee4ac6458061.dir + size: 6667841 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 + outs: + - path: experiment_data/pipeline/pl_common_voice/ajn__flair_upos_alignment + md5: d8fd6968dfa66671d8fb8cc609782287.dir + size: 17420454 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/ajn__flair_upos_metrics + md5: 619e9209626ba47702db03fa56670078.dir + size: 93408 + nfiles: 8155 + pipeline_wikineiural_ner@7: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs + --asr=google + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/google__result + md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir + size: 1377134 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/google__wikineural_ner_alignment + md5: 39e078edbc3f65934358787ddbe40eec.dir + size: 7572934 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/google__wikineural_ner_metrics + md5: f8b3c4183e31fc9e612b189579644f74.dir + size: 10796 + nfiles: 758 + pipeline_spacy_ner_wer@8: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs + --asr=ajn + deps: + - path: experiment/pipeline_process_spacy_pos_wer.py + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 + size: 1469 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__result + md5: 545e63a6daf9c46387c1d7d40b85499f.dir + size: 1413262 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_ner_alignment + md5: 34ddaf22d6ba136b8257344899902015.dir + size: 3675109 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_ner_metrics + md5: 9379479c875f0fe49d6da70c9b189d67.dir + size: 10663 + nfiles: 758 + pipeline_flair_upos@7: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs + --asr=google + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/google__result + md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir + size: 1377134 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/google__flair_upos_alignment + md5: 7a58a3caced592a9f00af9515eb92413.dir + size: 3696853 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/google__flair_upos_metrics + md5: 680491a1374081f637ade245e79ca627.dir + size: 9562 + nfiles: 758 + pipeline_flair_upos@4: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_voicelab_cbiz + --asr=google + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result + md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir + size: 27432599 + nfiles: 799 + outs: + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__flair_upos_alignment + md5: 3446f07e21780b9319c2c22d88767dfc.dir + size: 81897055 + nfiles: 799 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__flair_upos_metrics + md5: 06fc109e28016f32d68edf992b4c3072.dir + size: 14350 + nfiles: 799 + pipeline_spacy_pos_wer@4: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_voicelab_cbiz + --asr=google + deps: + - path: experiment/pipeline_process_spacy_pos_wer.py + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 + size: 1469 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result + md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir + size: 27432599 + nfiles: 799 + outs: + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_pos_alignment + md5: 22ccae20301046da3e40e93daa680d53.dir + size: 83052124 + nfiles: 799 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_pos_metrics + md5: 43d79d47ba1e91e86daf1f66aa18c941.dir + size: 14239 + nfiles: 799 + pipeline_flair_upos@1: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result + md5: b0d0042d77d7adce37890ca63ad40091.dir + size: 19014997 + nfiles: 8154 + outs: + - path: experiment_data/pipeline/pl_common_voice/wav2vec2__flair_upos_alignment + md5: c2de0794eab6b838cb889cd218c65651.dir + size: 17543992 + nfiles: 8154 + - path: experiment_data/pipeline/pl_common_voice/wav2vec2__flair_upos_metrics + md5: 5ec6a253f5152fb5215f5d4a1243ca4e.dir + size: 95798 + nfiles: 8154 + pipeline_flair_upos@5: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_voicelab_cbiz + --asr=ajn + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result + md5: 7de1137f44fad26766da0fc309720160.dir + size: 22765926 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 + nfiles: 800 + outs: + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__flair_upos_alignment + md5: d9c2a02375a3f9974ddf0ec4d64297b7.dir + size: 78332367 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__flair_upos_metrics + md5: 42f292cac09f5efc71215ac0f9bbf760.dir + size: 13841 + nfiles: 800 + pipeline_wikineiural_ner@6: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_voicelab_cbiz + --asr=techmo + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result + md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir + size: 39158267 + nfiles: 800 + outs: + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__wikineural_ner_alignment + md5: fc4d6251b6c8d8253b99aa87d4c7a189.dir + size: 144096029 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__wikineural_ner_metrics + md5: dca61a52df606b9c3b510007a48c53ee.dir + size: 14810 + nfiles: 800 + pipeline_flair_upos@6: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_voicelab_cbiz + --asr=techmo + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result + md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir + size: 39158267 + nfiles: 800 + outs: + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__flair_upos_alignment + md5: b9435b169d923e56d45be4e3a489358d.dir + size: 81635416 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__flair_upos_metrics + md5: 00f24bd9e609cc89f29cb6c3cc8dba48.dir + size: 14348 + nfiles: 800 + pipeline_flair_upos@8: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs + --asr=ajn + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__result + md5: 545e63a6daf9c46387c1d7d40b85499f.dir + size: 1413262 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/ajn__flair_upos_alignment + md5: be13715f9aed5232f08da3be9095a797.dir + size: 3675596 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__flair_upos_metrics + md5: 27bb131921ad52d2235aeb2b7befe4d1.dir + size: 10590 + nfiles: 758 + pipeline_spacy_ner_wer@7: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs + --asr=google + deps: + - path: experiment/pipeline_process_spacy_pos_wer.py + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 + size: 1469 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/google__result + md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir + size: 1377134 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_ner_alignment + md5: 67ea10ecbb9f5c3bfa14ffb85c843ba3.dir + size: 3638477 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_ner_metrics + md5: 0532a9596f26fd52037b6ecaa838ab9e.dir + size: 8500 + nfiles: 758 + pipeline_spacy_ner_wer@14: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_spacy_pos_wer.py + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 + size: 1469 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/wav2vec2__result + md5: 9c63b061ac7763144bca121e163ee7aa.dir + size: 20658485 + nfiles: 456 + outs: + - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_ner_alignment + md5: 4530ad915e82cc9668e775d5de219b13.dir + size: 17577804 + nfiles: 456 + - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_ner_metrics + md5: 0ad2dc0beeebd336771228f8751fe028.dir + size: 8014 + nfiles: 456 + pipeline_flair_upos@10: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result + md5: bf9c77e34376bcda73dbdb6afee55c8c.dir + size: 5137721 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__flair_upos_alignment + md5: 07ec971011a67b5e3ad7822fc717612c.dir + size: 3720804 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__flair_upos_metrics + md5: 642857d546ab39ca2d680fda663eab38.dir + size: 7889 + nfiles: 758 + pipeline_flair_upos@2: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice + --asr=techmo + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/techmo__result + md5: 9030cf3640f2749d9c1b4439687bdc2f.dir + size: 7761880 + nfiles: 8136 + outs: + - path: experiment_data/pipeline/pl_common_voice/techmo__flair_upos_alignment + md5: b2f57705be67ced0652ce670e1dde2cd.dir + size: 17397042 + nfiles: 8136 + - path: experiment_data/pipeline/pl_common_voice/techmo__flair_upos_metrics + md5: 09659b49a5c99187bf64cf922138a7c1.dir + size: 95391 + nfiles: 8136 + pipeline_spacy_ner_wer@17: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14 + --asr=techmo + deps: + - path: experiment/pipeline_process_spacy_pos_wer.py + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 + size: 1469 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/techmo__result + md5: 4c43636b4773f2bf9a2153ef3393a558.dir + size: 1336305 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_alignment + md5: b9b4f2685f681ad7334ac60236a6ebe7.dir + size: 2573182 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_metrics + md5: d306e0d18df9ee86f853719244bcfd35.dir + size: 5484 + nfiles: 562 + pipeline_wikineiural_ner@16: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14 + --asr=ajn + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/ajn__result + md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir + size: 974727 + nfiles: 559 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/ajn__wikineural_ner_alignment + md5: 6be03b571a6462a5e3e4c96678f32fce.dir + size: 5689217 + nfiles: 559 + - path: experiment_data/pipeline/pl_minds14/ajn__wikineural_ner_metrics + md5: 6e1b50f31e4bd4f264e6a6a91da6cb05.dir + size: 8593 + nfiles: 559 + pipeline_flair_upos@17: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14 + --asr=techmo + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/techmo__result + md5: 4c43636b4773f2bf9a2153ef3393a558.dir + size: 1336305 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/techmo__flair_upos_alignment + md5: fcfda34330dcd231409db4eff282a85a.dir + size: 2632853 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/techmo__flair_upos_metrics + md5: eda987cf12cfa026c8906eaa434e6090.dir + size: 6285 + nfiles: 562 + pipeline_wikineiural_ner@17: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14 + --asr=techmo + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/techmo__result + md5: 4c43636b4773f2bf9a2153ef3393a558.dir + size: 1336305 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/techmo__wikineural_ner_alignment + md5: cc2e98939b0f8ab82a9eb30a546cea2c.dir + size: 5641778 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/techmo__wikineural_ner_metrics + md5: 1025b67d8420594920a3cef4ed5c1a6a.dir + size: 8339 + nfiles: 562 + pipeline_flair_upos@15: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14 + --asr=google + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/google__result + md5: 3f7a79298a5156fd2b023e673326e72f.dir + size: 985004 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/google__flair_upos_alignment + md5: 71c60e45f4045e34c221d9c22701aab7.dir + size: 2641461 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/google__flair_upos_metrics + md5: 9f97126702b6268ce448649d4d7a3666.dir + size: 5000 + nfiles: 562 + pipeline_wikineiural_ner@15: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14 + --asr=google + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/google__result + md5: 3f7a79298a5156fd2b023e673326e72f.dir + size: 985004 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/google__wikineural_ner_alignment + md5: e632c40f440b7b59e2d94771e29f2dea.dir + size: 5692456 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/google__wikineural_ner_metrics + md5: 0385f2138fd318df8852a7e38c1770ac.dir + size: 5322 + nfiles: 562 + pipeline_flair_upos@16: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14 + --asr=ajn + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/ajn__result + md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir + size: 974727 + nfiles: 559 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/ajn__flair_upos_alignment + md5: 53a0fee0db0e7b4818c328730d33a8bc.dir + size: 3084431 + nfiles: 559 + - path: experiment_data/pipeline/pl_minds14/ajn__flair_upos_metrics + md5: a7791cb5b7b405e98b12834e1146d1bb.dir + size: 7147 + nfiles: 559 + pipeline_spacy_tag_wer@4: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_voicelab_cbiz + --asr=google + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result + md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir + size: 27432599 + nfiles: 799 + outs: + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_dep_tag_alignment + md5: e2028c14acd625109a465c36ef166e7a.dir + size: 83052124 + nfiles: 799 + - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_dep_tag_metrics + md5: 43d79d47ba1e91e86daf1f66aa18c941.dir + size: 14239 + nfiles: 799 + pipeline_spacy_tag_wer@17: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14 + --asr=techmo + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/techmo__result + md5: 4c43636b4773f2bf9a2153ef3393a558.dir + size: 1336305 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/techmo__spacy_dep_tag_alignment + md5: 9ea9473e990f9b1adafd9a6fef5a05ec.dir + size: 2640387 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/techmo__spacy_dep_tag_metrics + md5: 7d555363b27e9c0d0ab7d0e3011c6d13.dir + size: 6095 + nfiles: 562 + pipeline_spacy_tag_wer@10: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result + md5: bf9c77e34376bcda73dbdb6afee55c8c.dir + size: 5137721 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_dep_tag_alignment + md5: 4ca975e9b42db749a368760f5190805b.dir + size: 3737151 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_dep_tag_metrics + md5: 782cc84e9116281dfc28734b2ae4a5ea.dir + size: 9004 + nfiles: 758 + pipeline_spacy_tag_wer@0: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice + --asr=google + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/google__result + md5: afb53476cc93ef4de3591908df41fd2a.dir + size: 5854366 + nfiles: 8143 + outs: + - path: experiment_data/pipeline/pl_common_voice/google__spacy_dep_tag_alignment + md5: 38e2f031c443eea54bf86af578d2b79d.dir + size: 18316770 + nfiles: 8143 + - path: experiment_data/pipeline/pl_common_voice/google__spacy_dep_tag_metrics + md5: 117611317774e81fb482ba9c71ec806b.dir + size: 97235 + nfiles: 8143 + pipeline_spacy_tag_wer@18: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14 + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/wav2vec2__result + md5: 5658da01ecdce39ed99156bbc7f2dc62.dir + size: 3523907 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_dep_tag_alignment + md5: 0b266094262fe4ee6684527729caed32.dir + size: 2724276 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_dep_tag_metrics + md5: 818882d685785a9d7d9b8d757c4c8e90.dir + size: 6840 + nfiles: 562 + pipeline_spacy_tag_wer@15: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14 + --asr=google + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/google__result + md5: 3f7a79298a5156fd2b023e673326e72f.dir + size: 985004 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/google__spacy_dep_tag_alignment + md5: 715afeb1c31961d4680f8b98ba61d4ad.dir + size: 2659852 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/google__spacy_dep_tag_metrics + md5: aeda105b01366dee65935d3c07fe3444.dir + size: 4970 + nfiles: 562 + pipeline_spacy_tag_wer@6: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_voicelab_cbiz + --asr=techmo + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result + md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir + size: 39158267 + nfiles: 800 + outs: + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_dep_tag_alignment + md5: 19c1d8b3e8704af06e943ba6962cf9ad.dir + size: 81650836 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_dep_tag_metrics + md5: 11320499f29d2d7bfce68d35fb352b83.dir + size: 14334 + nfiles: 800 + pipeline_spacy_tag_wer@14: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/wav2vec2__result + md5: 9c63b061ac7763144bca121e163ee7aa.dir + size: 20658485 + nfiles: 456 + outs: + - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_dep_tag_alignment + md5: 6716464936f4f35ba81a43eb2c2f37b0.dir + size: 17967467 + nfiles: 456 + - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_dep_tag_metrics + md5: 7848ddff997fd231f3857ff30dfd7154.dir + size: 7940 + nfiles: 456 + pipeline_spacy_tag_wer@16: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14 + --asr=ajn + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/ajn__result + md5: 6fd1b042a7cb6d6f200c2eb9a926b7f1.dir + size: 974727 + nfiles: 559 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/ajn__spacy_dep_tag_alignment + md5: 372c26bb5e808d7856fed1c06b25de5e.dir + size: 3178442 + nfiles: 559 + - path: experiment_data/pipeline/pl_minds14/ajn__spacy_dep_tag_metrics + md5: 014fbe751a4f357f78f073920890e06d.dir + size: 7059 + nfiles: 559 + pipeline_spacy_tag_wer@5: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_voicelab_cbiz + --asr=ajn + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_voicelab_cbiz + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result + md5: 7de1137f44fad26766da0fc309720160.dir + size: 22765926 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript + md5: ebffd3814a48564f4e33b9a4e0956af3.dir + size: 21846798 + nfiles: 800 + outs: + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_dep_tag_alignment + md5: 3d4a9a912756443a1de46cf91f6e5805.dir + size: 78539613 + nfiles: 800 + - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_dep_tag_metrics + md5: 7ec3bb7c838e7f06b8a1dbe7a68faac2.dir + size: 13753 + nfiles: 800 + pipeline_spacy_tag_wer@2: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice + --asr=techmo + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/techmo__result + md5: 9030cf3640f2749d9c1b4439687bdc2f.dir + size: 7761880 + nfiles: 8136 + outs: + - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_dep_tag_alignment + md5: 3e1f2b39cd9d82679013649a1ad8d983.dir + size: 18192387 + nfiles: 8136 + - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_dep_tag_metrics + md5: 61c69fcd287051f4ab7d1ffcc68a9aca.dir + size: 96845 + nfiles: 8136 + pipeline_wikineiural_ner@18: + cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14 + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_wikineural_ner.py + md5: c2c4e92a33346a3c097a321f1f5f2af3 + size: 1802 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/wav2vec2__result + md5: 5658da01ecdce39ed99156bbc7f2dc62.dir + size: 3523907 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/wav2vec2__wikineural_ner_alignment + md5: 5bc3f08dd32f7769c6c6c0b25df52bf8.dir + size: 5895431 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/wav2vec2__wikineural_ner_metrics + md5: 6535a4779ab3be38804639e16cbbe70f.dir + size: 8602 + nfiles: 562 + pipeline_spacy_tag_wer@8: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs + --asr=ajn + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__result + md5: 545e63a6daf9c46387c1d7d40b85499f.dir + size: 1413262 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_dep_tag_alignment + md5: 8ec2e9fc88d4b8ce5032bf809c1c025f.dir + size: 3799802 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_dep_tag_metrics + md5: 0d95a4abb4a33aef3e242a304fd58698.dir + size: 10569 + nfiles: 758 + pipeline_word_wer@18: + cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14 + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_word_wer.py + md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 + size: 2125 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/wav2vec2__result + md5: 5658da01ecdce39ed99156bbc7f2dc62.dir + size: 3523907 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_classic_alignment + md5: c1d947bbd7bb1dff8d39e03ad4a1d11e.dir + size: 2811807 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_classic_metrics + md5: a3477ae81fcba4a8e21b044f279425bb.dir + size: 17073 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_embeddings_alignment + md5: cbdcbfe7157f28d70ca55b787483e26b.dir + size: 5870633 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_embeddings_metrics + md5: c3395e582b4562a33e97419134363d30.dir + size: 37892 + nfiles: 562 + pipeline_spacy_tag_wer@12: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna + --asr=ajn + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/ajn__result + md5: 653d65e186a7d05958ce3cbef219038c.dir + size: 6159899 + nfiles: 494 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + outs: + - path: experiment_data/pipeline/pl_luna/ajn__spacy_dep_tag_alignment + md5: 034d072825c711a824f1280f4a390f74.dir + size: 21936929 + nfiles: 494 + - path: experiment_data/pipeline/pl_luna/ajn__spacy_dep_tag_metrics + md5: 6c6bb673ea5f64c9d851878c9d8a7c09.dir + size: 8444 + nfiles: 494 + pipeline_spacy_tag_wer@3: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice + --asr=ajn + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/ajn__result + md5: 77d873041fe2952e3c45ee4ac6458061.dir + size: 6667841 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 + outs: + - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_dep_tag_alignment + md5: 10af363d90689138f55e3295f562efc4.dir + size: 19159060 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_dep_tag_metrics + md5: 6094fb960e2eab979ecb33d40a253531.dir + size: 95146 + nfiles: 8155 + pipeline_spacy_tag_wer@11: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna + --asr=google + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/google__result + md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir + size: 5346497 + nfiles: 500 + outs: + - path: experiment_data/pipeline/pl_luna/google__spacy_dep_tag_alignment + md5: 4663cdc1bb88d7d6de3691c734fe0ab6.dir + size: 19342263 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/google__spacy_dep_tag_metrics + md5: a65dd7d74319da06f4ebaca08cde30ce.dir + size: 8659 + nfiles: 500 + pipeline_spacy_tag_wer@9: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs + --asr=techmo + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__result + md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir + size: 1880403 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_dep_tag_alignment + md5: 18da1eb1ac1485f74337ea502e395b57.dir + size: 3707699 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_dep_tag_metrics + md5: 9cae08bbb8a6331d06a33dbbb4a16301.dir + size: 9662 + nfiles: 758 + pipeline_spacy_tag_wer@1: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_common_voice + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_common_voice/gold_transcript + md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir + size: 4093545 + nfiles: 8155 + - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result + md5: b0d0042d77d7adce37890ca63ad40091.dir + size: 19014997 + nfiles: 8154 + outs: + - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_dep_tag_alignment + md5: c46f35654ac42c3ddfd14e0197f36ae5.dir + size: 18349138 + nfiles: 8154 + - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_dep_tag_metrics + md5: 71381fa7fd6c0cdba00f25c17fd6be5e.dir + size: 97426 + nfiles: 8154 + pipeline_flair_upos@18: + cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14 + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_flair_upos.py + md5: 2ce2de99df9c06c5d9b0833ca7cdffda + size: 1916 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/wav2vec2__result + md5: 5658da01ecdce39ed99156bbc7f2dc62.dir + size: 3523907 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/wav2vec2__flair_upos_alignment + md5: e543ccad47d9829fdb6dbf3238ed99cc.dir + size: 2702561 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/wav2vec2__flair_upos_metrics + md5: 103738afc0ef4f24adf3b855181b1acb.dir + size: 6757 + nfiles: 562 + pipeline_spacy_pos_wer@18: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14 + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_spacy_pos_wer.py + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 + size: 1469 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/wav2vec2__result + md5: 5658da01ecdce39ed99156bbc7f2dc62.dir + size: 3523907 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_alignment + md5: 0d04963621be1dd6a1c81225734de652.dir + size: 2724276 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_metrics + md5: 818882d685785a9d7d9b8d757c4c8e90.dir + size: 6840 + nfiles: 562 + pipeline_spacy_tag_wer@13: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna + --asr=techmo + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_luna + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/pl_luna/gold_transcript + md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/techmo__result + md5: 0e596570e1502b38588427bc72dcc006.dir + size: 9697519 + nfiles: 500 + outs: + - path: experiment_data/pipeline/pl_luna/techmo__spacy_dep_tag_alignment + md5: baefcd5dfadd9c62d6fc71ba0ac31fa9.dir + size: 20897599 + nfiles: 500 + - path: experiment_data/pipeline/pl_luna/techmo__spacy_dep_tag_metrics + md5: 1478707020a96496b50eb732207c290e.dir + size: 8841 + nfiles: 500 + pipeline_spacy_tag_wer@7: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs + --asr=google + deps: + - path: experiment/pipeline_process_spacy_dep_tag_wer.py + md5: 83fc16ed68e85cfd89d8d84dc61d6d0f + size: 1489 + - path: experiment_data/dataset/pl_google_fleurs + md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir + size: 236272072 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript + md5: 607f551eca5dabcca0caf31c87bd2ac6.dir + size: 975209 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/google__result + md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir + size: 1377134 + nfiles: 758 + outs: + - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_dep_tag_alignment + md5: c15aa30b165152fac6813cd092763242.dir + size: 3712618 + nfiles: 758 + - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_dep_tag_metrics + md5: 56dddb48cea2022b91fd4323efd43a8b.dir + size: 9213 + nfiles: 758 + pipeline_spacy_ner_wer@18: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14 + --asr=wav2vec2 + deps: + - path: experiment/pipeline_process_spacy_pos_wer.py + md5: 3817c96d1d91d2cf2d8ec7fe570f1472 + size: 1469 + - path: experiment_data/dataset/pl_minds14 + md5: d751713988987e9331980363e24189ce.dir + size: 0 + nfiles: 0 + - path: experiment_data/pipeline/pl_minds14/gold_transcript + md5: d2d48495000b3ea7ea6f4212ddb113a5.dir + size: 689374 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/wav2vec2__result + md5: 5658da01ecdce39ed99156bbc7f2dc62.dir + size: 3523907 + nfiles: 562 + outs: + - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_ner_alignment + md5: 5c0650273cc3a942beb9ec39c01866f1.dir + size: 2653625 + nfiles: 562 + - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_ner_metrics + md5: 9aba9dba4721ced4ab9ae20a4f6b44cb.dir + size: 6092 + nfiles: 562 diff --git a/dvc.yaml b/dvc.yaml index 9b24294..09be1b7 100644 --- a/dvc.yaml +++ b/dvc.yaml @@ -99,6 +99,8 @@ stages: asr: wav2vec2 - dataset: pl_common_voice asr: techmo + - dataset: pl_common_voice + asr: ajn - dataset: pl_voicelab_cbiz asr: google @@ -154,13 +156,15 @@ stages: asr: wav2vec2 - dataset: pl_common_voice asr: techmo + - dataset: pl_common_voice + asr: ajn -# - dataset: pl_voicelab_cbiz -# asr: google -# - dataset: pl_voicelab_cbiz -# asr: ajn -# - dataset: pl_voicelab_cbiz -# asr: techmo + - dataset: pl_voicelab_cbiz + asr: google + - dataset: pl_voicelab_cbiz + asr: ajn + - dataset: pl_voicelab_cbiz + asr: techmo - dataset: pl_google_fleurs asr: google @@ -207,13 +211,15 @@ stages: asr: wav2vec2 - dataset: pl_common_voice asr: techmo + - dataset: pl_common_voice + asr: ajn -# - dataset: pl_voicelab_cbiz -# asr: google -# - dataset: pl_voicelab_cbiz -# asr: ajn -# - dataset: pl_voicelab_cbiz -# asr: techmo + - dataset: pl_voicelab_cbiz + asr: google + - dataset: pl_voicelab_cbiz + asr: ajn + - dataset: pl_voicelab_cbiz + asr: techmo - dataset: pl_google_fleurs asr: google @@ -260,13 +266,15 @@ stages: asr: wav2vec2 - dataset: pl_common_voice asr: techmo + - dataset: pl_common_voice + asr: ajn -# - dataset: pl_voicelab_cbiz -# asr: google -# - dataset: pl_voicelab_cbiz -# asr: ajn -# - dataset: pl_voicelab_cbiz -# asr: techmo + - dataset: pl_voicelab_cbiz + asr: google + - dataset: pl_voicelab_cbiz + asr: ajn + - dataset: pl_voicelab_cbiz + asr: techmo - dataset: pl_google_fleurs asr: google @@ -313,13 +321,15 @@ stages: asr: wav2vec2 - dataset: pl_common_voice asr: techmo + - dataset: pl_common_voice + asr: ajn -# - dataset: pl_voicelab_cbiz -# asr: google -# - dataset: pl_voicelab_cbiz -# asr: ajn -# - dataset: pl_voicelab_cbiz -# asr: techmo + - dataset: pl_voicelab_cbiz + asr: google + - dataset: pl_voicelab_cbiz + asr: ajn + - dataset: pl_voicelab_cbiz + asr: techmo - dataset: pl_google_fleurs asr: google @@ -357,3 +367,58 @@ stages: outs: - experiment_data/pipeline/${item.dataset}/${item.asr}__flair_upos_alignment - experiment_data/pipeline/${item.dataset}/${item.asr}__flair_upos_metrics + + pipeline_spacy_tag_wer: + foreach: + - dataset: pl_common_voice + asr: google + - dataset: pl_common_voice + asr: wav2vec2 + - dataset: pl_common_voice + asr: techmo + - dataset: pl_common_voice + asr: ajn + + - dataset: pl_voicelab_cbiz + asr: google + - dataset: pl_voicelab_cbiz + asr: ajn + - dataset: pl_voicelab_cbiz + asr: techmo + + - dataset: pl_google_fleurs + asr: google + - dataset: pl_google_fleurs + asr: ajn + - dataset: pl_google_fleurs + asr: techmo + - dataset: pl_google_fleurs + asr: wav2vec2 + + - dataset: pl_luna + asr: google + - dataset: pl_luna + asr: ajn + - dataset: pl_luna + asr: techmo + - dataset: pl_luna + asr: wav2vec2 + + - dataset: pl_minds14 + asr: google + - dataset: pl_minds14 + asr: ajn + - dataset: pl_minds14 + asr: techmo + - dataset: pl_minds14 + asr: wav2vec2 + do: + cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=${item.dataset} --asr=${item.asr} + deps: + - experiment/pipeline_process_spacy_dep_tag_wer.py + - experiment_data/dataset/${item.dataset} + - experiment_data/pipeline/${item.dataset}/gold_transcript + - experiment_data/pipeline/${item.dataset}/${item.asr}__result + outs: + - experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_dep_tag_alignment + - experiment_data/pipeline/${item.dataset}/${item.asr}__spacy_dep_tag_metrics diff --git a/experiment/pipeline_process_spacy_dep_tag_wer.py b/experiment/pipeline_process_spacy_dep_tag_wer.py new file mode 100644 index 0000000..49cba63 --- /dev/null +++ b/experiment/pipeline_process_spacy_dep_tag_wer.py @@ -0,0 +1,35 @@ +import argparse + +from experiment.const_pipeline_names import GOLD_TRANSCRIPT +from experiment.experiment_dependency_provider import get_record_provider, get_repository +from experiment.sentence_wer_processor.spacy_pos_sentence_dep_tag_processor import SpacyDepTagSentenceWerProcessor +from sziszapangma.integration.experiment_manager import ExperimentManager + + +def run_spacy_pos_wer_pipeline(dataset_name: str, asr_name: str): + record_provider = get_record_provider(dataset_name) + experiment_processor = ExperimentManager( + record_id_iterator=record_provider, + processing_tasks=[ + SpacyDepTagSentenceWerProcessor( + model_name='pl_core_news_lg', + gold_transcript_property_name=GOLD_TRANSCRIPT, + asr_property_name=f'{asr_name}__result', + alignment_property_name=f'{asr_name}__spacy_dep_tag_alignment', + wer_property_name=f'{asr_name}__spacy_dep_tag_metrics', + task_name=f'SpacyDepTagSentenceWerProcessor___{dataset_name}___{asr_name}', + require_update=False + ) + ], + experiment_repository=get_repository(dataset_name), + relation_manager_provider=record_provider + ) + experiment_processor.process() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--dataset") + parser.add_argument("--asr") + args = parser.parse_args() + run_spacy_pos_wer_pipeline(args.dataset, args.asr) diff --git a/experiment/sentence_wer_processor/spacy_pos_sentence_dep_tag_processor.py b/experiment/sentence_wer_processor/spacy_pos_sentence_dep_tag_processor.py new file mode 100644 index 0000000..ca467e0 --- /dev/null +++ b/experiment/sentence_wer_processor/spacy_pos_sentence_dep_tag_processor.py @@ -0,0 +1,28 @@ +from abc import ABC +from typing import List, Any + +import spacy + +from experiment.sentence_wer_processor.sentence_wer_processor import SentenceWerProcessor + + +class SpacyDepTagSentenceWerProcessor(SentenceWerProcessor): + _nlp: Any + + def __init__( + self, + model_name: str, + gold_transcript_property_name: str, + asr_property_name: str, + alignment_property_name: str, + wer_property_name: str, + task_name: str, + require_update: bool + ): + super().__init__(gold_transcript_property_name, asr_property_name, alignment_property_name, wer_property_name, + task_name, require_update) + self._nlp = spacy.load(model_name) + + def process_text(self, text: str) -> List[str]: + document = self._nlp(text) + return [token.pos_ for token in document] diff --git a/experiment_data/pipeline/pl_common_voice/.gitignore b/experiment_data/pipeline/pl_common_voice/.gitignore index 182d144..2fc1cf4 100644 --- a/experiment_data/pipeline/pl_common_voice/.gitignore +++ b/experiment_data/pipeline/pl_common_voice/.gitignore @@ -16,3 +16,27 @@ /wav2vec2__word_wer_embeddings_alignment /wav2vec2__spacy_ner_alignment /wav2vec2__spacy_ner_metrics +/techmo__wikineural_ner_alignment +/techmo__wikineural_ner_metrics +/google__wikineural_ner_alignment +/google__wikineural_ner_metrics +/techmo__spacy_pos_alignment +/techmo__spacy_pos_metrics +/wav2vec2__flair_upos_alignment +/wav2vec2__flair_upos_metrics +/techmo__flair_upos_alignment +/techmo__flair_upos_metrics +/ajn__wikineural_ner_alignment +/ajn__wikineural_ner_metrics +/ajn__spacy_dep_tag_alignment +/ajn__spacy_dep_tag_metrics +/ajn__word_wer_classic_metrics +/ajn__word_wer_classic_alignment +/ajn__word_wer_embeddings_metrics +/ajn__word_wer_embeddings_alignment +/ajn__flair_upos_alignment +/ajn__flair_upos_metrics +/wav2vec2__spacy_dep_tag_alignment +/wav2vec2__spacy_dep_tag_metrics +/ajn__spacy_pos_alignment +/ajn__spacy_pos_metrics diff --git a/experiment_data/pipeline/pl_google_fleurs/.gitignore b/experiment_data/pipeline/pl_google_fleurs/.gitignore index 324088b..5edb9f9 100644 --- a/experiment_data/pipeline/pl_google_fleurs/.gitignore +++ b/experiment_data/pipeline/pl_google_fleurs/.gitignore @@ -17,3 +17,43 @@ /wav2vec2__word_wer_embeddings_alignment /wav2vec2__spacy_ner_alignment /wav2vec2__spacy_ner_metrics +/ajn__wikineural_ner_alignment +/ajn__wikineural_ner_metrics +/techmo__wikineural_ner_alignment +/techmo__wikineural_ner_metrics +/google__flair_upos_alignment +/google__flair_upos_metrics +/wav2vec2__spacy_pos_alignment +/wav2vec2__spacy_pos_metrics +/ajn__flair_upos_alignment +/ajn__flair_upos_metrics +/ajn__spacy_pos_alignment +/ajn__spacy_pos_metrics +/techmo__flair_upos_alignment +/techmo__flair_upos_metrics +/ajn__spacy_ner_alignment +/ajn__spacy_ner_metrics +/wav2vec2__wikineural_ner_alignment +/wav2vec2__wikineural_ner_metrics +/wav2vec2__flair_upos_alignment +/wav2vec2__flair_upos_metrics +/google__spacy_ner_alignment +/google__spacy_ner_metrics +/techmo__spacy_ner_alignment +/techmo__spacy_ner_metrics +/google__spacy_pos_alignment +/google__spacy_pos_metrics +/techmo__spacy_pos_alignment +/techmo__spacy_pos_metrics +/google__wikineural_ner_alignment +/google__wikineural_ner_metrics +/ajn__spacy_dep_tag_alignment +/ajn__spacy_dep_tag_metrics +/techmo__spacy_dep_tag_alignment +/techmo__spacy_dep_tag_metrics +/google__word_wer_classic_metrics +/google__word_wer_classic_alignment +/google__word_wer_embeddings_metrics +/google__word_wer_embeddings_alignment +/google__spacy_dep_tag_alignment +/google__spacy_dep_tag_metrics diff --git a/experiment_data/pipeline/pl_luna/.gitignore b/experiment_data/pipeline/pl_luna/.gitignore index 715f0fb..e67f426 100644 --- a/experiment_data/pipeline/pl_luna/.gitignore +++ b/experiment_data/pipeline/pl_luna/.gitignore @@ -19,3 +19,41 @@ /ajn__word_wer_classic_alignment /ajn__word_wer_embeddings_metrics /ajn__word_wer_embeddings_alignment +/wav2vec2__wikineural_ner_alignment +/wav2vec2__wikineural_ner_metrics +/techmo__flair_upos_alignment +/techmo__flair_upos_metrics +/techmo__wikineural_ner_alignment +/techmo__wikineural_ner_metrics +/techmo__spacy_ner_alignment +/techmo__spacy_ner_metrics +/google__wikineural_ner_alignment +/google__wikineural_ner_metrics +/google__flair_upos_alignment +/google__flair_upos_metrics +/ajn__flair_upos_alignment +/ajn__flair_upos_metrics +/google__spacy_ner_alignment +/google__spacy_ner_metrics +/wav2vec2__flair_upos_alignment +/wav2vec2__flair_upos_metrics +/google__spacy_pos_alignment +/google__spacy_pos_metrics +/techmo__spacy_pos_alignment +/techmo__spacy_pos_metrics +/ajn__spacy_ner_alignment +/ajn__spacy_ner_metrics +/ajn__wikineural_ner_alignment +/ajn__wikineural_ner_metrics +/wav2vec2__spacy_pos_alignment +/wav2vec2__spacy_pos_metrics +/ajn__spacy_pos_alignment +/ajn__spacy_pos_metrics +/wav2vec2__spacy_ner_alignment +/wav2vec2__spacy_ner_metrics +/ajn__spacy_dep_tag_alignment +/ajn__spacy_dep_tag_metrics +/google__spacy_dep_tag_alignment +/google__spacy_dep_tag_metrics +/techmo__spacy_dep_tag_alignment +/techmo__spacy_dep_tag_metrics diff --git a/experiment_data/pipeline/pl_minds14/.gitignore b/experiment_data/pipeline/pl_minds14/.gitignore index f6e42cc..05ce9ab 100644 --- a/experiment_data/pipeline/pl_minds14/.gitignore +++ b/experiment_data/pipeline/pl_minds14/.gitignore @@ -15,3 +15,37 @@ /techmo__spacy_ner_metrics /ajn__spacy_ner_alignment /ajn__spacy_ner_metrics +/techmo__flair_upos_alignment +/techmo__flair_upos_metrics +/google__flair_upos_alignment +/google__flair_upos_metrics +/google__spacy_pos_alignment +/google__spacy_pos_metrics +/wav2vec2__flair_upos_alignment +/wav2vec2__flair_upos_metrics +/ajn__wikineural_ner_alignment +/ajn__wikineural_ner_metrics +/wav2vec2__spacy_ner_alignment +/wav2vec2__spacy_ner_metrics +/google__spacy_ner_alignment +/google__spacy_ner_metrics +/techmo__wikineural_ner_alignment +/techmo__wikineural_ner_metrics +/google__wikineural_ner_alignment +/google__wikineural_ner_metrics +/wav2vec2__wikineural_ner_alignment +/wav2vec2__wikineural_ner_metrics +/ajn__flair_upos_alignment +/ajn__flair_upos_metrics +/ajn__spacy_pos_alignment +/ajn__spacy_pos_metrics +/wav2vec2__word_wer_classic_metrics +/wav2vec2__word_wer_classic_alignment +/wav2vec2__word_wer_embeddings_metrics +/wav2vec2__word_wer_embeddings_alignment +/wav2vec2__spacy_pos_alignment +/wav2vec2__spacy_pos_metrics +/ajn__word_wer_classic_metrics +/ajn__word_wer_classic_alignment +/ajn__word_wer_embeddings_metrics +/ajn__word_wer_embeddings_alignment diff --git a/experiment_data/pipeline/pl_voicelab_cbiz/.gitignore b/experiment_data/pipeline/pl_voicelab_cbiz/.gitignore index a64acdd..10cac58 100644 --- a/experiment_data/pipeline/pl_voicelab_cbiz/.gitignore +++ b/experiment_data/pipeline/pl_voicelab_cbiz/.gitignore @@ -16,3 +16,25 @@ /ajn__word_wer_embeddings_alignment /ajn__spacy_ner_alignment /ajn__spacy_ner_metrics +/google__spacy_pos_alignment +/google__spacy_pos_metrics +/techmo__spacy_ner_alignment +/techmo__spacy_ner_metrics +/ajn__flair_upos_alignment +/ajn__flair_upos_metrics +/google__flair_upos_alignment +/google__flair_upos_metrics +/google__spacy_ner_alignment +/google__spacy_ner_metrics +/techmo__spacy_pos_alignment +/techmo__spacy_pos_metrics +/google__wikineural_ner_alignment +/google__wikineural_ner_metrics +/ajn__wikineural_ner_alignment +/ajn__wikineural_ner_metrics +/techmo__wikineural_ner_alignment +/techmo__wikineural_ner_metrics +/ajn__spacy_pos_alignment +/ajn__spacy_pos_metrics +/techmo__flair_upos_alignment +/techmo__flair_upos_metrics -- GitLab