diff --git a/dvc.lock b/dvc.lock index 601d3744c8a31c714f04a460fd9541ea7a3cd411..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/dvc.lock +++ b/dvc.lock @@ -1,5044 +0,0 @@ -schema: '2.0' -stages: - import_dataset_to_relation_manager@0: - cmd: PYTHONPATH=. python experiment/pl_luna/import_relation_manager.py - deps: - - path: experiment/pl_luna/import_relation_manager.py - md5: 0b72ccf1aa1107d85def09140e76bdf9 - size: 8284 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - outs: - - path: experiment_data/dataset_relation_manager_data/pl_luna - md5: 7c6720b017c9fa82bb1c2264239b3b03.dir - size: 229007155 - nfiles: 1000 - import_dataset_to_relation_manager@1: - cmd: PYTHONPATH=. python experiment/pl_minds14/import_relation_manager.py - deps: - - path: experiment/pl_minds14/import_relation_manager.py - md5: d83011855f3f1b2ddec96713c3c41177 - size: 438 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - outs: - - path: experiment_data/dataset_relation_manager_data/pl_minds14 - md5: b6838bb00cb83d69ae818d8de96a2f18.dir - size: 3545062 - nfiles: 1124 - import_dataset_to_relation_manager@2: - cmd: PYTHONPATH=. python experiment/pl_common_voice/import_relation_manager.py - deps: - - path: experiment/pl_common_voice/import_relation_manager.py - md5: 2d0b47d14088c2c5b61a78ddeeff5293 - size: 547 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - outs: - - path: experiment_data/dataset_relation_manager_data/pl_common_voice - md5: fe73df318524dc33331c62b22a7f5237.dir - size: 21756677 - nfiles: 16310 - process_gold_transcript@1: - cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_minds14 - deps: - - path: experiment/pipeline_process_gold_transcript.py - md5: baac4f80dcdfcc5a49806eea6eec832a - size: 1195 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/dataset_relation_manager_data/pl_minds14 - md5: b6838bb00cb83d69ae818d8de96a2f18.dir - size: 3545062 - nfiles: 1124 - outs: - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: 35661dfd20421a6a6068b7194e483eba.dir - size: 689374 - nfiles: 562 - process_gold_transcript@0: - cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_luna - deps: - - path: experiment/pipeline_process_gold_transcript.py - md5: baac4f80dcdfcc5a49806eea6eec832a - size: 1195 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/dataset_relation_manager_data/pl_luna - md5: 7c6720b017c9fa82bb1c2264239b3b03.dir - size: 229007155 - nfiles: 1000 - outs: - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 7e71aa1b181ad75dd08dd7932bc0fc65.dir - size: 6706925 - nfiles: 500 - process_asr@6: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14 - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_minds14 - md5: 8dbd7480f408f284f4b349c02c991afc.dir - size: 3821899 - nfiles: 2248 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/dataset_relation_manager_data/pl_minds14 - md5: b6838bb00cb83d69ae818d8de96a2f18.dir - size: 3545062 - nfiles: 1124 - outs: - - path: experiment_data/pipeline/pl_minds14/wav2vec2__result - md5: 004f25308b0f5577af589bede338ae9f.dir - size: 3523907 - nfiles: 562 - process_asr@2: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_luna - md5: 8dd7fbde8090469f4a0e41ea1c873cda.dir - size: 22986007 - nfiles: 1951 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/dataset_relation_manager_data/pl_luna - md5: 7c6720b017c9fa82bb1c2264239b3b03.dir - size: 229007155 - nfiles: 1000 - outs: - - path: experiment_data/pipeline/pl_luna/wav2vec2__result - md5: 80994fc18d2cc2a1fa56fd4436c5378b.dir - size: 20658485 - nfiles: 456 - process_asr@5: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14 - --asr=ajn - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_minds14 - md5: 8dbd7480f408f284f4b349c02c991afc.dir - size: 3821899 - nfiles: 2248 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/dataset_relation_manager_data/pl_minds14 - md5: b6838bb00cb83d69ae818d8de96a2f18.dir - size: 3545062 - nfiles: 1124 - outs: - - path: experiment_data/pipeline/pl_minds14/ajn__result - md5: b09eb8176b26586b06093384bbaea770.dir - size: 974727 - nfiles: 559 - process_asr@4: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14 - --asr=google - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_minds14 - md5: 8dbd7480f408f284f4b349c02c991afc.dir - size: 3821899 - nfiles: 2248 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/dataset_relation_manager_data/pl_minds14 - md5: b6838bb00cb83d69ae818d8de96a2f18.dir - size: 3545062 - nfiles: 1124 - outs: - - path: experiment_data/pipeline/pl_minds14/google__result - md5: 3505364058d488815dd90699e71fd00b.dir - size: 985004 - nfiles: 562 - process_asr@0: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna - --asr=google - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_luna - md5: 8dd7fbde8090469f4a0e41ea1c873cda.dir - size: 22986007 - nfiles: 1951 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/dataset_relation_manager_data/pl_luna - md5: 7c6720b017c9fa82bb1c2264239b3b03.dir - size: 229007155 - nfiles: 1000 - outs: - - path: experiment_data/pipeline/pl_luna/google__result - md5: 254e060731d669a14f3a5525ccd1fe28.dir - size: 5346497 - nfiles: 500 - process_asr@1: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna - --asr=ajn - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_luna - md5: 8dd7fbde8090469f4a0e41ea1c873cda.dir - size: 22986007 - nfiles: 1951 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/dataset_relation_manager_data/pl_luna - md5: 7c6720b017c9fa82bb1c2264239b3b03.dir - size: 229007155 - nfiles: 1000 - outs: - - path: experiment_data/pipeline/pl_luna/ajn__result - md5: bca7f2eae18f606867c9880bd0fe1c8d.dir - size: 6159899 - nfiles: 494 - process_asr@3: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna - --asr=techmo - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_luna - md5: 8dd7fbde8090469f4a0e41ea1c873cda.dir - size: 22986007 - nfiles: 1951 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/dataset_relation_manager_data/pl_luna - md5: 7c6720b017c9fa82bb1c2264239b3b03.dir - size: 229007155 - nfiles: 1000 - outs: - - path: experiment_data/pipeline/pl_luna/techmo__result - md5: 4c6609ab50706ad45f3919eaf902a75e.dir - size: 9697519 - nfiles: 500 - process_asr@7: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14 - --asr=techmo - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_minds14 - md5: 8dbd7480f408f284f4b349c02c991afc.dir - size: 3821899 - nfiles: 2248 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/dataset_relation_manager_data/pl_minds14 - md5: b6838bb00cb83d69ae818d8de96a2f18.dir - size: 3545062 - nfiles: 1124 - outs: - - path: experiment_data/pipeline/pl_minds14/techmo__result - md5: 13b6f9b23182b9255a802134c47ab4c4.dir - size: 1336305 - nfiles: 562 - save_wave_files__pl_google_fleurs: - cmd: PYTHONPATH=. python experiment/pl_google_fleurs/save_wav_files.py - deps: - - path: experiment/pl_google_fleurs/save_wav_files.py - md5: 5fd1d6210a9766c35cf11892a9951266 - size: 610 - outs: - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - import_dataset_to_relation_manager@3: - cmd: PYTHONPATH=. python experiment/pl_vpl_voicelab_cbizoicelab/import_relation_manager.py - deps: - - path: experiment/pl_voicelab_cbiz/import_relation_manager.py - md5: 8b100b393470511b789ee4bc722f12ea - size: 2026 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - outs: - - path: experiment_data/dataset_relation_manager_data/pl_voicelab_cbiz - md5: 7ef4be9a4eeef095baf7a4cd765610d0.dir - size: 110711470 - nfiles: 1600 - import_dataset_to_relation_manager@4: - cmd: PYTHONPATH=. python experiment/pl_google_fleurs/import_relation_manager.py - deps: - - path: experiment/pl_google_fleurs/import_relation_manager.py - md5: f5547b678bb8fe0542eaaf047d1447aa - size: 458 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - outs: - - path: experiment_data/dataset_relation_manager_data/pl_google_fleurs - md5: 5a5609426d0a70990792c8a8b1854a48.dir - size: 4991297 - nfiles: 1516 - process_gold_transcript@2: - cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_common_voice - deps: - - path: experiment/pipeline_process_gold_transcript.py - md5: baac4f80dcdfcc5a49806eea6eec832a - size: 1195 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/dataset_relation_manager_data/pl_common_voice - md5: fe73df318524dc33331c62b22a7f5237.dir - size: 21756677 - nfiles: 16310 - outs: - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: 4903cf286f3c1ae49876c4ba52713d3c.dir - size: 4093545 - nfiles: 8155 - process_gold_transcript@3: - cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_voicelab_cbiz - deps: - - path: experiment/pipeline_process_gold_transcript.py - md5: baac4f80dcdfcc5a49806eea6eec832a - size: 1195 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/dataset_relation_manager_data/pl_voicelab_cbiz - md5: 7ef4be9a4eeef095baf7a4cd765610d0.dir - size: 110711470 - nfiles: 1600 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: 3462fc9dea267f7db0e133ced6b5f4e9.dir - size: 21846798 - nfiles: 800 - process_gold_transcript@4: - cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_google_fleurs - deps: - - path: experiment/pipeline_process_gold_transcript.py - md5: baac4f80dcdfcc5a49806eea6eec832a - size: 1195 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/dataset_relation_manager_data/pl_google_fleurs - md5: 5a5609426d0a70990792c8a8b1854a48.dir - size: 4991297 - nfiles: 1516 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 7ea17627b30e788f34c81ca9322f2879.dir - size: 975209 - nfiles: 758 - process_asr@17: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_common_voice - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_common_voice - md5: 43714b76048ec7311d08a7c48c282146.dir - size: 20219523 - nfiles: 24453 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/dataset_relation_manager_data/pl_common_voice - md5: fe73df318524dc33331c62b22a7f5237.dir - size: 21756677 - nfiles: 16310 - outs: - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result - md5: 402f9a70d682d030b8f7b6a36044c651.dir - size: 19014997 - nfiles: 8154 - process_asr@10: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_google_fleurs - md5: dfa15d901991d0cc9634192e21a9cc23.dir - size: 5645552 - nfiles: 3032 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/dataset_relation_manager_data/pl_google_fleurs - md5: 5a5609426d0a70990792c8a8b1854a48.dir - size: 4991297 - nfiles: 1516 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result - md5: 88a7ae4e0399446f996c11d6ffef701a.dir - size: 5137721 - nfiles: 758 - process_asr@8: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs - --asr=google - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_google_fleurs - md5: dfa15d901991d0cc9634192e21a9cc23.dir - size: 5645552 - nfiles: 3032 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/dataset_relation_manager_data/pl_google_fleurs - md5: 5a5609426d0a70990792c8a8b1854a48.dir - size: 4991297 - nfiles: 1516 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/google__result - md5: 9cb6cac07efe8e5d0b33ae7c818c5766.dir - size: 1377134 - nfiles: 758 - save_audio_files: - cmd: PYTHONPATH=. python experiment/dataset_specific/pl_google_fleurs/pipeline_save_wav_files.py - deps: - - path: experiment/dataset_specific/pl_google_fleurs/pipeline_save_wav_files.py - md5: 0a386b55f0ad26109f110a4622690066 - size: 735 - outs: - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - pipeline_prepare_relation_manager@4: - cmd: PYTHONPATH=. python experiment/dataset_specific/pl_voicelab_cbiz/import_relation_manager.py - deps: - - path: experiment/dataset_specific/pl_voicelab_cbiz/import_relation_manager.py - md5: ca3760c3f051a00575bf015322575d1a - size: 2075 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - outs: - - path: experiment_data/dataset_relation_manager_data/pl_voicelab_cbiz - md5: d23bd3824dc9e68a649edc94fbec50fb.dir - size: 110711470 - nfiles: 1600 - pipeline_prepare_relation_manager@3: - cmd: PYTHONPATH=. python experiment/dataset_specific/pl_minds14/import_relation_manager.py - deps: - - path: experiment/dataset_specific/pl_minds14/import_relation_manager.py - md5: 2deae168760fa019e6d19d0111baca41 - size: 455 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - outs: - - path: experiment_data/dataset_relation_manager_data/pl_minds14 - md5: 40bb7d02cc76d5b1093955b5046cc3c4.dir - size: 3545062 - nfiles: 1124 - pipeline_prepare_relation_manager@0: - cmd: PYTHONPATH=. python experiment/dataset_specific/pl_common_voice/import_relation_manager.py - deps: - - path: experiment/dataset_specific/pl_common_voice/import_relation_manager.py - md5: 2d0b47d14088c2c5b61a78ddeeff5293 - size: 547 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - outs: - - path: experiment_data/dataset_relation_manager_data/pl_common_voice - md5: da29f883159b95496c488a3c9c35e632.dir - size: 21756677 - nfiles: 16310 - pipeline_prepare_relation_manager@1: - cmd: PYTHONPATH=. python experiment/dataset_specific/pl_google_fleurs/import_relation_manager.py - deps: - - path: experiment/dataset_specific/pl_google_fleurs/import_relation_manager.py - md5: f5547b678bb8fe0542eaaf047d1447aa - size: 458 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - outs: - - path: experiment_data/dataset_relation_manager_data/pl_google_fleurs - md5: ce176e60d7e55945e402d711b22b0c6a.dir - size: 4991297 - nfiles: 1516 - pipeline_gold_transcript@0: - cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_common_voice - deps: - - path: experiment/pipeline_process_gold_transcript.py - md5: baac4f80dcdfcc5a49806eea6eec832a - size: 1195 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/dataset_relation_manager_data/pl_common_voice - md5: da29f883159b95496c488a3c9c35e632.dir - size: 21756677 - nfiles: 16310 - outs: - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - pipeline_prepare_relation_manager@2: - cmd: PYTHONPATH=. python experiment/dataset_specific/pl_luna/import_relation_manager.py - deps: - - path: experiment/dataset_specific/pl_luna/import_relation_manager.py - md5: 100bd47d6984532b076776702d93fb9f - size: 8301 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - outs: - - path: experiment_data/dataset_relation_manager_data/pl_luna - md5: 95ca6826129400092cdc41365a4f6dcb.dir - size: 229007155 - nfiles: 1000 - pipeline_gold_transcript@2: - cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_luna - deps: - - path: experiment/pipeline_process_gold_transcript.py - md5: baac4f80dcdfcc5a49806eea6eec832a - size: 1195 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/dataset_relation_manager_data/pl_luna - md5: 95ca6826129400092cdc41365a4f6dcb.dir - size: 229007155 - nfiles: 1000 - outs: - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - pipeline_gold_transcript@3: - cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_minds14 - deps: - - path: experiment/pipeline_process_gold_transcript.py - md5: baac4f80dcdfcc5a49806eea6eec832a - size: 1195 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/dataset_relation_manager_data/pl_minds14 - md5: 40bb7d02cc76d5b1093955b5046cc3c4.dir - size: 3545062 - nfiles: 1124 - outs: - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - pipeline_gold_transcript@4: - cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_voicelab_cbiz - deps: - - path: experiment/pipeline_process_gold_transcript.py - md5: baac4f80dcdfcc5a49806eea6eec832a - size: 1195 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/dataset_relation_manager_data/pl_voicelab_cbiz - md5: d23bd3824dc9e68a649edc94fbec50fb.dir - size: 110711470 - nfiles: 1600 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - pipeline_gold_transcript@1: - cmd: PYTHONPATH=. python experiment/pipeline_process_gold_transcript.py --dataset=pl_google_fleurs - deps: - - path: experiment/pipeline_process_gold_transcript.py - md5: baac4f80dcdfcc5a49806eea6eec832a - size: 1195 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/dataset_relation_manager_data/pl_google_fleurs - md5: ce176e60d7e55945e402d711b22b0c6a.dir - size: 4991297 - nfiles: 1516 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - pipeline_asr_result@7: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_voicelab_cbiz - --asr=techmo - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_voicelab_cbiz/pl_voicelab_cbiz__techmo - md5: 6c3b356723d562c978f84e733b91f5d0.dir - size: 17539259 - nfiles: 800 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result - md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir - size: 39158267 - nfiles: 800 - pipeline_asr_result@3: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_common_voice - --asr=ajn - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_common_voice/pl_common_voice__ajn - md5: 873867be79ac82f04d28bd3419fa95f3.dir - size: 1677089 - nfiles: 8155 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - outs: - - path: experiment_data/pipeline/pl_common_voice/ajn__result - md5: 77d873041fe2952e3c45ee4ac6458061.dir - size: 6667841 - nfiles: 8155 - pipeline_asr_result@17: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_luna/pl_luna__wav2vec2 - md5: 0555f6346ef6332d27fc0979e8ac16f7.dir - size: 16356269 - nfiles: 456 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - outs: - - path: experiment_data/pipeline/pl_luna/wav2vec2__result - md5: 9c63b061ac7763144bca121e163ee7aa.dir - size: 20658485 - nfiles: 456 - pipeline_asr_result@13: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__whisper_tiny - md5: f7ba36aee7148e39c4dccd325ad68228.dir - size: 1037576 - nfiles: 758 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__result - md5: 7220584482e69b3a9f4b43fba97e97cf.dir - size: 2135360 - nfiles: 758 - pipeline_asr_result@15: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna - --asr=ajn - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_luna/pl_luna__ajn - md5: 10454ef4568c2023e9d51ad418db2854.dir - size: 1276562 - nfiles: 495 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - outs: - - path: experiment_data/pipeline/pl_luna/ajn__result - md5: 653d65e186a7d05958ce3cbef219038c.dir - size: 6159899 - nfiles: 494 - pipeline_asr_result@2: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_common_voice - --asr=techmo - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_common_voice/pl_common_voice__techmo - md5: 47a9d744ff9f02d0a82a4a30664270b5.dir - size: 3686716 - nfiles: 8155 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - outs: - - path: experiment_data/pipeline/pl_common_voice/techmo__result - md5: 9030cf3640f2749d9c1b4439687bdc2f.dir - size: 7761880 - nfiles: 8136 - pipeline_asr_result@6: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_voicelab_cbiz - --asr=ajn - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_voicelab_cbiz/pl_voicelab_cbiz__ajn - md5: 0705aafa0969142288cc9baa88d1ed57.dir - size: 6896694 - nfiles: 800 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result - md5: 7de1137f44fad26766da0fc309720160.dir - size: 22765926 - nfiles: 800 - pipeline_asr_result@11: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs - --asr=techmo - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__techmo - md5: 876e8cff4ebd191617fcd3844e411475.dir - size: 859227 - nfiles: 758 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/techmo__result - md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir - size: 1880403 - nfiles: 758 - pipeline_asr_result@1: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_common_voice - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_common_voice/pl_common_voice__wav2vec2 - md5: 8f84125abe861abbcd39499079aa1777.dir - size: 14830897 - nfiles: 8155 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - outs: - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result - md5: b0d0042d77d7adce37890ca63ad40091.dir - size: 19014997 - nfiles: 8154 - pipeline_asr_result@8: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_voicelab_cbiz - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_voicelab_cbiz/pl_voicelab_cbiz__whisper_tiny - md5: 92d2e61aed2ae428dba95fc3356a37d4.dir - size: 19120968 - nfiles: 800 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/whisper_tiny__result - md5: 5d8c8339129de7a9340bb64e3fed22ff.dir - size: 38889432 - nfiles: 800 - pipeline_asr_result@5: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_voicelab_cbiz - --asr=google - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_voicelab_cbiz/pl_voicelab_cbiz__google - md5: 35a79b96887fbf705327937fdd57c0c6.dir - size: 5651375 - nfiles: 799 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result - md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir - size: 27432599 - nfiles: 799 - pipeline_asr_result@12: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__wav2vec2 - md5: 9638746467b0dea972a0be101a7d5c58.dir - size: 4093001 - nfiles: 758 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result - md5: bf9c77e34376bcda73dbdb6afee55c8c.dir - size: 5137721 - nfiles: 758 - pipeline_asr_result@16: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna - --asr=techmo - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_luna/pl_luna__techmo - md5: 033ea7b5434dded73bf869bfdd299462.dir - size: 4256479 - nfiles: 500 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - outs: - - path: experiment_data/pipeline/pl_luna/techmo__result - md5: 0e596570e1502b38588427bc72dcc006.dir - size: 9697519 - nfiles: 500 - pipeline_asr_result@10: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs - --asr=ajn - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__ajn - md5: 2218d65c2c73761ce8825015cf79bacd.dir - size: 343198 - nfiles: 758 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/ajn__result - md5: 545e63a6daf9c46387c1d7d40b85499f.dir - size: 1413262 - nfiles: 758 - pipeline_asr_result@4: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_common_voice - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_common_voice/pl_common_voice__whisper_tiny - md5: b89a21b6106f8bd93068b0e9159c90db.dir - size: 5700329 - nfiles: 8155 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - outs: - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__result - md5: 6310f1b24332777817738abe33676505.dir - size: 10121705 - nfiles: 8155 - pipeline_asr_result@14: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna - --asr=google - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_luna/pl_luna__google - md5: ac191369daf21c45c48da1bcd3e3a540.dir - size: 1096697 - nfiles: 500 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - outs: - - path: experiment_data/pipeline/pl_luna/google__result - md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir - size: 5346497 - nfiles: 500 - pipeline_asr_result@9: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_google_fleurs - --asr=google - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_google_fleurs/pl_google_fleurs__google - md5: c740521abae49feebfaf20c2384c8de5.dir - size: 350126 - nfiles: 758 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/google__result - md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir - size: 1377134 - nfiles: 758 - pipeline_asr_result@0: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_common_voice - --asr=google - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_common_voice/pl_common_voice__google - md5: e640b898f723eed2af42b93596eabecd.dir - size: 1701910 - nfiles: 8143 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - outs: - - path: experiment_data/pipeline/pl_common_voice/google__result - md5: afb53476cc93ef4de3591908df41fd2a.dir - size: 5854366 - nfiles: 8143 - pipeline_word_wer@8: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs - --asr=google - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__result - md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir - size: 1377134 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_classic_alignment - md5: 9beb57e9be598275ac9d449618da440e.dir - size: 3818553 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_classic_metrics - md5: 984770b57ca48fd793099c9ed67791d0.dir - size: 23344 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_embeddings_alignment - md5: aeb20f1662b696a6fc11bcd39a02a0de.dir - size: 7984964 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__word_wer_embeddings_metrics - md5: b2626f92231431e163813ef7112c119d.dir - size: 53916 - nfiles: 758 - pipeline_word_wer@6: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz - --asr=ajn - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result - md5: 7de1137f44fad26766da0fc309720160.dir - size: 22765926 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_classic_alignment - md5: 1ed03de918f5373afdbdbb020c6161b5.dir - size: 78992762 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_classic_metrics - md5: 04f6ccbaf94cf08c34ac201ae079c21c.dir - size: 25307 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_embeddings_alignment - md5: a67e7a6e8a6e0755ea35a519d9decf86.dir - size: 128258410 - nfiles: 646 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__word_wer_embeddings_metrics - md5: c83561c448950860f36037c2287a25f5.dir - size: 25194 - nfiles: 646 - pipeline_word_wer@17: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/whisper_tiny__result - md5: 0237d69dfa56d1a29a8bc43f464c4f1e.dir - size: 11789990 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/whisper_tiny__word_wer_classic_alignment - md5: 41df69d888fbcd0e92d58deb676d5101.dir - size: 21939973 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/whisper_tiny__word_wer_classic_metrics - md5: c026954d3d4b240fa177a269530de31b.dir - size: 16939 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/whisper_tiny__word_wer_embeddings_alignment - md5: 28999392709820c671406c03b73b4f38.dir - size: 45305707 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/whisper_tiny__word_wer_embeddings_metrics - md5: c178c61c6e37b6ed729d2300aae65192.dir - size: 32880 - nfiles: 500 - pipeline_word_wer@15: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna - --asr=techmo - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__result - md5: 0e596570e1502b38588427bc72dcc006.dir - size: 9697519 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/techmo__word_wer_classic_alignment - md5: 5e90722635a811db67a1f0d917707b0a.dir - size: 21380796 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__word_wer_classic_metrics - md5: 4cfbb2830b280084ece14b1ef815b92a.dir - size: 17298 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__word_wer_embeddings_alignment - md5: 74f8be84e1e913050483713fbc945d80.dir - size: 44326962 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__word_wer_embeddings_metrics - md5: 98a7edeee3b630e8e301acfc578a8393.dir - size: 34869 - nfiles: 500 - pipeline_word_wer@12: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__result - md5: 7220584482e69b3a9f4b43fba97e97cf.dir - size: 2135360 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__word_wer_classic_alignment - md5: 54e9e094016886220c23e7bb9808ca7a.dir - size: 4024460 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__word_wer_classic_metrics - md5: 4dfcbf74f27bd94910ab72561428a771.dir - size: 22975 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__word_wer_embeddings_alignment - md5: 8d7759d25cd8f31fb37a726e5c5f010f.dir - size: 8296122 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__word_wer_embeddings_metrics - md5: 4b043e5c91dd095be62726a33b2400df.dir - size: 52238 - nfiles: 758 - pipeline_word_wer@10: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs - --asr=techmo - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__result - md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir - size: 1880403 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_classic_alignment - md5: 689776c77c4ecaa11578b53480100ecc.dir - size: 3821036 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_classic_metrics - md5: 8d0d99fd7d965d4070c0b391cd6fa2b0.dir - size: 23030 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_embeddings_alignment - md5: 0ec5fe337bde2254c91146fd16b9c6af.dir - size: 7995553 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__word_wer_embeddings_metrics - md5: 55eb6eb0aacc12e3e2caf224b4b89df1.dir - size: 53591 - nfiles: 758 - pipeline_word_wer@13: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna - --asr=google - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__result - md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir - size: 5346497 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/google__word_wer_classic_alignment - md5: 5bdee43e646a4e9470310073365ebc37.dir - size: 19650202 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__word_wer_classic_metrics - md5: 9951fb70382cc8bed9aa48d1185c1c7c.dir - size: 16989 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__word_wer_embeddings_alignment - md5: a06bd5f0b4c52c679315b6c6d7478084.dir - size: 40586004 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__word_wer_embeddings_metrics - md5: 351543fb54e2098ac07999482d2280a8.dir - size: 34530 - nfiles: 500 - pipeline_word_wer@3: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice - --asr=ajn - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/ajn__result - md5: 77d873041fe2952e3c45ee4ac6458061.dir - size: 6667841 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - outs: - - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_classic_alignment - md5: 7b7c5de97970c320cb8cf549839d16fd.dir - size: 17724868 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_classic_metrics - md5: b1bdb80faa8c728ea072632121a2f2d0.dir - size: 209927 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_embeddings_alignment - md5: 51798a8e9b7239c8833274dbf6644d1e.dir - size: 32293142 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/ajn__word_wer_embeddings_metrics - md5: 63233cd4c3a18f791f228e0e385cdde1.dir - size: 561188 - nfiles: 8155 - pipeline_word_wer@16: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/wav2vec2__result - md5: 9c63b061ac7763144bca121e163ee7aa.dir - size: 20658485 - nfiles: 456 - outs: - - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_classic_alignment - md5: 4d4ae25d4fac2a52893e60389fcd2f9e.dir - size: 18462856 - nfiles: 456 - - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_classic_metrics - md5: f209479afd2482800f1e75bdab9f83b2.dir - size: 15604 - nfiles: 456 - - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_embeddings_alignment - md5: 41ad4c0923986a90fc9ed12aa6e30f73.dir - size: 38402586 - nfiles: 456 - - path: experiment_data/pipeline/pl_luna/wav2vec2__word_wer_embeddings_metrics - md5: df02c1f9d7e26ed10815175de086e0b6.dir - size: 24633 - nfiles: 456 - pipeline_word_wer@11: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result - md5: bf9c77e34376bcda73dbdb6afee55c8c.dir - size: 5137721 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_classic_alignment - md5: 052fa8f97b2f9d5b8ca4175b4e92dfd1.dir - size: 3854678 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_classic_metrics - md5: d9dec56ef7ddb6a3bc03638e37d1c04c.dir - size: 23168 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_embeddings_alignment - md5: 069191de9f84f37303bc202968f26766.dir - size: 8069804 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__word_wer_embeddings_metrics - md5: b45390641698e71fdf420fbb2afec7e0.dir - size: 53351 - nfiles: 758 - pipeline_word_wer@7: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz - --asr=techmo - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result - md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir - size: 39158267 - nfiles: 800 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_classic_alignment - md5: 37f0c27869efd72bf3eaedae70e426d5.dir - size: 83756423 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_classic_metrics - md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir - size: 27780 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_embeddings_alignment - md5: 01badff31f4d2f6583481c332fae8abc.dir - size: 174322727 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__word_wer_embeddings_metrics - md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir - size: 56182 - nfiles: 800 - pipeline_word_wer@2: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice - --asr=techmo - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/techmo__result - md5: 9030cf3640f2749d9c1b4439687bdc2f.dir - size: 7761880 - nfiles: 8136 - outs: - - path: experiment_data/pipeline/pl_common_voice/techmo__word_wer_classic_alignment - md5: 478503dcf96c6162f15466d5455b6271.dir - size: 15696317 - nfiles: 8136 - - path: experiment_data/pipeline/pl_common_voice/techmo__word_wer_classic_metrics - md5: 7b60102e7896228777eb92fdc2aaebc2.dir - size: 223467 - nfiles: 8136 - - path: experiment_data/pipeline/pl_common_voice/techmo__word_wer_embeddings_alignment - md5: 09126e1594234e97cd10858cbc06f4be.dir - size: 33111258 - nfiles: 8136 - - path: experiment_data/pipeline/pl_common_voice/techmo__word_wer_embeddings_metrics - md5: 4fdb7cea64f99b62b20a725929093d4d.dir - size: 571563 - nfiles: 8136 - pipeline_word_wer@14: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_luna - --asr=ajn - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/ajn__result - md5: 653d65e186a7d05958ce3cbef219038c.dir - size: 6159899 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/ajn__word_wer_classic_alignment - md5: e25ae51f8dc1afe55d5c0b44a67ab8ee.dir - size: 20671277 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/ajn__word_wer_classic_metrics - md5: 18605657ff9c7ef3221e27b671a3b4d1.dir - size: 16835 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/ajn__word_wer_embeddings_alignment - md5: 6be0a1c035f4a84a9035bfba1458cdac.dir - size: 43099546 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/ajn__word_wer_embeddings_metrics - md5: 4f368d2ba1c5a54d5e3ab69a7581549e.dir - size: 19326 - nfiles: 494 - pipeline_word_wer@0: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice - --asr=google - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/google__result - md5: afb53476cc93ef4de3591908df41fd2a.dir - size: 5854366 - nfiles: 8143 - outs: - - path: experiment_data/pipeline/pl_common_voice/google__word_wer_classic_alignment - md5: 0562ca30a9415b6a568430f34650f063.dir - size: 15821424 - nfiles: 8143 - - path: experiment_data/pipeline/pl_common_voice/google__word_wer_classic_metrics - md5: deafb9c70dd9bb48c57dd0802bd8c923.dir - size: 225225 - nfiles: 8143 - - path: experiment_data/pipeline/pl_common_voice/google__word_wer_embeddings_alignment - md5: 4b1952cbde7ce121c616f686459ef924.dir - size: 33384670 - nfiles: 8143 - - path: experiment_data/pipeline/pl_common_voice/google__word_wer_embeddings_metrics - md5: e2dcce4d029ffe904d363790da375fac.dir - size: 572632 - nfiles: 8143 - pipeline_word_wer@5: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_voicelab_cbiz - --asr=google - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result - md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir - size: 27432599 - nfiles: 799 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_classic_alignment - md5: 184cdfdd465db609f514891b0330ef67.dir - size: 83997172 - nfiles: 799 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_classic_metrics - md5: c12eab8acb3cee0219fc8046691b24ab.dir - size: 27841 - nfiles: 799 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_embeddings_alignment - md5: 1e7ef6690d2e36f368eb02a8dcba9ab0.dir - size: 174732507 - nfiles: 799 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__word_wer_embeddings_metrics - md5: c15414f2ec9c0a1ab78199861b0b4ce0.dir - size: 55765 - nfiles: 799 - pipeline_word_wer@1: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result - md5: b0d0042d77d7adce37890ca63ad40091.dir - size: 19014997 - nfiles: 8154 - outs: - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__word_wer_classic_alignment - md5: 59beaa6beb3dc834c555376a5aeca7b4.dir - size: 15851643 - nfiles: 8154 - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__word_wer_classic_metrics - md5: ccb35fbf94c551765dca2f72babcb7cb.dir - size: 225545 - nfiles: 8154 - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__word_wer_embeddings_alignment - md5: a49f354af5d3f345fe2f85557f47931b.dir - size: 33444109 - nfiles: 8154 - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__word_wer_embeddings_metrics - md5: c829268cd43ce5af3f57be7db6d058ce.dir - size: 573538 - nfiles: 8154 - pipeline_word_wer@9: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_google_fleurs - --asr=ajn - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__result - md5: 545e63a6daf9c46387c1d7d40b85499f.dir - size: 1413262 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_classic_alignment - md5: 6437952d0ab383e44acca5fa70d02e54.dir - size: 3981739 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_classic_metrics - md5: 5b84bc58a25d7d0a4d04e6d5e1abc966.dir - size: 22391 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_embeddings_alignment - md5: f7494807662a0e632c7ae790d85e6c19.dir - size: 8197217 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__word_wer_embeddings_metrics - md5: 3ca4ecc9fec05a9bc00aad4f898751ae.dir - size: 43801 - nfiles: 758 - pipeline_word_wer@4: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_common_voice - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__result - md5: 6310f1b24332777817738abe33676505.dir - size: 10121705 - nfiles: 8155 - outs: - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__word_wer_classic_alignment - md5: d21d7cdfe4d13ec9f6c3ae105528531a.dir - size: 16691829 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__word_wer_classic_metrics - md5: 278bdbc2fa64f8b9b8d59e0cac978d0e.dir - size: 213692 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__word_wer_embeddings_alignment - md5: e19eed8fb2f2cdad4a3f0ed4ec857fb2.dir - size: 34644290 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__word_wer_embeddings_metrics - md5: b169e4d35af45cb82fb4cf86d8d2aa79.dir - size: 553962 - nfiles: 8155 - pipeline_spacy_pos_wer@14: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna - --asr=ajn - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/ajn__result - md5: 653d65e186a7d05958ce3cbef219038c.dir - size: 6159899 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_alignment - md5: 0f310b8840f2d09ac558b9bac2a0d3b0.dir - size: 21936929 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/ajn__spacy_pos_metrics - md5: 6c6bb673ea5f64c9d851878c9d8a7c09.dir - size: 8444 - nfiles: 494 - pipeline_spacy_pos_wer@2: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/techmo__result - md5: 9030cf3640f2749d9c1b4439687bdc2f.dir - size: 7761880 - nfiles: 8136 - outs: - - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_pos_alignment - md5: c0c77bf3030799c32241555bf4d71e6b.dir - size: 18192387 - nfiles: 8136 - - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_pos_metrics - md5: 61c69fcd287051f4ab7d1ffcc68a9aca.dir - size: 96845 - nfiles: 8136 - pipeline_spacy_pos_wer@16: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/whisper_tiny__result - md5: 0237d69dfa56d1a29a8bc43f464c4f1e.dir - size: 11789990 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/whisper_tiny__spacy_pos_alignment - md5: 95446f56d8b5c32ce540d04452b810f6.dir - size: 25245301 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/whisper_tiny__spacy_pos_metrics - md5: b7cd2c7b8a385a5c97af1efee94c970b.dir - size: 8562 - nfiles: 500 - pipeline_spacy_pos_wer@6: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_voicelab_cbiz - --asr=ajn - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result - md5: 7de1137f44fad26766da0fc309720160.dir - size: 22765926 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_pos_alignment - md5: 4f07fe60bf29e2148879db0c903c98f6.dir - size: 78539613 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_pos_metrics - md5: 7ec3bb7c838e7f06b8a1dbe7a68faac2.dir - size: 13753 - nfiles: 800 - pipeline_spacy_pos_wer@0: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice - --asr=google - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/google__result - md5: afb53476cc93ef4de3591908df41fd2a.dir - size: 5854366 - nfiles: 8143 - outs: - - path: experiment_data/pipeline/pl_common_voice/google__spacy_pos_alignment - md5: d7f32c8741318563bdec3f35679cdc9a.dir - size: 18316770 - nfiles: 8143 - - path: experiment_data/pipeline/pl_common_voice/google__spacy_pos_metrics - md5: 117611317774e81fb482ba9c71ec806b.dir - size: 97235 - nfiles: 8143 - pipeline_spacy_pos_wer@11: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result - md5: bf9c77e34376bcda73dbdb6afee55c8c.dir - size: 5137721 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_pos_alignment - md5: 3b2e3e75c644805681905c97ec37cf28.dir - size: 3737151 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_pos_metrics - md5: 782cc84e9116281dfc28734b2ae4a5ea.dir - size: 9004 - nfiles: 758 - pipeline_spacy_pos_wer@15: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__result - md5: 0e596570e1502b38588427bc72dcc006.dir - size: 9697519 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/techmo__spacy_pos_alignment - md5: a5cb7dd59d83b2ec678c6ba5177bc38f.dir - size: 20897599 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__spacy_pos_metrics - md5: 1478707020a96496b50eb732207c290e.dir - size: 8841 - nfiles: 500 - pipeline_spacy_pos_wer@12: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__result - md5: 7220584482e69b3a9f4b43fba97e97cf.dir - size: 2135360 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__spacy_pos_alignment - md5: 2f9237dcfa69ed025f290ca40681e28f.dir - size: 4382663 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__spacy_pos_metrics - md5: 15b44c2e516183028c14ee7cc9bf47e1.dir - size: 10407 - nfiles: 758 - pipeline_spacy_pos_wer@17: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14 - --asr=google - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__result - md5: 3f7a79298a5156fd2b023e673326e72f.dir - size: 985004 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/google__spacy_pos_alignment - md5: 26f0d02f96d867426ff26a358b596a4c.dir - size: 2660000 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__spacy_pos_metrics - md5: 44391a580b4a84f33a2fa0cbace4c5d6.dir - size: 5109 - nfiles: 562 - pipeline_spacy_pos_wer@1: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result - md5: b0d0042d77d7adce37890ca63ad40091.dir - size: 19014997 - nfiles: 8154 - outs: - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_pos_alignment - md5: caf518456afa1864c286cd5e15b9e563.dir - size: 18349138 - nfiles: 8154 - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_pos_metrics - md5: 71381fa7fd6c0cdba00f25c17fd6be5e.dir - size: 97426 - nfiles: 8154 - pipeline_spacy_pos_wer@7: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_voicelab_cbiz - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result - md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir - size: 39158267 - nfiles: 800 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_pos_alignment - md5: 678171dbd7c038cd6eaadc5eb331b8b5.dir - size: 81650836 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_pos_metrics - md5: 11320499f29d2d7bfce68d35fb352b83.dir - size: 14334 - nfiles: 800 - pipeline_spacy_pos_wer@10: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__result - md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir - size: 1880403 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_pos_alignment - md5: d76701778be18566a4d3a45bc325196e.dir - size: 3707699 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_pos_metrics - md5: 9cae08bbb8a6331d06a33dbbb4a16301.dir - size: 9662 - nfiles: 758 - pipeline_spacy_pos_wer@13: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_luna - --asr=google - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__result - md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir - size: 5346497 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/google__spacy_pos_alignment - md5: 460edffd716b1f46fb863e5b2b0d949e.dir - size: 19342263 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__spacy_pos_metrics - md5: a65dd7d74319da06f4ebaca08cde30ce.dir - size: 8659 - nfiles: 500 - pipeline_spacy_pos_wer@5: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_voicelab_cbiz - --asr=google - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result - md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir - size: 27432599 - nfiles: 799 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_pos_alignment - md5: 22ccae20301046da3e40e93daa680d53.dir - size: 83052124 - nfiles: 799 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_pos_metrics - md5: 43d79d47ba1e91e86daf1f66aa18c941.dir - size: 14239 - nfiles: 799 - pipeline_spacy_ner_wer@4: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__result - md5: 6310f1b24332777817738abe33676505.dir - size: 10121705 - nfiles: 8155 - outs: - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__spacy_ner_alignment - md5: b0057c661dc1197217535fc74c04bb37.dir - size: 20900270 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__spacy_ner_metrics - md5: 1c0279e4b2fc0e61314e7031b8d00dc1.dir - size: 93934 - nfiles: 8155 - pipeline_spacy_ner_wer@16: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/wav2vec2__result - md5: 9c63b061ac7763144bca121e163ee7aa.dir - size: 20658485 - nfiles: 456 - outs: - - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_ner_alignment - md5: 4530ad915e82cc9668e775d5de219b13.dir - size: 17577804 - nfiles: 456 - - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_ner_metrics - md5: 0ad2dc0beeebd336771228f8751fe028.dir - size: 8014 - nfiles: 456 - pipeline_spacy_ner_wer@9: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs - --asr=ajn - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__result - md5: 545e63a6daf9c46387c1d7d40b85499f.dir - size: 1413262 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_ner_alignment - md5: 34ddaf22d6ba136b8257344899902015.dir - size: 3675109 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_ner_metrics - md5: 9379479c875f0fe49d6da70c9b189d67.dir - size: 10663 - nfiles: 758 - pipeline_spacy_ner_wer@15: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__result - md5: 0e596570e1502b38588427bc72dcc006.dir - size: 9697519 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/techmo__spacy_ner_alignment - md5: c301698fa01d07cfb3efb16ffbb06b69.dir - size: 20451389 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__spacy_ner_metrics - md5: 06cea0926a325c92a1ff79457db655a8.dir - size: 8918 - nfiles: 500 - pipeline_spacy_ner_wer@1: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result - md5: b0d0042d77d7adce37890ca63ad40091.dir - size: 19014997 - nfiles: 8154 - outs: - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_ner_alignment - md5: f67a9bd9c25520328ed3c64f149aea17.dir - size: 17938087 - nfiles: 8154 - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_ner_metrics - md5: badc978fc37c2cc9e1c3cafd77dbd064.dir - size: 98038 - nfiles: 8154 - pipeline_asr_result@18: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_luna - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_luna/pl_luna__whisper_tiny - md5: 1214905c22db4c9bd7e439e68b7551cc.dir - size: 6202142 - nfiles: 500 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - outs: - - path: experiment_data/pipeline/pl_luna/whisper_tiny__result - md5: 0237d69dfa56d1a29a8bc43f464c4f1e.dir - size: 11789990 - nfiles: 500 - pipeline_asr_result@19: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14 - --asr=google - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_minds14/pl_minds14__google - md5: bb1acae0c3070424d635ae9d2c6e14f0.dir - size: 247940 - nfiles: 562 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - outs: - - path: experiment_data/pipeline/pl_minds14/google__result - md5: 3f7a79298a5156fd2b023e673326e72f.dir - size: 985004 - nfiles: 562 - pipeline_spacy_ner_wer@12: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__result - md5: 7220584482e69b3a9f4b43fba97e97cf.dir - size: 2135360 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__spacy_ner_alignment - md5: 194c42115d93fddc20442ef0dc592513.dir - size: 4268672 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__spacy_ner_metrics - md5: 7884b951c828e6874b72a6d83cfc5a31.dir - size: 10855 - nfiles: 758 - pipeline_spacy_ner_wer@13: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna - --asr=google - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__result - md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir - size: 5346497 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/google__spacy_ner_alignment - md5: 84a872f90985c615648ce02ea8a6951c.dir - size: 18946509 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__spacy_ner_metrics - md5: 7df5fdeb083c8b1e953e1ff097c69f11.dir - size: 8752 - nfiles: 500 - pipeline_spacy_ner_wer@11: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result - md5: bf9c77e34376bcda73dbdb6afee55c8c.dir - size: 5137721 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_alignment - md5: d04f82c751e6ef28414b35ba4d844470.dir - size: 3660762 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_ner_metrics - md5: 308e3f0f67d855482d7d3634954f4b57.dir - size: 7553 - nfiles: 758 - pipeline_spacy_ner_wer@6: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_voicelab_cbiz - --asr=ajn - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result - md5: 7de1137f44fad26766da0fc309720160.dir - size: 22765926 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_ner_alignment - md5: 48a512d6811d00ec9dbe60e3131834ac.dir - size: 76044699 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__spacy_ner_metrics - md5: 61799b677b4d6af5e41c947c1d3b02cd.dir - size: 14196 - nfiles: 800 - pipeline_spacy_ner_wer@5: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_voicelab_cbiz - --asr=google - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result - md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir - size: 27432599 - nfiles: 799 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_ner_alignment - md5: a83b6e086aa207da109fc06293501f52.dir - size: 80594165 - nfiles: 799 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__spacy_ner_metrics - md5: 85388729ffbed2a328f94758c682c809.dir - size: 14708 - nfiles: 799 - pipeline_spacy_pos_wer@3: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice - --asr=ajn - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/ajn__result - md5: 77d873041fe2952e3c45ee4ac6458061.dir - size: 6667841 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - outs: - - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_pos_alignment - md5: f3b7dd0987fc7be5171478910c950657.dir - size: 19159060 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_pos_metrics - md5: 6094fb960e2eab979ecb33d40a253531.dir - size: 95146 - nfiles: 8155 - pipeline_spacy_ner_wer@0: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice - --asr=google - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/google__result - md5: afb53476cc93ef4de3591908df41fd2a.dir - size: 5854366 - nfiles: 8143 - outs: - - path: experiment_data/pipeline/pl_common_voice/google__spacy_ner_alignment - md5: 0534b5b887fc62c82cb98fbac73f0e45.dir - size: 17894226 - nfiles: 8143 - - path: experiment_data/pipeline/pl_common_voice/google__spacy_ner_metrics - md5: da5556dd7ef27958f9ebabf8b71ab477.dir - size: 97917 - nfiles: 8143 - pipeline_spacy_pos_wer@9: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs - --asr=ajn - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__result - md5: 545e63a6daf9c46387c1d7d40b85499f.dir - size: 1413262 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_pos_alignment - md5: 210ec7ffdde2c076fe2ab72873f240c7.dir - size: 3799802 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_pos_metrics - md5: 0d95a4abb4a33aef3e242a304fd58698.dir - size: 10569 - nfiles: 758 - pipeline_wikineiural_ner@11: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result - md5: bf9c77e34376bcda73dbdb6afee55c8c.dir - size: 5137721 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__wikineural_ner_alignment - md5: 320e1321cb3db403a9ec0bb662abe52c.dir - size: 7828240 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__wikineural_ner_metrics - md5: 0738885f0f798e8bc0fbb5c9824d9db1.dir - size: 9704 - nfiles: 758 - pipeline_wikineiural_ner@8: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs - --asr=google - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__result - md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir - size: 1377134 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/google__wikineural_ner_alignment - md5: 39e078edbc3f65934358787ddbe40eec.dir - size: 7572934 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__wikineural_ner_metrics - md5: f8b3c4183e31fc9e612b189579644f74.dir - size: 10796 - nfiles: 758 - pipeline_wikineiural_ner@14: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna - --asr=ajn - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/ajn__result - md5: 653d65e186a7d05958ce3cbef219038c.dir - size: 6159899 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/ajn__wikineural_ner_alignment - md5: 52c8c06ce74db727c4aa5f55a2a865cd.dir - size: 37478125 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/ajn__wikineural_ner_metrics - md5: 602eaef0046a2a8b551bbbced7123886.dir - size: 8816 - nfiles: 494 - pipeline_spacy_ner_wer@10: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__result - md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir - size: 1880403 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_ner_alignment - md5: 628a77d036410a40bd358db2fc8ff739.dir - size: 3626842 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_ner_metrics - md5: cda4c4246e8b3b6d57aed57c4fe5594a.dir - size: 8311 - nfiles: 758 - pipeline_spacy_ner_wer@3: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice - --asr=ajn - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/ajn__result - md5: 77d873041fe2952e3c45ee4ac6458061.dir - size: 6667841 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - outs: - - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_ner_alignment - md5: 4b5e01b54c2ce8118580ad02d6dffca1.dir - size: 18576865 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_ner_metrics - md5: 15078a7515bc939d08d29d079311b37d.dir - size: 91570 - nfiles: 8155 - pipeline_spacy_ner_wer@2: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_common_voice - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/techmo__result - md5: 9030cf3640f2749d9c1b4439687bdc2f.dir - size: 7761880 - nfiles: 8136 - outs: - - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_ner_alignment - md5: fb116356575fb0589d58449fbf1377f7.dir - size: 17774086 - nfiles: 8136 - - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_ner_metrics - md5: 45eaaef226809a58ecf9180a47dec8aa.dir - size: 97994 - nfiles: 8136 - pipeline_wikineiural_ner@3: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice - --asr=ajn - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/ajn__result - md5: 77d873041fe2952e3c45ee4ac6458061.dir - size: 6667841 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - outs: - - path: experiment_data/pipeline/pl_common_voice/ajn__wikineural_ner_alignment - md5: 493bf2c6d82f18a117f2ae6d438d5c30.dir - size: 36352970 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/ajn__wikineural_ner_metrics - md5: 2275d851a39aafa755474fe79b16b337.dir - size: 117588 - nfiles: 8155 - pipeline_spacy_pos_wer@8: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_google_fleurs - --asr=google - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__result - md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir - size: 1377134 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_alignment - md5: 3429e870a0541d01eb85b0a34fd16021.dir - size: 3712618 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_pos_metrics - md5: 56dddb48cea2022b91fd4323efd43a8b.dir - size: 9213 - nfiles: 758 - pipeline_wikineiural_ner@1: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result - md5: b0d0042d77d7adce37890ca63ad40091.dir - size: 19014997 - nfiles: 8154 - outs: - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__wikineural_ner_alignment - md5: 9613a3fcb28ead3d3d39a16180ccfab8.dir - size: 37574594 - nfiles: 8154 - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__wikineural_ner_metrics - md5: 59ff9a09d1e232c66bd41afa757f9130.dir - size: 118556 - nfiles: 8154 - pipeline_flair_upos@13: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna - --asr=google - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__result - md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir - size: 5346497 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/google__flair_upos_alignment - md5: 8a23016ee1f269beec30232dff1f751b.dir - size: 19210117 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__flair_upos_metrics - md5: 891ec9646dea43249aa9cb4eb562b2de.dir - size: 8654 - nfiles: 500 - pipeline_flair_upos@0: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice - --asr=google - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/google__result - md5: afb53476cc93ef4de3591908df41fd2a.dir - size: 5854366 - nfiles: 8143 - outs: - - path: experiment_data/pipeline/pl_common_voice/google__flair_upos_alignment - md5: 169eb8cd967ea75404bd3bd7f5a41b5e.dir - size: 17519056 - nfiles: 8143 - - path: experiment_data/pipeline/pl_common_voice/google__flair_upos_metrics - md5: 768158db51050c79dd588c8b32e5b89e.dir - size: 95486 - nfiles: 8143 - pipeline_wikineiural_ner@13: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna - --asr=google - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__result - md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir - size: 5346497 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/google__wikineural_ner_alignment - md5: dbfd406215b6d3b8ed5afcceb92034cb.dir - size: 34436914 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__wikineural_ner_metrics - md5: d4ca8091dac498b609dca9fda6d22f07.dir - size: 8807 - nfiles: 500 - pipeline_flair_upos@12: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__result - md5: 7220584482e69b3a9f4b43fba97e97cf.dir - size: 2135360 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__flair_upos_alignment - md5: fc0957cb3c17b47cea93b135220ac9ba.dir - size: 4233187 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__flair_upos_metrics - md5: 76e4bd62eb3a4eb3a34638683f06ba84.dir - size: 10449 - nfiles: 758 - pipeline_wikineiural_ner@10: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs - --asr=techmo - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__result - md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir - size: 1880403 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/techmo__wikineural_ner_alignment - md5: 2fdab231aaf8c96a97268d6447f3ca1e.dir - size: 7640523 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__wikineural_ner_metrics - md5: 14d453272229bc7b8d7c4eb2790e6b32.dir - size: 10705 - nfiles: 758 - pipeline_wikineiural_ner@2: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice - --asr=techmo - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/techmo__result - md5: 9030cf3640f2749d9c1b4439687bdc2f.dir - size: 7761880 - nfiles: 8136 - outs: - - path: experiment_data/pipeline/pl_common_voice/techmo__wikineural_ner_alignment - md5: 40094adf33d391cd6abb6199c326dfd7.dir - size: 37097148 - nfiles: 8136 - - path: experiment_data/pipeline/pl_common_voice/techmo__wikineural_ner_metrics - md5: 4570a295cf89a779de66565b0b9a77ad.dir - size: 118544 - nfiles: 8136 - pipeline_flair_upos@11: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result - md5: bf9c77e34376bcda73dbdb6afee55c8c.dir - size: 5137721 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__flair_upos_alignment - md5: 07ec971011a67b5e3ad7822fc717612c.dir - size: 3720804 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__flair_upos_metrics - md5: 642857d546ab39ca2d680fda663eab38.dir - size: 7889 - nfiles: 758 - pipeline_wikineiural_ner@4: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__result - md5: 6310f1b24332777817738abe33676505.dir - size: 10121705 - nfiles: 8155 - outs: - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__wikineural_ner_alignment - md5: da4492daece0b76c95ea363246ff5414.dir - size: 39870020 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__wikineural_ner_metrics - md5: b7b319a512c38ab0ee0ef5c347b7b80e.dir - size: 113084 - nfiles: 8155 - pipeline_flair_upos@9: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs - --asr=ajn - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__result - md5: 545e63a6daf9c46387c1d7d40b85499f.dir - size: 1413262 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/ajn__flair_upos_alignment - md5: be13715f9aed5232f08da3be9095a797.dir - size: 3675596 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__flair_upos_metrics - md5: 27bb131921ad52d2235aeb2b7befe4d1.dir - size: 10590 - nfiles: 758 - pipeline_wikineiural_ner@9: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs - --asr=ajn - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__result - md5: 545e63a6daf9c46387c1d7d40b85499f.dir - size: 1413262 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/ajn__wikineural_ner_alignment - md5: 83f073c2166a9a2fcfc6dc4d2268efb4.dir - size: 7530856 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__wikineural_ner_metrics - md5: 7def95d54e9428f71fa43a5b9a95b54b.dir - size: 12379 - nfiles: 758 - pipeline_flair_upos@14: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna - --asr=ajn - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/ajn__result - md5: 653d65e186a7d05958ce3cbef219038c.dir - size: 6159899 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/ajn__flair_upos_alignment - md5: 8aa7d2eeb9deb1bde3394ab5df9c15a0.dir - size: 21838304 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/ajn__flair_upos_metrics - md5: 4ccf8f54da1b47d0a9e0e7c95020135e.dir - size: 8450 - nfiles: 494 - pipeline_wikineiural_ner@0: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_common_voice - --asr=google - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/google__result - md5: afb53476cc93ef4de3591908df41fd2a.dir - size: 5854366 - nfiles: 8143 - outs: - - path: experiment_data/pipeline/pl_common_voice/google__wikineural_ner_alignment - md5: 1adc1718f35e14b2aae8de6c6a49de1b.dir - size: 37254218 - nfiles: 8143 - - path: experiment_data/pipeline/pl_common_voice/google__wikineural_ner_metrics - md5: a8b91d2203368be020389a8a02ecbb7b.dir - size: 117584 - nfiles: 8143 - pipeline_wikineiural_ner@12: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_google_fleurs - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__result - md5: 7220584482e69b3a9f4b43fba97e97cf.dir - size: 2135360 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__wikineural_ner_alignment - md5: 1e3b4afa928b74c1d58b0445bdc15a68.dir - size: 8311448 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__wikineural_ner_metrics - md5: 255cb7f51e1075cac6a2fbc65cbeb5fc.dir - size: 12402 - nfiles: 758 - pipeline_wikineiural_ner@5: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_voicelab_cbiz - --asr=google - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result - md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir - size: 27432599 - nfiles: 799 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__wikineural_ner_alignment - md5: f977050b1f0445e9997f902f91634976.dir - size: 144157380 - nfiles: 799 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__wikineural_ner_metrics - md5: 9a9056d774f8e901fa2beaa9a874b0a2.dir - size: 14800 - nfiles: 799 - pipeline_flair_upos@3: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice - --asr=ajn - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/ajn__result - md5: 77d873041fe2952e3c45ee4ac6458061.dir - size: 6667841 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - outs: - - path: experiment_data/pipeline/pl_common_voice/ajn__flair_upos_alignment - md5: d8fd6968dfa66671d8fb8cc609782287.dir - size: 17420454 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/ajn__flair_upos_metrics - md5: 619e9209626ba47702db03fa56670078.dir - size: 93408 - nfiles: 8155 - pipeline_wikineiural_ner@7: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_voicelab_cbiz - --asr=techmo - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result - md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir - size: 39158267 - nfiles: 800 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__wikineural_ner_alignment - md5: fc4d6251b6c8d8253b99aa87d4c7a189.dir - size: 144096029 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__wikineural_ner_metrics - md5: dca61a52df606b9c3b510007a48c53ee.dir - size: 14810 - nfiles: 800 - pipeline_spacy_ner_wer@8: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_google_fleurs - --asr=google - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__result - md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir - size: 1377134 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_ner_alignment - md5: 67ea10ecbb9f5c3bfa14ffb85c843ba3.dir - size: 3638477 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_ner_metrics - md5: 0532a9596f26fd52037b6ecaa838ab9e.dir - size: 8500 - nfiles: 758 - pipeline_flair_upos@7: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_voicelab_cbiz - --asr=techmo - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result - md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir - size: 39158267 - nfiles: 800 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__flair_upos_alignment - md5: b9435b169d923e56d45be4e3a489358d.dir - size: 81635416 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__flair_upos_metrics - md5: 00f24bd9e609cc89f29cb6c3cc8dba48.dir - size: 14348 - nfiles: 800 - pipeline_flair_upos@4: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__result - md5: 6310f1b24332777817738abe33676505.dir - size: 10121705 - nfiles: 8155 - outs: - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__flair_upos_alignment - md5: 147573fff8b9edf9df71f16aedd2646a.dir - size: 19643758 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__flair_upos_metrics - md5: 2f2b9f5e1e48ad1268d75099c357b26c.dir - size: 90154 - nfiles: 8155 - pipeline_spacy_pos_wer@4: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_common_voice - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__result - md5: 6310f1b24332777817738abe33676505.dir - size: 10121705 - nfiles: 8155 - outs: - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__spacy_pos_alignment - md5: ccb3410d235e8cf849e09a9ebef528bf.dir - size: 21551242 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__spacy_pos_metrics - md5: f43c1cd06a76f9fa8f684a3d22b2ff48.dir - size: 94586 - nfiles: 8155 - pipeline_flair_upos@1: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result - md5: b0d0042d77d7adce37890ca63ad40091.dir - size: 19014997 - nfiles: 8154 - outs: - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__flair_upos_alignment - md5: c2de0794eab6b838cb889cd218c65651.dir - size: 17543992 - nfiles: 8154 - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__flair_upos_metrics - md5: 5ec6a253f5152fb5215f5d4a1243ca4e.dir - size: 95798 - nfiles: 8154 - pipeline_flair_upos@5: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_voicelab_cbiz - --asr=google - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__result - md5: cb6322c8c0c6d3cf557b93bf52efd0dc.dir - size: 27432599 - nfiles: 799 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__flair_upos_alignment - md5: 3446f07e21780b9319c2c22d88767dfc.dir - size: 81897055 - nfiles: 799 - - path: experiment_data/pipeline/pl_voicelab_cbiz/google__flair_upos_metrics - md5: 06fc109e28016f32d68edf992b4c3072.dir - size: 14350 - nfiles: 799 - pipeline_wikineiural_ner@6: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_voicelab_cbiz - --asr=ajn - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result - md5: 7de1137f44fad26766da0fc309720160.dir - size: 22765926 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__wikineural_ner_alignment - md5: 3363be14202fdb2577c8ddfce6549751.dir - size: 132627376 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__wikineural_ner_metrics - md5: b83cd24583e6fa648f4e106239e1ffcd.dir - size: 14486 - nfiles: 800 - pipeline_flair_upos@6: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_voicelab_cbiz - --asr=ajn - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__result - md5: 7de1137f44fad26766da0fc309720160.dir - size: 22765926 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__flair_upos_alignment - md5: d9c2a02375a3f9974ddf0ec4d64297b7.dir - size: 78332367 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/ajn__flair_upos_metrics - md5: 42f292cac09f5efc71215ac0f9bbf760.dir - size: 13841 - nfiles: 800 - pipeline_flair_upos@8: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs - --asr=google - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__result - md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir - size: 1377134 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/google__flair_upos_alignment - md5: 7a58a3caced592a9f00af9515eb92413.dir - size: 3696853 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__flair_upos_metrics - md5: 680491a1374081f637ade245e79ca627.dir - size: 9562 - nfiles: 758 - pipeline_spacy_ner_wer@7: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_voicelab_cbiz - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_voicelab_cbiz - md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir - size: 4803739404 - nfiles: 1600 - - path: experiment_data/pipeline/pl_voicelab_cbiz/gold_transcript - md5: ebffd3814a48564f4e33b9a4e0956af3.dir - size: 21846798 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__result - md5: e544489fc21b6a3e6d4fd68ab8c2c069.dir - size: 39158267 - nfiles: 800 - outs: - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_ner_alignment - md5: 3c7bf6f59153f0eef4685795dc8c1246.dir - size: 79503990 - nfiles: 800 - - path: experiment_data/pipeline/pl_voicelab_cbiz/techmo__spacy_ner_metrics - md5: f0dd0a8adb67c53d83ad32d152f90365.dir - size: 14595 - nfiles: 800 - pipeline_spacy_ner_wer@14: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna - --asr=ajn - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/ajn__result - md5: 653d65e186a7d05958ce3cbef219038c.dir - size: 6159899 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/ajn__spacy_ner_alignment - md5: 68434e48e2e6652ae6c9b17f9eb6fa05.dir - size: 21154450 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/ajn__spacy_ner_metrics - md5: 288d81ffe41a25d5ab85bbb3ef908979.dir - size: 8703 - nfiles: 494 - pipeline_flair_upos@10: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_google_fleurs - --asr=techmo - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__result - md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir - size: 1880403 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/techmo__flair_upos_alignment - md5: 527561b0790917cc03d782d5bf074294.dir - size: 3695215 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__flair_upos_metrics - md5: 06a349eee2ca119d3e79bbdbe5c95ef1.dir - size: 9384 - nfiles: 758 - pipeline_flair_upos@2: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_common_voice - --asr=techmo - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/techmo__result - md5: 9030cf3640f2749d9c1b4439687bdc2f.dir - size: 7761880 - nfiles: 8136 - outs: - - path: experiment_data/pipeline/pl_common_voice/techmo__flair_upos_alignment - md5: b2f57705be67ced0652ce670e1dde2cd.dir - size: 17397042 - nfiles: 8136 - - path: experiment_data/pipeline/pl_common_voice/techmo__flair_upos_metrics - md5: 09659b49a5c99187bf64cf922138a7c1.dir - size: 95391 - nfiles: 8136 - pipeline_spacy_ner_wer@17: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_luna - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/whisper_tiny__result - md5: 0237d69dfa56d1a29a8bc43f464c4f1e.dir - size: 11789990 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/whisper_tiny__spacy_ner_alignment - md5: 9c5a0efcc57695b5bdfb4f2ed6d1ce82.dir - size: 24437639 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/whisper_tiny__spacy_ner_metrics - md5: c0b89a3966675c348e9601bc68bfda11.dir - size: 8763 - nfiles: 500 - pipeline_wikineiural_ner@16: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/wav2vec2__result - md5: 9c63b061ac7763144bca121e163ee7aa.dir - size: 20658485 - nfiles: 456 - outs: - - path: experiment_data/pipeline/pl_luna/wav2vec2__wikineural_ner_alignment - md5: 4a1ad7d9221851e3735f1c3c2238862b.dir - size: 33934003 - nfiles: 456 - - path: experiment_data/pipeline/pl_luna/wav2vec2__wikineural_ner_metrics - md5: 0368dfd54954d3aa317e359f32efe59e.dir - size: 8244 - nfiles: 456 - pipeline_flair_upos@17: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/whisper_tiny__result - md5: 0237d69dfa56d1a29a8bc43f464c4f1e.dir - size: 11789990 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/whisper_tiny__flair_upos_alignment - md5: b30b207caed04dd1be8c79313124a6ac.dir - size: 24799648 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/whisper_tiny__flair_upos_metrics - md5: ed1ff9fa572dd43e0229726b3be66c2d.dir - size: 8494 - nfiles: 500 - pipeline_wikineiural_ner@17: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/whisper_tiny__result - md5: 0237d69dfa56d1a29a8bc43f464c4f1e.dir - size: 11789990 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/whisper_tiny__wikineural_ner_alignment - md5: 2ce8893b2a29cb7c4907636e55a962ba.dir - size: 40512588 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/whisper_tiny__wikineural_ner_metrics - md5: 0dc6ffc41d0711297133dd93f4937b6f.dir - size: 8949 - nfiles: 500 - pipeline_flair_upos@15: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna - --asr=techmo - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__result - md5: 0e596570e1502b38588427bc72dcc006.dir - size: 9697519 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/techmo__flair_upos_alignment - md5: 4319f089e3987d68b4622f864c17ad6c.dir - size: 20898162 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__flair_upos_metrics - md5: 0063866f11c1d82a4ba430f14c81ed8f.dir - size: 8844 - nfiles: 500 - pipeline_wikineiural_ner@15: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_luna - --asr=techmo - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__result - md5: 0e596570e1502b38588427bc72dcc006.dir - size: 9697519 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/techmo__wikineural_ner_alignment - md5: 6819012ed6e05015753687b12eea426c.dir - size: 37042138 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__wikineural_ner_metrics - md5: 47cd0bc5270bf7e3300c91f211b7a6f8.dir - size: 9011 - nfiles: 500 - pipeline_flair_upos@16: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_luna - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/wav2vec2__result - md5: 9c63b061ac7763144bca121e163ee7aa.dir - size: 20658485 - nfiles: 456 - outs: - - path: experiment_data/pipeline/pl_luna/wav2vec2__flair_upos_alignment - md5: ac11e3b8f28fd268a8bb90f851bc7b5b.dir - size: 17899335 - nfiles: 456 - - path: experiment_data/pipeline/pl_luna/wav2vec2__flair_upos_metrics - md5: 56807b890f4abcb9c41f60ff72a2e6aa.dir - size: 8051 - nfiles: 456 - pipeline_spacy_tag_wer@4: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__result - md5: 6310f1b24332777817738abe33676505.dir - size: 10121705 - nfiles: 8155 - outs: - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__spacy_dep_tag_alignment - md5: a0ad9d2e145bdd55da93137bcbd89ca0.dir - size: 21688710 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/whisper_tiny__spacy_dep_tag_metrics - md5: efb4cd381d95cfdbe40b09cdc251e3bc.dir - size: 95585 - nfiles: 8155 - pipeline_spacy_tag_wer@17: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14 - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__result - md5: 4c43636b4773f2bf9a2153ef3393a558.dir - size: 1336305 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/techmo__spacy_dep_tag_alignment - md5: 66efb4e5647eda2e2ab3116445bdf9b5.dir - size: 2666883 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__spacy_dep_tag_metrics - md5: 15c9ef006daec951119079da2794dcad.dir - size: 6736 - nfiles: 562 - pipeline_spacy_tag_wer@10: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna - --asr=google - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__result - md5: 8e4bf67df4dccd218d4d7c3de69688a4.dir - size: 5346497 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/google__spacy_dep_tag_alignment - md5: 5fc24c54101bce2e858b08f4c47e0667.dir - size: 19568605 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/google__spacy_dep_tag_metrics - md5: f64735e07b7c460895d1ccf8e4d0884c.dir - size: 8466 - nfiles: 500 - pipeline_spacy_tag_wer@0: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice - --asr=google - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/google__result - md5: afb53476cc93ef4de3591908df41fd2a.dir - size: 5854366 - nfiles: 8143 - outs: - - path: experiment_data/pipeline/pl_common_voice/google__spacy_dep_tag_alignment - md5: f043c22d203a7efd123232f1a2a6b4ad.dir - size: 18474982 - nfiles: 8143 - - path: experiment_data/pipeline/pl_common_voice/google__spacy_dep_tag_metrics - md5: f12ee96b46679884f65c59fef8ce74ea.dir - size: 96519 - nfiles: 8143 - pipeline_spacy_tag_wer@18: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14 - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__result - md5: 5658da01ecdce39ed99156bbc7f2dc62.dir - size: 3523907 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_dep_tag_alignment - md5: fd108bf3d67c339ebc12a0965a6e4c18.dir - size: 2752423 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_dep_tag_metrics - md5: 88f7aef65d580d59cdc78610dd98e616.dir - size: 7100 - nfiles: 562 - pipeline_spacy_tag_wer@15: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14 - --asr=google - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__result - md5: 3f7a79298a5156fd2b023e673326e72f.dir - size: 985004 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/google__spacy_dep_tag_alignment - md5: 76be498f539e45e5650695e29f22b436.dir - size: 2685018 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__spacy_dep_tag_metrics - md5: 96aea021765fd076fc534e9f09b29037.dir - size: 5117 - nfiles: 562 - pipeline_spacy_tag_wer@6: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs - --asr=ajn - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__result - md5: 545e63a6daf9c46387c1d7d40b85499f.dir - size: 1413262 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_dep_tag_alignment - md5: 8ec2e9fc88d4b8ce5032bf809c1c025f.dir - size: 3799802 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/ajn__spacy_dep_tag_metrics - md5: 0d95a4abb4a33aef3e242a304fd58698.dir - size: 10569 - nfiles: 758 - pipeline_spacy_tag_wer@14: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/whisper_tiny__result - md5: 0237d69dfa56d1a29a8bc43f464c4f1e.dir - size: 11789990 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/whisper_tiny__spacy_dep_tag_alignment - md5: 639560135c84aab006b3c08f9d3bd77e.dir - size: 25464153 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/whisper_tiny__spacy_dep_tag_metrics - md5: f28d26d045db695e6b236abe5a90ed4f.dir - size: 8555 - nfiles: 500 - pipeline_spacy_tag_wer@16: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14 - --asr=ajn - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/ajn__result - md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir - size: 974727 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/ajn__spacy_dep_tag_alignment - md5: 7635cf48e907c8d2939fa9ef6870cd6a.dir - size: 3200084 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__spacy_dep_tag_metrics - md5: 54a8912bab4cd197f01719d5340ef7e9.dir - size: 6867 - nfiles: 559 - pipeline_spacy_tag_wer@5: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs - --asr=google - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__result - md5: 6e0d7eb490eadd8dcc3c5452ba85932b.dir - size: 1377134 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_dep_tag_alignment - md5: c15aa30b165152fac6813cd092763242.dir - size: 3712618 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/google__spacy_dep_tag_metrics - md5: 56dddb48cea2022b91fd4323efd43a8b.dir - size: 9213 - nfiles: 758 - pipeline_spacy_tag_wer@2: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/techmo__result - md5: 9030cf3640f2749d9c1b4439687bdc2f.dir - size: 7761880 - nfiles: 8136 - outs: - - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_dep_tag_alignment - md5: 7f52c28043e0a7b311bd39e877998834.dir - size: 18352367 - nfiles: 8136 - - path: experiment_data/pipeline/pl_common_voice/techmo__spacy_dep_tag_metrics - md5: 3d8555026f88adec199bcb380e30a0f0.dir - size: 96139 - nfiles: 8136 - pipeline_wikineiural_ner@18: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14 - --asr=google - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__result - md5: 3f7a79298a5156fd2b023e673326e72f.dir - size: 985004 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/google__wikineural_ner_alignment - md5: 539cc438ec67099bb95f58ccdde9d42f.dir - size: 5692456 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__wikineural_ner_metrics - md5: 0385f2138fd318df8852a7e38c1770ac.dir - size: 5322 - nfiles: 562 - pipeline_spacy_tag_wer@8: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__result - md5: bf9c77e34376bcda73dbdb6afee55c8c.dir - size: 5137721 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_dep_tag_alignment - md5: 8503661e0ee89ff96690e245f3144807.dir - size: 3772188 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/wav2vec2__spacy_dep_tag_metrics - md5: be82f4400be9dd5f01a7a4f73c357b1c.dir - size: 9083 - nfiles: 758 - pipeline_word_wer@18: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14 - --asr=google - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__result - md5: 3f7a79298a5156fd2b023e673326e72f.dir - size: 985004 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/google__word_wer_classic_alignment - md5: 9259147092c1555cd087a4fc4f428b2b.dir - size: 2725626 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__word_wer_classic_metrics - md5: 41621d3845a0035ee77352902c46a186.dir - size: 15012 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__word_wer_embeddings_alignment - md5: 07eff4b993990ceb973120d4515e3b9e.dir - size: 5493309 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__word_wer_embeddings_metrics - md5: e9ff0ac8eac5389bfc6ea7b13cd94a9c.dir - size: 31258 - nfiles: 562 - pipeline_spacy_tag_wer@12: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__result - md5: 0e596570e1502b38588427bc72dcc006.dir - size: 9697519 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/techmo__spacy_dep_tag_alignment - md5: baefcd5dfadd9c62d6fc71ba0ac31fa9.dir - size: 20897599 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/techmo__spacy_dep_tag_metrics - md5: 1478707020a96496b50eb732207c290e.dir - size: 8841 - nfiles: 500 - pipeline_spacy_tag_wer@3: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice - --asr=ajn - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/ajn__result - md5: 77d873041fe2952e3c45ee4ac6458061.dir - size: 6667841 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - outs: - - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_dep_tag_alignment - md5: a735ec3c634bbe034cb67f7a54fb0d2f.dir - size: 19294281 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/ajn__spacy_dep_tag_metrics - md5: 516d26ee39867a1166c51edb014ad897.dir - size: 94253 - nfiles: 8155 - pipeline_spacy_tag_wer@11: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna - --asr=ajn - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/ajn__result - md5: 653d65e186a7d05958ce3cbef219038c.dir - size: 6159899 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - outs: - - path: experiment_data/pipeline/pl_luna/ajn__spacy_dep_tag_alignment - md5: 034d072825c711a824f1280f4a390f74.dir - size: 21936929 - nfiles: 494 - - path: experiment_data/pipeline/pl_luna/ajn__spacy_dep_tag_metrics - md5: 6c6bb673ea5f64c9d851878c9d8a7c09.dir - size: 8444 - nfiles: 494 - pipeline_spacy_tag_wer@9: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__result - md5: 7220584482e69b3a9f4b43fba97e97cf.dir - size: 2135360 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__spacy_dep_tag_alignment - md5: ce259eeea86c4eb2188170f3afc3d20b.dir - size: 4422013 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/whisper_tiny__spacy_dep_tag_metrics - md5: ae86609116018717054c6eac6e17a668.dir - size: 10339 - nfiles: 758 - pipeline_spacy_tag_wer@1: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_common_voice - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_common_voice - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_common_voice/gold_transcript - md5: e77ce22b14cb32594ae9cfcd4d6403b3.dir - size: 4093545 - nfiles: 8155 - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__result - md5: b0d0042d77d7adce37890ca63ad40091.dir - size: 19014997 - nfiles: 8154 - outs: - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_dep_tag_alignment - md5: 09fbe03eafa4948e0d3009ef392e9c40.dir - size: 18505763 - nfiles: 8154 - - path: experiment_data/pipeline/pl_common_voice/wav2vec2__spacy_dep_tag_metrics - md5: d1bc1925fe39ccb98e8bb085a1b1b24f.dir - size: 96041 - nfiles: 8154 - pipeline_flair_upos@18: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14 - --asr=google - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__result - md5: 3f7a79298a5156fd2b023e673326e72f.dir - size: 985004 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/google__flair_upos_alignment - md5: 531ca099d7ca7667993016c9237fb875.dir - size: 2641461 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__flair_upos_metrics - md5: 9f97126702b6268ce448649d4d7a3666.dir - size: 5000 - nfiles: 562 - pipeline_spacy_pos_wer@18: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14 - --asr=ajn - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/ajn__result - md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir - size: 974727 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_alignment - md5: e9d82d54f5f44e6bfe9da4af3954f20d.dir - size: 3180963 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__spacy_pos_metrics - md5: a596f37b4eb1b7f3d04269fc1adb8a26.dir - size: 7003 - nfiles: 559 - pipeline_spacy_tag_wer@13: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_luna - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_luna - md5: d342155b1871e881797cf7da09d5dc3c.dir - size: 1578358645 - nfiles: 4500 - - path: experiment_data/pipeline/pl_luna/gold_transcript - md5: 4c3e09acb7ffac0ef5b117a38515e3a9.dir - size: 6706925 - nfiles: 500 - - path: experiment_data/pipeline/pl_luna/wav2vec2__result - md5: 9c63b061ac7763144bca121e163ee7aa.dir - size: 20658485 - nfiles: 456 - outs: - - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_dep_tag_alignment - md5: 6716464936f4f35ba81a43eb2c2f37b0.dir - size: 17967467 - nfiles: 456 - - path: experiment_data/pipeline/pl_luna/wav2vec2__spacy_dep_tag_metrics - md5: 7848ddff997fd231f3857ff30dfd7154.dir - size: 7940 - nfiles: 456 - pipeline_spacy_tag_wer@7: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_google_fleurs - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_google_fleurs - md5: dfcb8cf40b4a1e1a62f9ada00468cca9.dir - size: 236272072 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/gold_transcript - md5: 607f551eca5dabcca0caf31c87bd2ac6.dir - size: 975209 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__result - md5: 33c60c2b8bd57c3aedd7161256ad8cfa.dir - size: 1880403 - nfiles: 758 - outs: - - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_dep_tag_alignment - md5: 997d8e36cd023245065af9c1c3db1d72.dir - size: 3743812 - nfiles: 758 - - path: experiment_data/pipeline/pl_google_fleurs/techmo__spacy_dep_tag_metrics - md5: 386a8988937349d2ab69a4a335d0d270.dir - size: 9672 - nfiles: 758 - pipeline_spacy_ner_wer@18: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14 - --asr=google - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__result - md5: 3f7a79298a5156fd2b023e673326e72f.dir - size: 985004 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/google__spacy_ner_alignment - md5: 42116658821e33fc3360d8957bf68354.dir - size: 2599522 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/google__spacy_ner_metrics - md5: 4e363aa38b90467de244a923ddc14d98.dir - size: 4544 - nfiles: 562 - pipeline_asr_result@22: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14 - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_minds14/pl_minds14__wav2vec2 - md5: 6174de2a0578f456aae4e4e796dec805.dir - size: 2755451 - nfiles: 562 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - outs: - - path: experiment_data/pipeline/pl_minds14/wav2vec2__result - md5: 5658da01ecdce39ed99156bbc7f2dc62.dir - size: 3523907 - nfiles: 562 - pipeline_asr_result@21: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14 - --asr=techmo - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_minds14/pl_minds14__techmo - md5: 5fd62d52b1a39db98d86ff0a11dcfd52.dir - size: 612777 - nfiles: 562 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - outs: - - path: experiment_data/pipeline/pl_minds14/techmo__result - md5: 4c43636b4773f2bf9a2153ef3393a558.dir - size: 1336305 - nfiles: 562 - pipeline_asr_result@20: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14 - --asr=ajn - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_minds14/pl_minds14__ajn - md5: 41b35b832b72e132575e1c8bc777682f.dir - size: 205731 - nfiles: 562 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - outs: - - path: experiment_data/pipeline/pl_minds14/ajn__result - md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir - size: 974727 - nfiles: 559 - pipeline_asr_result@23: - cmd: PYTHONPATH=. python experiment/pipeline_process_asr.py --dataset=pl_minds14 - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_asr.py - md5: efe378505897550fe1a8d423c321ef53 - size: 1403 - - path: experiment_data/cached_asr/pl_minds14/pl_minds14__whisper_tiny - md5: 602bbd1622c4d9c1f147bafa16cd5811.dir - size: 845870 - nfiles: 562 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - outs: - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__result - md5: 2b54257ba731e5aba5c9264293e6c70d.dir - size: 1631174 - nfiles: 562 - pipeline_wikineiural_ner@21: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14 - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__result - md5: 5658da01ecdce39ed99156bbc7f2dc62.dir - size: 3523907 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/wav2vec2__wikineural_ner_alignment - md5: 926f05dfd25a3c0f1a35da955261f73b.dir - size: 5895431 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__wikineural_ner_metrics - md5: 6535a4779ab3be38804639e16cbbe70f.dir - size: 8602 - nfiles: 562 - pipeline_flair_upos@19: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14 - --asr=ajn - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/ajn__result - md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir - size: 974727 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/ajn__flair_upos_alignment - md5: 482f5c68c013bf1547b58dc470ba4cc5.dir - size: 3084431 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__flair_upos_metrics - md5: a7791cb5b7b405e98b12834e1146d1bb.dir - size: 7147 - nfiles: 559 - pipeline_spacy_ner_wer@21: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14 - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__result - md5: 5658da01ecdce39ed99156bbc7f2dc62.dir - size: 3523907 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_ner_alignment - md5: c29a8e30d0dfbf6591d62ea3884f0dc1.dir - size: 2653254 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_ner_metrics - md5: 353ba9400e092ad54b9ee26a39e44df3.dir - size: 6036 - nfiles: 562 - pipeline_spacy_pos_wer@20: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14 - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__result - md5: 5658da01ecdce39ed99156bbc7f2dc62.dir - size: 3523907 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_alignment - md5: f594e5f289f7916ad6a70f81ff4677f8.dir - size: 2723830 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__spacy_pos_metrics - md5: 9cb542718053e6ff28d9e79b813c664e.dir - size: 7001 - nfiles: 562 - pipeline_spacy_tag_wer@19: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_dep_tag_wer.py --dataset=pl_minds14 - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_spacy_dep_tag_wer.py - md5: 83fc16ed68e85cfd89d8d84dc61d6d0f - size: 1489 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__result - md5: 2b54257ba731e5aba5c9264293e6c70d.dir - size: 1631174 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__spacy_dep_tag_alignment - md5: 9e828d1150d2351dc0b84a5294849da3.dir - size: 3281307 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__spacy_dep_tag_metrics - md5: 7ba8d5c7cd580cd8ddaba742ddd1b60a.dir - size: 6749 - nfiles: 562 - pipeline_spacy_ner_wer@19: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14 - --asr=ajn - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/ajn__result - md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir - size: 974727 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_alignment - md5: 57c9eb8cfc9ce3e89e169248a8d46a78.dir - size: 3070194 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__spacy_ner_metrics - md5: 36cca512956bd2224ef86d126148272d.dir - size: 7332 - nfiles: 559 - pipeline_wikineiural_ner@20: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14 - --asr=techmo - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__result - md5: 4c43636b4773f2bf9a2153ef3393a558.dir - size: 1336305 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/techmo__wikineural_ner_alignment - md5: ff67cff35bc6b045895ad3e14613ab39.dir - size: 5641778 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__wikineural_ner_metrics - md5: 1025b67d8420594920a3cef4ed5c1a6a.dir - size: 8339 - nfiles: 562 - pipeline_flair_upos@20: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14 - --asr=techmo - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__result - md5: 4c43636b4773f2bf9a2153ef3393a558.dir - size: 1336305 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/techmo__flair_upos_alignment - md5: 348e7c7e61df45225b6c28f835a1f1ec.dir - size: 2632853 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__flair_upos_metrics - md5: eda987cf12cfa026c8906eaa434e6090.dir - size: 6285 - nfiles: 562 - pipeline_wikineiural_ner@19: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14 - --asr=ajn - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/ajn__result - md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir - size: 974727 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/ajn__wikineural_ner_alignment - md5: f9d5e3fce4551db9fa1d3ba32df06ee0.dir - size: 5689217 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__wikineural_ner_metrics - md5: 6e1b50f31e4bd4f264e6a6a91da6cb05.dir - size: 8593 - nfiles: 559 - pipeline_flair_upos@22: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14 - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__result - md5: 2b54257ba731e5aba5c9264293e6c70d.dir - size: 1631174 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__flair_upos_alignment - md5: 9eb00b7f2b7ad96bd458d2b10db04397.dir - size: 3174118 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__flair_upos_metrics - md5: b2667614ba07f82acb821c6a303380c8.dir - size: 7013 - nfiles: 562 - pipeline_word_wer@19: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14 - --asr=ajn - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/ajn__result - md5: 4094dd4b22895a0a4ce82793cd6a8b0f.dir - size: 974727 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_classic_alignment - md5: f5fd8a87dfcbf4e998b9a1d215186921.dir - size: 2851613 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_classic_metrics - md5: 6d6d430e7f3672c67b9c78eb7d0ea296.dir - size: 15213 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_embeddings_alignment - md5: 57207c7e05493bf4862dda0394d4ccde.dir - size: 5945687 - nfiles: 559 - - path: experiment_data/pipeline/pl_minds14/ajn__word_wer_embeddings_metrics - md5: aca6839fbd1332d3bb26833c95e8e6b0.dir - size: 22441 - nfiles: 559 - pipeline_wikineiural_ner@22: - cmd: PYTHONPATH=. python experiment/pipeline_process_wikineural_ner.py --dataset=pl_minds14 - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_wikineural_ner.py - md5: c2c4e92a33346a3c097a321f1f5f2af3 - size: 1802 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__result - md5: 2b54257ba731e5aba5c9264293e6c70d.dir - size: 1631174 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__wikineural_ner_alignment - md5: 83fb11da981b9d06686117efe97869e0.dir - size: 6168851 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__wikineural_ner_metrics - md5: bd9f3729a2a206312f7863c24353e755.dir - size: 8613 - nfiles: 562 - pipeline_flair_upos@21: - cmd: PYTHONPATH=. python experiment/pipeline_process_flair_upos.py --dataset=pl_minds14 - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_flair_upos.py - md5: 2ce2de99df9c06c5d9b0833ca7cdffda - size: 1916 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__result - md5: 5658da01ecdce39ed99156bbc7f2dc62.dir - size: 3523907 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/wav2vec2__flair_upos_alignment - md5: 2055e7e07fd4979aa284d48b6ea7a713.dir - size: 2702561 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__flair_upos_metrics - md5: 103738afc0ef4f24adf3b855181b1acb.dir - size: 6757 - nfiles: 562 - pipeline_spacy_ner_wer@20: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14 - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__result - md5: 4c43636b4773f2bf9a2153ef3393a558.dir - size: 1336305 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_alignment - md5: d957f3f6216d2e749226df128b611e26.dir - size: 2573280 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__spacy_ner_metrics - md5: a9bb85e539da902ed87b5c78ab0d3133.dir - size: 5500 - nfiles: 562 - pipeline_word_wer@22: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14 - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__result - md5: 2b54257ba731e5aba5c9264293e6c70d.dir - size: 1631174 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__word_wer_classic_alignment - md5: cf3157d59e7af77248c524e504dc04f4.dir - size: 2916121 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__word_wer_classic_metrics - md5: 51e17aba487d6e86ae41b0a3379bb788.dir - size: 15967 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__word_wer_embeddings_alignment - md5: 10a6352d89a16e482f841d4766058794.dir - size: 6018264 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__word_wer_embeddings_metrics - md5: 365866e7c9c0db5b4041f39d0d9ed8f7.dir - size: 37953 - nfiles: 562 - pipeline_spacy_ner_wer@22: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_ner_wer.py --dataset=pl_minds14 - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__result - md5: 2b54257ba731e5aba5c9264293e6c70d.dir - size: 1631174 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__spacy_ner_alignment - md5: 3c4161f8db8a5f218ead787f77d2ff74.dir - size: 3161511 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__spacy_ner_metrics - md5: c55b2189a6033dd7bb183a479c73875d.dir - size: 7425 - nfiles: 562 - pipeline_spacy_pos_wer@21: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14 - --asr=whisper_tiny - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__result - md5: 2b54257ba731e5aba5c9264293e6c70d.dir - size: 1631174 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__spacy_pos_alignment - md5: 4727dbf6ca0c0867479b67f29041018d.dir - size: 3252193 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/whisper_tiny__spacy_pos_metrics - md5: 5c87afa3d3f1cdeb843416eec888446d.dir - size: 7030 - nfiles: 562 - pipeline_word_wer@20: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14 - --asr=techmo - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__result - md5: 4c43636b4773f2bf9a2153ef3393a558.dir - size: 1336305 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_classic_alignment - md5: 8cc588761bc5bd56fb867953da04ac35.dir - size: 2723010 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_classic_metrics - md5: b6a4f05d71bd3fbed796d54b1ac2b29d.dir - size: 17198 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_embeddings_alignment - md5: c2b20050da963980f2ca5bbed0b1457d.dir - size: 5505715 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__word_wer_embeddings_metrics - md5: e2448cce1cb93fa872b3898c00af62ba.dir - size: 34993 - nfiles: 562 - pipeline_spacy_pos_wer@19: - cmd: PYTHONPATH=. python experiment/pipeline_process_spacy_pos_wer.py --dataset=pl_minds14 - --asr=techmo - deps: - - path: experiment/pipeline_process_spacy_pos_wer.py - md5: 3817c96d1d91d2cf2d8ec7fe570f1472 - size: 1469 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__result - md5: 4c43636b4773f2bf9a2153ef3393a558.dir - size: 1336305 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_alignment - md5: c1b9ede77bb9061f1c0507147029efee.dir - size: 2640298 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/techmo__spacy_pos_metrics - md5: 3e56923bc3b678ee5263e5c8436da9ca.dir - size: 6118 - nfiles: 562 - pipeline_word_wer@21: - cmd: PYTHONPATH=. python experiment/pipeline_process_word_wer.py --dataset=pl_minds14 - --asr=wav2vec2 - deps: - - path: experiment/pipeline_process_word_wer.py - md5: 98c7d6b43efbe0c2e84b5ad44d01fec9 - size: 2125 - - path: experiment_data/dataset/pl_minds14 - md5: d751713988987e9331980363e24189ce.dir - size: 0 - nfiles: 0 - - path: experiment_data/pipeline/pl_minds14/gold_transcript - md5: d4da8b3a8c5f044af1403d70d8f60fcf.dir - size: 689374 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__result - md5: 5658da01ecdce39ed99156bbc7f2dc62.dir - size: 3523907 - nfiles: 562 - outs: - - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_classic_alignment - md5: 171d4dddb9fd5b96479c62fa86ccfe57.dir - size: 2811807 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_classic_metrics - md5: a3477ae81fcba4a8e21b044f279425bb.dir - size: 17073 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_embeddings_alignment - md5: d5b7c18b6537344d7e0113ac8a8d9c28.dir - size: 5684553 - nfiles: 562 - - path: experiment_data/pipeline/pl_minds14/wav2vec2__word_wer_embeddings_metrics - md5: 64afcfdbe04279ed8dd3cc2b25e77093.dir - size: 37859 - nfiles: 562 diff --git a/experiment/dataset_helper.py b/experiment/dataset_helper.py index 964c4b8c3ab33c301a9eafd0aa341e3723d30e09..c245b76cd09eefa3328e53c280bd65639ad1119e 100644 --- a/experiment/dataset_helper.py +++ b/experiment/dataset_helper.py @@ -5,5 +5,5 @@ from sziszapangma.integration.record_path_provider import RecordPathProvider from sziszapangma.integration.relation_manager_provider import RelationManagerProvider -class DatasetHelper(RecordIdIterator, RecordPathProvider, RelationManagerProvider, ABC): +class DatasetHelper(RecordIdIterator, RecordPathProvider, ABC): pass diff --git a/new_datasets/import_datasets/upload_audio.py b/new_datasets/import_datasets/upload_audio.py index 3eb99bc7ffe54478b8b543f95f4df60e2f35acf3..51db0205292f2f3c9f496f244dfd1957ea7791ee 100644 --- a/new_datasets/import_datasets/upload_audio.py +++ b/new_datasets/import_datasets/upload_audio.py @@ -8,6 +8,7 @@ from hashlib import sha1 _TEMP_WAV_FILE = '_temp_file.wav' + def process_numpy_array_to_md5_hash(array: np.ndarray) -> str: return sha1(array).hexdigest() diff --git a/new_experiment/new_dependency_provider.py b/new_experiment/new_dependency_provider.py new file mode 100644 index 0000000000000000000000000000000000000000..3ec1412d13307e452e576970112831ba805b45f3 --- /dev/null +++ b/new_experiment/new_dependency_provider.py @@ -0,0 +1,22 @@ +from minio import Minio +from pymongo import MongoClient + +from new_experiment.utils.minio_audio_record_repository import MinioAudioRecordRepository +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository +from sziszapangma.integration.repository.mongo_experiment_repository import MongoExperimentRepository + + +def get_minio_client() -> Minio: + return Minio('minio-asr-benchmarks.theliver.pl', 'minio_user', 'eUxzEQbyYPdzrLxuvvethSbk18kB2s7G') + + +def get_mongo_client() -> MongoClient: + return MongoClient('mongodb://root:example@mongo-asr-benchmarks.theliver.pl:27021/', ssl=True) + + +def get_experiment_repository(dataset_name: str) -> ExperimentRepository: + return MongoExperimentRepository(mongo_client=get_mongo_client(), database_name=dataset_name) + + +def get_minio_audio_record_repository() -> MinioAudioRecordRepository: + return MinioAudioRecordRepository(get_minio_client(), 'dataset-audio') diff --git a/new_experiment/pipeline/dataset_importer/__init__.py b/new_experiment/pipeline/dataset_importer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/new_experiment/pipeline/dataset_importer/fleurs_dataset_importer.py b/new_experiment/pipeline/dataset_importer/fleurs_dataset_importer.py new file mode 100644 index 0000000000000000000000000000000000000000..9b88c89112b4eef3fb620dbda4d34979a79ef133 --- /dev/null +++ b/new_experiment/pipeline/dataset_importer/fleurs_dataset_importer.py @@ -0,0 +1,30 @@ +from pathlib import Path +from typing import Dict, Any, List + +from minio import Minio +from nltk import RegexpTokenizer + +from new_datasets.import_datasets.upload_audio import process_numpy_array_to_md5_hash +from new_experiment.utils.hf_dataset_importer import HfDatasetImporter +from new_experiment.utils.minio_audio_record_repository import MinioAudioRecordRepository +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository + + +class FleursDatasetImporter(HfDatasetImporter): + + def __init__(self, experiment_repository: ExperimentRepository, + minio_audio_record_repository: MinioAudioRecordRepository, dataset_name: str): + super().__init__(experiment_repository, minio_audio_record_repository, dataset_name) + + def get_words(self, record: Dict[str, Any]) -> List[str]: + tokenizer = RegexpTokenizer(r'\w+') + return tokenizer.tokenize(record['transcription']) + + def get_raw_transcription(self, record: Dict[str, Any]) -> str: + return record['transcription'] + + def get_audio_file(self, record: Dict[str, Any]) -> Path: + return record['path'] + + def get_record_id(self, record: Dict[str, Any]) -> str: + return record["id"] diff --git a/new_experiment/pipeline/dataset_importer/minds14_dataset_importer.py b/new_experiment/pipeline/dataset_importer/minds14_dataset_importer.py new file mode 100644 index 0000000000000000000000000000000000000000..b8e8e5e8b7252c9247dcffd6fe09939752a5391c --- /dev/null +++ b/new_experiment/pipeline/dataset_importer/minds14_dataset_importer.py @@ -0,0 +1,30 @@ +from pathlib import Path +from typing import Dict, Any, List + +from minio import Minio +from nltk import RegexpTokenizer + +from new_datasets.import_datasets.upload_audio import process_numpy_array_to_md5_hash +from new_experiment.utils.hf_dataset_importer import HfDatasetImporter +from new_experiment.utils.minio_audio_record_repository import MinioAudioRecordRepository +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository + + +class Minds14DatasetImporter(HfDatasetImporter): + + def __init__(self, experiment_repository: ExperimentRepository, + minio_audio_record_repository: MinioAudioRecordRepository, dataset_name: str): + super().__init__(experiment_repository, minio_audio_record_repository, dataset_name) + + def get_words(self, record: Dict[str, Any]) -> List[str]: + tokenizer = RegexpTokenizer(r'\w+') + return tokenizer.tokenize(record['transcription']) + + def get_raw_transcription(self, record: Dict[str, Any]) -> str: + return record['transcription'] + + def get_audio_file(self, record: Dict[str, Any]) -> Path: + return record['path'] + + def get_record_id(self, record: Dict[str, Any]) -> str: + return process_numpy_array_to_md5_hash(record['audio']['array']) diff --git a/new_experiment/pipeline/dataset_importer/voxpopuli_dataset_importer.py b/new_experiment/pipeline/dataset_importer/voxpopuli_dataset_importer.py new file mode 100644 index 0000000000000000000000000000000000000000..d89ec959205c1a66169bfb29bbfd79b119adcfab --- /dev/null +++ b/new_experiment/pipeline/dataset_importer/voxpopuli_dataset_importer.py @@ -0,0 +1,30 @@ +from pathlib import Path +from typing import Dict, Any, List + +from minio import Minio +from nltk import RegexpTokenizer + +from new_datasets.import_datasets.upload_audio import process_numpy_array_to_md5_hash +from new_experiment.utils.hf_dataset_importer import HfDatasetImporter +from new_experiment.utils.minio_audio_record_repository import MinioAudioRecordRepository +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository + + +class VoxpopuliDatasetImporter(HfDatasetImporter): + + def __init__(self, experiment_repository: ExperimentRepository, + minio_audio_record_repository: MinioAudioRecordRepository, dataset_name: str): + super().__init__(experiment_repository, minio_audio_record_repository, dataset_name) + + def get_words(self, record: Dict[str, Any]) -> List[str]: + tokenizer = RegexpTokenizer(r'\w+') + return tokenizer.tokenize(record['normalized_text']) + + def get_raw_transcription(self, record: Dict[str, Any]) -> str: + return record['raw_text'] + + def get_audio_file(self, record: Dict[str, Any]) -> Path: + return record['audio']['path'] + + def get_record_id(self, record: Dict[str, Any]) -> str: + return process_numpy_array_to_md5_hash(record['audio']['array']) diff --git a/new_experiment/pipeline/import_datasets.py b/new_experiment/pipeline/import_datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..1633088b377f28039d65dd65635580fac6ca23e2 --- /dev/null +++ b/new_experiment/pipeline/import_datasets.py @@ -0,0 +1,35 @@ +from datasets import Dataset, load_dataset + +from new_experiment.new_dependency_provider import get_experiment_repository, get_minio_client, \ + get_minio_audio_record_repository +from new_experiment.pipeline.dataset_importer.fleurs_dataset_importer import FleursDatasetImporter +from new_experiment.pipeline.dataset_importer.minds14_dataset_importer import Minds14DatasetImporter +from new_experiment.utils.hf_dataset_importer import HfDatasetImporter + + +def import_single_dataset(importer: HfDatasetImporter, dataset: Dataset): + importer.process_dataset(dataset) + + +def import_fleurs_dataset(dataset_lang: str, experiment_dataset_name: str): + import_single_dataset(FleursDatasetImporter( + get_experiment_repository(experiment_dataset_name), + get_minio_audio_record_repository(), + experiment_dataset_name, + ), load_dataset('google/fleurs', dataset_lang)) + + +def import_minds14_dataset(dataset_lang: str, experiment_dataset_name: str): + import_single_dataset(Minds14DatasetImporter( + get_experiment_repository(experiment_dataset_name), + get_minio_audio_record_repository(), + experiment_dataset_name, + ), load_dataset('PolyAI/minds14', dataset_lang)) + + +def import_voxpopuli_dataset(dataset_lang: str, experiment_dataset_name: str): + import_single_dataset(Minds14DatasetImporter( + get_experiment_repository(experiment_dataset_name), + get_minio_audio_record_repository(), + experiment_dataset_name, + ), load_dataset('facebook/voxpopuli', dataset_lang)) diff --git a/new_experiment/pipeline/import_fleurs.py b/new_experiment/pipeline/import_fleurs.py new file mode 100644 index 0000000000000000000000000000000000000000..f08197d50132213a8e23b313b04893bb0dae0b81 --- /dev/null +++ b/new_experiment/pipeline/import_fleurs.py @@ -0,0 +1,10 @@ +from new_experiment.pipeline.import_datasets import import_fleurs_dataset + +if __name__ == '__main__': + import_fleurs_dataset('nl_nl', 'nl_google_fleurs') + import_fleurs_dataset('fr_fr', 'fr_google_fleurs') + import_fleurs_dataset('de_de', 'de_google_fleurs') + import_fleurs_dataset('it_it', 'it_google_fleurs') + import_fleurs_dataset('pl_pl', 'pl_google_fleurs') + import_fleurs_dataset('es_419', 'es_google_fleurs') + import_fleurs_dataset('en_us', 'en_google_fleurs') diff --git a/new_experiment/utils/hf_dataset_importer.py b/new_experiment/utils/hf_dataset_importer.py new file mode 100644 index 0000000000000000000000000000000000000000..94db715eb3901c722c83583b7fbf77856247d998 --- /dev/null +++ b/new_experiment/utils/hf_dataset_importer.py @@ -0,0 +1,63 @@ +from abc import ABC, abstractmethod +from hashlib import sha1 +from pathlib import Path +from typing import List, Any, Dict + +import numpy as np +from datasets import Dataset +from minio import Minio + +from new_experiment.utils.minio_audio_record_repository import MinioAudioRecordRepository +from new_experiment.utils.property_helper import PropertyHelper +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository +from sziszapangma.model.model_creators import create_new_word + + +class HfDatasetImporter(ABC): + _experiment_repository: ExperimentRepository + _minio_audio_record_repository: MinioAudioRecordRepository + _experiment_dataset_name: str + + def __init__(self, experiment_repository: ExperimentRepository, + minio_audio_record_repository: MinioAudioRecordRepository, experiment_dataset_name: str): + self._experiment_repository = experiment_repository + self._minio_audio_record_repository = minio_audio_record_repository + self._experiment_dataset_name = experiment_dataset_name + + @abstractmethod + def get_words(self, record: Dict[str, Any]) -> List[str]: + pass + + @abstractmethod + def get_raw_transcription(self, record: Dict[str, Any]) -> str: + pass + + @abstractmethod + def get_audio_file(self, record: Dict[str, Any]) -> Path: + pass + + @abstractmethod + def get_record_id(self, record: Dict[str, Any]) -> str: + pass + + def process_dataset(self, dataset: Dataset): + for it in dataset: + self.process_record(it) + + def process_record(self, record: Dict[str, Any]): + record_id = self.get_record_id(record) + words = [create_new_word(it) for it in self.get_words(record)] + raw_transcription = self.get_raw_transcription(record) + audio_file_path = self.get_audio_file(record) + self._experiment_repository.update_property_for_key( + record_id=record_id, + property_name=PropertyHelper.get_gold_transcript_words(), + property_value=words + ) + self._experiment_repository.update_property_for_key( + record_id=record_id, + property_name=PropertyHelper.get_gold_transcript_raw(), + property_value={'gold_transcript_raw': raw_transcription} + ) + # TODO uncomment + # self._minio_audio_record_repository.save_file(audio_file_path, self._experiment_dataset_name, record_id) diff --git a/new_experiment/utils/id_calculator.py b/new_experiment/utils/id_calculator.py new file mode 100644 index 0000000000000000000000000000000000000000..15419d1850cc9ae323e3bf1854504f2bb37951a4 --- /dev/null +++ b/new_experiment/utils/id_calculator.py @@ -0,0 +1,7 @@ +from hashlib import sha1 + +import numpy as np + + +def process_numpy_array_to_md5_hash(array: np.ndarray) -> str: + return sha1(array).hexdigest() diff --git a/new_experiment/utils/loaded_remote_dataset_helper.py b/new_experiment/utils/loaded_remote_dataset_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..78dc8bf3a6d7a8ef0d2a5c3abd4317e000d950de --- /dev/null +++ b/new_experiment/utils/loaded_remote_dataset_helper.py @@ -0,0 +1,32 @@ +from pathlib import Path +from typing import Set + +from minio import Minio +from urllib3 import HTTPResponse + +from experiment.dataset_helper import DatasetHelper +from new_experiment.utils.minio_audio_record_repository import MinioRecordRepository +from new_experiment.utils.property_helper import PropertyHelper +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository + + +class LoadedRemoteDatasetHelper(DatasetHelper): + _experiment_repository: ExperimentRepository + _minio_record_repository: MinioRecordRepository + _dataset_name: str + + def __init__(self, experiment_repository: ExperimentRepository, minio_record_repository: MinioRecordRepository, + dataset_name: str): + self._experiment_repository = experiment_repository + self._minio_record_repository = minio_record_repository + self._dataset_name = dataset_name + + def get_all_records(self) -> Set[str]: + return self._experiment_repository.get_all_record_ids_for_property(PropertyHelper.get_gold_transcript_words()) + + def get_path(self, record_id: str) -> str: + record_path = Path.home() / f'.cache/asr_benchmark/{self._dataset_name}/{record_id}.wav' + if record_path.exists(): + return record_path.as_posix() + self._minio_record_repository.save_file(record_path, self._dataset_name, record_id) + return record_path.as_posix() diff --git a/new_experiment/utils/minio_audio_record_repository.py b/new_experiment/utils/minio_audio_record_repository.py new file mode 100644 index 0000000000000000000000000000000000000000..0c699c8f5c71566988b05312ae798d7eaca30866 --- /dev/null +++ b/new_experiment/utils/minio_audio_record_repository.py @@ -0,0 +1,28 @@ +from pathlib import Path + +from minio import Minio +from urllib3 import HTTPResponse + + +class MinioAudioRecordRepository: + _minio: Minio + _bucket: str + + def __init__(self, minio: Minio, bucket: str): + self._minio = minio + self._bucket = bucket + + @staticmethod + def _get_record_path(dataset_name: str, record_id: str) -> str: + return f'{dataset_name}/{record_id}.wav' + + def save_file(self, local_path: Path, dataset_name: str, record_id: str): + self._minio.put_object(self._bucket, self._get_record_path(dataset_name, record_id), open(local_path, 'rb'), + len(open(local_path, 'rb').read())) + + def load_file(self, local_path: Path, dataset_name: str, record_id: str): + record_response: HTTPResponse = self._minio.get_object(self._bucket, + self._get_record_path(dataset_name, record_id)) + local_path.parent.mkdir(parents=True, exist_ok=True) + with open(local_path, 'wb') as writer: + writer.write(record_response.data) diff --git a/new_experiment/utils/property_helper.py b/new_experiment/utils/property_helper.py index 7cff60d3bc95a8573d9cab7c90db508db89a8f7d..f00ebe88c470938b03b0dc4303cc5a6fd21ae0b5 100644 --- a/new_experiment/utils/property_helper.py +++ b/new_experiment/utils/property_helper.py @@ -1,8 +1,8 @@ class PropertyHelper: @staticmethod - def get_gold_transcript() -> str: - return 'gold_transcript' + def get_gold_transcript_words() -> str: + return 'gold_transcript_words' @staticmethod def get_gold_transcript_raw() -> str: diff --git a/poetry.lock b/poetry.lock index bd253e19184751c7f9a76128a90ad8d515dd637d..236cdabaa77f7ef4bd318a8011bb4a3b8c9c3fdd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -738,14 +738,14 @@ test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"] [[package]] name = "confection" -version = "0.0.3" +version = "0.0.4" description = "The sweetest config system for Python" category = "main" optional = false python-versions = ">=3.6" files = [ - {file = "confection-0.0.3-py3-none-any.whl", hash = "sha256:51af839c1240430421da2b248541ebc95f9d0ee385bcafa768b8acdbd2b0111d"}, - {file = "confection-0.0.3.tar.gz", hash = "sha256:4fec47190057c43c9acbecb8b1b87a9bf31c469caa0d6888a5b9384432fdba5a"}, + {file = "confection-0.0.4-py3-none-any.whl", hash = "sha256:aeac5919ba770c7b281aa5863bb6b0efed42568a7ad8ea26b6cb632154503fb2"}, + {file = "confection-0.0.4.tar.gz", hash = "sha256:b1ddf5885da635f0e260a40b339730806dfb1bd17d30e08764f35af841b04ecf"}, ] [package.dependencies] @@ -1113,14 +1113,14 @@ wmi = ["wmi (>=1.5.1,<2.0.0)"] [[package]] name = "dpath" -version = "2.1.3" +version = "2.1.4" description = "Filesystem-like pathing and searching for dictionaries" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "dpath-2.1.3-py3-none-any.whl", hash = "sha256:d9560e03ccd83b3c6f29988b0162ce9b34fd28b9d8dbda46663b20c68d9cdae3"}, - {file = "dpath-2.1.3.tar.gz", hash = "sha256:d1a7a0e6427d0a4156c792c82caf1f0109603f68ace792e36ca4596fd2cb8d9d"}, + {file = "dpath-2.1.4-py3-none-any.whl", hash = "sha256:3a4f6cc07e3a1b34bc73baa3a6854ee0a48fb2cf18a8c9b1911b66fd72afaa85"}, + {file = "dpath-2.1.4.tar.gz", hash = "sha256:3380a77d0db4abf104125860ff6eb4bd07c97c65b81aad42a609717089a1bed0"}, ] [[package]] @@ -1207,14 +1207,14 @@ pgp = ["gpg"] [[package]] name = "dvc" -version = "2.39.0" +version = "2.41.1" description = "Git for data scientists - manage your code and data together" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "dvc-2.39.0-py3-none-any.whl", hash = "sha256:e0765516c85efac36148804c01fadaf19f4b3a862675499814a96a127a2fa9c1"}, - {file = "dvc-2.39.0.tar.gz", hash = "sha256:efb00f96b296446c951107d4d82a2fb0b5be4c457d3c9f2d285fbcbf1f7be266"}, + {file = "dvc-2.41.1-py3-none-any.whl", hash = "sha256:a3fafd63c3e9bedd278add480d62be38b7f51db891e14e37d754c986c6bc284d"}, + {file = "dvc-2.41.1.tar.gz", hash = "sha256:d875358ce2463b0b93de1e4a91ee8ca960793b96462048e72f40d3168ae5e97c"}, ] [package.dependencies] @@ -1223,7 +1223,7 @@ colorama = ">=0.3.9" configobj = ">=5.0.6" distro = ">=1.3.0" dpath = ">=2.0.2,<3" -dvc-data = "0.28.5" +dvc-data = "0.29.0" dvc-http = "*" dvc-render = "0.0.17" dvc-s3 = {version = "2.21.0", optional = true, markers = "extra == \"s3\""} @@ -1270,28 +1270,28 @@ ssh = ["dvc-ssh (==2.20.0)"] ssh-gssapi = ["dvc-ssh[gssapi] (==2.20.0)"] terraform = ["tpi[ssh] (>=2.1.0)"] testing = ["pytest-test-utils (==0.0.8)"] -tests = ["beautifulsoup4 (==4.11.1)", "dvc-ssh (==2.20.0)", "filelock (==3.9.0)", "flaky (==3.7.0)", "mypy (==0.991)", "pylint (==2.15.9)", "pylint-plugin-utils (==0.7)", "pylint-pytest (==1.1.2)", "pytest (==7.2.0)", "pytest-cov (==4.0.0)", "pytest-docker (==0.11.0)", "pytest-lazy-fixture (==0.6.3)", "pytest-mock (==3.10.0)", "pytest-test-utils (==0.0.8)", "pytest-timeout (==2.1.0)", "pytest-xdist (==3.1.0)", "pywin32 (>=225)", "tpi[ssh] (>=2.1.0)", "types-requests", "types-tabulate", "types-toml"] +tests = ["beautifulsoup4 (==4.11.1)", "dvc-ssh (==2.20.0)", "filelock (==3.9.0)", "flaky (==3.7.0)", "mypy (==0.991)", "pylint (==2.15.10)", "pylint-plugin-utils (==0.7)", "pylint-pytest (==1.1.2)", "pytest (==7.2.0)", "pytest-cov (==4.0.0)", "pytest-docker (==0.11.0)", "pytest-lazy-fixture (==0.6.3)", "pytest-mock (==3.10.0)", "pytest-test-utils (==0.0.8)", "pytest-timeout (==2.1.0)", "pytest-xdist (==3.1.0)", "pywin32 (>=225)", "tpi[ssh] (>=2.1.0)", "types-requests", "types-tabulate", "types-toml"] webdav = ["dvc-webdav (==2.19.0)"] webhdfs = ["dvc-webhdfs (==2.19.0)"] webhdfs-kerberos = ["dvc-webhdfs[kerberos] (==2.19.0)"] [[package]] name = "dvc-data" -version = "0.28.5" +version = "0.29.0" description = "dvc data" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "dvc-data-0.28.5.tar.gz", hash = "sha256:cf6ae7a7d9220fcd223504427829ef7e9b6088e94b83e7d2882a9846eafb85cb"}, - {file = "dvc_data-0.28.5-py3-none-any.whl", hash = "sha256:dc5fc5e4d20e20f595a5b84bf1b623652e3d65804f37cb6c937a75e4fc1c0bad"}, + {file = "dvc-data-0.29.0.tar.gz", hash = "sha256:3385054422b66738948690d96944bc547f2f26edfa5776bc0c0a4f9f64a8cc42"}, + {file = "dvc_data-0.29.0-py3-none-any.whl", hash = "sha256:004ee9c20971e17c26dbf0ef59ce38dde1c03612e55d81a91b1331ff71774bb5"}, ] [package.dependencies] attrs = ">=21.3.0" dictdiffer = ">=0.8.1" diskcache = ">=5.2.1" -dvc-objects = "0.14.0" +dvc-objects = "0.14.1" funcy = ">=1.14" nanotime = ">=0.5.2" pygtrie = ">=2.3.2" @@ -1325,14 +1325,14 @@ tests = ["dvc[testing]", "flaky (==3.7.0)", "mypy (==0.910)", "pylint (==2.15.9) [[package]] name = "dvc-objects" -version = "0.14.0" +version = "0.14.1" description = "dvc objects" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "dvc-objects-0.14.0.tar.gz", hash = "sha256:039b7be98f4c8325bd90e2869072e3a776bf8e27419723b459704d7cd984e11f"}, - {file = "dvc_objects-0.14.0-py3-none-any.whl", hash = "sha256:8767ab7df049dbd41ed318bdc402976073c51b0d0c95aaa48dd13d337572fcc2"}, + {file = "dvc-objects-0.14.1.tar.gz", hash = "sha256:181ad4eadaee592cd37a5027b2c8b4727ca00aba7ae60e8417075dd4e5ef0d42"}, + {file = "dvc_objects-0.14.1-py3-none-any.whl", hash = "sha256:36587b4a0e1eec1778998070fd804cd5feac33b03fe7f8bbcb191110631977ec"}, ] [package.dependencies] @@ -2757,6 +2757,7 @@ packaging = ">=20.0" pillow = ">=6.2.0" pyparsing = ">=2.2.1" python-dateutil = ">=2.7" +setuptools_scm = ">=7" [[package]] name = "miniful" @@ -3256,14 +3257,14 @@ files = [ [[package]] name = "packaging" -version = "22.0" +version = "23.0" description = "Core utilities for Python packages" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "packaging-22.0-py3-none-any.whl", hash = "sha256:957e2148ba0e1a3b282772e791ef1d8083648bc131c8ab0c1feba110ce1146c3"}, - {file = "packaging-22.0.tar.gz", hash = "sha256:2198ec20bd4c017b8f9717e00f0c8714076fc2fd93816750ab48e2c41de2cfd3"}, + {file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"}, + {file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"}, ] [[package]] @@ -4393,7 +4394,6 @@ files = [ {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-win_amd64.whl", hash = "sha256:d000f258cf42fec2b1bbf2863c61d7b8918d31ffee905da62dede869254d3b8a"}, {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:045e0626baf1c52e5527bd5db361bc83180faaba2ff586e763d3d5982a876a9e"}, {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-macosx_12_6_arm64.whl", hash = "sha256:721bc4ba4525f53f6a611ec0967bdcee61b31df5a56801281027a3a6d1c2daf5"}, - {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:41d0f1fa4c6830176eef5b276af04c89320ea616655d01327d5ce65e50575c94"}, {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:4b3a93bb9bc662fc1f99c5c3ea8e623d8b23ad22f861eb6fce9377ac07ad6072"}, {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-macosx_12_0_arm64.whl", hash = "sha256:a234a20ae07e8469da311e182e70ef6b199d0fbeb6c6cc2901204dd87fb867e8"}, {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:15910ef4f3e537eea7fe45f8a5d19997479940d9196f357152a09031c5be59f3"}, @@ -4649,6 +4649,28 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-g testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] +[[package]] +name = "setuptools-scm" +version = "7.1.0" +description = "the blessed package to manage your versions by scm tags" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "setuptools_scm-7.1.0-py3-none-any.whl", hash = "sha256:73988b6d848709e2af142aa48c986ea29592bbcfca5375678064708205253d8e"}, + {file = "setuptools_scm-7.1.0.tar.gz", hash = "sha256:6c508345a771aad7d56ebff0e70628bf2b0ec7573762be9960214730de278f27"}, +] + +[package.dependencies] +packaging = ">=20.0" +setuptools = "*" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} +typing-extensions = "*" + +[package.extras] +test = ["pytest (>=6.2)", "virtualenv (>20)"] +toml = ["setuptools (>=42)"] + [[package]] name = "shortuuid" version = "1.0.11" @@ -5091,7 +5113,7 @@ testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] name = "tomli" version = "2.0.1" description = "A lil' TOML parser" -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -5306,14 +5328,14 @@ files = [ [[package]] name = "urllib3" -version = "1.26.13" +version = "1.26.14" description = "HTTP library with thread-safe connection pooling, file post, and more." category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ - {file = "urllib3-1.26.13-py2.py3-none-any.whl", hash = "sha256:47cc05d99aaa09c9e72ed5809b60e7ba354e64b59c9c173ac3018642d8bb41fc"}, - {file = "urllib3-1.26.13.tar.gz", hash = "sha256:c083dd0dce68dbfbe1129d5271cb90f9447dea7d52097c6e0126120c521ddea8"}, + {file = "urllib3-1.26.14-py2.py3-none-any.whl", hash = "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1"}, + {file = "urllib3-1.26.14.tar.gz", hash = "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72"}, ] [package.extras] @@ -5419,7 +5441,7 @@ dev = ["pytest"] type = "git" url = "https://github.com/openai/whisper.git" reference = "HEAD" -resolved_reference = "28769fcfe50755a817ab922a7bc83483159600a9" +resolved_reference = "f82bc59f5ea234d4b97fb2860842ed38519f7e65" [[package]] name = "wikipedia-api" diff --git a/pyproject.toml b/pyproject.toml index e7b17bc4e61b85de61028bf6dbfa3549975b51b0..69c306443cf025893bd363b6f9eff254df2f5bd9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ spacy = "^3.4.4" pysoundfile = "^0.9.0.post1" datasets = "^2.8.0" torchaudio = "^0.13.1" -flair = "^0.11.3" +#flair = "^0.11.3" whisper = {git = "https://github.com/openai/whisper.git"} ffmpeg-python = "^0.2.0" pydub = "^0.25.1" diff --git a/sziszapangma/integration/repository/experiment_repository.py b/sziszapangma/integration/repository/experiment_repository.py index 4e01d5565d224faa235f51aa2f1eb58279b2f861..e50666dab0f4553dbe7495f742d935b2ed6d2b55 100644 --- a/sziszapangma/integration/repository/experiment_repository.py +++ b/sziszapangma/integration/repository/experiment_repository.py @@ -30,6 +30,10 @@ class ExperimentRepository(ABC): def get_all_record_ids(self) -> Set[str]: """Methods returns all possible record ids.""" + @abstractmethod + def get_all_record_ids_for_property(self, property_name: str) -> Set[str]: + """Methods returns all possible record ids.""" + @abstractmethod def get_all_properties(self) -> Set[str]: """Methods returns all possible properties.""" diff --git a/sziszapangma/integration/repository/mongo_experiment_repository.py b/sziszapangma/integration/repository/mongo_experiment_repository.py index b169735a55a6e36a1ffb66e0abc1d0901658bb4f..6c87a1dc87b4e83c636b8006bebb5ac8ef3202c5 100644 --- a/sziszapangma/integration/repository/mongo_experiment_repository.py +++ b/sziszapangma/integration/repository/mongo_experiment_repository.py @@ -49,9 +49,12 @@ class MongoExperimentRepository(ExperimentRepository): def get_all_record_ids(self) -> Set[str]: records = set() for collection_name in self.get_all_properties(): - for record in self._get_database()[collection_name].find(): - records.add(record[ID]) + for record_id in self.get_all_record_ids_for_property(collection_name): + records.add(record_id) return records + def get_all_record_ids_for_property(self, property_name: str) -> Set[str]: + return set(record[ID] for record in self._get_database()[property_name].find()) + def get_all_properties(self) -> Set[str]: return set(self._get_database().list_collection_names()) diff --git a/sziszapangma/integration/task/processing_task.py b/sziszapangma/integration/task/processing_task.py index 5c03afb465b099c246e5d95069c7976e0fab935c..be9e52391acc5415adee1700db848a18eafc2a6b 100644 --- a/sziszapangma/integration/task/processing_task.py +++ b/sziszapangma/integration/task/processing_task.py @@ -20,8 +20,7 @@ class ProcessingTask(ABC): def run_single_process( self, record_id: str, - experiment_repository: ExperimentRepository, - relation_manager: RelationManager, + experiment_repository: ExperimentRepository ): pass @@ -32,8 +31,7 @@ class ProcessingTask(ABC): def process( self, record_id_iterator: RecordIdIterator, - experiment_repository: ExperimentRepository, - relation_manager_provider: RelationManagerProvider, + experiment_repository: ExperimentRepository ): records_ids = list(record_id_iterator.get_all_records()) for record_index in range(len(records_ids)): @@ -46,8 +44,7 @@ class ProcessingTask(ABC): if not skip or self._require_update: print(base_log) try: - relation_manager = relation_manager_provider.get_relation_manager(record_id) - self.run_single_process(record_id, experiment_repository, relation_manager) + self.run_single_process(record_id, experiment_repository) except Exception as err: print("Handling run-time error:", err) traceback.print_exc()