diff --git a/docker/ajn_asr/main.py b/docker/ajn_asr/main.py index 150ddde32759f53482c3f395601fe2f967d90b4c..bf69371d360bdde4dec3aa9b08af03b4b357a04b 100644 --- a/docker/ajn_asr/main.py +++ b/docker/ajn_asr/main.py @@ -7,12 +7,20 @@ from sziszapangma.integration.service_core.asr.asr_result import AsrResult class SpeechbrainAsrProcessor(AsrBaseProcessor): + @staticmethod + def _process_file_to_correct_format(file_path: str, extension: str): + temp_file = f'{str(uuid.uuid4())}.{extension}' + os.system(f'ffmpeg -i {file_path} -ar 16000 {temp_file}') + # os.remove(file_path) + os.rename(temp_file, file_path) + def process_asr(self, audio_file_path: str) -> AsrResult: file_tag = str(uuid.uuid4()) file_extension = audio_file_path.split('.')[-1] file_name = f'{file_tag}.{file_extension}' result_file_path = f'processing_flask/{file_tag}.txt' file_path = f'processing_flask/{file_name}' + self._process_file_to_correct_format(audio_file_path, file_extension) # create file in /data/uuid.ext os.system(f"cp {audio_file_path} /data/{file_path}") diff --git a/docker/ajn_asr/prepare_docker.sh b/docker/ajn_asr/prepare_docker.sh index c7e687d087cba4d4c5fd7305b9ec8b6afb8fe431..711edd2c16d03bde15460d7f307599cb3d5bc109 100755 --- a/docker/ajn_asr/prepare_docker.sh +++ b/docker/ajn_asr/prepare_docker.sh @@ -3,5 +3,5 @@ SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" docker build -t asr-clarin-pl-service "$SCRIPT_DIR" -docker tag asr-clarin-pl-service gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/asr-clarin-pl-service:1.4 -docker push gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/asr-clarin-pl-service:1.4 +docker tag asr-clarin-pl-service gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/asr-clarin-pl-service:1.5 +docker push gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/asr-clarin-pl-service:1.5 diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index d4486f1d65ce5621a00b1c80b6e63ae16a04e00e..b2fbf23ff98662ccaa5da524f131f7b9ebebd57a 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -18,34 +18,34 @@ services: # - TECHMO_SERVER_URL=156.17.135.34 # - AUTH_TOKEN=__example_token__ - transformers-wav2vec2for_ctc: - image: docker-registry.theliver.pl/transformers-wav2vec2for_ctc:1.0 - container_name: transformers-wav2vec2for_ctc + # transformers-wav2vec2for_ctc: + # image: gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/transformers-wav2vec2for_ctc:1.0 + # container_name: transformers-wav2vec2for_ctc + # restart: always + # volumes: + # - /etc/localtime:/etc/localtime:ro + # - ./wav2vec2for_ctc_models:/models + # ports: + # - "5430:5000" + # environment: + # - AUTH_TOKEN=__example_token__ + # - MODEL_NAME=jonatasgrosman/wav2vec2-large-xlsr-53-polish + # - SAMPLING_RATE=16000 + + embedding_service: + image: docker-registry.theliver.pl/embedding_docker:1.0 + container_name: embeddings_service restart: always - volumes: - - /etc/localtime:/etc/localtime:ro - - ./wav2vec2for_ctc_models:/models ports: - - "5430:5000" + - "5003:5000" environment: - - AUTH_TOKEN=__example_token__ - - MODEL_NAME=jonatasgrosman/wav2vec2-large-xlsr-53-polish - - SAMPLING_RATE=16000 - - # embedding_service: - # image: docker-registry.theliver.pl/embedding_docker:1.0 - # container_name: embeddings_service - # restart: always - # ports: - # - 5003:5000 - # environment: - # - AUTH_TOKEN=__example_token__ - # volumes: - # - /etc/localtime:/etc/localtime:ro - # - ./embedding_models:/models + - AUTH_TOKEN=fjsd-mkwe-oius-m9h2 + volumes: + - /etc/localtime:/etc/localtime:ro + - ./embedding_models:/models ajn_asr: - image: docker-registry.theliver.pl/asr-clarin-pl-service:1.4 + image: gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/asr-clarin-pl-service:1.5 container_name: ajn_asr restart: always ports: @@ -55,14 +55,14 @@ services: volumes: - /etc/localtime:/etc/localtime:ro - speechbrain_asr: - image: docker-registry.theliver.pl/speechbrain-asr:1.5 - container_name: speechbrain_asr - restart: always - ports: - - "5432:5000" - volumes: - - /etc/localtime:/etc/localtime:ro - - ./speechbrain_asr_models:/models - environment: - - AUTH_TOKEN=__example_token__ +# speechbrain_asr: +# image: gitlab.clarin-pl.eu:5050/clarin-dialog/clarin-dialog/speechbrain-asr:1.5 +# container_name: speechbrain_asr +# restart: always +# ports: +# - "5432:5000" +# volumes: +# - /etc/localtime:/etc/localtime:ro +# - ./speechbrain_asr_models:/models +# environment: +# - AUTH_TOKEN=__example_token__ diff --git a/dvc.lock b/dvc.lock index 7a9261df1e54c57a45cf1d7152c4df94780b4dfd..85522fea163f790c37506b278f906fb85d4a4c8c 100644 --- a/dvc.lock +++ b/dvc.lock @@ -52,7 +52,7 @@ stages: size: 229007155 nfiles: 1000 voicelab_import_to_common_format: - cmd: PYTHONPATH=. python experiment/voicelab/import_data.py + cmd: PYTHONPATH=. python -u experiment/voicelab/import_data.py deps: - path: experiment/voicelab/import_data.py md5: 41acb98a1517e66c052182fe0a1403ba @@ -63,7 +63,7 @@ stages: nfiles: 1600 outs: - path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322 - md5: 926ef9bab4ce41b9de95f2f3d5ab67a0.dir + md5: 4046ea5d80966f0c017b2c4bec0e7c9b.dir size: 110711470 nfiles: 1600 luna_gold_transcript_processing: @@ -93,8 +93,12 @@ stages: cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py\n" deps: - path: experiment/luna/pipeline/luna_ajn_asr_processing.py - md5: ec7d7b5384f845173d9fb77e9cfa9907 - size: 2501 + md5: 2d66cb8890c420b55e8b7eb33ac32ba2 + size: 3558 + - path: experiment_data/cached_asr/luna_ajn_polish_asr + md5: 620e178854dbcb69f49a608f34573a88.dir + size: 6159899 + nfiles: 494 - path: experiment_data/dataset/LUNA.PL md5: d342155b1871e881797cf7da09d5dc3c.dir size: 1578358645 @@ -109,35 +113,43 @@ stages: nfiles: 500 outs: - path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr - md5: 620e178854dbcb69f49a608f34573a88.dir - size: 6159899 - nfiles: 494 + md5: fa9d926ae8fd0268c71f19c1d5d39fcf.dir + size: 11080541 + nfiles: 499 - path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy - md5: 312be284d4ec9e38986048e785fcbbc1.dir - size: 6535212 - nfiles: 494 + md5: 417d8f07266eb5da9c4bfbf84f3b4eac.dir + size: 6579351 + nfiles: 499 - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer - md5: 8ad558edb6a8bd2508a7e25bcf53bf94.dir - size: 21936929 - nfiles: 494 + md5: 2bf746c412e6bff4071f689d853b106f.dir + size: 22061350 + nfiles: 499 - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer - md5: 98c74c5bf87637749eac1ed5ff3393b4.dir - size: 16842 - nfiles: 494 + md5: 3147413bdfd36ad91c64303e8705951b.dir + size: 17002 + nfiles: 499 - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer - md5: 1741fff740259398b28bf2a6ba3aec41.dir - size: 20671277 - nfiles: 494 + md5: 2bb11f8a97cdeb18c557fadb49a6f015.dir + size: 25669158 + nfiles: 499 + - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer_embeddings + md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir + size: 44326962 + nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer - md5: 18605657ff9c7ef3221e27b671a3b4d1.dir - size: 16835 - nfiles: 494 + md5: c48c74eccf1cfd0768900514d2fcfd1b.dir + size: 10527 + nfiles: 499 + - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer_embeddings + md5: 98a7edeee3b630e8e301acfc578a8393.dir + size: 34869 + nfiles: 500 luna_techmo_processing: cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py\n" deps: - path: experiment/luna/pipeline/luna_techmo_processing.py - md5: b4d5ad7a0d7fb0714a2dc02cb457e8c9 - size: 2628 + md5: 75069cd6e3a61dfaaf49c2bdb1e81976 + size: 3416 - path: experiment_data/cached_asr/luna_techmo md5: 033ea7b5434dded73bf869bfdd299462.dir size: 4256479 @@ -156,7 +168,7 @@ stages: nfiles: 500 outs: - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer - md5: c71539f3889c627a371957958bd0907d.dir + md5: 94762d19a853810064afd38319d05a2c.dir size: 20897599 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer @@ -164,21 +176,29 @@ stages: size: 17341 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr - md5: acfaec46b2415ed6a64e3a3464d164f8.dir + md5: 3787c6a4c7941787253165e2ba760e73.dir size: 9697519 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy - md5: e869581816457d1585a7e42d0a18b8b2.dir + md5: 337b6bf947ee47cda30b3cc75f954e8e.dir size: 6124559 nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer - md5: 0dabd65b3981d588cd23d943abc6e231.dir + md5: afc25d6ad22bed4ded5cb07028bff1cf.dir size: 21380796 nfiles: 500 + - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer_embeddings + md5: c2824c0c5cf433dbf864ebbdc2fb3cfc.dir + size: 44326962 + nfiles: 500 - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer md5: 4cfbb2830b280084ece14b1ef815b92a.dir size: 17298 nfiles: 500 + - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer_embeddings + md5: 98a7edeee3b630e8e301acfc578a8393.dir + size: 34869 + nfiles: 500 voicelab_gold_transcript_processing: cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py\n" deps: @@ -190,24 +210,24 @@ stages: size: 4803739404 nfiles: 1600 - path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322 - md5: 926ef9bab4ce41b9de95f2f3d5ab67a0.dir + md5: 4046ea5d80966f0c017b2c4bec0e7c9b.dir size: 110711470 nfiles: 1600 outs: - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript - md5: fb6812b2f3044c0285ee6ee2b21d0523.dir + md5: 9edf1e743faa9fc3515790acb6fd8cab.dir size: 21846798 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy - md5: f2e68dcc8842a15e417ae6f5221a802a.dir + md5: c166937f6e8ae9d28412ca1e3e43469e.dir size: 26643278 nfiles: 800 voicelab_techmo_processing: cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py\n" deps: - path: experiment/voicelab/voicelab_pipeline_techmo.py - md5: 23c0869d7cc9f0088870362d669ab82e - size: 2685 + md5: 3d6347486055a11e399beac71ce2f877 + size: 3479 - path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo md5: 6c3b356723d562c978f84e733b91f5d0.dir size: 17539259 @@ -217,16 +237,16 @@ stages: size: 4803739404 nfiles: 1600 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript - md5: fb6812b2f3044c0285ee6ee2b21d0523.dir + md5: 9edf1e743faa9fc3515790acb6fd8cab.dir size: 21846798 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy - md5: f2e68dcc8842a15e417ae6f5221a802a.dir + md5: c166937f6e8ae9d28412ca1e3e43469e.dir size: 26643278 nfiles: 800 outs: - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer - md5: 8c5f0380ba2891b3e726d647c2863c60.dir + md5: a15a7a19f46e329c8b77eeecdda9d7b4.dir size: 81650836 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer @@ -234,18 +254,81 @@ stages: size: 27934 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr - md5: c45e29b08af7bb13cdf54da9655bd96c.dir + md5: da32e6fa9d986deddb594cb66e649864.dir size: 39158267 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy - md5: a39c82666419c2b7791952a1fa116d61.dir + md5: cd89a91a33629088ba6fc30ef8427dee.dir size: 24482297 nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer - md5: 72ff86c7cb2e89ac7e04677f532255b2.dir + md5: 0b714391682432408d74beee1cd5a14a.dir size: 83756423 nfiles: 800 + - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer_embeddings + md5: 93d34d82f8536014ddbe0cf0645dd837.dir + size: 174322727 + nfiles: 800 - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir size: 27780 nfiles: 800 + - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer_embeddings + md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir + size: 56182 + nfiles: 800 + voicelab_ajn_processing: + cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_ajn_asr.py\n" + deps: + - path: experiment/voicelab/voicelab_pipeline_ajn_asr.py + md5: 85e8d3d79379e6d5db751e03c5ebae75 + size: 4161 + - path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn + md5: 49a38b90f1265a61b90b54f820415011.dir + size: 32601414 + nfiles: 800 + - path: experiment_data/dataset/voicelab_cbiz_testset_20220322 + md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir + size: 4803739404 + nfiles: 1600 + - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript + md5: 9edf1e743faa9fc3515790acb6fd8cab.dir + size: 21846798 + nfiles: 800 + - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy + md5: c166937f6e8ae9d28412ca1e3e43469e.dir + size: 26643278 + nfiles: 800 + outs: + - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr + md5: 94181d7a0731e8defbdcb4b477ad72a2.dir + size: 48470646 + nfiles: 800 + - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy + md5: ef8be18b8acca299f9b9542ac8643a87.dir + size: 20536889 + nfiles: 800 + - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer + md5: b2d3a9872e6016cfde8e6d025bef373b.dir + size: 78539613 + nfiles: 800 + - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer + md5: d0e1ef5f57de27a2356d2f2050a93349.dir + size: 27353 + nfiles: 800 + - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer + md5: acb5337346e70bed974dfe7ca7947d79.dir + size: 104789466 + nfiles: 800 + - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer_embeddings + md5: 93d34d82f8536014ddbe0cf0645dd837.dir + size: 174322727 + nfiles: 800 + - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer + md5: 903096554a3ea6896c4abaa5e2c71d4c.dir + size: 16505 + nfiles: 800 + - path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer_embeddings + md5: 1fc2985ad4c3cb00d05b1865ad5b22d4.dir + size: 56182 + nfiles: 800 diff --git a/dvc.yaml b/dvc.yaml index da6552da4616572e8a7680ea957eb585a1a11fe6..e2f3e5d29374acdce61d0676cd4afe8f77add483 100644 --- a/dvc.yaml +++ b/dvc.yaml @@ -35,6 +35,8 @@ stages: - experiment_data/pipeline/asr_benchmark_luna/techmo_spacy - experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer - experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer + - experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer_embeddings + - experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer_embeddings luna_ajn_processing: cmd: | @@ -42,6 +44,7 @@ stages: deps: - experiment/luna/pipeline/luna_ajn_asr_processing.py - experiment_data/dataset/LUNA.PL + - experiment_data/cached_asr/luna_ajn_polish_asr - experiment_data/pipeline/asr_benchmark_luna/gold_transcript - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy outs: @@ -51,9 +54,11 @@ stages: - experiment_data/pipeline/asr_benchmark_luna/ajn_spacy - experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer - experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer + - experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer_embeddings + - experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer_embeddings voicelab_import_to_common_format: - cmd: PYTHONPATH=. python experiment/voicelab/import_data.py + cmd: PYTHONPATH=. python -u experiment/voicelab/import_data.py deps: - experiment/voicelab/import_data.py - experiment_data/dataset/voicelab_cbiz_testset_20220322 @@ -87,21 +92,26 @@ stages: - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer_embeddings + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer_embeddings -# voicelab_ajn_processing: -# cmd: | -# PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_ajn_asr.py -# deps: -# - experiment/voicelab/voicelab_pipeline_ajn_asr.py -# - experiment_data/dataset/voicelab_cbiz_testset_20220322 -# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript -# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy -# outs: -# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr -# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer -# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer -# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy -# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer -# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer + voicelab_ajn_processing: + cmd: | + PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_ajn_asr.py + deps: + - experiment/voicelab/voicelab_pipeline_ajn_asr.py + - experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn + - experiment_data/dataset/voicelab_cbiz_testset_20220322 + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy + outs: + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer_embeddings + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer_embeddings # concurrent features, multiprocessing diff --git a/experiment/luna/pipeline/dependency_provider.py b/experiment/luna/pipeline/dependency_provider.py index 7557f511e1585a1d5b443ca181a607e9595e60c2..78a4e7ab05cf82d1de8980e97905958b0210bab9 100644 --- a/experiment/luna/pipeline/dependency_provider.py +++ b/experiment/luna/pipeline/dependency_provider.py @@ -8,18 +8,22 @@ GOLD_TRANSCRIPT = 'gold_transcript' GOLD_TRANSCRIPT_SPACY = 'gold_transcript_spacy' TECHMO_POLISH_ASR = 'techmo_polish_asr' -WORD_TECHMO_MERTICS_WER = 'word_techmo_metrics_wer' +WORD_TECHMO_METRICS_WER = 'word_techmo_metrics_wer' WORD_TECHMO_ALIGNMENT_WER = 'word_techmo_alignment_wer' TECHMO_SPACY = 'techmo_spacy' POS_TECHMO_ALIGNMENT_WER = 'pos_techmo_alignment_wer' POS_TECHMO_METRICS_WER = 'pos_techmo_metrics_wer' +WORD_TECHMO_METRICS_WER_EMBEDDINGS = 'word_techmo_metrics_wer_embeddings' +WORD_TECHMO_ALIGNMENT_WER_EMBEDDINGS = 'word_techmo_alignment_wer_embeddings' AJN_POLISH_ASR = 'ajn_polish_asr' -WORD_AJN_MERTICS_WER = 'word_ajn_metrics_wer' +WORD_AJN_METRICS_WER = 'word_ajn_metrics_wer' WORD_AJN_ALIGNMENT_WER = 'word_ajn_alignment_wer' AJN_SPACY = 'ajn_spacy' POS_AJN_ALIGNMENT_WER = 'pos_ajn_alignment_wer' POS_AJN_METRICS_WER = 'pos_ajn_metrics_wer' +WORD_AJN_METRICS_WER_EMBEDDINGS = 'word_ajn_metrics_wer_embeddings' +WORD_AJN_ALIGNMENT_WER_EMBEDDINGS = 'word_ajn_alignment_wer_embeddings' def get_record_provider() -> LunaRecordProvider: diff --git a/experiment/luna/pipeline/luna_ajn_asr_processing.py b/experiment/luna/pipeline/luna_ajn_asr_processing.py index 7ad6122475444b57eb1f209bf31945bcf7277ad8..662858dd2b6e1bfdfb706717e754b9259ac6d21f 100644 --- a/experiment/luna/pipeline/luna_ajn_asr_processing.py +++ b/experiment/luna/pipeline/luna_ajn_asr_processing.py @@ -1,13 +1,15 @@ from experiment.luna.pipeline.dependency_provider import get_record_provider, GOLD_TRANSCRIPT, \ get_multiple_files_repository, \ - GOLD_TRANSCRIPT_SPACY, AJN_POLISH_ASR, WORD_AJN_MERTICS_WER, WORD_AJN_ALIGNMENT_WER, AJN_SPACY, \ - POS_AJN_ALIGNMENT_WER, POS_AJN_METRICS_WER + GOLD_TRANSCRIPT_SPACY, AJN_POLISH_ASR, WORD_AJN_METRICS_WER, WORD_AJN_ALIGNMENT_WER, AJN_SPACY, \ + POS_AJN_ALIGNMENT_WER, POS_AJN_METRICS_WER, WORD_AJN_METRICS_WER_EMBEDDINGS, WORD_AJN_ALIGNMENT_WER_EMBEDDINGS from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask +from sziszapangma.core.transformer.web_embedding_transformer import WebEmbeddingTransformer from sziszapangma.integration.experiment_manager import ExperimentManager from sziszapangma.integration.task.asr_task import AsrTask from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask -from sziszapangma.integration.asr_processor import AsrWebClient +from sziszapangma.integration.asr_processor import AsrWebClient, MultipleSourcesAsrProcessor, AsrPathCacheClient +from sziszapangma.integration.task.embedding_wer_metrics_task import EmbeddingWerMetricsTask def run_luna_experiment(): @@ -17,7 +19,11 @@ def run_luna_experiment(): processing_tasks=[ AsrTask( task_name='ajn_polish_asr_task', - asr_processor=AsrWebClient('http://localhost:5431/process_asr', '__example_token__'), + asr_processor=MultipleSourcesAsrProcessor([ + AsrPathCacheClient('experiment_data/cached_asr/luna_ajn_polish_asr', record_provider, + record_provider), + AsrWebClient('http://localhost:5431/process_asr', '__example_token__') + ]), asr_property_name=AJN_POLISH_ASR, require_update=False, record_path_provider=record_provider @@ -26,7 +32,7 @@ def run_luna_experiment(): task_name='techmo_word_wer_processing', asr_property_name=AJN_POLISH_ASR, gold_transcript_property_name=GOLD_TRANSCRIPT, - metrics_property_name=WORD_AJN_MERTICS_WER, + metrics_property_name=WORD_AJN_METRICS_WER, require_update=False, alignment_property_name=WORD_AJN_ALIGNMENT_WER ), @@ -43,6 +49,15 @@ def run_luna_experiment(): asr_pos_property_name=AJN_SPACY, pos_alignment_wer=POS_AJN_ALIGNMENT_WER, pos_metrics_wer=POS_AJN_METRICS_WER + ), + EmbeddingWerMetricsTask( + task_name='EmbeddingWerMetricsTask', + asr_property_name='techmo_polish_asr', + gold_transcript_property_name=GOLD_TRANSCRIPT, + metrics_property_name=WORD_AJN_METRICS_WER_EMBEDDINGS, + require_update=False, + embedding_transformer=WebEmbeddingTransformer('pl', 'http://localhost:5003', 'fjsd-mkwe-oius-m9h2'), + alignment_property_name=WORD_AJN_ALIGNMENT_WER_EMBEDDINGS ) ], experiment_repository=get_multiple_files_repository(), diff --git a/experiment/luna/pipeline/luna_techmo_processing.py b/experiment/luna/pipeline/luna_techmo_processing.py index ee1229687f1a20903267ee015a15b8dd6e42124a..c219440bbd0893443d4014e8f891ae5b9efa84ea 100644 --- a/experiment/luna/pipeline/luna_techmo_processing.py +++ b/experiment/luna/pipeline/luna_techmo_processing.py @@ -1,13 +1,15 @@ from experiment.luna.pipeline.dependency_provider import get_record_provider, GOLD_TRANSCRIPT, TECHMO_POLISH_ASR, \ get_multiple_files_repository, \ - GOLD_TRANSCRIPT_SPACY, POS_TECHMO_ALIGNMENT_WER, POS_TECHMO_METRICS_WER, WORD_TECHMO_MERTICS_WER, \ - WORD_TECHMO_ALIGNMENT_WER, TECHMO_SPACY + GOLD_TRANSCRIPT_SPACY, POS_TECHMO_ALIGNMENT_WER, POS_TECHMO_METRICS_WER, WORD_TECHMO_METRICS_WER, \ + WORD_TECHMO_ALIGNMENT_WER, TECHMO_SPACY, WORD_TECHMO_METRICS_WER_EMBEDDINGS, WORD_TECHMO_ALIGNMENT_WER_EMBEDDINGS from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask +from sziszapangma.core.transformer.web_embedding_transformer import WebEmbeddingTransformer from sziszapangma.integration.asr_processor import AsrPathCacheClient from sziszapangma.integration.experiment_manager import ExperimentManager from sziszapangma.integration.task.asr_task import AsrTask from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask +from sziszapangma.integration.task.embedding_wer_metrics_task import EmbeddingWerMetricsTask def run_luna_experiment(): @@ -27,7 +29,7 @@ def run_luna_experiment(): task_name='techmo_word_wer_processing', asr_property_name=TECHMO_POLISH_ASR, gold_transcript_property_name=GOLD_TRANSCRIPT, - metrics_property_name=WORD_TECHMO_MERTICS_WER, + metrics_property_name=WORD_TECHMO_METRICS_WER, require_update=False, alignment_property_name=WORD_TECHMO_ALIGNMENT_WER ), @@ -44,6 +46,15 @@ def run_luna_experiment(): asr_pos_property_name=TECHMO_SPACY, pos_alignment_wer=POS_TECHMO_ALIGNMENT_WER, pos_metrics_wer=POS_TECHMO_METRICS_WER + ), + EmbeddingWerMetricsTask( + task_name='EmbeddingWerMetricsTask', + asr_property_name='techmo_polish_asr', + gold_transcript_property_name=GOLD_TRANSCRIPT, + metrics_property_name=WORD_TECHMO_METRICS_WER_EMBEDDINGS, + require_update=False, + embedding_transformer=WebEmbeddingTransformer('pl', 'http://localhost:5003', 'fjsd-mkwe-oius-m9h2'), + alignment_property_name=WORD_TECHMO_ALIGNMENT_WER_EMBEDDINGS ) ], experiment_repository=get_multiple_files_repository(), diff --git a/experiment/voicelab/voicelab_dependency.py b/experiment/voicelab/voicelab_dependency.py index 58642e403a4c2f0bb7a7db59d5013723e01453d2..4b528c9c2689c8457186c72e0a9a497202bbe369 100644 --- a/experiment/voicelab/voicelab_dependency.py +++ b/experiment/voicelab/voicelab_dependency.py @@ -8,19 +8,22 @@ GOLD_TRANSCRIPT = 'gold_transcript' GOLD_TRANSCRIPT_SPACY = 'gold_transcript_spacy' TECHMO_POLISH_ASR = 'techmo_polish_asr' -WORD_TECHMO_MERTICS_WER = 'word_techmo_metrics_wer' +WORD_TECHMO_METRICS_WER = 'word_techmo_metrics_wer' WORD_TECHMO_ALIGNMENT_WER = 'word_techmo_alignment_wer' TECHMO_SPACY = 'techmo_spacy' POS_TECHMO_ALIGNMENT_WER = 'pos_techmo_alignment_wer' POS_TECHMO_METRICS_WER = 'pos_techmo_metrics_wer' +WORD_TECHMO_METRICS_WER_EMBEDDINGS = 'word_techmo_metrics_wer_embeddings' +WORD_TECHMO_ALIGNMENT_WER_EMBEDDINGS = 'word_techmo_alignment_wer_embeddings' AJN_POLISH_ASR = 'ajn_polish_asr' -WORD_AJN_MERTICS_WER = 'word_ajn_metrics_wer' +WORD_AJN_METRICS_WER = 'word_ajn_metrics_wer' WORD_AJN_ALIGNMENT_WER = 'word_ajn_alignment_wer' AJN_SPACY = 'ajn_spacy' POS_AJN_ALIGNMENT_WER = 'pos_ajn_alignment_wer' POS_AJN_METRICS_WER = 'pos_ajn_metrics_wer' - +WORD_AJN_METRICS_WER_EMBEDDINGS = 'word_ajn_metrics_wer_embeddings' +WORD_AJN_ALIGNMENT_WER_EMBEDDINGS = 'word_ajn_alignment_wer_embeddings' PIPELINE_DATA_DIRECTORY = 'experiment_data/pipeline' EXPERIMENT_NAME = 'asr_benchmark_voicelab_cbiz_testset_20220322' diff --git a/experiment/voicelab/voicelab_pipeline.py b/experiment/voicelab/voicelab_pipeline.py deleted file mode 100644 index d58f92e6e14816641c67dc6e03fc8a35373c549b..0000000000000000000000000000000000000000 --- a/experiment/voicelab/voicelab_pipeline.py +++ /dev/null @@ -1,96 +0,0 @@ -from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask -from experiment.pos_processing.gold_transcript_spacy_token_pos_processing_task import \ - GoldTranscriptSpacyTokenPosProcessingTask -from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask -from experiment.voicelab.voicelab_dependency import get_record_provider, get_repository -from experiment.voicelab.voicelab_gold_transcript_processor import VoicelabGoldTranscriptProcessor -from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider -from sziszapangma.integration.asr_processor import AsrPathCacheClient -from sziszapangma.integration.experiment_manager import ExperimentManager -from sziszapangma.integration.path_filter import ExtensionPathFilter -from sziszapangma.integration.repository.experiment_repository import ExperimentRepository -from sziszapangma.integration.repository.multi_files_experiment_repository import \ - MultiFilesExperimentRepository -from sziszapangma.integration.task.asr_task import AsrTask -from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask -from sziszapangma.integration.task.gold_transcript_task import GoldTranscriptTask - -GOLD_TRANSCRIPT = 'gold_transcript' -TECHMO_POLISH_ASR = 'techmo_polish_asr' -TECHMO_POLISH_CLASSIC_WER_METRIC = 'techmo_polish_classic_wer_metric' -TECHMO_POLISH_CLASSIC_ALIGNMENT = 'techmo_polish_classic_alignment' -TECHMO_SPACY = 'techmo_spacy' -GOLD_TRANSCRIPT_SPACY = 'gold_transcript_spacy' -POS_ALIGNMENT_WER = 'pos_alignment_wer' -POS_METRICS_WER = 'pos_metrics_wer' - -DATASET_DIRECTORY = 'experiment_data/dataset/voicelab_cbiz_testset_20220322' -PIPELINE_DATA_DIRECTORY = 'experiment_data/pipeline' -EXPERIMENT_NAME = 'asr_benchmark_voicelab_cbiz_testset_20220322' -RELATION_MANAGER_ROOT_PATH = 'experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322' - - -def run_voicelab_experiment(): - record_provider = get_record_provider() - experiment_processor = ExperimentManager( - record_id_iterator=record_provider, - processing_tasks=[ - GoldTranscriptTask( - task_name='gold_transcript_task', - gold_transcript_processor=VoicelabGoldTranscriptProcessor(record_provider), - gold_transcript_property_name=GOLD_TRANSCRIPT, - require_update=False - ), - AsrTask( - task_name='techmo_polish_task', - # asr_processor=AsrWebClient('http://192.168.0.124:4999/process_asr', 'test1234'), - asr_processor=AsrPathCacheClient( - 'experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo', - record_provider, - record_provider - ), - asr_property_name=TECHMO_POLISH_ASR, - require_update=False, - record_path_provider=record_provider - ), - ClassicWerMetricTask( - task_name='classic_wer_metric_task', - asr_property_name=TECHMO_POLISH_ASR, - gold_transcript_property_name=GOLD_TRANSCRIPT, - metrics_property_name=TECHMO_POLISH_CLASSIC_WER_METRIC, - require_update=False, - alignment_property_name=TECHMO_POLISH_CLASSIC_ALIGNMENT - ), - GoldTranscriptSpacyTokenPosProcessingTask( - task_name='gold_transcript_spacy_task', - input_property_name=GOLD_TRANSCRIPT, - spacy_property_name=GOLD_TRANSCRIPT_SPACY, - require_update=True - ), - AsrSpacyTokenPosProcessingTask( - task_name='techmo_spacy_task', - input_property_name=TECHMO_POLISH_ASR, - spacy_property_name=TECHMO_SPACY, - require_update=True - ), - SpacyPosWerProcessingTask( - task_name='PosWerProcessor', - require_update=False, - gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY, - asr_pos_property_name=TECHMO_SPACY, - pos_alignment_wer=POS_ALIGNMENT_WER, - pos_metrics_wer=POS_METRICS_WER - ) - ], - experiment_repository=get_repository(), - relation_manager_provider=record_provider - ) - experiment_processor.process() - - -def example_run(): - run_voicelab_experiment() - - -if __name__ == '__main__': - example_run() diff --git a/experiment/voicelab/voicelab_pipeline_ajn_asr.py b/experiment/voicelab/voicelab_pipeline_ajn_asr.py index d650082aaa8210b16f35d5d0075a919f30241e64..b6c4c2c441b8e1681d2418312de745ef1588c12b 100644 --- a/experiment/voicelab/voicelab_pipeline_ajn_asr.py +++ b/experiment/voicelab/voicelab_pipeline_ajn_asr.py @@ -3,11 +3,12 @@ from experiment.pos_processing.gold_transcript_spacy_token_pos_processing_task i GoldTranscriptSpacyTokenPosProcessingTask from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask from experiment.voicelab.voicelab_dependency import get_record_provider, get_repository, GOLD_TRANSCRIPT, \ - GOLD_TRANSCRIPT_SPACY, TECHMO_POLISH_ASR, AJN_POLISH_ASR, WORD_AJN_MERTICS_WER, WORD_AJN_ALIGNMENT_WER, AJN_SPACY, \ - POS_AJN_ALIGNMENT_WER, POS_AJN_METRICS_WER + GOLD_TRANSCRIPT_SPACY, TECHMO_POLISH_ASR, AJN_POLISH_ASR, WORD_AJN_METRICS_WER, WORD_AJN_ALIGNMENT_WER, AJN_SPACY, \ + POS_AJN_ALIGNMENT_WER, POS_AJN_METRICS_WER, WORD_AJN_METRICS_WER_EMBEDDINGS, WORD_AJN_ALIGNMENT_WER_EMBEDDINGS from experiment.voicelab.voicelab_gold_transcript_processor import VoicelabGoldTranscriptProcessor from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider -from sziszapangma.integration.asr_processor import AsrPathCacheClient, AsrWebClient +from sziszapangma.core.transformer.web_embedding_transformer import WebEmbeddingTransformer +from sziszapangma.integration.asr_processor import AsrPathCacheClient, AsrWebClient, MultipleSourcesAsrProcessor from sziszapangma.integration.experiment_manager import ExperimentManager from sziszapangma.integration.path_filter import ExtensionPathFilter from sziszapangma.integration.repository.experiment_repository import ExperimentRepository @@ -15,6 +16,7 @@ from sziszapangma.integration.repository.multi_files_experiment_repository impor MultiFilesExperimentRepository from sziszapangma.integration.task.asr_task import AsrTask from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask +from sziszapangma.integration.task.embedding_wer_metrics_task import EmbeddingWerMetricsTask from sziszapangma.integration.task.gold_transcript_task import GoldTranscriptTask @@ -25,7 +27,11 @@ def run_voicelab_experiment(): processing_tasks=[ AsrTask( task_name='ajn_polish_asr_task', - asr_processor=AsrWebClient('http://localhost:5431/process_asr', '__example_token__'), + asr_processor=AsrPathCacheClient( + 'experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn', + record_provider, + record_provider + ), asr_property_name=AJN_POLISH_ASR, require_update=False, record_path_provider=record_provider @@ -34,7 +40,7 @@ def run_voicelab_experiment(): task_name='techmo_word_wer_processing', asr_property_name=AJN_POLISH_ASR, gold_transcript_property_name=GOLD_TRANSCRIPT, - metrics_property_name=WORD_AJN_MERTICS_WER, + metrics_property_name=WORD_AJN_METRICS_WER, require_update=False, alignment_property_name=WORD_AJN_ALIGNMENT_WER ), @@ -51,6 +57,15 @@ def run_voicelab_experiment(): asr_pos_property_name=AJN_SPACY, pos_alignment_wer=POS_AJN_ALIGNMENT_WER, pos_metrics_wer=POS_AJN_METRICS_WER + ), + EmbeddingWerMetricsTask( + task_name='EmbeddingWerMetricsTask', + asr_property_name='techmo_polish_asr', + gold_transcript_property_name=GOLD_TRANSCRIPT, + metrics_property_name=WORD_AJN_METRICS_WER_EMBEDDINGS, + require_update=False, + embedding_transformer=WebEmbeddingTransformer('pl', 'http://localhost:5003', 'fjsd-mkwe-oius-m9h2'), + alignment_property_name=WORD_AJN_ALIGNMENT_WER_EMBEDDINGS ) ], experiment_repository=get_repository(), @@ -64,6 +79,4 @@ def example_run(): if __name__ == '__main__': - # example_run() - path = '/home/marcinwatroba/PWR_ASR/asr-benchmarks/experiment_data/dataset/voicelab_cbiz_testset_20220322/bankowe/cbiz_tc_2.agnt.wav' - print(AsrWebClient('http://localhost:5431/process_asr', '__example_token__').call_recognise(path)) + example_run() diff --git a/experiment/voicelab/voicelab_pipeline_techmo.py b/experiment/voicelab/voicelab_pipeline_techmo.py index 03bdd64f82f71b0230c93d095f73dc6774eae0b1..c99e09b1b95e5a33d19d7857289c6c966eabacd7 100644 --- a/experiment/voicelab/voicelab_pipeline_techmo.py +++ b/experiment/voicelab/voicelab_pipeline_techmo.py @@ -1,12 +1,15 @@ from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask from experiment.voicelab.voicelab_dependency import get_record_provider, get_repository, GOLD_TRANSCRIPT, \ - GOLD_TRANSCRIPT_SPACY, TECHMO_POLISH_ASR, WORD_TECHMO_MERTICS_WER, WORD_TECHMO_ALIGNMENT_WER, TECHMO_SPACY, \ - POS_TECHMO_METRICS_WER, POS_TECHMO_ALIGNMENT_WER + GOLD_TRANSCRIPT_SPACY, TECHMO_POLISH_ASR, WORD_TECHMO_METRICS_WER, WORD_TECHMO_ALIGNMENT_WER, TECHMO_SPACY, \ + POS_TECHMO_METRICS_WER, POS_TECHMO_ALIGNMENT_WER, WORD_TECHMO_METRICS_WER_EMBEDDINGS, \ + WORD_TECHMO_ALIGNMENT_WER_EMBEDDINGS +from sziszapangma.core.transformer.web_embedding_transformer import WebEmbeddingTransformer from sziszapangma.integration.asr_processor import AsrPathCacheClient from sziszapangma.integration.experiment_manager import ExperimentManager from sziszapangma.integration.task.asr_task import AsrTask from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask +from sziszapangma.integration.task.embedding_wer_metrics_task import EmbeddingWerMetricsTask def run_voicelab_experiment(): @@ -29,7 +32,7 @@ def run_voicelab_experiment(): task_name='techmo_word_wer_processing', asr_property_name=TECHMO_POLISH_ASR, gold_transcript_property_name=GOLD_TRANSCRIPT, - metrics_property_name=WORD_TECHMO_MERTICS_WER, + metrics_property_name=WORD_TECHMO_METRICS_WER, require_update=False, alignment_property_name=WORD_TECHMO_ALIGNMENT_WER ), @@ -46,6 +49,15 @@ def run_voicelab_experiment(): asr_pos_property_name=TECHMO_SPACY, pos_alignment_wer=POS_TECHMO_ALIGNMENT_WER, pos_metrics_wer=POS_TECHMO_METRICS_WER + ), + EmbeddingWerMetricsTask( + task_name='EmbeddingWerMetricsTask', + asr_property_name='techmo_polish_asr', + gold_transcript_property_name=GOLD_TRANSCRIPT, + metrics_property_name=WORD_TECHMO_METRICS_WER_EMBEDDINGS, + require_update=False, + embedding_transformer=WebEmbeddingTransformer('pl', 'http://localhost:5003', 'fjsd-mkwe-oius-m9h2'), + alignment_property_name=WORD_TECHMO_ALIGNMENT_WER_EMBEDDINGS ) ], experiment_repository=get_repository(), diff --git a/experiment_data/cached_asr/.gitignore b/experiment_data/cached_asr/.gitignore index 127221d8df92f47e05599556664cc65c59c7ac14..b8177bca9f52566f1d35f3450716eda975a1af67 100644 --- a/experiment_data/cached_asr/.gitignore +++ b/experiment_data/cached_asr/.gitignore @@ -1,3 +1,4 @@ /luna_techmo /voicelab_cbiz_testset_20220322_techmo /luna_ajn_polish_asr +/voicelab_cbiz_testset_20220322_ajn diff --git a/experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn.dvc b/experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn.dvc new file mode 100644 index 0000000000000000000000000000000000000000..107d8c08f496a369d57ef80e2bc54d8c9bdf6446 --- /dev/null +++ b/experiment_data/cached_asr/voicelab_cbiz_testset_20220322_ajn.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 49a38b90f1265a61b90b54f820415011.dir + size: 32601414 + nfiles: 800 + path: voicelab_cbiz_testset_20220322_ajn diff --git a/experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/.gitignore b/experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/.gitignore index 7aff3d637c2b7ca712c0f00c80540e765928aa22..e07b32490887af2a908649169653413934bb654e 100644 --- a/experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/.gitignore +++ b/experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/.gitignore @@ -6,3 +6,13 @@ /techmo_spacy /pos_techmo_alignment_wer /pos_techmo_metrics_wer +/ajn_polish_asr +/word_ajn_metrics_wer +/word_ajn_alignment_wer +/ajn_spacy +/pos_ajn_alignment_wer +/pos_ajn_metrics_wer +/word_techmo_metrics_wer_embeddings +/word_techmo_alignment_wer_embeddings +/word_ajn_metrics_wer_embeddings +/word_ajn_alignment_wer_embeddings diff --git a/run_repro_in_background.sh b/run_repro_in_background.sh new file mode 100755 index 0000000000000000000000000000000000000000..35d3286fae10f5d9711f80c3293115784f15db59 --- /dev/null +++ b/run_repro_in_background.sh @@ -0,0 +1,2 @@ +#!/bin/bash +dvc repro > repro.log 2>&1 & diff --git a/sziszapangma/core/transformer/__pycache__/cached_embedding_transformer.cpython-38.pyc b/sziszapangma/core/transformer/__pycache__/cached_embedding_transformer.cpython-38.pyc index 0ce5df23cfc1255b9563c274caccbf4fb6fc0826..e69f01a8dbd67b79d3cb0386b27827ac6d3021b2 100644 Binary files a/sziszapangma/core/transformer/__pycache__/cached_embedding_transformer.cpython-38.pyc and b/sziszapangma/core/transformer/__pycache__/cached_embedding_transformer.cpython-38.pyc differ diff --git a/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc b/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc index 79aba6f4c6f20a2b241734f0e21c01cd2656659d..adb286664a8b35871a52844a1c97afdc946c595d 100644 Binary files a/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc and b/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc differ diff --git a/sziszapangma/integration/asr_processor.py b/sziszapangma/integration/asr_processor.py index 56a8786b56e78adb5cb2942a2615cd8677be4ea6..4d01c10c7aa94d34378570e739b24b8833a7a9d7 100644 --- a/sziszapangma/integration/asr_processor.py +++ b/sziszapangma/integration/asr_processor.py @@ -1,7 +1,7 @@ import json from abc import ABC, abstractmethod from pathlib import Path -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, List import requests @@ -59,4 +59,28 @@ class AsrPathCacheClient(AsrProcessor): def call_recognise(self, file_path: str) -> Dict[str, Any]: path = Path(self._cache_path).joinpath(f"{self.path_to_id[file_path]}.json") + print(f'cache path {path}') + if not path.exists(): + raise Exception(f"path not exists {path}") return json.load(open(path, "r")) + + +class MultipleSourcesAsrProcessor(AsrProcessor): + processors: List[AsrProcessor] + + def __init__(self, processors: List[AsrProcessor]): + self.processors = processors + + def call_recognise(self, file_path: str) -> Dict[str, Any]: + value = None + for it in self.processors: + if value is None: + try: + value = it.call_recognise(file_path) + except Exception as e: + print(e) + if value is None: + raise Exception("All processors can not process record") + else: + print(value) + return value diff --git a/sziszapangma/integration/repository/multi_files_experiment_repository.py b/sziszapangma/integration/repository/multi_files_experiment_repository.py index 238585824d7e071285ce8d89ed9ee93824beb6aa..09394959903bfbeb1845a46d48278b1496086318 100644 --- a/sziszapangma/integration/repository/multi_files_experiment_repository.py +++ b/sziszapangma/integration/repository/multi_files_experiment_repository.py @@ -23,9 +23,13 @@ class MultiFilesExperimentRepository(ExperimentRepository): return self._get_file_path(property_name, record_id).exists() def update_property_for_key(self, record_id: str, property_name: str, property_value: Any): + print(f'update {record_id} {property_name}') path = self._get_file_path(property_name, record_id) + print(path) path.parent.mkdir(parents=True, exist_ok=True) + print('created path') json.dump(property_value, open(path, "w")) + print('value saved') def delete_property_for_key(self, record_id: str, property_name: str): self._get_file_path(property_name, record_id).unlink() diff --git a/sziszapangma/integration/task/__pycache__/asr_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/asr_task.cpython-38.pyc index a34960b88f1c6e4bbe7fb65ec4b80fc16a9658e7..2ec6d5594835708ca2244cb10668efea26f93dc6 100644 Binary files a/sziszapangma/integration/task/__pycache__/asr_task.cpython-38.pyc and b/sziszapangma/integration/task/__pycache__/asr_task.cpython-38.pyc differ diff --git a/sziszapangma/integration/task/__pycache__/embedding_wer_metrics_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/embedding_wer_metrics_task.cpython-38.pyc index 4918ee5191cb5b9f9f58c552c124e883bb704be7..388f4fe48dd157ff91257c305c0a27b700e0f078 100644 Binary files a/sziszapangma/integration/task/__pycache__/embedding_wer_metrics_task.cpython-38.pyc and b/sziszapangma/integration/task/__pycache__/embedding_wer_metrics_task.cpython-38.pyc differ diff --git a/sziszapangma/integration/task/asr_task.py b/sziszapangma/integration/task/asr_task.py index bbb60b2e5aa98a4156d7ca61100ff531624e5613..3c8f444426c7565037603c02c5e3f8d8d1201b8b 100644 --- a/sziszapangma/integration/task/asr_task.py +++ b/sziszapangma/integration/task/asr_task.py @@ -36,8 +36,11 @@ class AsrTask(ProcessingTask): relation_manager: RelationManager, ) -> None: file_record_path = self._record_path_provider.get_path(record_id) + print('before call_recognise', flush=True) asr_result = self._asr_processor.call_recognise(file_record_path) + print('after call_recognise', flush=True) asr_result["transcription"] = [create_new_word(it) for it in asr_result["transcription"]] + print('after create_new_word', flush=True) experiment_repository.update_property_for_key( record_id, self._asr_property_name, asr_result )