From 3be71a1b7f37ccff2a702eb3df1736ed5bd00d50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20W=C4=85troba?= <markowanga@gmail.com> Date: Fri, 29 Apr 2022 08:38:05 +0200 Subject: [PATCH] Add voicelab pipeline stages --- docker/docker-compose.yml | 122 +++++++++--------- dvc.lock | 113 ++++++++++++++++ dvc.yaml | 84 ++++++++++-- .../luna/pipeline/dependency_provider.py | 36 ++++++ .../luna/pipeline/luna_ajn_asr_processing.py | 55 ++++++++ .../luna_gold_transcript_processing.py | 34 +++++ experiment/luna/pipeline/luna_main.py | 96 -------------- .../luna/pipeline/luna_techmo_processing.py | 56 ++++++++ experiment/luna/pipeline/task/__init__.py | 0 .../luna_gold_transcript_processor.py | 2 +- experiment/voicelab/import_data.py | 9 +- experiment/voicelab/voicelab_dependency.py | 41 ++++++ experiment/voicelab/voicelab_pipeline.py | 17 +-- .../voicelab/voicelab_pipeline_ajn_asr.py | 67 ++++++++++ .../voicelab_pipeline_gold_transcript.py | 49 +++++++ .../voicelab/voicelab_pipeline_techmo.py | 62 +++++++++ experiment_data/cached_asr/.gitignore | 1 + .../cached_asr/luna_ajn_polish_asr.dvc | 5 + .../__pycache__/asr_processor.cpython-38.pyc | Bin 2855 -> 2865 bytes .../experiment_manager.cpython-38.pyc | Bin 1361 -> 1361 bytes 20 files changed, 662 insertions(+), 187 deletions(-) create mode 100644 experiment/luna/pipeline/dependency_provider.py create mode 100644 experiment/luna/pipeline/luna_ajn_asr_processing.py create mode 100644 experiment/luna/pipeline/luna_gold_transcript_processing.py delete mode 100644 experiment/luna/pipeline/luna_main.py create mode 100644 experiment/luna/pipeline/luna_techmo_processing.py create mode 100644 experiment/luna/pipeline/task/__init__.py rename experiment/luna/pipeline/{ => task}/luna_gold_transcript_processor.py (98%) create mode 100644 experiment/voicelab/voicelab_dependency.py create mode 100644 experiment/voicelab/voicelab_pipeline_ajn_asr.py create mode 100644 experiment/voicelab/voicelab_pipeline_gold_transcript.py create mode 100644 experiment/voicelab/voicelab_pipeline_techmo.py create mode 100644 experiment_data/cached_asr/luna_ajn_polish_asr.dvc diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 430cc92..d4486f1 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,68 +1,68 @@ version: "3.8" services: - techmo_asr: - image: docker-registry.theliver.pl/techmo-asr:1.1 - container_name: techmo_asr - restart: always - ports: - - 5001:5000 - volumes: - - /etc/localtime:/etc/localtime:ro - - /home/marcinwatroba/.ssh/keys/techmo_asr_server:/keys/techmo_rsa_key:ro - environment: - - TECHMO_SSH_SERVER_USERNAME=mwatroba - - TECHMO_SSH_SERVER_URL=jankocon.clarin-pl.eu - - TECHMO_SERVER_SSH_PORT=9222 - - TECHMO_REMOTE_SERVICE_PORT=12321 - - TECHMO_SERVER_URL=156.17.135.34 - - AUTH_TOKEN=__example_token__ + # techmo_asr: + # image: docker-registry.theliver.pl/techmo-asr:1.1 + # container_name: techmo_asr + # restart: always + # ports: + # - 5001:5000 + # volumes: + # - /etc/localtime:/etc/localtime:ro + # - /home/marcinwatroba/.ssh/keys/techmo_asr_server:/keys/techmo_rsa_key:ro + # environment: + # - TECHMO_SSH_SERVER_USERNAME=mwatroba + # - TECHMO_SSH_SERVER_URL=jankocon.clarin-pl.eu + # - TECHMO_SERVER_SSH_PORT=9222 + # - TECHMO_REMOTE_SERVICE_PORT=12321 + # - TECHMO_SERVER_URL=156.17.135.34 + # - AUTH_TOKEN=__example_token__ - transformers-wav2vec2for_ctc: - image: docker-registry.theliver.pl/transformers-wav2vec2for_ctc:1.0 - container_name: transformers-wav2vec2for_ctc - restart: always - volumes: - - /etc/localtime:/etc/localtime:ro - - ./wav2vec2for_ctc_models:/models - ports: - - 5002:5000 - environment: - - AUTH_TOKEN=__example_token__ - - MODEL_NAME=jonatasgrosman/wav2vec2-large-xlsr-53-polish - - SAMPLING_RATE=16000 + transformers-wav2vec2for_ctc: + image: docker-registry.theliver.pl/transformers-wav2vec2for_ctc:1.0 + container_name: transformers-wav2vec2for_ctc + restart: always + volumes: + - /etc/localtime:/etc/localtime:ro + - ./wav2vec2for_ctc_models:/models + ports: + - "5430:5000" + environment: + - AUTH_TOKEN=__example_token__ + - MODEL_NAME=jonatasgrosman/wav2vec2-large-xlsr-53-polish + - SAMPLING_RATE=16000 - embedding_service: - image: docker-registry.theliver.pl/embedding_docker:1.0 - container_name: embeddings_service - restart: always - ports: - - 5003:5000 - environment: - - AUTH_TOKEN=__example_token__ - volumes: - - /etc/localtime:/etc/localtime:ro - - ./embedding_models:/models + # embedding_service: + # image: docker-registry.theliver.pl/embedding_docker:1.0 + # container_name: embeddings_service + # restart: always + # ports: + # - 5003:5000 + # environment: + # - AUTH_TOKEN=__example_token__ + # volumes: + # - /etc/localtime:/etc/localtime:ro + # - ./embedding_models:/models - ajn_asr: - image: docker-registry.theliver.pl/asr-clarin-pl-service:1.4 - container_name: ajn_asr - restart: always - ports: - - 5004:5000 - environment: - - AUTH_TOKEN=__example_token__ - volumes: - - /etc/localtime:/etc/localtime:ro + ajn_asr: + image: docker-registry.theliver.pl/asr-clarin-pl-service:1.4 + container_name: ajn_asr + restart: always + ports: + - "5431:5000" + environment: + - AUTH_TOKEN=__example_token__ + volumes: + - /etc/localtime:/etc/localtime:ro - speechbrain_asr: - image: docker-registry.theliver.pl/speechbrain-asr:1.5 - container_name: speechbrain_asr - restart: always - ports: - - 5005:5000 - volumes: - - /etc/localtime:/etc/localtime:ro - - ./speechbrain_asr_models:/models - environment: - - AUTH_TOKEN=__example_token__ + speechbrain_asr: + image: docker-registry.theliver.pl/speechbrain-asr:1.5 + container_name: speechbrain_asr + restart: always + ports: + - "5432:5000" + volumes: + - /etc/localtime:/etc/localtime:ro + - ./speechbrain_asr_models:/models + environment: + - AUTH_TOKEN=__example_token__ diff --git a/dvc.lock b/dvc.lock index 686566c..5da2911 100644 --- a/dvc.lock +++ b/dvc.lock @@ -66,3 +66,116 @@ stages: md5: 6d56f24b0ff78c0d44ade2114158150d.dir size: 110711470 nfiles: 1600 + luna_gold_transcript_processing: + cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py\n" + deps: + - path: experiment/luna/pipeline/luna_gold_transcript_processing.py + md5: 2bae24d511febebb26b3264b204784f5 + size: 1466 + - path: experiment_data/dataset/LUNA.PL + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/dataset_relation_manager_data/luna + md5: ff680a49296818460a49bd0c70089a4a.dir + size: 229007155 + nfiles: 1000 + outs: + - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript + md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy + md5: 24a399475b752737db0f2a8671507014.dir + size: 6785648 + nfiles: 500 + luna_ajn_processing: + cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py\n" + deps: + - path: experiment/luna/pipeline/luna_ajn_asr_processing.py + md5: ec7d7b5384f845173d9fb77e9cfa9907 + size: 2501 + - path: experiment_data/dataset/LUNA.PL + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript + md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy + md5: 24a399475b752737db0f2a8671507014.dir + size: 6785648 + nfiles: 500 + outs: + - path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr + md5: 620e178854dbcb69f49a608f34573a88.dir + size: 6159899 + nfiles: 494 + - path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy + md5: 312be284d4ec9e38986048e785fcbbc1.dir + size: 6535212 + nfiles: 494 + - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer + md5: 8ad558edb6a8bd2508a7e25bcf53bf94.dir + size: 21936929 + nfiles: 494 + - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer + md5: 98c74c5bf87637749eac1ed5ff3393b4.dir + size: 16842 + nfiles: 494 + - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer + md5: 1741fff740259398b28bf2a6ba3aec41.dir + size: 20671277 + nfiles: 494 + - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer + md5: 18605657ff9c7ef3221e27b671a3b4d1.dir + size: 16835 + nfiles: 494 + luna_techmo_processing: + cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py\n" + deps: + - path: experiment/luna/pipeline/luna_techmo_processing.py + md5: b4d5ad7a0d7fb0714a2dc02cb457e8c9 + size: 2628 + - path: experiment_data/cached_asr/luna_techmo + md5: 033ea7b5434dded73bf869bfdd299462.dir + size: 4256479 + nfiles: 500 + - path: experiment_data/dataset/LUNA.PL + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript + md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir + size: 6706925 + nfiles: 500 + - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy + md5: 24a399475b752737db0f2a8671507014.dir + size: 6785648 + nfiles: 500 + outs: + - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer + md5: c71539f3889c627a371957958bd0907d.dir + size: 20897599 + nfiles: 500 + - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer + md5: 4efbe309674d9d494bae3dac057025ba.dir + size: 17341 + nfiles: 500 + - path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr + md5: acfaec46b2415ed6a64e3a3464d164f8.dir + size: 9697519 + nfiles: 500 + - path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy + md5: e869581816457d1585a7e42d0a18b8b2.dir + size: 6124559 + nfiles: 500 + - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer + md5: 0dabd65b3981d588cd23d943abc6e231.dir + size: 21380796 + nfiles: 500 + - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer + md5: 4cfbb2830b280084ece14b1ef815b92a.dir + size: 17298 + nfiles: 500 diff --git a/dvc.yaml b/dvc.yaml index f2151c0..590a16c 100644 --- a/dvc.yaml +++ b/dvc.yaml @@ -8,17 +8,49 @@ stages: outs: - experiment_data/dataset_relation_manager_data/luna - luna_main_pipeline: + luna_gold_transcript_processing: cmd: | - python -m spacy download pl_core_news_lg - PYTHONPATH=. python experiment/luna/pipeline/luna_main.py + PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py deps: - - experiment/luna/pipeline/luna_main.py + - experiment/luna/pipeline/luna_gold_transcript_processing.py - experiment_data/dataset_relation_manager_data/luna - experiment_data/dataset/LUNA.PL + outs: + - experiment_data/pipeline/asr_benchmark_luna/gold_transcript + - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy + + luna_techmo_processing: + cmd: | + PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py + deps: + - experiment/luna/pipeline/luna_techmo_processing.py + - experiment_data/dataset/LUNA.PL - experiment_data/cached_asr/luna_techmo + - experiment_data/pipeline/asr_benchmark_luna/gold_transcript + - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy + outs: + - experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr + - experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer + - experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer + - experiment_data/pipeline/asr_benchmark_luna/techmo_spacy + - experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer + - experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer + + luna_ajn_processing: + cmd: | + PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py + deps: + - experiment/luna/pipeline/luna_ajn_asr_processing.py + - experiment_data/dataset/LUNA.PL + - experiment_data/pipeline/asr_benchmark_luna/gold_transcript + - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy outs: - - experiment_data/pipeline/asr_benchmark_luna + - experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr + - experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer + - experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer + - experiment_data/pipeline/asr_benchmark_luna/ajn_spacy + - experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer + - experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer voicelab_import_to_common_format: cmd: PYTHONPATH=. python experiment/voicelab/import_data.py @@ -28,16 +60,48 @@ stages: outs: - experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322 - voicelab_main_pipeline: + voicelab_gold_transcript_processing: cmd: | - python -m spacy download pl_core_news_lg - PYTHONPATH=. python experiment/voicelab/voicelab_pipeline.py + PYTHONPATH=. python experiment/voicelab/voicelab_gold_transcript_processor.py deps: - - experiment/voicelab/voicelab_pipeline.py + - experiment/voicelab/voicelab_gold_transcript_processor.py - experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322 - experiment_data/dataset/voicelab_cbiz_testset_20220322 + outs: + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy + + voicelab_techmo_processing: + cmd: | + PYTHONPATH=. python experiment/voicelab/voicelab_pipeline_techmo.py + deps: + - experiment/voicelab/voicelab_pipeline_techmo.py + - experiment_data/dataset/voicelab_cbiz_testset_20220322 - experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy + outs: + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer + + voicelab_ajn_processing: + cmd: | + PYTHONPATH=. python experiment/voicelab/voicelab_pipeline_ajn_asr.py + deps: + - experiment/voicelab/voicelab_pipeline_ajn_asr.py + - experiment_data/dataset/voicelab_cbiz_testset_20220322 + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy outs: - - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322 + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer + - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer # concurrent features, multiprocessing diff --git a/experiment/luna/pipeline/dependency_provider.py b/experiment/luna/pipeline/dependency_provider.py new file mode 100644 index 0000000..7557f51 --- /dev/null +++ b/experiment/luna/pipeline/dependency_provider.py @@ -0,0 +1,36 @@ +from experiment.luna.luna_record_provider import LunaRecordProvider +from sziszapangma.integration.path_filter import ExtensionPathFilter +from sziszapangma.integration.repository.multi_files_experiment_repository import MultiFilesExperimentRepository + +LUNA_DIRECTORY = 'experiment_data/dataset/LUNA.PL' + +GOLD_TRANSCRIPT = 'gold_transcript' +GOLD_TRANSCRIPT_SPACY = 'gold_transcript_spacy' + +TECHMO_POLISH_ASR = 'techmo_polish_asr' +WORD_TECHMO_MERTICS_WER = 'word_techmo_metrics_wer' +WORD_TECHMO_ALIGNMENT_WER = 'word_techmo_alignment_wer' +TECHMO_SPACY = 'techmo_spacy' +POS_TECHMO_ALIGNMENT_WER = 'pos_techmo_alignment_wer' +POS_TECHMO_METRICS_WER = 'pos_techmo_metrics_wer' + +AJN_POLISH_ASR = 'ajn_polish_asr' +WORD_AJN_MERTICS_WER = 'word_ajn_metrics_wer' +WORD_AJN_ALIGNMENT_WER = 'word_ajn_alignment_wer' +AJN_SPACY = 'ajn_spacy' +POS_AJN_ALIGNMENT_WER = 'pos_ajn_alignment_wer' +POS_AJN_METRICS_WER = 'pos_ajn_metrics_wer' + + +def get_record_provider() -> LunaRecordProvider: + return LunaRecordProvider( + ExtensionPathFilter( + root_directory=f'{LUNA_DIRECTORY}/LUNA.PL', + extension='wav' + ), + relation_manager_root_path='experiment_data/dataset_relation_manager_data/luna' + ) + + +def get_multiple_files_repository() -> MultiFilesExperimentRepository: + return MultiFilesExperimentRepository('experiment_data/pipeline', 'asr_benchmark_luna') diff --git a/experiment/luna/pipeline/luna_ajn_asr_processing.py b/experiment/luna/pipeline/luna_ajn_asr_processing.py new file mode 100644 index 0000000..7ad6122 --- /dev/null +++ b/experiment/luna/pipeline/luna_ajn_asr_processing.py @@ -0,0 +1,55 @@ +from experiment.luna.pipeline.dependency_provider import get_record_provider, GOLD_TRANSCRIPT, \ + get_multiple_files_repository, \ + GOLD_TRANSCRIPT_SPACY, AJN_POLISH_ASR, WORD_AJN_MERTICS_WER, WORD_AJN_ALIGNMENT_WER, AJN_SPACY, \ + POS_AJN_ALIGNMENT_WER, POS_AJN_METRICS_WER +from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask +from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask +from sziszapangma.integration.experiment_manager import ExperimentManager +from sziszapangma.integration.task.asr_task import AsrTask +from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask +from sziszapangma.integration.asr_processor import AsrWebClient + + +def run_luna_experiment(): + record_provider = get_record_provider() + experiment_processor = ExperimentManager( + record_id_iterator=record_provider, + processing_tasks=[ + AsrTask( + task_name='ajn_polish_asr_task', + asr_processor=AsrWebClient('http://localhost:5431/process_asr', '__example_token__'), + asr_property_name=AJN_POLISH_ASR, + require_update=False, + record_path_provider=record_provider + ), + ClassicWerMetricTask( + task_name='techmo_word_wer_processing', + asr_property_name=AJN_POLISH_ASR, + gold_transcript_property_name=GOLD_TRANSCRIPT, + metrics_property_name=WORD_AJN_MERTICS_WER, + require_update=False, + alignment_property_name=WORD_AJN_ALIGNMENT_WER + ), + AsrSpacyTokenPosProcessingTask( + task_name='techmo_spacy_task', + input_property_name=AJN_POLISH_ASR, + spacy_property_name=AJN_SPACY, + require_update=True + ), + SpacyPosWerProcessingTask( + task_name='techmo_pos_wer_processing', + require_update=False, + gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY, + asr_pos_property_name=AJN_SPACY, + pos_alignment_wer=POS_AJN_ALIGNMENT_WER, + pos_metrics_wer=POS_AJN_METRICS_WER + ) + ], + experiment_repository=get_multiple_files_repository(), + relation_manager_provider=record_provider + ) + experiment_processor.process() + + +if __name__ == '__main__': + run_luna_experiment() diff --git a/experiment/luna/pipeline/luna_gold_transcript_processing.py b/experiment/luna/pipeline/luna_gold_transcript_processing.py new file mode 100644 index 0000000..6f311f1 --- /dev/null +++ b/experiment/luna/pipeline/luna_gold_transcript_processing.py @@ -0,0 +1,34 @@ +from experiment.luna.pipeline.dependency_provider import get_record_provider, GOLD_TRANSCRIPT, \ + get_multiple_files_repository, GOLD_TRANSCRIPT_SPACY +from experiment.luna.pipeline.task.luna_gold_transcript_processor import LunaGoldTranscriptProcessor +from experiment.pos_processing.gold_transcript_spacy_token_pos_processing_task import \ + GoldTranscriptSpacyTokenPosProcessingTask +from sziszapangma.integration.experiment_manager import ExperimentManager +from sziszapangma.integration.task.gold_transcript_task import GoldTranscriptTask + + +def run_luna_experiment(): + record_provider = get_record_provider() + ExperimentManager( + record_id_iterator=record_provider, + processing_tasks=[ + GoldTranscriptTask( + task_name='gold_transcript_task', + gold_transcript_processor=LunaGoldTranscriptProcessor(record_provider), + gold_transcript_property_name=GOLD_TRANSCRIPT, + require_update=False + ), + GoldTranscriptSpacyTokenPosProcessingTask( + task_name='gold_transcript_spacy_task', + input_property_name=GOLD_TRANSCRIPT, + spacy_property_name=GOLD_TRANSCRIPT_SPACY, + require_update=True + ), + ], + experiment_repository=get_multiple_files_repository(), + relation_manager_provider=record_provider + ).process() + + +if __name__ == '__main__': + run_luna_experiment() diff --git a/experiment/luna/pipeline/luna_main.py b/experiment/luna/pipeline/luna_main.py deleted file mode 100644 index d707202..0000000 --- a/experiment/luna/pipeline/luna_main.py +++ /dev/null @@ -1,96 +0,0 @@ -from experiment.luna.pipeline.luna_gold_transcript_processor import LunaGoldTranscriptProcessor -from experiment.luna.luna_record_provider import LunaRecordProvider -from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask -from experiment.pos_processing.gold_transcript_spacy_token_pos_processing_task import \ - GoldTranscriptSpacyTokenPosProcessingTask -from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask -from sziszapangma.integration.asr_processor import AsrPathCacheClient -from sziszapangma.integration.experiment_manager import ExperimentManager -from sziszapangma.integration.path_filter import ExtensionPathFilter -from sziszapangma.integration.repository.experiment_repository import ExperimentRepository -from sziszapangma.integration.repository.multi_files_experiment_repository import \ - MultiFilesExperimentRepository -from sziszapangma.integration.task.asr_task import AsrTask -from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask -from sziszapangma.integration.task.gold_transcript_task import GoldTranscriptTask - -LUNA_DIRECTORY = 'experiment_data/dataset/LUNA.PL' -GOLD_TRANSCRIPT = 'gold_transcript' -TECHMO_POLISH_ASR = 'techmo_polish_asr' -TECHMO_POLISH_CLASSIC_WER_METRIC = 'techmo_polish_classic_wer_metric' -TECHMO_POLISH_CLASSIC_ALIGNMENT = 'techmo_polish_classic_alignment' -TECHMO_SPACY = 'techmo_spacy' -GOLD_TRANSCRIPT_SPACY = 'gold_transcript_spacy' -POS_ALIGNMENT_WER = 'pos_alignment_wer' -POS_METRICS_WER = 'pos_metrics_wer' - - -def run_luna_experiment(experiment_repository: ExperimentRepository): - record_provider = LunaRecordProvider( - ExtensionPathFilter( - root_directory=f'{LUNA_DIRECTORY}/LUNA.PL', - extension='wav' - ), - relation_manager_root_path='experiment_data/dataset_relation_manager_data/luna' - ) - experiment_processor = ExperimentManager( - record_id_iterator=record_provider, - processing_tasks=[ - GoldTranscriptTask( - task_name='gold_transcript_task', - gold_transcript_processor=LunaGoldTranscriptProcessor(record_provider), - gold_transcript_property_name=GOLD_TRANSCRIPT, - require_update=False - ), - AsrTask( - task_name='techmo_polish_task', - # asr_processor=AsrWebClient('http://192.168.0.124:4999/process_asr', 'test1234'), - asr_processor=AsrPathCacheClient('experiment_data/cached_asr/luna_techmo', record_provider, - record_provider), - asr_property_name=TECHMO_POLISH_ASR, - require_update=False, - record_path_provider=record_provider - ), - ClassicWerMetricTask( - task_name='classic_wer_metric_task', - asr_property_name=TECHMO_POLISH_ASR, - gold_transcript_property_name=GOLD_TRANSCRIPT, - metrics_property_name=TECHMO_POLISH_CLASSIC_WER_METRIC, - require_update=False, - alignment_property_name=TECHMO_POLISH_CLASSIC_ALIGNMENT - ), - GoldTranscriptSpacyTokenPosProcessingTask( - task_name='gold_transcript_spacy_task', - input_property_name=GOLD_TRANSCRIPT, - spacy_property_name=GOLD_TRANSCRIPT_SPACY, - require_update=True - ), - AsrSpacyTokenPosProcessingTask( - task_name='techmo_spacy_task', - input_property_name=TECHMO_POLISH_ASR, - spacy_property_name=TECHMO_SPACY, - require_update=True - ), - SpacyPosWerProcessingTask( - task_name='PosWerProcessor', - require_update=False, - gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY, - asr_pos_property_name=TECHMO_SPACY, - pos_alignment_wer=POS_ALIGNMENT_WER, - pos_metrics_wer=POS_METRICS_WER - ) - ], - experiment_repository=experiment_repository, - relation_manager_provider=record_provider - ) - experiment_processor.process() - - -def example_run(): - experiment_repository = MultiFilesExperimentRepository( - 'experiment_data/pipeline', 'asr_benchmark_luna') - run_luna_experiment(experiment_repository) - - -if __name__ == '__main__': - example_run() diff --git a/experiment/luna/pipeline/luna_techmo_processing.py b/experiment/luna/pipeline/luna_techmo_processing.py new file mode 100644 index 0000000..ee12296 --- /dev/null +++ b/experiment/luna/pipeline/luna_techmo_processing.py @@ -0,0 +1,56 @@ +from experiment.luna.pipeline.dependency_provider import get_record_provider, GOLD_TRANSCRIPT, TECHMO_POLISH_ASR, \ + get_multiple_files_repository, \ + GOLD_TRANSCRIPT_SPACY, POS_TECHMO_ALIGNMENT_WER, POS_TECHMO_METRICS_WER, WORD_TECHMO_MERTICS_WER, \ + WORD_TECHMO_ALIGNMENT_WER, TECHMO_SPACY +from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask +from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask +from sziszapangma.integration.asr_processor import AsrPathCacheClient +from sziszapangma.integration.experiment_manager import ExperimentManager +from sziszapangma.integration.task.asr_task import AsrTask +from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask + + +def run_luna_experiment(): + record_provider = get_record_provider() + experiment_processor = ExperimentManager( + record_id_iterator=record_provider, + processing_tasks=[ + AsrTask( + task_name='techmo_polish_asr_task', + asr_processor=AsrPathCacheClient('experiment_data/cached_asr/luna_techmo', record_provider, + record_provider), + asr_property_name=TECHMO_POLISH_ASR, + require_update=False, + record_path_provider=record_provider + ), + ClassicWerMetricTask( + task_name='techmo_word_wer_processing', + asr_property_name=TECHMO_POLISH_ASR, + gold_transcript_property_name=GOLD_TRANSCRIPT, + metrics_property_name=WORD_TECHMO_MERTICS_WER, + require_update=False, + alignment_property_name=WORD_TECHMO_ALIGNMENT_WER + ), + AsrSpacyTokenPosProcessingTask( + task_name='techmo_spacy_task', + input_property_name=TECHMO_POLISH_ASR, + spacy_property_name=TECHMO_SPACY, + require_update=True + ), + SpacyPosWerProcessingTask( + task_name='techmo_pos_wer_processing', + require_update=False, + gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY, + asr_pos_property_name=TECHMO_SPACY, + pos_alignment_wer=POS_TECHMO_ALIGNMENT_WER, + pos_metrics_wer=POS_TECHMO_METRICS_WER + ) + ], + experiment_repository=get_multiple_files_repository(), + relation_manager_provider=record_provider + ) + experiment_processor.process() + + +if __name__ == '__main__': + run_luna_experiment() diff --git a/experiment/luna/pipeline/task/__init__.py b/experiment/luna/pipeline/task/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/experiment/luna/pipeline/luna_gold_transcript_processor.py b/experiment/luna/pipeline/task/luna_gold_transcript_processor.py similarity index 98% rename from experiment/luna/pipeline/luna_gold_transcript_processor.py rename to experiment/luna/pipeline/task/luna_gold_transcript_processor.py index e7e36b2..4c5c5b9 100644 --- a/experiment/luna/pipeline/luna_gold_transcript_processor.py +++ b/experiment/luna/pipeline/task/luna_gold_transcript_processor.py @@ -11,7 +11,7 @@ class LunaGoldTranscriptProcessor(GoldTranscriptProcessor): def __init__(self, record_provider: LunaRecordProvider): self._record_provider = record_provider - def parse_word(self, word, relation_manager: RelationManager): + def parse_word(self, word, relation_manager: RelationManager) -> Dict[str, str]: all_relations = relation_manager.get_all_relations_for_item(word['id']) pos_id = [it['second_id'] for it in all_relations if it['second_type'] in ['pos']][0] return { diff --git a/experiment/voicelab/import_data.py b/experiment/voicelab/import_data.py index a08b49e..a169468 100644 --- a/experiment/voicelab/import_data.py +++ b/experiment/voicelab/import_data.py @@ -1,6 +1,7 @@ import os.path from typing import List +from experiment.voicelab.voicelab_dependency import get_record_provider from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider from sziszapangma.integration.path_filter import ExtensionPathFilter from sziszapangma.model.model import Word @@ -48,13 +49,7 @@ class VoicelabAdapter: if __name__ == '__main__': - voicelab_record_provider = VoicelabTelcoRecordProvider( - ExtensionPathFilter( - 'experiment_data/dataset/voicelab_cbiz_testset_20220322', - 'wav' - ), - 'experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322' - ) + voicelab_record_provider = get_record_provider() adapter = VoicelabAdapter(voicelab_record_provider) for it in voicelab_record_provider.get_all_records(): adapter.import_record(it) diff --git a/experiment/voicelab/voicelab_dependency.py b/experiment/voicelab/voicelab_dependency.py new file mode 100644 index 0000000..58642e4 --- /dev/null +++ b/experiment/voicelab/voicelab_dependency.py @@ -0,0 +1,41 @@ +from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider +from sziszapangma.integration.path_filter import ExtensionPathFilter +from sziszapangma.integration.repository.multi_files_experiment_repository import MultiFilesExperimentRepository + +DATASET_DIRECTORY = 'experiment_data/dataset/voicelab_cbiz_testset_20220322' + +GOLD_TRANSCRIPT = 'gold_transcript' +GOLD_TRANSCRIPT_SPACY = 'gold_transcript_spacy' + +TECHMO_POLISH_ASR = 'techmo_polish_asr' +WORD_TECHMO_MERTICS_WER = 'word_techmo_metrics_wer' +WORD_TECHMO_ALIGNMENT_WER = 'word_techmo_alignment_wer' +TECHMO_SPACY = 'techmo_spacy' +POS_TECHMO_ALIGNMENT_WER = 'pos_techmo_alignment_wer' +POS_TECHMO_METRICS_WER = 'pos_techmo_metrics_wer' + +AJN_POLISH_ASR = 'ajn_polish_asr' +WORD_AJN_MERTICS_WER = 'word_ajn_metrics_wer' +WORD_AJN_ALIGNMENT_WER = 'word_ajn_alignment_wer' +AJN_SPACY = 'ajn_spacy' +POS_AJN_ALIGNMENT_WER = 'pos_ajn_alignment_wer' +POS_AJN_METRICS_WER = 'pos_ajn_metrics_wer' + + +PIPELINE_DATA_DIRECTORY = 'experiment_data/pipeline' +EXPERIMENT_NAME = 'asr_benchmark_voicelab_cbiz_testset_20220322' +RELATION_MANAGER_ROOT_PATH = 'experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322' + + +def get_record_provider() -> VoicelabTelcoRecordProvider: + return VoicelabTelcoRecordProvider( + ExtensionPathFilter( + root_directory=DATASET_DIRECTORY, + extension='wav' + ), + relation_manager_root_path=RELATION_MANAGER_ROOT_PATH + ) + + +def get_repository() -> MultiFilesExperimentRepository: + return MultiFilesExperimentRepository(PIPELINE_DATA_DIRECTORY, EXPERIMENT_NAME) diff --git a/experiment/voicelab/voicelab_pipeline.py b/experiment/voicelab/voicelab_pipeline.py index 37ef974..d58f92e 100644 --- a/experiment/voicelab/voicelab_pipeline.py +++ b/experiment/voicelab/voicelab_pipeline.py @@ -2,6 +2,7 @@ from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpa from experiment.pos_processing.gold_transcript_spacy_token_pos_processing_task import \ GoldTranscriptSpacyTokenPosProcessingTask from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask +from experiment.voicelab.voicelab_dependency import get_record_provider, get_repository from experiment.voicelab.voicelab_gold_transcript_processor import VoicelabGoldTranscriptProcessor from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider from sziszapangma.integration.asr_processor import AsrPathCacheClient @@ -29,14 +30,8 @@ EXPERIMENT_NAME = 'asr_benchmark_voicelab_cbiz_testset_20220322' RELATION_MANAGER_ROOT_PATH = 'experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322' -def run_voicelab_experiment(experiment_repository: ExperimentRepository): - record_provider = VoicelabTelcoRecordProvider( - ExtensionPathFilter( - root_directory=DATASET_DIRECTORY, - extension='wav' - ), - relation_manager_root_path=RELATION_MANAGER_ROOT_PATH - ) +def run_voicelab_experiment(): + record_provider = get_record_provider() experiment_processor = ExperimentManager( record_id_iterator=record_provider, processing_tasks=[ @@ -87,16 +82,14 @@ def run_voicelab_experiment(experiment_repository: ExperimentRepository): pos_metrics_wer=POS_METRICS_WER ) ], - experiment_repository=experiment_repository, + experiment_repository=get_repository(), relation_manager_provider=record_provider ) experiment_processor.process() def example_run(): - experiment_repository = MultiFilesExperimentRepository( - PIPELINE_DATA_DIRECTORY, EXPERIMENT_NAME) - run_voicelab_experiment(experiment_repository) + run_voicelab_experiment() if __name__ == '__main__': diff --git a/experiment/voicelab/voicelab_pipeline_ajn_asr.py b/experiment/voicelab/voicelab_pipeline_ajn_asr.py new file mode 100644 index 0000000..285208b --- /dev/null +++ b/experiment/voicelab/voicelab_pipeline_ajn_asr.py @@ -0,0 +1,67 @@ +from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask +from experiment.pos_processing.gold_transcript_spacy_token_pos_processing_task import \ + GoldTranscriptSpacyTokenPosProcessingTask +from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask +from experiment.voicelab.voicelab_dependency import get_record_provider, get_repository, GOLD_TRANSCRIPT, \ + GOLD_TRANSCRIPT_SPACY, TECHMO_POLISH_ASR, AJN_POLISH_ASR, WORD_AJN_MERTICS_WER, WORD_AJN_ALIGNMENT_WER, AJN_SPACY, \ + POS_AJN_ALIGNMENT_WER, POS_AJN_METRICS_WER +from experiment.voicelab.voicelab_gold_transcript_processor import VoicelabGoldTranscriptProcessor +from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider +from sziszapangma.integration.asr_processor import AsrPathCacheClient, AsrWebClient +from sziszapangma.integration.experiment_manager import ExperimentManager +from sziszapangma.integration.path_filter import ExtensionPathFilter +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository +from sziszapangma.integration.repository.multi_files_experiment_repository import \ + MultiFilesExperimentRepository +from sziszapangma.integration.task.asr_task import AsrTask +from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask +from sziszapangma.integration.task.gold_transcript_task import GoldTranscriptTask + + +def run_voicelab_experiment(): + record_provider = get_record_provider() + experiment_processor = ExperimentManager( + record_id_iterator=record_provider, + processing_tasks=[ + AsrTask( + task_name='ajn_polish_asr_task', + asr_processor=AsrWebClient('http://localhost:5431/process_asr', '__example_token__'), + asr_property_name=AJN_POLISH_ASR, + require_update=False, + record_path_provider=record_provider + ), + ClassicWerMetricTask( + task_name='techmo_word_wer_processing', + asr_property_name=AJN_POLISH_ASR, + gold_transcript_property_name=GOLD_TRANSCRIPT, + metrics_property_name=WORD_AJN_MERTICS_WER, + require_update=False, + alignment_property_name=WORD_AJN_ALIGNMENT_WER + ), + AsrSpacyTokenPosProcessingTask( + task_name='techmo_spacy_task', + input_property_name=AJN_POLISH_ASR, + spacy_property_name=AJN_SPACY, + require_update=True + ), + SpacyPosWerProcessingTask( + task_name='techmo_pos_wer_processing', + require_update=False, + gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY, + asr_pos_property_name=AJN_SPACY, + pos_alignment_wer=POS_AJN_ALIGNMENT_WER, + pos_metrics_wer=POS_AJN_METRICS_WER + ) + ], + experiment_repository=get_repository(), + relation_manager_provider=record_provider + ) + experiment_processor.process() + + +def example_run(): + run_voicelab_experiment() + + +if __name__ == '__main__': + example_run() diff --git a/experiment/voicelab/voicelab_pipeline_gold_transcript.py b/experiment/voicelab/voicelab_pipeline_gold_transcript.py new file mode 100644 index 0000000..a9c90ee --- /dev/null +++ b/experiment/voicelab/voicelab_pipeline_gold_transcript.py @@ -0,0 +1,49 @@ +from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask +from experiment.pos_processing.gold_transcript_spacy_token_pos_processing_task import \ + GoldTranscriptSpacyTokenPosProcessingTask +from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask +from experiment.voicelab.voicelab_dependency import get_record_provider, get_repository, GOLD_TRANSCRIPT, \ + GOLD_TRANSCRIPT_SPACY +from experiment.voicelab.voicelab_gold_transcript_processor import VoicelabGoldTranscriptProcessor +from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider +from sziszapangma.integration.asr_processor import AsrPathCacheClient +from sziszapangma.integration.experiment_manager import ExperimentManager +from sziszapangma.integration.path_filter import ExtensionPathFilter +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository +from sziszapangma.integration.repository.multi_files_experiment_repository import \ + MultiFilesExperimentRepository +from sziszapangma.integration.task.asr_task import AsrTask +from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask +from sziszapangma.integration.task.gold_transcript_task import GoldTranscriptTask + + +def run_voicelab_experiment(): + record_provider = get_record_provider() + experiment_processor = ExperimentManager( + record_id_iterator=record_provider, + processing_tasks=[ + GoldTranscriptTask( + task_name='gold_transcript_task', + gold_transcript_processor=VoicelabGoldTranscriptProcessor(record_provider), + gold_transcript_property_name=GOLD_TRANSCRIPT, + require_update=False + ), + GoldTranscriptSpacyTokenPosProcessingTask( + task_name='gold_transcript_spacy_task', + input_property_name=GOLD_TRANSCRIPT, + spacy_property_name=GOLD_TRANSCRIPT_SPACY, + require_update=True + ) + ], + experiment_repository=get_repository(), + relation_manager_provider=record_provider + ) + experiment_processor.process() + + +def example_run(): + run_voicelab_experiment() + + +if __name__ == '__main__': + example_run() diff --git a/experiment/voicelab/voicelab_pipeline_techmo.py b/experiment/voicelab/voicelab_pipeline_techmo.py new file mode 100644 index 0000000..03bdd64 --- /dev/null +++ b/experiment/voicelab/voicelab_pipeline_techmo.py @@ -0,0 +1,62 @@ +from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask +from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask +from experiment.voicelab.voicelab_dependency import get_record_provider, get_repository, GOLD_TRANSCRIPT, \ + GOLD_TRANSCRIPT_SPACY, TECHMO_POLISH_ASR, WORD_TECHMO_MERTICS_WER, WORD_TECHMO_ALIGNMENT_WER, TECHMO_SPACY, \ + POS_TECHMO_METRICS_WER, POS_TECHMO_ALIGNMENT_WER +from sziszapangma.integration.asr_processor import AsrPathCacheClient +from sziszapangma.integration.experiment_manager import ExperimentManager +from sziszapangma.integration.task.asr_task import AsrTask +from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask + + +def run_voicelab_experiment(): + record_provider = get_record_provider() + experiment_processor = ExperimentManager( + record_id_iterator=record_provider, + processing_tasks=[ + AsrTask( + task_name='techmo_polish_task', + asr_processor=AsrPathCacheClient( + 'experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo', + record_provider, + record_provider + ), + asr_property_name=TECHMO_POLISH_ASR, + require_update=False, + record_path_provider=record_provider + ), + ClassicWerMetricTask( + task_name='techmo_word_wer_processing', + asr_property_name=TECHMO_POLISH_ASR, + gold_transcript_property_name=GOLD_TRANSCRIPT, + metrics_property_name=WORD_TECHMO_MERTICS_WER, + require_update=False, + alignment_property_name=WORD_TECHMO_ALIGNMENT_WER + ), + AsrSpacyTokenPosProcessingTask( + task_name='techmo_spacy_task', + input_property_name=TECHMO_POLISH_ASR, + spacy_property_name=TECHMO_SPACY, + require_update=True + ), + SpacyPosWerProcessingTask( + task_name='techmo_pos_wer_processing', + require_update=False, + gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY, + asr_pos_property_name=TECHMO_SPACY, + pos_alignment_wer=POS_TECHMO_ALIGNMENT_WER, + pos_metrics_wer=POS_TECHMO_METRICS_WER + ) + ], + experiment_repository=get_repository(), + relation_manager_provider=record_provider + ) + experiment_processor.process() + + +def example_run(): + run_voicelab_experiment() + + +if __name__ == '__main__': + example_run() diff --git a/experiment_data/cached_asr/.gitignore b/experiment_data/cached_asr/.gitignore index c6c1670..127221d 100644 --- a/experiment_data/cached_asr/.gitignore +++ b/experiment_data/cached_asr/.gitignore @@ -1,2 +1,3 @@ /luna_techmo /voicelab_cbiz_testset_20220322_techmo +/luna_ajn_polish_asr diff --git a/experiment_data/cached_asr/luna_ajn_polish_asr.dvc b/experiment_data/cached_asr/luna_ajn_polish_asr.dvc new file mode 100644 index 0000000..702810e --- /dev/null +++ b/experiment_data/cached_asr/luna_ajn_polish_asr.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 620e178854dbcb69f49a608f34573a88.dir + size: 6159899 + nfiles: 494 + path: luna_ajn_polish_asr diff --git a/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc b/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc index 26c7df6c9e9c9e96fc61e0704d394bc1e4d08456..79aba6f4c6f20a2b241734f0e21c01cd2656659d 100644 GIT binary patch delta 86 zcmZ23wo!~Xl$V!_0SMf0W+hc`<dxvywgB>q*?<Ha<75Ml8GK-AHa<o+MlMFS|C29q hoMN<@JeRXuh!3cSi;;s-fQ5+>2>%H1Og7>A3;?`*5FP*k delta 76 zcmdlewp@%il$V!_0SN5VGn4E#@=9=U8v}X8Y(RpIVX^_o3@#9pfsv1qjdAidj#G@L ZlNWM!^KbygxEMJY1(?KGC);s-1_1f&4Lbk; diff --git a/sziszapangma/integration/__pycache__/experiment_manager.cpython-38.pyc b/sziszapangma/integration/__pycache__/experiment_manager.cpython-38.pyc index b999f14b73a56f40a5df21939781357eed62bf5f..1eea679377d1d71135df739087e833c710ea2c8d 100644 GIT binary patch delta 22 ccmcb}b&-oVl$V!_0SMf0W+icL<aJ>M06xJ5IsgCw delta 22 ccmcb}b&-oVl$V!_0SF#MXC`rO<aJ>M06-=MQvd(} -- GitLab