Skip to content
Snippets Groups Projects
Commit 51f72cdf authored by Marcin Wątroba's avatar Marcin Wątroba
Browse files

Update pipeline

parent 428b93f6
No related branches found
No related tags found
1 merge request!13Change data model
......@@ -55,15 +55,15 @@ stages:
cmd: PYTHONPATH=. python experiment/voicelab/import_data.py
deps:
- path: experiment/voicelab/import_data.py
md5: 0cf7cf604b202489ce3b0cb51bb47fa2
size: 2264
md5: 41acb98a1517e66c052182fe0a1403ba
size: 2108
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
outs:
- path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
md5: 6d56f24b0ff78c0d44ade2114158150d.dir
md5: 926ef9bab4ce41b9de95f2f3d5ab67a0.dir
size: 110711470
nfiles: 1600
luna_gold_transcript_processing:
......@@ -179,3 +179,73 @@ stages:
md5: 4cfbb2830b280084ece14b1ef815b92a.dir
size: 17298
nfiles: 500
voicelab_gold_transcript_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py\n"
deps:
- path: experiment/voicelab/voicelab_pipeline_gold_transcript.py
md5: 4ba38fdfac616f8a0818cedabf66b94d
size: 2312
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
md5: 926ef9bab4ce41b9de95f2f3d5ab67a0.dir
size: 110711470
nfiles: 1600
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
md5: fb6812b2f3044c0285ee6ee2b21d0523.dir
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
md5: f2e68dcc8842a15e417ae6f5221a802a.dir
size: 26643278
nfiles: 800
voicelab_techmo_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py\n"
deps:
- path: experiment/voicelab/voicelab_pipeline_techmo.py
md5: 23c0869d7cc9f0088870362d669ab82e
size: 2685
- path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo
md5: 6c3b356723d562c978f84e733b91f5d0.dir
size: 17539259
nfiles: 800
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
md5: fb6812b2f3044c0285ee6ee2b21d0523.dir
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
md5: f2e68dcc8842a15e417ae6f5221a802a.dir
size: 26643278
nfiles: 800
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer
md5: 8c5f0380ba2891b3e726d647c2863c60.dir
size: 81650836
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer
md5: b1a674826142a44095a4c6439ac49024.dir
size: 27934
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
md5: c45e29b08af7bb13cdf54da9655bd96c.dir
size: 39158267
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
md5: a39c82666419c2b7791952a1fa116d61.dir
size: 24482297
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer
md5: 72ff86c7cb2e89ac7e04677f532255b2.dir
size: 83756423
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer
md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir
size: 27780
nfiles: 800
......@@ -62,9 +62,9 @@ stages:
voicelab_gold_transcript_processing:
cmd: |
PYTHONPATH=. python experiment/voicelab/voicelab_gold_transcript_processor.py
PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py
deps:
- experiment/voicelab/voicelab_gold_transcript_processor.py
- experiment/voicelab/voicelab_pipeline_gold_transcript.py
- experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
- experiment_data/dataset/voicelab_cbiz_testset_20220322
outs:
......@@ -73,7 +73,7 @@ stages:
voicelab_techmo_processing:
cmd: |
PYTHONPATH=. python experiment/voicelab/voicelab_pipeline_techmo.py
PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py
deps:
- experiment/voicelab/voicelab_pipeline_techmo.py
- experiment_data/dataset/voicelab_cbiz_testset_20220322
......@@ -88,20 +88,20 @@ stages:
- experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer
- experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer
voicelab_ajn_processing:
cmd: |
PYTHONPATH=. python experiment/voicelab/voicelab_pipeline_ajn_asr.py
deps:
- experiment/voicelab/voicelab_pipeline_ajn_asr.py
- experiment_data/dataset/voicelab_cbiz_testset_20220322
- experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
- experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
outs:
- experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
- experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer
- experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer
- experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
- experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer
- experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer
# voicelab_ajn_processing:
# cmd: |
# PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_ajn_asr.py
# deps:
# - experiment/voicelab/voicelab_pipeline_ajn_asr.py
# - experiment_data/dataset/voicelab_cbiz_testset_20220322
# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
# outs:
# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer
# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer
# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer
# - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer
# concurrent features, multiprocessing
This diff is collapsed.
......@@ -64,4 +64,6 @@ def example_run():
if __name__ == '__main__':
example_run()
# example_run()
path = '/home/marcinwatroba/PWR_ASR/asr-benchmarks/experiment_data/dataset/voicelab_cbiz_testset_20220322/bankowe/cbiz_tc_2.agnt.wav'
print(AsrWebClient('http://localhost:5431/process_asr', '__example_token__').call_recognise(path))
outs:
- md5: 94b1709c05bd09b77c5a6850e2f2f373.dir
size: 34654307
- md5: 6c3b356723d562c978f84e733b91f5d0.dir
size: 17539259
nfiles: 800
path: voicelab_cbiz_testset_20220322_techmo
/gold_transcript
/gold_transcript_spacy
/techmo_polish_asr
/word_techmo_metrics_wer
/word_techmo_alignment_wer
/techmo_spacy
/pos_techmo_alignment_wer
/pos_techmo_metrics_wer
import json
import os.path
from pathlib import Path
from typing import Any, List, Optional, Set
......@@ -48,7 +49,7 @@ class MultiFilesExperimentRepository(ExperimentRepository):
def get_all_properties(self) -> Set[str]:
experiment_path = Path(self._root_directory).joinpath(self._experiment_name)
return set([it.name for it in experiment_path.iterdir()])
return set([it.name for it in experiment_path.iterdir() if os.path.isdir(it)])
def _get_file_path(self, property_name: str, record_id: str) -> Path:
return (
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment