From 3be71a1b7f37ccff2a702eb3df1736ed5bd00d50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20W=C4=85troba?= <markowanga@gmail.com>
Date: Fri, 29 Apr 2022 08:38:05 +0200
Subject: [PATCH] Add voicelab pipeline stages

---
 docker/docker-compose.yml                     | 122 +++++++++---------
 dvc.lock                                      | 113 ++++++++++++++++
 dvc.yaml                                      |  84 ++++++++++--
 .../luna/pipeline/dependency_provider.py      |  36 ++++++
 .../luna/pipeline/luna_ajn_asr_processing.py  |  55 ++++++++
 .../luna_gold_transcript_processing.py        |  34 +++++
 experiment/luna/pipeline/luna_main.py         |  96 --------------
 .../luna/pipeline/luna_techmo_processing.py   |  56 ++++++++
 experiment/luna/pipeline/task/__init__.py     |   0
 .../luna_gold_transcript_processor.py         |   2 +-
 experiment/voicelab/import_data.py            |   9 +-
 experiment/voicelab/voicelab_dependency.py    |  41 ++++++
 experiment/voicelab/voicelab_pipeline.py      |  17 +--
 .../voicelab/voicelab_pipeline_ajn_asr.py     |  67 ++++++++++
 .../voicelab_pipeline_gold_transcript.py      |  49 +++++++
 .../voicelab/voicelab_pipeline_techmo.py      |  62 +++++++++
 experiment_data/cached_asr/.gitignore         |   1 +
 .../cached_asr/luna_ajn_polish_asr.dvc        |   5 +
 .../__pycache__/asr_processor.cpython-38.pyc  | Bin 2855 -> 2865 bytes
 .../experiment_manager.cpython-38.pyc         | Bin 1361 -> 1361 bytes
 20 files changed, 662 insertions(+), 187 deletions(-)
 create mode 100644 experiment/luna/pipeline/dependency_provider.py
 create mode 100644 experiment/luna/pipeline/luna_ajn_asr_processing.py
 create mode 100644 experiment/luna/pipeline/luna_gold_transcript_processing.py
 delete mode 100644 experiment/luna/pipeline/luna_main.py
 create mode 100644 experiment/luna/pipeline/luna_techmo_processing.py
 create mode 100644 experiment/luna/pipeline/task/__init__.py
 rename experiment/luna/pipeline/{ => task}/luna_gold_transcript_processor.py (98%)
 create mode 100644 experiment/voicelab/voicelab_dependency.py
 create mode 100644 experiment/voicelab/voicelab_pipeline_ajn_asr.py
 create mode 100644 experiment/voicelab/voicelab_pipeline_gold_transcript.py
 create mode 100644 experiment/voicelab/voicelab_pipeline_techmo.py
 create mode 100644 experiment_data/cached_asr/luna_ajn_polish_asr.dvc

diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index 430cc92..d4486f1 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -1,68 +1,68 @@
 version: "3.8"
 services:
 
-  techmo_asr:
-    image: docker-registry.theliver.pl/techmo-asr:1.1
-    container_name: techmo_asr
-    restart: always
-    ports:
-      - 5001:5000
-    volumes:
-      - /etc/localtime:/etc/localtime:ro
-      - /home/marcinwatroba/.ssh/keys/techmo_asr_server:/keys/techmo_rsa_key:ro
-    environment:
-      - TECHMO_SSH_SERVER_USERNAME=mwatroba
-      - TECHMO_SSH_SERVER_URL=jankocon.clarin-pl.eu
-      - TECHMO_SERVER_SSH_PORT=9222
-      - TECHMO_REMOTE_SERVICE_PORT=12321
-      - TECHMO_SERVER_URL=156.17.135.34
-      - AUTH_TOKEN=__example_token__
+    #  techmo_asr:
+    #    image: docker-registry.theliver.pl/techmo-asr:1.1
+    #    container_name: techmo_asr
+    #    restart: always
+    #    ports:
+    #      - 5001:5000
+    #    volumes:
+    #      - /etc/localtime:/etc/localtime:ro
+    #      - /home/marcinwatroba/.ssh/keys/techmo_asr_server:/keys/techmo_rsa_key:ro
+    #    environment:
+    #      - TECHMO_SSH_SERVER_USERNAME=mwatroba
+    #      - TECHMO_SSH_SERVER_URL=jankocon.clarin-pl.eu
+    #      - TECHMO_SERVER_SSH_PORT=9222
+    #      - TECHMO_REMOTE_SERVICE_PORT=12321
+    #      - TECHMO_SERVER_URL=156.17.135.34
+    #      - AUTH_TOKEN=__example_token__
 
-  transformers-wav2vec2for_ctc:
-    image: docker-registry.theliver.pl/transformers-wav2vec2for_ctc:1.0
-    container_name: transformers-wav2vec2for_ctc
-    restart: always
-    volumes:
-      - /etc/localtime:/etc/localtime:ro
-      - ./wav2vec2for_ctc_models:/models
-    ports:
-      - 5002:5000
-    environment:
-      - AUTH_TOKEN=__example_token__
-      - MODEL_NAME=jonatasgrosman/wav2vec2-large-xlsr-53-polish
-      - SAMPLING_RATE=16000
+    transformers-wav2vec2for_ctc:
+        image: docker-registry.theliver.pl/transformers-wav2vec2for_ctc:1.0
+        container_name: transformers-wav2vec2for_ctc
+        restart: always
+        volumes:
+            - /etc/localtime:/etc/localtime:ro
+            - ./wav2vec2for_ctc_models:/models
+        ports:
+            - "5430:5000"
+        environment:
+            - AUTH_TOKEN=__example_token__
+            - MODEL_NAME=jonatasgrosman/wav2vec2-large-xlsr-53-polish
+            - SAMPLING_RATE=16000
 
-  embedding_service:
-    image: docker-registry.theliver.pl/embedding_docker:1.0
-    container_name: embeddings_service
-    restart: always
-    ports:
-      - 5003:5000
-    environment:
-      - AUTH_TOKEN=__example_token__
-    volumes:
-      - /etc/localtime:/etc/localtime:ro
-      - ./embedding_models:/models
+    #  embedding_service:
+    #    image: docker-registry.theliver.pl/embedding_docker:1.0
+    #    container_name: embeddings_service
+    #    restart: always
+    #    ports:
+    #      - 5003:5000
+    #    environment:
+    #      - AUTH_TOKEN=__example_token__
+    #    volumes:
+    #      - /etc/localtime:/etc/localtime:ro
+    #      - ./embedding_models:/models
 
-  ajn_asr:
-    image: docker-registry.theliver.pl/asr-clarin-pl-service:1.4
-    container_name: ajn_asr
-    restart: always
-    ports:
-      - 5004:5000
-    environment:
-      - AUTH_TOKEN=__example_token__
-    volumes:
-      - /etc/localtime:/etc/localtime:ro
+    ajn_asr:
+        image: docker-registry.theliver.pl/asr-clarin-pl-service:1.4
+        container_name: ajn_asr
+        restart: always
+        ports:
+            - "5431:5000"
+        environment:
+            - AUTH_TOKEN=__example_token__
+        volumes:
+            - /etc/localtime:/etc/localtime:ro
 
-  speechbrain_asr:
-    image: docker-registry.theliver.pl/speechbrain-asr:1.5
-    container_name: speechbrain_asr
-    restart: always
-    ports:
-      - 5005:5000
-    volumes:
-      - /etc/localtime:/etc/localtime:ro
-      - ./speechbrain_asr_models:/models
-    environment:
-      - AUTH_TOKEN=__example_token__
+    speechbrain_asr:
+        image: docker-registry.theliver.pl/speechbrain-asr:1.5
+        container_name: speechbrain_asr
+        restart: always
+        ports:
+            - "5432:5000"
+        volumes:
+            - /etc/localtime:/etc/localtime:ro
+            - ./speechbrain_asr_models:/models
+        environment:
+            - AUTH_TOKEN=__example_token__
diff --git a/dvc.lock b/dvc.lock
index 686566c..5da2911 100644
--- a/dvc.lock
+++ b/dvc.lock
@@ -66,3 +66,116 @@ stages:
       md5: 6d56f24b0ff78c0d44ade2114158150d.dir
       size: 110711470
       nfiles: 1600
+  luna_gold_transcript_processing:
+    cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py\n"
+    deps:
+    - path: experiment/luna/pipeline/luna_gold_transcript_processing.py
+      md5: 2bae24d511febebb26b3264b204784f5
+      size: 1466
+    - path: experiment_data/dataset/LUNA.PL
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/dataset_relation_manager_data/luna
+      md5: ff680a49296818460a49bd0c70089a4a.dir
+      size: 229007155
+      nfiles: 1000
+    outs:
+    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
+      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
+      md5: 24a399475b752737db0f2a8671507014.dir
+      size: 6785648
+      nfiles: 500
+  luna_ajn_processing:
+    cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py\n"
+    deps:
+    - path: experiment/luna/pipeline/luna_ajn_asr_processing.py
+      md5: ec7d7b5384f845173d9fb77e9cfa9907
+      size: 2501
+    - path: experiment_data/dataset/LUNA.PL
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
+      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
+      md5: 24a399475b752737db0f2a8671507014.dir
+      size: 6785648
+      nfiles: 500
+    outs:
+    - path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
+      md5: 620e178854dbcb69f49a608f34573a88.dir
+      size: 6159899
+      nfiles: 494
+    - path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
+      md5: 312be284d4ec9e38986048e785fcbbc1.dir
+      size: 6535212
+      nfiles: 494
+    - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer
+      md5: 8ad558edb6a8bd2508a7e25bcf53bf94.dir
+      size: 21936929
+      nfiles: 494
+    - path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer
+      md5: 98c74c5bf87637749eac1ed5ff3393b4.dir
+      size: 16842
+      nfiles: 494
+    - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer
+      md5: 1741fff740259398b28bf2a6ba3aec41.dir
+      size: 20671277
+      nfiles: 494
+    - path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer
+      md5: 18605657ff9c7ef3221e27b671a3b4d1.dir
+      size: 16835
+      nfiles: 494
+  luna_techmo_processing:
+    cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py\n"
+    deps:
+    - path: experiment/luna/pipeline/luna_techmo_processing.py
+      md5: b4d5ad7a0d7fb0714a2dc02cb457e8c9
+      size: 2628
+    - path: experiment_data/cached_asr/luna_techmo
+      md5: 033ea7b5434dded73bf869bfdd299462.dir
+      size: 4256479
+      nfiles: 500
+    - path: experiment_data/dataset/LUNA.PL
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
+      md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
+      size: 6706925
+      nfiles: 500
+    - path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
+      md5: 24a399475b752737db0f2a8671507014.dir
+      size: 6785648
+      nfiles: 500
+    outs:
+    - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer
+      md5: c71539f3889c627a371957958bd0907d.dir
+      size: 20897599
+      nfiles: 500
+    - path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer
+      md5: 4efbe309674d9d494bae3dac057025ba.dir
+      size: 17341
+      nfiles: 500
+    - path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
+      md5: acfaec46b2415ed6a64e3a3464d164f8.dir
+      size: 9697519
+      nfiles: 500
+    - path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
+      md5: e869581816457d1585a7e42d0a18b8b2.dir
+      size: 6124559
+      nfiles: 500
+    - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer
+      md5: 0dabd65b3981d588cd23d943abc6e231.dir
+      size: 21380796
+      nfiles: 500
+    - path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer
+      md5: 4cfbb2830b280084ece14b1ef815b92a.dir
+      size: 17298
+      nfiles: 500
diff --git a/dvc.yaml b/dvc.yaml
index f2151c0..590a16c 100644
--- a/dvc.yaml
+++ b/dvc.yaml
@@ -8,17 +8,49 @@ stages:
         outs:
             - experiment_data/dataset_relation_manager_data/luna
 
-    luna_main_pipeline:
+    luna_gold_transcript_processing:
         cmd: |
-            python -m spacy download pl_core_news_lg
-            PYTHONPATH=. python experiment/luna/pipeline/luna_main.py
+            PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py
         deps:
-            - experiment/luna/pipeline/luna_main.py
+            - experiment/luna/pipeline/luna_gold_transcript_processing.py
             - experiment_data/dataset_relation_manager_data/luna
             - experiment_data/dataset/LUNA.PL
+        outs:
+            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript
+            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
+
+    luna_techmo_processing:
+        cmd: |
+            PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py
+        deps:
+            - experiment/luna/pipeline/luna_techmo_processing.py
+            - experiment_data/dataset/LUNA.PL
             - experiment_data/cached_asr/luna_techmo
+            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript
+            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
+        outs:
+            - experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
+            - experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer
+            - experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer
+            - experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
+            - experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer
+            - experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer
+
+    luna_ajn_processing:
+        cmd: |
+            PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py
+        deps:
+            - experiment/luna/pipeline/luna_ajn_asr_processing.py
+            - experiment_data/dataset/LUNA.PL
+            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript
+            - experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
         outs:
-            - experiment_data/pipeline/asr_benchmark_luna
+            - experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
+            - experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer
+            - experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer
+            - experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
+            - experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer
+            - experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer
 
     voicelab_import_to_common_format:
         cmd: PYTHONPATH=. python experiment/voicelab/import_data.py
@@ -28,16 +60,48 @@ stages:
         outs:
             - experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
 
-    voicelab_main_pipeline:
+    voicelab_gold_transcript_processing:
         cmd: |
-            python -m spacy download pl_core_news_lg
-            PYTHONPATH=. python experiment/voicelab/voicelab_pipeline.py
+            PYTHONPATH=. python experiment/voicelab/voicelab_gold_transcript_processor.py
         deps:
-            - experiment/voicelab/voicelab_pipeline.py
+            - experiment/voicelab/voicelab_gold_transcript_processor.py
             - experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
             - experiment_data/dataset/voicelab_cbiz_testset_20220322
+        outs:
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
+
+    voicelab_techmo_processing:
+        cmd: |
+            PYTHONPATH=. python experiment/voicelab/voicelab_pipeline_techmo.py
+        deps:
+            - experiment/voicelab/voicelab_pipeline_techmo.py
+            - experiment_data/dataset/voicelab_cbiz_testset_20220322
             - experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
+        outs:
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer
+
+    voicelab_ajn_processing:
+        cmd: |
+            PYTHONPATH=. python experiment/voicelab/voicelab_pipeline_ajn_asr.py
+        deps:
+            - experiment/voicelab/voicelab_pipeline_ajn_asr.py
+            - experiment_data/dataset/voicelab_cbiz_testset_20220322
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
         outs:
-            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_polish_asr
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_metrics_wer
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_ajn_alignment_wer
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/ajn_spacy
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_alignment_wer
+            - experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_ajn_metrics_wer
 
 # concurrent features, multiprocessing
diff --git a/experiment/luna/pipeline/dependency_provider.py b/experiment/luna/pipeline/dependency_provider.py
new file mode 100644
index 0000000..7557f51
--- /dev/null
+++ b/experiment/luna/pipeline/dependency_provider.py
@@ -0,0 +1,36 @@
+from experiment.luna.luna_record_provider import LunaRecordProvider
+from sziszapangma.integration.path_filter import ExtensionPathFilter
+from sziszapangma.integration.repository.multi_files_experiment_repository import MultiFilesExperimentRepository
+
+LUNA_DIRECTORY = 'experiment_data/dataset/LUNA.PL'
+
+GOLD_TRANSCRIPT = 'gold_transcript'
+GOLD_TRANSCRIPT_SPACY = 'gold_transcript_spacy'
+
+TECHMO_POLISH_ASR = 'techmo_polish_asr'
+WORD_TECHMO_MERTICS_WER = 'word_techmo_metrics_wer'
+WORD_TECHMO_ALIGNMENT_WER = 'word_techmo_alignment_wer'
+TECHMO_SPACY = 'techmo_spacy'
+POS_TECHMO_ALIGNMENT_WER = 'pos_techmo_alignment_wer'
+POS_TECHMO_METRICS_WER = 'pos_techmo_metrics_wer'
+
+AJN_POLISH_ASR = 'ajn_polish_asr'
+WORD_AJN_MERTICS_WER = 'word_ajn_metrics_wer'
+WORD_AJN_ALIGNMENT_WER = 'word_ajn_alignment_wer'
+AJN_SPACY = 'ajn_spacy'
+POS_AJN_ALIGNMENT_WER = 'pos_ajn_alignment_wer'
+POS_AJN_METRICS_WER = 'pos_ajn_metrics_wer'
+
+
+def get_record_provider() -> LunaRecordProvider:
+    return LunaRecordProvider(
+        ExtensionPathFilter(
+            root_directory=f'{LUNA_DIRECTORY}/LUNA.PL',
+            extension='wav'
+        ),
+        relation_manager_root_path='experiment_data/dataset_relation_manager_data/luna'
+    )
+
+
+def get_multiple_files_repository() -> MultiFilesExperimentRepository:
+    return MultiFilesExperimentRepository('experiment_data/pipeline', 'asr_benchmark_luna')
diff --git a/experiment/luna/pipeline/luna_ajn_asr_processing.py b/experiment/luna/pipeline/luna_ajn_asr_processing.py
new file mode 100644
index 0000000..7ad6122
--- /dev/null
+++ b/experiment/luna/pipeline/luna_ajn_asr_processing.py
@@ -0,0 +1,55 @@
+from experiment.luna.pipeline.dependency_provider import get_record_provider, GOLD_TRANSCRIPT, \
+    get_multiple_files_repository, \
+    GOLD_TRANSCRIPT_SPACY, AJN_POLISH_ASR, WORD_AJN_MERTICS_WER, WORD_AJN_ALIGNMENT_WER, AJN_SPACY, \
+    POS_AJN_ALIGNMENT_WER, POS_AJN_METRICS_WER
+from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask
+from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask
+from sziszapangma.integration.experiment_manager import ExperimentManager
+from sziszapangma.integration.task.asr_task import AsrTask
+from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask
+from sziszapangma.integration.asr_processor import AsrWebClient
+
+
+def run_luna_experiment():
+    record_provider = get_record_provider()
+    experiment_processor = ExperimentManager(
+        record_id_iterator=record_provider,
+        processing_tasks=[
+            AsrTask(
+                task_name='ajn_polish_asr_task',
+                asr_processor=AsrWebClient('http://localhost:5431/process_asr', '__example_token__'),
+                asr_property_name=AJN_POLISH_ASR,
+                require_update=False,
+                record_path_provider=record_provider
+            ),
+            ClassicWerMetricTask(
+                task_name='techmo_word_wer_processing',
+                asr_property_name=AJN_POLISH_ASR,
+                gold_transcript_property_name=GOLD_TRANSCRIPT,
+                metrics_property_name=WORD_AJN_MERTICS_WER,
+                require_update=False,
+                alignment_property_name=WORD_AJN_ALIGNMENT_WER
+            ),
+            AsrSpacyTokenPosProcessingTask(
+                task_name='techmo_spacy_task',
+                input_property_name=AJN_POLISH_ASR,
+                spacy_property_name=AJN_SPACY,
+                require_update=True
+            ),
+            SpacyPosWerProcessingTask(
+                task_name='techmo_pos_wer_processing',
+                require_update=False,
+                gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY,
+                asr_pos_property_name=AJN_SPACY,
+                pos_alignment_wer=POS_AJN_ALIGNMENT_WER,
+                pos_metrics_wer=POS_AJN_METRICS_WER
+            )
+        ],
+        experiment_repository=get_multiple_files_repository(),
+        relation_manager_provider=record_provider
+    )
+    experiment_processor.process()
+
+
+if __name__ == '__main__':
+    run_luna_experiment()
diff --git a/experiment/luna/pipeline/luna_gold_transcript_processing.py b/experiment/luna/pipeline/luna_gold_transcript_processing.py
new file mode 100644
index 0000000..6f311f1
--- /dev/null
+++ b/experiment/luna/pipeline/luna_gold_transcript_processing.py
@@ -0,0 +1,34 @@
+from experiment.luna.pipeline.dependency_provider import get_record_provider, GOLD_TRANSCRIPT, \
+    get_multiple_files_repository, GOLD_TRANSCRIPT_SPACY
+from experiment.luna.pipeline.task.luna_gold_transcript_processor import LunaGoldTranscriptProcessor
+from experiment.pos_processing.gold_transcript_spacy_token_pos_processing_task import \
+    GoldTranscriptSpacyTokenPosProcessingTask
+from sziszapangma.integration.experiment_manager import ExperimentManager
+from sziszapangma.integration.task.gold_transcript_task import GoldTranscriptTask
+
+
+def run_luna_experiment():
+    record_provider = get_record_provider()
+    ExperimentManager(
+        record_id_iterator=record_provider,
+        processing_tasks=[
+            GoldTranscriptTask(
+                task_name='gold_transcript_task',
+                gold_transcript_processor=LunaGoldTranscriptProcessor(record_provider),
+                gold_transcript_property_name=GOLD_TRANSCRIPT,
+                require_update=False
+            ),
+            GoldTranscriptSpacyTokenPosProcessingTask(
+                task_name='gold_transcript_spacy_task',
+                input_property_name=GOLD_TRANSCRIPT,
+                spacy_property_name=GOLD_TRANSCRIPT_SPACY,
+                require_update=True
+            ),
+        ],
+        experiment_repository=get_multiple_files_repository(),
+        relation_manager_provider=record_provider
+    ).process()
+
+
+if __name__ == '__main__':
+    run_luna_experiment()
diff --git a/experiment/luna/pipeline/luna_main.py b/experiment/luna/pipeline/luna_main.py
deleted file mode 100644
index d707202..0000000
--- a/experiment/luna/pipeline/luna_main.py
+++ /dev/null
@@ -1,96 +0,0 @@
-from experiment.luna.pipeline.luna_gold_transcript_processor import LunaGoldTranscriptProcessor
-from experiment.luna.luna_record_provider import LunaRecordProvider
-from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask
-from experiment.pos_processing.gold_transcript_spacy_token_pos_processing_task import \
-    GoldTranscriptSpacyTokenPosProcessingTask
-from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask
-from sziszapangma.integration.asr_processor import AsrPathCacheClient
-from sziszapangma.integration.experiment_manager import ExperimentManager
-from sziszapangma.integration.path_filter import ExtensionPathFilter
-from sziszapangma.integration.repository.experiment_repository import ExperimentRepository
-from sziszapangma.integration.repository.multi_files_experiment_repository import \
-    MultiFilesExperimentRepository
-from sziszapangma.integration.task.asr_task import AsrTask
-from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask
-from sziszapangma.integration.task.gold_transcript_task import GoldTranscriptTask
-
-LUNA_DIRECTORY = 'experiment_data/dataset/LUNA.PL'
-GOLD_TRANSCRIPT = 'gold_transcript'
-TECHMO_POLISH_ASR = 'techmo_polish_asr'
-TECHMO_POLISH_CLASSIC_WER_METRIC = 'techmo_polish_classic_wer_metric'
-TECHMO_POLISH_CLASSIC_ALIGNMENT = 'techmo_polish_classic_alignment'
-TECHMO_SPACY = 'techmo_spacy'
-GOLD_TRANSCRIPT_SPACY = 'gold_transcript_spacy'
-POS_ALIGNMENT_WER = 'pos_alignment_wer'
-POS_METRICS_WER = 'pos_metrics_wer'
-
-
-def run_luna_experiment(experiment_repository: ExperimentRepository):
-    record_provider = LunaRecordProvider(
-        ExtensionPathFilter(
-            root_directory=f'{LUNA_DIRECTORY}/LUNA.PL',
-            extension='wav'
-        ),
-        relation_manager_root_path='experiment_data/dataset_relation_manager_data/luna'
-    )
-    experiment_processor = ExperimentManager(
-        record_id_iterator=record_provider,
-        processing_tasks=[
-            GoldTranscriptTask(
-                task_name='gold_transcript_task',
-                gold_transcript_processor=LunaGoldTranscriptProcessor(record_provider),
-                gold_transcript_property_name=GOLD_TRANSCRIPT,
-                require_update=False
-            ),
-            AsrTask(
-                task_name='techmo_polish_task',
-                # asr_processor=AsrWebClient('http://192.168.0.124:4999/process_asr', 'test1234'),
-                asr_processor=AsrPathCacheClient('experiment_data/cached_asr/luna_techmo', record_provider,
-                                                 record_provider),
-                asr_property_name=TECHMO_POLISH_ASR,
-                require_update=False,
-                record_path_provider=record_provider
-            ),
-            ClassicWerMetricTask(
-                task_name='classic_wer_metric_task',
-                asr_property_name=TECHMO_POLISH_ASR,
-                gold_transcript_property_name=GOLD_TRANSCRIPT,
-                metrics_property_name=TECHMO_POLISH_CLASSIC_WER_METRIC,
-                require_update=False,
-                alignment_property_name=TECHMO_POLISH_CLASSIC_ALIGNMENT
-            ),
-            GoldTranscriptSpacyTokenPosProcessingTask(
-                task_name='gold_transcript_spacy_task',
-                input_property_name=GOLD_TRANSCRIPT,
-                spacy_property_name=GOLD_TRANSCRIPT_SPACY,
-                require_update=True
-            ),
-            AsrSpacyTokenPosProcessingTask(
-                task_name='techmo_spacy_task',
-                input_property_name=TECHMO_POLISH_ASR,
-                spacy_property_name=TECHMO_SPACY,
-                require_update=True
-            ),
-            SpacyPosWerProcessingTask(
-                task_name='PosWerProcessor',
-                require_update=False,
-                gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY,
-                asr_pos_property_name=TECHMO_SPACY,
-                pos_alignment_wer=POS_ALIGNMENT_WER,
-                pos_metrics_wer=POS_METRICS_WER
-            )
-        ],
-        experiment_repository=experiment_repository,
-        relation_manager_provider=record_provider
-    )
-    experiment_processor.process()
-
-
-def example_run():
-    experiment_repository = MultiFilesExperimentRepository(
-        'experiment_data/pipeline', 'asr_benchmark_luna')
-    run_luna_experiment(experiment_repository)
-
-
-if __name__ == '__main__':
-    example_run()
diff --git a/experiment/luna/pipeline/luna_techmo_processing.py b/experiment/luna/pipeline/luna_techmo_processing.py
new file mode 100644
index 0000000..ee12296
--- /dev/null
+++ b/experiment/luna/pipeline/luna_techmo_processing.py
@@ -0,0 +1,56 @@
+from experiment.luna.pipeline.dependency_provider import get_record_provider, GOLD_TRANSCRIPT, TECHMO_POLISH_ASR, \
+    get_multiple_files_repository, \
+    GOLD_TRANSCRIPT_SPACY, POS_TECHMO_ALIGNMENT_WER, POS_TECHMO_METRICS_WER, WORD_TECHMO_MERTICS_WER, \
+    WORD_TECHMO_ALIGNMENT_WER, TECHMO_SPACY
+from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask
+from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask
+from sziszapangma.integration.asr_processor import AsrPathCacheClient
+from sziszapangma.integration.experiment_manager import ExperimentManager
+from sziszapangma.integration.task.asr_task import AsrTask
+from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask
+
+
+def run_luna_experiment():
+    record_provider = get_record_provider()
+    experiment_processor = ExperimentManager(
+        record_id_iterator=record_provider,
+        processing_tasks=[
+            AsrTask(
+                task_name='techmo_polish_asr_task',
+                asr_processor=AsrPathCacheClient('experiment_data/cached_asr/luna_techmo', record_provider,
+                                                 record_provider),
+                asr_property_name=TECHMO_POLISH_ASR,
+                require_update=False,
+                record_path_provider=record_provider
+            ),
+            ClassicWerMetricTask(
+                task_name='techmo_word_wer_processing',
+                asr_property_name=TECHMO_POLISH_ASR,
+                gold_transcript_property_name=GOLD_TRANSCRIPT,
+                metrics_property_name=WORD_TECHMO_MERTICS_WER,
+                require_update=False,
+                alignment_property_name=WORD_TECHMO_ALIGNMENT_WER
+            ),
+            AsrSpacyTokenPosProcessingTask(
+                task_name='techmo_spacy_task',
+                input_property_name=TECHMO_POLISH_ASR,
+                spacy_property_name=TECHMO_SPACY,
+                require_update=True
+            ),
+            SpacyPosWerProcessingTask(
+                task_name='techmo_pos_wer_processing',
+                require_update=False,
+                gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY,
+                asr_pos_property_name=TECHMO_SPACY,
+                pos_alignment_wer=POS_TECHMO_ALIGNMENT_WER,
+                pos_metrics_wer=POS_TECHMO_METRICS_WER
+            )
+        ],
+        experiment_repository=get_multiple_files_repository(),
+        relation_manager_provider=record_provider
+    )
+    experiment_processor.process()
+
+
+if __name__ == '__main__':
+    run_luna_experiment()
diff --git a/experiment/luna/pipeline/task/__init__.py b/experiment/luna/pipeline/task/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/experiment/luna/pipeline/luna_gold_transcript_processor.py b/experiment/luna/pipeline/task/luna_gold_transcript_processor.py
similarity index 98%
rename from experiment/luna/pipeline/luna_gold_transcript_processor.py
rename to experiment/luna/pipeline/task/luna_gold_transcript_processor.py
index e7e36b2..4c5c5b9 100644
--- a/experiment/luna/pipeline/luna_gold_transcript_processor.py
+++ b/experiment/luna/pipeline/task/luna_gold_transcript_processor.py
@@ -11,7 +11,7 @@ class LunaGoldTranscriptProcessor(GoldTranscriptProcessor):
     def __init__(self, record_provider: LunaRecordProvider):
         self._record_provider = record_provider
 
-    def parse_word(self, word, relation_manager: RelationManager):
+    def parse_word(self, word, relation_manager: RelationManager) -> Dict[str, str]:
         all_relations = relation_manager.get_all_relations_for_item(word['id'])
         pos_id = [it['second_id'] for it in all_relations if it['second_type'] in ['pos']][0]
         return {
diff --git a/experiment/voicelab/import_data.py b/experiment/voicelab/import_data.py
index a08b49e..a169468 100644
--- a/experiment/voicelab/import_data.py
+++ b/experiment/voicelab/import_data.py
@@ -1,6 +1,7 @@
 import os.path
 from typing import List
 
+from experiment.voicelab.voicelab_dependency import get_record_provider
 from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider
 from sziszapangma.integration.path_filter import ExtensionPathFilter
 from sziszapangma.model.model import Word
@@ -48,13 +49,7 @@ class VoicelabAdapter:
 
 
 if __name__ == '__main__':
-    voicelab_record_provider = VoicelabTelcoRecordProvider(
-        ExtensionPathFilter(
-            'experiment_data/dataset/voicelab_cbiz_testset_20220322',
-            'wav'
-        ),
-        'experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322'
-    )
+    voicelab_record_provider = get_record_provider()
     adapter = VoicelabAdapter(voicelab_record_provider)
     for it in voicelab_record_provider.get_all_records():
         adapter.import_record(it)
diff --git a/experiment/voicelab/voicelab_dependency.py b/experiment/voicelab/voicelab_dependency.py
new file mode 100644
index 0000000..58642e4
--- /dev/null
+++ b/experiment/voicelab/voicelab_dependency.py
@@ -0,0 +1,41 @@
+from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider
+from sziszapangma.integration.path_filter import ExtensionPathFilter
+from sziszapangma.integration.repository.multi_files_experiment_repository import MultiFilesExperimentRepository
+
+DATASET_DIRECTORY = 'experiment_data/dataset/voicelab_cbiz_testset_20220322'
+
+GOLD_TRANSCRIPT = 'gold_transcript'
+GOLD_TRANSCRIPT_SPACY = 'gold_transcript_spacy'
+
+TECHMO_POLISH_ASR = 'techmo_polish_asr'
+WORD_TECHMO_MERTICS_WER = 'word_techmo_metrics_wer'
+WORD_TECHMO_ALIGNMENT_WER = 'word_techmo_alignment_wer'
+TECHMO_SPACY = 'techmo_spacy'
+POS_TECHMO_ALIGNMENT_WER = 'pos_techmo_alignment_wer'
+POS_TECHMO_METRICS_WER = 'pos_techmo_metrics_wer'
+
+AJN_POLISH_ASR = 'ajn_polish_asr'
+WORD_AJN_MERTICS_WER = 'word_ajn_metrics_wer'
+WORD_AJN_ALIGNMENT_WER = 'word_ajn_alignment_wer'
+AJN_SPACY = 'ajn_spacy'
+POS_AJN_ALIGNMENT_WER = 'pos_ajn_alignment_wer'
+POS_AJN_METRICS_WER = 'pos_ajn_metrics_wer'
+
+
+PIPELINE_DATA_DIRECTORY = 'experiment_data/pipeline'
+EXPERIMENT_NAME = 'asr_benchmark_voicelab_cbiz_testset_20220322'
+RELATION_MANAGER_ROOT_PATH = 'experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322'
+
+
+def get_record_provider() -> VoicelabTelcoRecordProvider:
+    return VoicelabTelcoRecordProvider(
+        ExtensionPathFilter(
+            root_directory=DATASET_DIRECTORY,
+            extension='wav'
+        ),
+        relation_manager_root_path=RELATION_MANAGER_ROOT_PATH
+    )
+
+
+def get_repository() -> MultiFilesExperimentRepository:
+    return MultiFilesExperimentRepository(PIPELINE_DATA_DIRECTORY, EXPERIMENT_NAME)
diff --git a/experiment/voicelab/voicelab_pipeline.py b/experiment/voicelab/voicelab_pipeline.py
index 37ef974..d58f92e 100644
--- a/experiment/voicelab/voicelab_pipeline.py
+++ b/experiment/voicelab/voicelab_pipeline.py
@@ -2,6 +2,7 @@ from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpa
 from experiment.pos_processing.gold_transcript_spacy_token_pos_processing_task import \
     GoldTranscriptSpacyTokenPosProcessingTask
 from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask
+from experiment.voicelab.voicelab_dependency import get_record_provider, get_repository
 from experiment.voicelab.voicelab_gold_transcript_processor import VoicelabGoldTranscriptProcessor
 from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider
 from sziszapangma.integration.asr_processor import AsrPathCacheClient
@@ -29,14 +30,8 @@ EXPERIMENT_NAME = 'asr_benchmark_voicelab_cbiz_testset_20220322'
 RELATION_MANAGER_ROOT_PATH = 'experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322'
 
 
-def run_voicelab_experiment(experiment_repository: ExperimentRepository):
-    record_provider = VoicelabTelcoRecordProvider(
-        ExtensionPathFilter(
-            root_directory=DATASET_DIRECTORY,
-            extension='wav'
-        ),
-        relation_manager_root_path=RELATION_MANAGER_ROOT_PATH
-    )
+def run_voicelab_experiment():
+    record_provider = get_record_provider()
     experiment_processor = ExperimentManager(
         record_id_iterator=record_provider,
         processing_tasks=[
@@ -87,16 +82,14 @@ def run_voicelab_experiment(experiment_repository: ExperimentRepository):
                 pos_metrics_wer=POS_METRICS_WER
             )
         ],
-        experiment_repository=experiment_repository,
+        experiment_repository=get_repository(),
         relation_manager_provider=record_provider
     )
     experiment_processor.process()
 
 
 def example_run():
-    experiment_repository = MultiFilesExperimentRepository(
-        PIPELINE_DATA_DIRECTORY, EXPERIMENT_NAME)
-    run_voicelab_experiment(experiment_repository)
+    run_voicelab_experiment()
 
 
 if __name__ == '__main__':
diff --git a/experiment/voicelab/voicelab_pipeline_ajn_asr.py b/experiment/voicelab/voicelab_pipeline_ajn_asr.py
new file mode 100644
index 0000000..285208b
--- /dev/null
+++ b/experiment/voicelab/voicelab_pipeline_ajn_asr.py
@@ -0,0 +1,67 @@
+from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask
+from experiment.pos_processing.gold_transcript_spacy_token_pos_processing_task import \
+    GoldTranscriptSpacyTokenPosProcessingTask
+from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask
+from experiment.voicelab.voicelab_dependency import get_record_provider, get_repository, GOLD_TRANSCRIPT, \
+    GOLD_TRANSCRIPT_SPACY, TECHMO_POLISH_ASR, AJN_POLISH_ASR, WORD_AJN_MERTICS_WER, WORD_AJN_ALIGNMENT_WER, AJN_SPACY, \
+    POS_AJN_ALIGNMENT_WER, POS_AJN_METRICS_WER
+from experiment.voicelab.voicelab_gold_transcript_processor import VoicelabGoldTranscriptProcessor
+from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider
+from sziszapangma.integration.asr_processor import AsrPathCacheClient, AsrWebClient
+from sziszapangma.integration.experiment_manager import ExperimentManager
+from sziszapangma.integration.path_filter import ExtensionPathFilter
+from sziszapangma.integration.repository.experiment_repository import ExperimentRepository
+from sziszapangma.integration.repository.multi_files_experiment_repository import \
+    MultiFilesExperimentRepository
+from sziszapangma.integration.task.asr_task import AsrTask
+from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask
+from sziszapangma.integration.task.gold_transcript_task import GoldTranscriptTask
+
+
+def run_voicelab_experiment():
+    record_provider = get_record_provider()
+    experiment_processor = ExperimentManager(
+        record_id_iterator=record_provider,
+        processing_tasks=[
+            AsrTask(
+                task_name='ajn_polish_asr_task',
+                asr_processor=AsrWebClient('http://localhost:5431/process_asr', '__example_token__'),
+                asr_property_name=AJN_POLISH_ASR,
+                require_update=False,
+                record_path_provider=record_provider
+            ),
+            ClassicWerMetricTask(
+                task_name='techmo_word_wer_processing',
+                asr_property_name=AJN_POLISH_ASR,
+                gold_transcript_property_name=GOLD_TRANSCRIPT,
+                metrics_property_name=WORD_AJN_MERTICS_WER,
+                require_update=False,
+                alignment_property_name=WORD_AJN_ALIGNMENT_WER
+            ),
+            AsrSpacyTokenPosProcessingTask(
+                task_name='techmo_spacy_task',
+                input_property_name=AJN_POLISH_ASR,
+                spacy_property_name=AJN_SPACY,
+                require_update=True
+            ),
+            SpacyPosWerProcessingTask(
+                task_name='techmo_pos_wer_processing',
+                require_update=False,
+                gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY,
+                asr_pos_property_name=AJN_SPACY,
+                pos_alignment_wer=POS_AJN_ALIGNMENT_WER,
+                pos_metrics_wer=POS_AJN_METRICS_WER
+            )
+        ],
+        experiment_repository=get_repository(),
+        relation_manager_provider=record_provider
+    )
+    experiment_processor.process()
+
+
+def example_run():
+    run_voicelab_experiment()
+
+
+if __name__ == '__main__':
+    example_run()
diff --git a/experiment/voicelab/voicelab_pipeline_gold_transcript.py b/experiment/voicelab/voicelab_pipeline_gold_transcript.py
new file mode 100644
index 0000000..a9c90ee
--- /dev/null
+++ b/experiment/voicelab/voicelab_pipeline_gold_transcript.py
@@ -0,0 +1,49 @@
+from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask
+from experiment.pos_processing.gold_transcript_spacy_token_pos_processing_task import \
+    GoldTranscriptSpacyTokenPosProcessingTask
+from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask
+from experiment.voicelab.voicelab_dependency import get_record_provider, get_repository, GOLD_TRANSCRIPT, \
+    GOLD_TRANSCRIPT_SPACY
+from experiment.voicelab.voicelab_gold_transcript_processor import VoicelabGoldTranscriptProcessor
+from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider
+from sziszapangma.integration.asr_processor import AsrPathCacheClient
+from sziszapangma.integration.experiment_manager import ExperimentManager
+from sziszapangma.integration.path_filter import ExtensionPathFilter
+from sziszapangma.integration.repository.experiment_repository import ExperimentRepository
+from sziszapangma.integration.repository.multi_files_experiment_repository import \
+    MultiFilesExperimentRepository
+from sziszapangma.integration.task.asr_task import AsrTask
+from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask
+from sziszapangma.integration.task.gold_transcript_task import GoldTranscriptTask
+
+
+def run_voicelab_experiment():
+    record_provider = get_record_provider()
+    experiment_processor = ExperimentManager(
+        record_id_iterator=record_provider,
+        processing_tasks=[
+            GoldTranscriptTask(
+                task_name='gold_transcript_task',
+                gold_transcript_processor=VoicelabGoldTranscriptProcessor(record_provider),
+                gold_transcript_property_name=GOLD_TRANSCRIPT,
+                require_update=False
+            ),
+            GoldTranscriptSpacyTokenPosProcessingTask(
+                task_name='gold_transcript_spacy_task',
+                input_property_name=GOLD_TRANSCRIPT,
+                spacy_property_name=GOLD_TRANSCRIPT_SPACY,
+                require_update=True
+            )
+        ],
+        experiment_repository=get_repository(),
+        relation_manager_provider=record_provider
+    )
+    experiment_processor.process()
+
+
+def example_run():
+    run_voicelab_experiment()
+
+
+if __name__ == '__main__':
+    example_run()
diff --git a/experiment/voicelab/voicelab_pipeline_techmo.py b/experiment/voicelab/voicelab_pipeline_techmo.py
new file mode 100644
index 0000000..03bdd64
--- /dev/null
+++ b/experiment/voicelab/voicelab_pipeline_techmo.py
@@ -0,0 +1,62 @@
+from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask
+from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask
+from experiment.voicelab.voicelab_dependency import get_record_provider, get_repository, GOLD_TRANSCRIPT, \
+    GOLD_TRANSCRIPT_SPACY, TECHMO_POLISH_ASR, WORD_TECHMO_MERTICS_WER, WORD_TECHMO_ALIGNMENT_WER, TECHMO_SPACY, \
+    POS_TECHMO_METRICS_WER, POS_TECHMO_ALIGNMENT_WER
+from sziszapangma.integration.asr_processor import AsrPathCacheClient
+from sziszapangma.integration.experiment_manager import ExperimentManager
+from sziszapangma.integration.task.asr_task import AsrTask
+from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask
+
+
+def run_voicelab_experiment():
+    record_provider = get_record_provider()
+    experiment_processor = ExperimentManager(
+        record_id_iterator=record_provider,
+        processing_tasks=[
+            AsrTask(
+                task_name='techmo_polish_task',
+                asr_processor=AsrPathCacheClient(
+                    'experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo',
+                    record_provider,
+                    record_provider
+                ),
+                asr_property_name=TECHMO_POLISH_ASR,
+                require_update=False,
+                record_path_provider=record_provider
+            ),
+            ClassicWerMetricTask(
+                task_name='techmo_word_wer_processing',
+                asr_property_name=TECHMO_POLISH_ASR,
+                gold_transcript_property_name=GOLD_TRANSCRIPT,
+                metrics_property_name=WORD_TECHMO_MERTICS_WER,
+                require_update=False,
+                alignment_property_name=WORD_TECHMO_ALIGNMENT_WER
+            ),
+            AsrSpacyTokenPosProcessingTask(
+                task_name='techmo_spacy_task',
+                input_property_name=TECHMO_POLISH_ASR,
+                spacy_property_name=TECHMO_SPACY,
+                require_update=True
+            ),
+            SpacyPosWerProcessingTask(
+                task_name='techmo_pos_wer_processing',
+                require_update=False,
+                gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY,
+                asr_pos_property_name=TECHMO_SPACY,
+                pos_alignment_wer=POS_TECHMO_ALIGNMENT_WER,
+                pos_metrics_wer=POS_TECHMO_METRICS_WER
+            )
+        ],
+        experiment_repository=get_repository(),
+        relation_manager_provider=record_provider
+    )
+    experiment_processor.process()
+
+
+def example_run():
+    run_voicelab_experiment()
+
+
+if __name__ == '__main__':
+    example_run()
diff --git a/experiment_data/cached_asr/.gitignore b/experiment_data/cached_asr/.gitignore
index c6c1670..127221d 100644
--- a/experiment_data/cached_asr/.gitignore
+++ b/experiment_data/cached_asr/.gitignore
@@ -1,2 +1,3 @@
 /luna_techmo
 /voicelab_cbiz_testset_20220322_techmo
+/luna_ajn_polish_asr
diff --git a/experiment_data/cached_asr/luna_ajn_polish_asr.dvc b/experiment_data/cached_asr/luna_ajn_polish_asr.dvc
new file mode 100644
index 0000000..702810e
--- /dev/null
+++ b/experiment_data/cached_asr/luna_ajn_polish_asr.dvc
@@ -0,0 +1,5 @@
+outs:
+- md5: 620e178854dbcb69f49a608f34573a88.dir
+  size: 6159899
+  nfiles: 494
+  path: luna_ajn_polish_asr
diff --git a/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc b/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc
index 26c7df6c9e9c9e96fc61e0704d394bc1e4d08456..79aba6f4c6f20a2b241734f0e21c01cd2656659d 100644
GIT binary patch
delta 86
zcmZ23wo!~Xl$V!_0SMf0W+hc`<dxvywgB>q*?<Ha<75Ml8GK-AHa<o+MlMFS|C29q
hoMN<@JeRXuh!3cSi;;s-fQ5+>2>%H1Og7>A3;?`*5FP*k

delta 76
zcmdlewp@%il$V!_0SN5VGn4E#@=9=U8v}X8Y(RpIVX^_o3@#9pfsv1qjdAidj#G@L
ZlNWM!^KbygxEMJY1(?KGC);s-1_1f&4Lbk;

diff --git a/sziszapangma/integration/__pycache__/experiment_manager.cpython-38.pyc b/sziszapangma/integration/__pycache__/experiment_manager.cpython-38.pyc
index b999f14b73a56f40a5df21939781357eed62bf5f..1eea679377d1d71135df739087e833c710ea2c8d 100644
GIT binary patch
delta 22
ccmcb}b&-oVl$V!_0SMf0W+icL<aJ>M06xJ5IsgCw

delta 22
ccmcb}b&-oVl$V!_0SF#MXC`rO<aJ>M06-=MQvd(}

-- 
GitLab