From c67a89c30cb26c5d39f1d6664d2420ff935eccd6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20W=C4=85troba?= <markowanga@gmail.com>
Date: Wed, 27 Apr 2022 19:00:25 +0200
Subject: [PATCH] Finish processing luna

---
 dvc.lock                                      |  31 +-
 dvc.yaml                                      |  31 +-
 experiment/luna/pipeline/luna_main.py         |   3 +-
 experiment_data/pipeline/.gitignore           |   1 +
 poetry.lock                                   | 443 +++++++++++++++++-
 pyproject.toml                                |   1 +
 .../__pycache__/__init__.cpython-38.pyc       | Bin 307 -> 306 bytes
 .../core/__pycache__/__init__.cpython-38.pyc  | Bin 167 -> 163 bytes
 .../__pycache__/__init__.cpython-38.pyc       | Bin 179 -> 175 bytes
 .../embedding_transformer.cpython-38.pyc      | Bin 947 -> 946 bytes
 .../wer/__pycache__/__init__.cpython-38.pyc   | Bin 171 -> 167 bytes
 .../__pycache__/wer_calculator.cpython-38.pyc | Bin 2274 -> 2273 bytes
 .../__pycache__/__init__.cpython-38.pyc       | Bin 174 -> 170 bytes
 .../__pycache__/asr_processor.cpython-38.pyc  | Bin 2207 -> 2855 bytes
 .../experiment_manager.cpython-38.pyc         | Bin 1362 -> 1361 bytes
 .../gold_transcript_processor.cpython-38.pyc  | Bin 837 -> 836 bytes
 .../__pycache__/path_filter.cpython-38.pyc    | Bin 1966 -> 1965 bytes
 .../record_id_iterator.cpython-38.pyc         | Bin 1304 -> 1303 bytes
 .../record_path_provider.cpython-38.pyc       | Bin 587 -> 583 bytes
 sziszapangma/integration/asr_processor.py     |  23 +-
 .../__pycache__/__init__.cpython-38.pyc       | Bin 185 -> 181 bytes
 .../experiment_repository.cpython-38.pyc      | Bin 2151 -> 2150 bytes
 .../task/__pycache__/__init__.cpython-38.pyc  | Bin 179 -> 175 bytes
 .../task/__pycache__/asr_task.cpython-38.pyc  | Bin 2149 -> 2148 bytes
 .../classic_wer_metric_task.cpython-38.pyc    | Bin 3429 -> 3428 bytes
 .../processing_task.cpython-38.pyc            | Bin 2207 -> 2206 bytes
 26 files changed, 496 insertions(+), 37 deletions(-)
 create mode 100644 experiment_data/pipeline/.gitignore

diff --git a/dvc.lock b/dvc.lock
index 96eddc5..f611d3c 100644
--- a/dvc.lock
+++ b/dvc.lock
@@ -4,14 +4,35 @@ stages:
     cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
     deps:
     - path: experiment/luna/import_dataset/import_luna.py
-      md5: d938162187616f7e7390983ecb9e120b
-      size: 8269
+      md5: f40adccbf0b51094a71b876c9ccad751
+      size: 8265
     - path: experiment_data/dataset/LUNA.PL
       md5: d342155b1871e881797cf7da09d5dc3c.dir
       size: 1578358645
       nfiles: 4500
     outs:
     - path: experiment_data/dataset_relation_manager_data/luna
-      md5: c68722cc69375259a4d1a4b6a2bd4dc3.dir
-      size: 3016826
-      nfiles: 10
+      md5: 773f92667e16efd915ec6384d06aa4fb.dir
+      size: 229007155
+      nfiles: 1000
+  luna_main_pipeline:
+    cmd: "PYTHONPATH=. python -m spacy download pl_core_news_lg\nPYTHONPATH=. python\
+      \ experiment/luna/pipeline/luna_main.py\n"
+    deps:
+    - path: experiment_data/cached_asr/luna_techmo
+      md5: 033ea7b5434dded73bf869bfdd299462.dir
+      size: 4256479
+      nfiles: 500
+    - path: experiment_data/dataset/LUNA.PL
+      md5: d342155b1871e881797cf7da09d5dc3c.dir
+      size: 1578358645
+      nfiles: 4500
+    - path: experiment_data/dataset_relation_manager_data/luna
+      md5: 773f92667e16efd915ec6384d06aa4fb.dir
+      size: 229007155
+      nfiles: 1000
+    outs:
+    - path: experiment_data/pipeline/asr_benchmark_luna
+      md5: 2e334734387ab4579b7b5269d5029e81.dir
+      size: 71627685
+      nfiles: 4000
diff --git a/dvc.yaml b/dvc.yaml
index ef4354e..7fae901 100644
--- a/dvc.yaml
+++ b/dvc.yaml
@@ -1,17 +1,20 @@
 stages:
 
-  import_luna_to_common_format:
-    cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
-    deps:
-      - experiment/luna/import_dataset/import_luna.py
-      - experiment_data/dataset/LUNA.PL
-    outs:
-      - experiment_data/dataset_relation_manager_data/luna
+    import_luna_to_common_format:
+        cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
+        deps:
+            - experiment/luna/import_dataset/import_luna.py
+            - experiment_data/dataset/LUNA.PL
+        outs:
+            - experiment_data/dataset_relation_manager_data/luna
 
-#  luna_main_pipeline:
-#    cmd: PYTHONPATH=. python experiment/luna/pipeline/luna_main.py
-#    deps:
-#      - experiment_data/dataset_relation_manager_data/luna
-#      - experiment_data/dataset/LUNA.PL
-#    outs:
-#      - experiment_data/pipeline/luna
+    luna_main_pipeline:
+        cmd: |
+            PYTHONPATH=. python -m spacy download pl_core_news_lg
+            PYTHONPATH=. python experiment/luna/pipeline/luna_main.py
+        deps:
+            - experiment_data/dataset_relation_manager_data/luna
+            - experiment_data/dataset/LUNA.PL
+            - experiment_data/cached_asr/luna_techmo
+        outs:
+            - experiment_data/pipeline/asr_benchmark_luna
diff --git a/experiment/luna/pipeline/luna_main.py b/experiment/luna/pipeline/luna_main.py
index 7b6b719..cc556e7 100644
--- a/experiment/luna/pipeline/luna_main.py
+++ b/experiment/luna/pipeline/luna_main.py
@@ -47,7 +47,8 @@ def run_luna_experiment(experiment_repository: ExperimentRepository):
             AsrTask(
                 task_name='techmo_polish_task',
                 # asr_processor=AsrWebClient('http://192.168.0.124:4999/process_asr', 'test1234'),
-                asr_processor=AsrPathCacheClient(),
+                asr_processor=AsrPathCacheClient('experiment_data/cached_asr/luna_techmo', record_provider,
+                                                 record_provider),
                 asr_property_name=TECHMO_POLISH_ASR,
                 require_update=False,
                 record_path_provider=record_provider
diff --git a/experiment_data/pipeline/.gitignore b/experiment_data/pipeline/.gitignore
new file mode 100644
index 0000000..03b97ac
--- /dev/null
+++ b/experiment_data/pipeline/.gitignore
@@ -0,0 +1 @@
+/asr_benchmark_luna
diff --git a/poetry.lock b/poetry.lock
index 6eeb9b4..990acb4 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -156,6 +156,17 @@ typing-extensions = ">=3.7.4"
 colorama = ["colorama (>=0.4.3)"]
 d = ["aiohttp (>=3.3.2)", "aiohttp-cors"]
 
+[[package]]
+name = "blis"
+version = "0.7.7"
+description = "The Blis BLAS-like linear algebra library, as a self-contained C-extension."
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+numpy = ">=1.15.0"
+
 [[package]]
 name = "boto3"
 version = "1.21.21"
@@ -188,6 +199,14 @@ urllib3 = ">=1.25.4,<1.27"
 [package.extras]
 crt = ["awscrt (==0.13.5)"]
 
+[[package]]
+name = "catalogue"
+version = "2.0.7"
+description = "Super lightweight function registries for your library"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
 [[package]]
 name = "certifi"
 version = "2021.10.8"
@@ -278,6 +297,14 @@ sdist = ["setuptools_rust (>=0.11.4)"]
 ssh = ["bcrypt (>=3.1.5)"]
 test = ["pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-subtests", "pytest-xdist", "pretend", "iso8601", "pytz", "hypothesis (>=1.11.4,!=3.79.2)"]
 
+[[package]]
+name = "cymem"
+version = "2.0.6"
+description = "Manage calls to calloc/free through Cython"
+category = "main"
+optional = false
+python-versions = "*"
+
 [[package]]
 name = "dictdiffer"
 version = "0.9.0"
@@ -640,6 +667,17 @@ category = "main"
 optional = false
 python-versions = ">=3.7"
 
+[[package]]
+name = "langcodes"
+version = "3.3.0"
+description = "Tools for labeling human languages with IETF language tags"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.extras]
+data = ["language-data (>=1.1,<2.0)"]
+
 [[package]]
 name = "mailchecker"
 version = "4.1.16"
@@ -684,6 +722,14 @@ category = "main"
 optional = false
 python-versions = ">=3.7"
 
+[[package]]
+name = "murmurhash"
+version = "1.0.7"
+description = "Cython bindings for MurmurHash"
+category = "main"
+optional = false
+python-versions = "*"
+
 [[package]]
 name = "mypy"
 version = "0.812"
@@ -787,6 +833,24 @@ category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
 
+[[package]]
+name = "pathy"
+version = "0.6.1"
+description = "pathlib.Path subclasses for local and cloud bucket storage"
+category = "main"
+optional = false
+python-versions = ">= 3.6"
+
+[package.dependencies]
+smart-open = ">=5.0.0,<6.0.0"
+typer = ">=0.3.0,<1.0.0"
+
+[package.extras]
+all = ["google-cloud-storage (>=1.26.0,<2.0.0)", "boto3", "pytest", "pytest-coverage", "mock", "typer-cli"]
+gcs = ["google-cloud-storage (>=1.26.0,<2.0.0)"]
+s3 = ["boto3"]
+test = ["pytest", "pytest-coverage", "mock", "typer-cli"]
+
 [[package]]
 name = "phonenumbers"
 version = "8.12.47"
@@ -818,6 +882,18 @@ python-versions = ">=3.6,<4.0"
 pastel = ">=0.2.0,<0.3.0"
 tomlkit = ">=0.6.0,<1.0.0"
 
+[[package]]
+name = "preshed"
+version = "3.0.6"
+description = "Cython hash table that trusts the keys are pre-hashed"
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+cymem = ">=2.0.2,<2.1.0"
+murmurhash = ">=0.28.0,<1.1.0"
+
 [[package]]
 name = "psutil"
 version = "5.9.0"
@@ -845,6 +921,21 @@ category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 
+[[package]]
+name = "pydantic"
+version = "1.8.2"
+description = "Data validation and settings management using python 3.6 type hinting"
+category = "main"
+optional = false
+python-versions = ">=3.6.1"
+
+[package.dependencies]
+typing-extensions = ">=3.7.4.3"
+
+[package.extras]
+dotenv = ["python-dotenv (>=0.10.4)"]
+email = ["email-validator (>=1.0.3)"]
+
 [[package]]
 name = "pydot"
 version = "1.4.2"
@@ -1152,6 +1243,23 @@ category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
 
+[[package]]
+name = "smart-open"
+version = "5.2.1"
+description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)"
+category = "main"
+optional = false
+python-versions = ">=3.6,<4.0"
+
+[package.extras]
+all = ["boto3", "google-cloud-storage", "azure-storage-blob", "azure-common", "azure-core", "requests"]
+azure = ["azure-storage-blob", "azure-common", "azure-core"]
+gcs = ["google-cloud-storage"]
+http = ["requests"]
+s3 = ["boto3"]
+test = ["boto3", "google-cloud-storage", "azure-storage-blob", "azure-common", "azure-core", "requests", "moto[server] (==1.3.14)", "pathlib2", "responses", "paramiko", "parameterizedtestcase", "pytest", "pytest-rerunfailures"]
+webhdfs = ["requests"]
+
 [[package]]
 name = "smmap"
 version = "5.0.0"
@@ -1160,6 +1268,89 @@ category = "main"
 optional = false
 python-versions = ">=3.6"
 
+[[package]]
+name = "spacy"
+version = "3.2.4"
+description = "Industrial-strength Natural Language Processing (NLP) in Python"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+blis = ">=0.4.0,<0.8.0"
+catalogue = ">=2.0.6,<2.1.0"
+click = "<8.1.0"
+cymem = ">=2.0.2,<2.1.0"
+jinja2 = "*"
+langcodes = ">=3.2.0,<4.0.0"
+murmurhash = ">=0.28.0,<1.1.0"
+numpy = ">=1.15.0"
+packaging = ">=20.0"
+pathy = ">=0.3.5"
+preshed = ">=3.0.2,<3.1.0"
+pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<1.9.0"
+requests = ">=2.13.0,<3.0.0"
+spacy-legacy = ">=3.0.8,<3.1.0"
+spacy-loggers = ">=1.0.0,<2.0.0"
+srsly = ">=2.4.1,<3.0.0"
+thinc = ">=8.0.12,<8.1.0"
+tqdm = ">=4.38.0,<5.0.0"
+typer = ">=0.3.0,<0.5.0"
+wasabi = ">=0.8.1,<1.1.0"
+
+[package.extras]
+apple = ["thinc-apple-ops (>=0.0.4,<1.0.0)"]
+cuda = ["cupy (>=5.0.0b4,<11.0.0)"]
+cuda100 = ["cupy-cuda100 (>=5.0.0b4,<11.0.0)"]
+cuda101 = ["cupy-cuda101 (>=5.0.0b4,<11.0.0)"]
+cuda102 = ["cupy-cuda102 (>=5.0.0b4,<11.0.0)"]
+cuda110 = ["cupy-cuda110 (>=5.0.0b4,<11.0.0)"]
+cuda111 = ["cupy-cuda111 (>=5.0.0b4,<11.0.0)"]
+cuda112 = ["cupy-cuda112 (>=5.0.0b4,<11.0.0)"]
+cuda113 = ["cupy-cuda113 (>=5.0.0b4,<11.0.0)"]
+cuda114 = ["cupy-cuda114 (>=5.0.0b4,<11.0.0)"]
+cuda115 = ["cupy-cuda115 (>=5.0.0b4,<11.0.0)"]
+cuda80 = ["cupy-cuda80 (>=5.0.0b4,<11.0.0)"]
+cuda90 = ["cupy-cuda90 (>=5.0.0b4,<11.0.0)"]
+cuda91 = ["cupy-cuda91 (>=5.0.0b4,<11.0.0)"]
+cuda92 = ["cupy-cuda92 (>=5.0.0b4,<11.0.0)"]
+ja = ["sudachipy (>=0.5.2,!=0.6.1)", "sudachidict-core (>=20211220)"]
+ko = ["natto-py (==0.9.0)"]
+lookups = ["spacy-lookups-data (>=1.0.3,<1.1.0)"]
+ray = ["spacy-ray (>=0.1.0,<1.0.0)"]
+th = ["pythainlp (>=2.0)"]
+transformers = ["spacy-transformers (>=1.1.2,<1.2.0)"]
+
+[[package]]
+name = "spacy-legacy"
+version = "3.0.9"
+description = "Legacy registered functions for spaCy backwards compatibility"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[[package]]
+name = "spacy-loggers"
+version = "1.0.2"
+description = "Logging utilities for SpaCy"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+wasabi = ">=0.8.1,<1.1.0"
+
+[[package]]
+name = "srsly"
+version = "2.4.3"
+description = "Modern high-performance serialization utilities for Python"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+catalogue = ">=2.0.3,<2.1.0"
+
 [[package]]
 name = "tabulate"
 version = "0.8.9"
@@ -1179,6 +1370,45 @@ category = "main"
 optional = false
 python-versions = "*"
 
+[[package]]
+name = "thinc"
+version = "8.0.15"
+description = "A refreshing functional take on deep learning, compatible with your favorite libraries"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+blis = ">=0.4.0,<0.8.0"
+catalogue = ">=2.0.4,<2.1.0"
+cymem = ">=2.0.2,<2.1.0"
+murmurhash = ">=1.0.2,<1.1.0"
+numpy = ">=1.15.0"
+preshed = ">=3.0.2,<3.1.0"
+pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<1.9.0"
+srsly = ">=2.4.0,<3.0.0"
+wasabi = ">=0.8.1,<1.1.0"
+
+[package.extras]
+cuda = ["cupy (>=5.0.0b4)"]
+cuda100 = ["cupy-cuda100 (>=5.0.0b4)"]
+cuda101 = ["cupy-cuda101 (>=5.0.0b4)"]
+cuda102 = ["cupy-cuda102 (>=5.0.0b4)"]
+cuda110 = ["cupy-cuda110 (>=5.0.0b4)"]
+cuda111 = ["cupy-cuda111 (>=5.0.0b4)"]
+cuda112 = ["cupy-cuda112 (>=5.0.0b4)"]
+cuda113 = ["cupy-cuda113 (>=5.0.0b4)"]
+cuda114 = ["cupy-cuda114 (>=5.0.0b4)"]
+cuda115 = ["cupy-cuda115 (>=5.0.0b4)"]
+cuda80 = ["cupy-cuda80 (>=5.0.0b4)"]
+cuda90 = ["cupy-cuda90 (>=5.0.0b4)"]
+cuda91 = ["cupy-cuda91 (>=5.0.0b4)"]
+cuda92 = ["cupy-cuda92 (>=5.0.0b4)"]
+datasets = ["ml-datasets (>=0.2.0,<0.3.0)"]
+mxnet = ["mxnet (>=1.5.1,<1.6.0)"]
+tensorflow = ["tensorflow (>=2.0.0,<2.6.0)"]
+torch = ["torch (>=1.6.0)"]
+
 [[package]]
 name = "toml"
 version = "0.10.2"
@@ -1220,6 +1450,23 @@ category = "dev"
 optional = false
 python-versions = "*"
 
+[[package]]
+name = "typer"
+version = "0.4.1"
+description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+click = ">=7.1.1,<9.0.0"
+
+[package.extras]
+all = ["colorama (>=0.4.3,<0.5.0)", "shellingham (>=1.3.0,<2.0.0)"]
+dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)"]
+doc = ["mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "mdx-include (>=1.4.1,<2.0.0)"]
+test = ["shellingham (>=1.3.0,<2.0.0)", "pytest (>=4.4.0,<5.4.0)", "pytest-cov (>=2.10.0,<3.0.0)", "coverage (>=5.2,<6.0)", "pytest-xdist (>=1.32.0,<2.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "mypy (==0.910)", "black (>=22.3.0,<23.0.0)", "isort (>=5.0.6,<6.0.0)"]
+
 [[package]]
 name = "typing-extensions"
 version = "3.10.0.2"
@@ -1249,6 +1496,14 @@ category = "main"
 optional = false
 python-versions = "*"
 
+[[package]]
+name = "wasabi"
+version = "0.9.1"
+description = "A lightweight console printing and formatting toolkit"
+category = "main"
+optional = false
+python-versions = "*"
+
 [[package]]
 name = "wcwidth"
 version = "0.2.5"
@@ -1325,7 +1580,7 @@ developer = []
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.8"
-content-hash = "e7d2c642bcdf256de73261802793f95ef45b1e63ed2a6811a7e89dc62505d862"
+content-hash = "709709b64fd48bb5f03a52cfe742f27668502a4242dcb1123475e435ce13e128"
 
 [metadata.files]
 aiobotocore = [
@@ -1444,6 +1699,24 @@ attrs = [
 black = [
     {file = "black-20.8b1.tar.gz", hash = "sha256:1c02557aa099101b9d21496f8a914e9ed2222ef70336404eeeac8edba836fbea"},
 ]
+blis = [
+    {file = "blis-0.7.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f4109cce38e644e81d923836b34024905d59e88c8fb48b89b420f4d7661cd89f"},
+    {file = "blis-0.7.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b5e0acc760daf5c3b45bce44653943e3a04d81c21c5b92213ed51664525dc24e"},
+    {file = "blis-0.7.7-cp310-cp310-win_amd64.whl", hash = "sha256:bead485e5d79d3eb62a8df55618743878fb3cba606aaf926153db5803270b185"},
+    {file = "blis-0.7.7-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:1667db8439d9ca41c0c1f0ea954d87462be01b125436c4b264f73603c9fb4e82"},
+    {file = "blis-0.7.7-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e024f103522e72a27019cfcfe14569522a394f5d651565560a18040fdd69a6c"},
+    {file = "blis-0.7.7-cp36-cp36m-win_amd64.whl", hash = "sha256:64bef63b1abd5b41819ea53897bdbc03c631a59c1757a9393e6ae0828692f31c"},
+    {file = "blis-0.7.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cfb7d730fef706f3ea4389196ce5f610f24cc83f828c498a275c12f05f0cf5c4"},
+    {file = "blis-0.7.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:148f59a0a47a38ce82e3afc50c709494d5e5a494bef28ce1519c7a17346c645b"},
+    {file = "blis-0.7.7-cp37-cp37m-win_amd64.whl", hash = "sha256:a0183760604b14e8eb671a431d06606594def03c36aaaa2a2e7b7f88382dac76"},
+    {file = "blis-0.7.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:680480dfa16b354f2e4d584edb8d36f0505ed8df12939beee2d161aea7bb3609"},
+    {file = "blis-0.7.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a48eeaa506f176bcac306378f5e8063697c93e26d2418fcbe053e8912019090"},
+    {file = "blis-0.7.7-cp38-cp38-win_amd64.whl", hash = "sha256:7865e39cac4e10506afc49213938fb7e13bf73ca980c9c20ffad2de4ef858f43"},
+    {file = "blis-0.7.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:76d13dbcd648ca33dfc83569bb219d0696e4f6e5ad00b9f538332a3bdb28ff30"},
+    {file = "blis-0.7.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e22145110864bcffb1d52cb57050b67b8a8ecd43c7c0a1ac0bcdb2c85c8bf416"},
+    {file = "blis-0.7.7-cp39-cp39-win_amd64.whl", hash = "sha256:d6055ced65d6581ab4f1da0d3f6ec14c60512474c5c9b3210c9f30dd7dd1447d"},
+    {file = "blis-0.7.7.tar.gz", hash = "sha256:5d4a81f9438db7a19ac8e64ad41331f65a659ea8f3bb1889a9c2088cfd9fe104"},
+]
 boto3 = [
     {file = "boto3-1.21.21-py3-none-any.whl", hash = "sha256:8fa32fcc8be38327bd667237223d71e5e4b2475f39d6882aca4dbad19fff8c29"},
     {file = "boto3-1.21.21.tar.gz", hash = "sha256:6fa0622f308cfd1da758966fc98b52fbd74b80606d14586c8ad82c7a6c4f32d0"},
@@ -1452,6 +1725,10 @@ botocore = [
     {file = "botocore-1.24.21-py3-none-any.whl", hash = "sha256:92daca8775e738a9db9b465d533019285f09d541e903233261299fd87c2f842c"},
     {file = "botocore-1.24.21.tar.gz", hash = "sha256:7e976cfd0a61601e74624ef8f5246b40a01f2cce73a011ef29cf80a6e371d0fa"},
 ]
+catalogue = [
+    {file = "catalogue-2.0.7-py3-none-any.whl", hash = "sha256:cab4feda641fe05da1e6a1a9d123b0869d5ca324dcd93d4a5c384408ab62e7fb"},
+    {file = "catalogue-2.0.7.tar.gz", hash = "sha256:535d33ae79ebd21ca298551d85da186ae8b8e1df36b0fb0246da774163ec2d6b"},
+]
 certifi = [
     {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"},
     {file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"},
@@ -1551,6 +1828,24 @@ cryptography = [
     {file = "cryptography-37.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cab59c774125596fa72f1decc5805894313b40f370a7c75597e37f0211027944"},
     {file = "cryptography-37.0.0.tar.gz", hash = "sha256:5a761fc1ff0eae360a80656bea462c3163dfaa8093b2fa0f72af929217b14a97"},
 ]
+cymem = [
+    {file = "cymem-2.0.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:700540b68e96a7056d0691d467df2bbaaf0934a3e6fe2383669998cbee19580a"},
+    {file = "cymem-2.0.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:971cf0a8437dfb4185c3049c086e463612fe849efadc0f5cc153fc81c501da7d"},
+    {file = "cymem-2.0.6-cp310-cp310-win_amd64.whl", hash = "sha256:6b0d1a6b0a1296f31fa9e4b7ae5ea49394084ecc883b1ae6fec4844403c43468"},
+    {file = "cymem-2.0.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b8e1c18bb00800425576710468299153caad20c64ddb6819d40a6a34e21ee21c"},
+    {file = "cymem-2.0.6-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:492084aef23ac2ff3da3729e9d36340bc91a96c2dc8c3a82a1926e384ab52412"},
+    {file = "cymem-2.0.6-cp36-cp36m-win_amd64.whl", hash = "sha256:af3c01e6b20f9e6c07c7d7cdb7f710e49889d3906c9a3e039546ee6636a34b9a"},
+    {file = "cymem-2.0.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d7a59cef8f2fa25d12e2c30138f8623acbd43ad2715e730a709e49c5eef8e1b0"},
+    {file = "cymem-2.0.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd52d8a81881804625df88453611175ab7e0099b34f52204da1f6940cf2e83c9"},
+    {file = "cymem-2.0.6-cp37-cp37m-win_amd64.whl", hash = "sha256:4749f220e4c06ec44eb10de13794ff0508cdc4f8eff656cf49cab2cdb3122c0c"},
+    {file = "cymem-2.0.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2aa3fa467d906cd2c27fa0a2e2952dd7925f5fcc7973fab6d815ef6acb25aad8"},
+    {file = "cymem-2.0.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea535f74ab6024e7416f93de564e5c81fb7c0964b96280de66f60aeb05f0cf53"},
+    {file = "cymem-2.0.6-cp38-cp38-win_amd64.whl", hash = "sha256:4f87fe087f2ae36c3e20e2b1a29d7f76a28c035372d0a97655f26223d975235a"},
+    {file = "cymem-2.0.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a93fba62fe79dbf6fc4d5b6d804a6e114b44af3ff3d40a28833ee39f21bd336b"},
+    {file = "cymem-2.0.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04676d696596b0db3f3c5a3936bab12fb6f24278921a6622bb185e61765b2b4d"},
+    {file = "cymem-2.0.6-cp39-cp39-win_amd64.whl", hash = "sha256:c59293b232b53ebb47427f16cf648e937022f489cff36c11d1d8a1f0075b6609"},
+    {file = "cymem-2.0.6.tar.gz", hash = "sha256:169725b5816959d34de2545b33fee6a8021a6e08818794a426c5a4f981f17e5e"},
+]
 dictdiffer = [
     {file = "dictdiffer-0.9.0-py2.py3-none-any.whl", hash = "sha256:442bfc693cfcadaf46674575d2eba1c53b42f5e404218ca2c2ff549f2df56595"},
     {file = "dictdiffer-0.9.0.tar.gz", hash = "sha256:17bacf5fbfe613ccf1b6d512bd766e6b21fb798822a133aa86098b8ac9997578"},
@@ -1729,6 +2024,10 @@ jmespath = [
     {file = "jmespath-1.0.0-py3-none-any.whl", hash = "sha256:e8dcd576ed616f14ec02eed0005c85973b5890083313860136657e24784e4c04"},
     {file = "jmespath-1.0.0.tar.gz", hash = "sha256:a490e280edd1f57d6de88636992d05b71e97d69a26a19f058ecf7d304474bf5e"},
 ]
+langcodes = [
+    {file = "langcodes-3.3.0-py3-none-any.whl", hash = "sha256:4d89fc9acb6e9c8fdef70bcdf376113a3db09b67285d9e1d534de6d8818e7e69"},
+    {file = "langcodes-3.3.0.tar.gz", hash = "sha256:794d07d5a28781231ac335a1561b8442f8648ca07cd518310aeb45d6f0807ef6"},
+]
 mailchecker = [
     {file = "mailchecker-4.1.16.tar.gz", hash = "sha256:03e961f8182031227f3c871831f5f7bbf625295aa11b92316eb1cf6f553a2e5e"},
 ]
@@ -1872,6 +2171,24 @@ multidict = [
     {file = "multidict-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:4bae31803d708f6f15fd98be6a6ac0b6958fcf68fda3c77a048a4f9073704aae"},
     {file = "multidict-6.0.2.tar.gz", hash = "sha256:5ff3bd75f38e4c43f1f470f2df7a4d430b821c4ce22be384e1459cb57d6bb013"},
 ]
+murmurhash = [
+    {file = "murmurhash-1.0.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:966d2efec6e01aa32c5774c44906724efca00da3507f06faa11acafb47ea1230"},
+    {file = "murmurhash-1.0.7-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13723aeb2b0f5ebc96bbcf133410481b28bfc7341ce65ae86fc32f02b54a68c1"},
+    {file = "murmurhash-1.0.7-cp310-cp310-win_amd64.whl", hash = "sha256:f53f16ef143f93127e9aa920a30cda11a799e172b28508c32fb538a82b487a0c"},
+    {file = "murmurhash-1.0.7-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f7da66418c84982eca3494528b54ded4185d10a6b3231d53e1a2c83751e701e6"},
+    {file = "murmurhash-1.0.7-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e0837d2e02186eeac5aacb1e7ce7a8cada0da6fa7b366100e70c7d6c119206f"},
+    {file = "murmurhash-1.0.7-cp36-cp36m-win_amd64.whl", hash = "sha256:78adacef6767250cb7422e36d4e3f0d7359176f17f79fef9d1529656f8e73167"},
+    {file = "murmurhash-1.0.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3d2cc5e8ff2ee28b124bf32a944e31e5e164000233b772d72281f6b62568dc86"},
+    {file = "murmurhash-1.0.7-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b317021f38505d48a9ab89ce32e3a06d7f3f32b06b16ceda8bb93c82eb6aea8"},
+    {file = "murmurhash-1.0.7-cp37-cp37m-win_amd64.whl", hash = "sha256:2f0ef0c80b590b4ad5cd474771f0bed81ecdb1942c549319d6895fa985d96dc3"},
+    {file = "murmurhash-1.0.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:400c6a0a96f4fee3a3a384091044acb54f509af8b582d339de329d82ac4585f6"},
+    {file = "murmurhash-1.0.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2165e2d0e8fa806e5aacc7fd9e7e79c633618b23c11aa724192fad5dda6599ef"},
+    {file = "murmurhash-1.0.7-cp38-cp38-win_amd64.whl", hash = "sha256:65d9c6c39bb4c71689963109a1a3519acfa074280a94501c64f5e8d2a0cc257a"},
+    {file = "murmurhash-1.0.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b151ab593132cae6529575515ed664b618095590c08b41beda9f47689408623"},
+    {file = "murmurhash-1.0.7-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dee39a6f4067cdfefb2156374de230f49405850bc3280eb787e8f6c8daefeb8d"},
+    {file = "murmurhash-1.0.7-cp39-cp39-win_amd64.whl", hash = "sha256:fe79b40470885c5accfa7e378a0405407ebf0d8b0cd06a726748dcfd2d8dfa50"},
+    {file = "murmurhash-1.0.7.tar.gz", hash = "sha256:630a396ebd31ca44d89b4eca36fa74ea8aae724adf0afaa2de7680c350b2936f"},
+]
 mypy = [
     {file = "mypy-0.812-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:a26f8ec704e5a7423c8824d425086705e381b4f1dfdef6e3a1edab7ba174ec49"},
     {file = "mypy-0.812-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:28fb5479c494b1bab244620685e2eb3c3f988d71fd5d64cc753195e8ed53df7c"},
@@ -1976,6 +2293,10 @@ pathspec = [
     {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"},
     {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"},
 ]
+pathy = [
+    {file = "pathy-0.6.1-py3-none-any.whl", hash = "sha256:25fd04cec6393661113086730ce69c789d121bea83ab1aa18452e8fd42faf29a"},
+    {file = "pathy-0.6.1.tar.gz", hash = "sha256:838624441f799a06b446a657e4ecc9ebc3fdd05234397e044a7c87e8f6e76b1c"},
+]
 phonenumbers = [
     {file = "phonenumbers-8.12.47-py2.py3-none-any.whl", hash = "sha256:065fc5930ceff3147f50beb4c6d253c25ab0a467ac461174c62696c119593f7e"},
     {file = "phonenumbers-8.12.47.tar.gz", hash = "sha256:56fd605d2f5460e1df2117085b653bb38322295ec658e6acaafc9c976867d522"},
@@ -1988,6 +2309,24 @@ poethepoet = [
     {file = "poethepoet-0.10.0-py3-none-any.whl", hash = "sha256:6fb3021603d4421c6fcc40072bbcf150a6c52ef70ff4d3be089b8b04e015ef5a"},
     {file = "poethepoet-0.10.0.tar.gz", hash = "sha256:70b97cb194b978dc464c70793e85e6f746cddf82b84a38bfb135946ad71ae19c"},
 ]
+preshed = [
+    {file = "preshed-3.0.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:66a71ced487516cf81fd0431a3a843514262ae2f33e9a7688b87562258fa75d5"},
+    {file = "preshed-3.0.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c98f725d8478f3ade4ab1ea00f50a92d2d9406d37276bc46fd8bab1d47452c4"},
+    {file = "preshed-3.0.6-cp310-cp310-win_amd64.whl", hash = "sha256:ea8aa9610837e907e8442e79300df0a861bfdb4dcaf026a5d9642a688ad04815"},
+    {file = "preshed-3.0.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e03ae3eee961106a517fcd827b5a7c51f7317236b3e665c989054ab8dc381d28"},
+    {file = "preshed-3.0.6-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58661bea8d0d63a648588511407285e43d43627e27f836e30819801fb3c75d70"},
+    {file = "preshed-3.0.6-cp36-cp36m-win_amd64.whl", hash = "sha256:5f99837e7353ce1fa81f0074d4b15f36e0af5af60a2a54d4d11e13cb09768a9e"},
+    {file = "preshed-3.0.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8c60a400babfc5b25ba371fda7041be227f7c625e1fb7a43329c2c08fe00a53b"},
+    {file = "preshed-3.0.6-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61b2ea656cb1c38d544cc774f1c2ad1cdab23167b46b35310a7e211d4ba9c6d0"},
+    {file = "preshed-3.0.6-cp37-cp37m-win_amd64.whl", hash = "sha256:87e1add41b7f6236a3ccc34788f47ab8682bc28e8a2d369089062e274494c1a0"},
+    {file = "preshed-3.0.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a279c138ad1d5be02547b1545254929588414b01571fe637016367f6a1aa11de"},
+    {file = "preshed-3.0.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3af09f4cfcdaca085fd87dac8107617c4e2bb0ad1458f953841b71e9728287f5"},
+    {file = "preshed-3.0.6-cp38-cp38-win_amd64.whl", hash = "sha256:f92e752a868ea2690e1b38c4b775251a145e0fce36b9bdd972539e8271b7a23a"},
+    {file = "preshed-3.0.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eaffbc71fdb8625f9aac4fe7e19e20bf318d1421ea05903bebe3e6ffef27b587"},
+    {file = "preshed-3.0.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfe1495fcfc7f479de840ddc4f426dbb55351e218ae5c8712c1269183a4d0060"},
+    {file = "preshed-3.0.6-cp39-cp39-win_amd64.whl", hash = "sha256:92a8f49d17a63537a8beed48a049b62ef168ca07e0042a5b2bcdf178a1fb5d48"},
+    {file = "preshed-3.0.6.tar.gz", hash = "sha256:fb3b7588a3a0f2f2f1bf3fe403361b2b031212b73a37025aea1df7215af3772a"},
+]
 psutil = [
     {file = "psutil-5.9.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:55ce319452e3d139e25d6c3f85a1acf12d1607ddedea5e35fb47a552c051161b"},
     {file = "psutil-5.9.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:7336292a13a80eb93c21f36bde4328aa748a04b68c13d01dfddd67fc13fd0618"},
@@ -2030,6 +2369,30 @@ pycparser = [
     {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
     {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
 ]
+pydantic = [
+    {file = "pydantic-1.8.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:05ddfd37c1720c392f4e0d43c484217b7521558302e7069ce8d318438d297739"},
+    {file = "pydantic-1.8.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:a7c6002203fe2c5a1b5cbb141bb85060cbff88c2d78eccbc72d97eb7022c43e4"},
+    {file = "pydantic-1.8.2-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:589eb6cd6361e8ac341db97602eb7f354551482368a37f4fd086c0733548308e"},
+    {file = "pydantic-1.8.2-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:10e5622224245941efc193ad1d159887872776df7a8fd592ed746aa25d071840"},
+    {file = "pydantic-1.8.2-cp36-cp36m-win_amd64.whl", hash = "sha256:99a9fc39470010c45c161a1dc584997f1feb13f689ecf645f59bb4ba623e586b"},
+    {file = "pydantic-1.8.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a83db7205f60c6a86f2c44a61791d993dff4b73135df1973ecd9eed5ea0bda20"},
+    {file = "pydantic-1.8.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:41b542c0b3c42dc17da70554bc6f38cbc30d7066d2c2815a94499b5684582ecb"},
+    {file = "pydantic-1.8.2-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:ea5cb40a3b23b3265f6325727ddfc45141b08ed665458be8c6285e7b85bd73a1"},
+    {file = "pydantic-1.8.2-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:18b5ea242dd3e62dbf89b2b0ec9ba6c7b5abaf6af85b95a97b00279f65845a23"},
+    {file = "pydantic-1.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:234a6c19f1c14e25e362cb05c68afb7f183eb931dd3cd4605eafff055ebbf287"},
+    {file = "pydantic-1.8.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:021ea0e4133e8c824775a0cfe098677acf6fa5a3cbf9206a376eed3fc09302cd"},
+    {file = "pydantic-1.8.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:e710876437bc07bd414ff453ac8ec63d219e7690128d925c6e82889d674bb505"},
+    {file = "pydantic-1.8.2-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:ac8eed4ca3bd3aadc58a13c2aa93cd8a884bcf21cb019f8cfecaae3b6ce3746e"},
+    {file = "pydantic-1.8.2-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:4a03cbbe743e9c7247ceae6f0d8898f7a64bb65800a45cbdc52d65e370570820"},
+    {file = "pydantic-1.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:8621559dcf5afacf0069ed194278f35c255dc1a1385c28b32dd6c110fd6531b3"},
+    {file = "pydantic-1.8.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8b223557f9510cf0bfd8b01316bf6dd281cf41826607eada99662f5e4963f316"},
+    {file = "pydantic-1.8.2-cp39-cp39-manylinux1_i686.whl", hash = "sha256:244ad78eeb388a43b0c927e74d3af78008e944074b7d0f4f696ddd5b2af43c62"},
+    {file = "pydantic-1.8.2-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:05ef5246a7ffd2ce12a619cbb29f3307b7c4509307b1b49f456657b43529dc6f"},
+    {file = "pydantic-1.8.2-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:54cd5121383f4a461ff7644c7ca20c0419d58052db70d8791eacbbe31528916b"},
+    {file = "pydantic-1.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:4be75bebf676a5f0f87937c6ddb061fa39cbea067240d98e298508c1bda6f3f3"},
+    {file = "pydantic-1.8.2-py3-none-any.whl", hash = "sha256:fec866a0b59f372b7e776f2d7308511784dace622e0992a0b59ea3ccee0ae833"},
+    {file = "pydantic-1.8.2.tar.gz", hash = "sha256:26464e57ccaafe72b7ad156fdaa4e9b9ef051f69e175dbbb463283000c05ab7b"},
+]
 pydot = [
     {file = "pydot-1.4.2-py2.py3-none-any.whl", hash = "sha256:66c98190c65b8d2e2382a441b4c0edfdb4f4c025ef9cb9874de478fb0793a451"},
     {file = "pydot-1.4.2.tar.gz", hash = "sha256:248081a39bcb56784deb018977e428605c1c758f10897a339fce1dd728ff007d"},
@@ -2338,10 +2701,6 @@ rich = [
     {file = "ruamel.yaml-0.17.21.tar.gz", hash = "sha256:8b7ce697a2f212752a35c1ac414471dc16c424c9573be4926b56ff3f5d23b7af"},
 ]
 "ruamel.yaml.clib" = [
-    {file = "ruamel.yaml.clib-0.2.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6e7be2c5bcb297f5b82fee9c665eb2eb7001d1050deaba8471842979293a80b0"},
-    {file = "ruamel.yaml.clib-0.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:221eca6f35076c6ae472a531afa1c223b9c29377e62936f61bc8e6e8bdc5f9e7"},
-    {file = "ruamel.yaml.clib-0.2.6-cp310-cp310-win32.whl", hash = "sha256:1070ba9dd7f9370d0513d649420c3b362ac2d687fe78c6e888f5b12bf8bc7bee"},
-    {file = "ruamel.yaml.clib-0.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:77df077d32921ad46f34816a9a16e6356d8100374579bc35e15bab5d4e9377de"},
     {file = "ruamel.yaml.clib-0.2.6-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:cfdb9389d888c5b74af297e51ce357b800dd844898af9d4a547ffc143fa56751"},
     {file = "ruamel.yaml.clib-0.2.6-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:7b2927e92feb51d830f531de4ccb11b320255ee95e791022555971c466af4527"},
     {file = "ruamel.yaml.clib-0.2.6-cp35-cp35m-win32.whl", hash = "sha256:ada3f400d9923a190ea8b59c8f60680c4ef8a4b0dfae134d2f2ff68429adfab5"},
@@ -2388,10 +2747,58 @@ six = [
     {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
     {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
 ]
+smart-open = [
+    {file = "smart_open-5.2.1-py3-none-any.whl", hash = "sha256:71d14489da58b60ce12fc3ecb823facc59a8b23cd1b58edb97175640350d3a62"},
+    {file = "smart_open-5.2.1.tar.gz", hash = "sha256:75abf758717a92a8f53aa96953f0c245c8cedf8e1e4184903db3659b419d4c17"},
+]
 smmap = [
     {file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"},
     {file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"},
 ]
+spacy = [
+    {file = "spacy-3.2.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7e20c63ba47eaa33ebd4b2cc6eefa3e8906505273799138ad8ab231b146d8875"},
+    {file = "spacy-3.2.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9184973c9052e1bb9eeb975801e6906aacbe0c009533ec0c34f443832473fd"},
+    {file = "spacy-3.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:0168d97e7fbbddd3258016e4d3c10d1593b7129dddff146c14f3b103ade6b1cd"},
+    {file = "spacy-3.2.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cff47cdaa824802cd38ae94fe98af9cde6810d86334cd283659c868e0011831a"},
+    {file = "spacy-3.2.4-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:400af3490c36c1b6c895de526ec06f6c7655af5ca595743c07e09e9bc8f378ea"},
+    {file = "spacy-3.2.4-cp36-cp36m-win_amd64.whl", hash = "sha256:87bd072ccacedbf8bc5a692fea1d5c320abd26821c63af157a7c95baa47dc36d"},
+    {file = "spacy-3.2.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:36e9ef5a32834383d37bbd27fca49388e31e9b53f77c91ba8ccbf19af10e3aef"},
+    {file = "spacy-3.2.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e9a98999b0fce03d4f483112837ac7111378449ace069c7cd050908f0fa5d9f"},
+    {file = "spacy-3.2.4-cp37-cp37m-win_amd64.whl", hash = "sha256:89be328ff378e4cdcfb4dcf38ca2fad740f87213825ed10e8ce9f54b822277b8"},
+    {file = "spacy-3.2.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ed29278fc89f07c1999ceca5f6702b379589c8e884a57816bdaeb05a1a7b2bbb"},
+    {file = "spacy-3.2.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:090e684eec551b5b7d56d9242cea18742515a706191ad158e32e16e8f2fe15ac"},
+    {file = "spacy-3.2.4-cp38-cp38-win_amd64.whl", hash = "sha256:2053cb78bcf4eec38aa266890a5700167a284d1a26197f851710d29f3d7071b3"},
+    {file = "spacy-3.2.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6db861f69f18ba5e00d0bd44744cf1662e00cc3b564d17a1ccdc4625ec3d5c3d"},
+    {file = "spacy-3.2.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac2288e87de1066ad65676e930f53978d6ee97c34044dca4d24f64a24e2a88b6"},
+    {file = "spacy-3.2.4-cp39-cp39-win_amd64.whl", hash = "sha256:e759e27da39e469b6367b82281a10eb4e50de04260ba49d42091cbdfe2d99633"},
+    {file = "spacy-3.2.4.tar.gz", hash = "sha256:3e4c6f298d54044582daca1142b082ee38831bb3d7bb931d2ee601e8b8dce64f"},
+]
+spacy-legacy = [
+    {file = "spacy-legacy-3.0.9.tar.gz", hash = "sha256:4f7dcbc4e6c8e8cb4eadbb009f9c0a1a2a67442e0032c8d6776c9470c3759903"},
+    {file = "spacy_legacy-3.0.9-py2.py3-none-any.whl", hash = "sha256:dfd58b0cc65b3596cb06f7b95e7bf4fff34668297c59eb179eb050db07b199df"},
+]
+spacy-loggers = [
+    {file = "spacy-loggers-1.0.2.tar.gz", hash = "sha256:e75d44f4cf99e6763d7132ca7c8c420e0a92790222a08bc8eb9e24ea2c13536e"},
+    {file = "spacy_loggers-1.0.2-py3-none-any.whl", hash = "sha256:d48c9313a577ad1818da961cf6db71a73fd1e556ae47e6e68d7e28b541d11e18"},
+]
+srsly = [
+    {file = "srsly-2.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d0236feafe3805b384532221596e6749a54d0ff10ba022b333dc1de7aa1b2f7"},
+    {file = "srsly-2.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62630dbf20e240610fa64b6717545fcc28d9f18a6085ee93656be000678592a6"},
+    {file = "srsly-2.4.3-cp310-cp310-win_amd64.whl", hash = "sha256:27b3f693296d8a24c306aacd5df38a565ec43214f2aeb51a38170af5dc8b48bc"},
+    {file = "srsly-2.4.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d3b93531f086c516a26f729beac9b052c2ad0528d72e80f9d193de26aa2202be"},
+    {file = "srsly-2.4.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5ddcc5f36eb318d011c6f142e826c1ca15cb34bd5beab2f21fee62d4ae4d590"},
+    {file = "srsly-2.4.3-cp36-cp36m-win_amd64.whl", hash = "sha256:82cbf1ec388ed0c16f8062fee30dc54ba8513bd51aae0602570143c6d9218e4c"},
+    {file = "srsly-2.4.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:acbb14546da9bdf287dfefa0883e793ac563c7868eca32cd65504463980022fa"},
+    {file = "srsly-2.4.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab31586fd89e5e5fe6f38664209577b03e85fb834f238c928c15ed3c80ab9c73"},
+    {file = "srsly-2.4.3-cp37-cp37m-win_amd64.whl", hash = "sha256:a906c9b1f62c109ddcfaeaf242b19b2ebc5d2f865eb38ef4af35959027c5185b"},
+    {file = "srsly-2.4.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0d2b92c40f9aa9ba7cb0d8048bd7bfaa13d79d02e9ad6808ca7a8879ba5ed50b"},
+    {file = "srsly-2.4.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97a67c8f86ce3207e5e810b998a94ea49d439139adc21d9aadbd0bfab9faa64b"},
+    {file = "srsly-2.4.3-cp38-cp38-win_amd64.whl", hash = "sha256:cffec31143c6e1c783ead11245c08938cae859115d4cb0f4cf423e2895707b74"},
+    {file = "srsly-2.4.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:61e31a72370238387a8ff2a4cebea402227215a1450648b852cad9e511a8b59e"},
+    {file = "srsly-2.4.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1d13dc2133d5a83d30774793adb2c3fd9be905da339e2d54e2c79d55248c1a5"},
+    {file = "srsly-2.4.3-cp39-cp39-win_amd64.whl", hash = "sha256:fb08416fd6ef04c51fdeefd6d28592b64563b2853243c571a9b0d67403b5be7f"},
+    {file = "srsly-2.4.3.tar.gz", hash = "sha256:dbe91f6dd4aea9e819493628356dc715bd9c606486297bb7ca5748e6e003841c"},
+]
 tabulate = [
     {file = "tabulate-0.8.9-py3-none-any.whl", hash = "sha256:d7c013fe7abbc5e491394e10fa845f8f32fe54f8dc60c6622c6cf482d25d47e4"},
     {file = "tabulate-0.8.9.tar.gz", hash = "sha256:eb1d13f25760052e8931f2ef80aaf6045a6cceb47514db8beab24cded16f13a7"},
@@ -2400,6 +2807,24 @@ text-unidecode = [
     {file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"},
     {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"},
 ]
+thinc = [
+    {file = "thinc-8.0.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0368c0b279492c0ed0b5b1bc79614e8a335ae1ccc3b1617de46f04eb74dc9a43"},
+    {file = "thinc-8.0.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4276b64a8cd91197f30382c0874f59fa6c94ef533150d845b2f30998aae87cc"},
+    {file = "thinc-8.0.15-cp310-cp310-win_amd64.whl", hash = "sha256:72cec290eb1b54ba6144b05d96f3247ea34eb41c66842961b05b408b93f2ba9b"},
+    {file = "thinc-8.0.15-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a4ee24a6505d63b6f0161f25d0f73f87ab569e0e1a9799a6baca97352788a91f"},
+    {file = "thinc-8.0.15-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:376b196da6c69c8efaaf26fb99f6997543d80ea4bc5f4ab8600e9d1d521a7dc9"},
+    {file = "thinc-8.0.15-cp36-cp36m-win_amd64.whl", hash = "sha256:bed92be72516b1511fecaf616ea31ff1c2e972a7ec4ad991c212f9b2f5c94183"},
+    {file = "thinc-8.0.15-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:42641f021f4fdc47eaec4b9ff66246b153b9783ef24e2c266bf0f51eccd40db5"},
+    {file = "thinc-8.0.15-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0557791e73865fa81f09623dd1f9b98b6d4ab80c63fca5f141530536516aac98"},
+    {file = "thinc-8.0.15-cp37-cp37m-win_amd64.whl", hash = "sha256:f9ba4e4dac98e166950e004c87a0f57b8f8796ecd0e3b6973beb6febc20257ff"},
+    {file = "thinc-8.0.15-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:489521ca3cca469d67432fc30f14c7c13c17320b179bf8e362319313feaafbb7"},
+    {file = "thinc-8.0.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ddda1aa1432eef8bab5c83e4cf2020f1ed891771a6dd86729f1aa6078f25f2c"},
+    {file = "thinc-8.0.15-cp38-cp38-win_amd64.whl", hash = "sha256:70781a0802fbb62a27217ccb80e744e80a5b43f9107ac596c5cd2dc9878ae258"},
+    {file = "thinc-8.0.15-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1f19dd9a7121d332d16446db39b4999abb4f040ce7c71bc86ea05664c86d361"},
+    {file = "thinc-8.0.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecd8eab82598b079e901f16567818dc955481326c01d84b819c3c05801b97e07"},
+    {file = "thinc-8.0.15-cp39-cp39-win_amd64.whl", hash = "sha256:5d98e6b3bf220c1068442d09d7c34dd8e52bbdfa43ea32f773747c5909a1c011"},
+    {file = "thinc-8.0.15.tar.gz", hash = "sha256:2e315020da85c3791e191fbf37c4a2433f57cf322e27380da0cd4de99d96053b"},
+]
 toml = [
     {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
     {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
@@ -2444,6 +2869,10 @@ typed-ast = [
     {file = "typed_ast-1.4.3-cp39-cp39-win_amd64.whl", hash = "sha256:9c6d1a54552b5330bc657b7ef0eae25d00ba7ffe85d9ea8ae6540d2197a3788c"},
     {file = "typed_ast-1.4.3.tar.gz", hash = "sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65"},
 ]
+typer = [
+    {file = "typer-0.4.1-py3-none-any.whl", hash = "sha256:e8467f0ebac0c81366c2168d6ad9f888efdfb6d4e1d3d5b4a004f46fa444b5c3"},
+    {file = "typer-0.4.1.tar.gz", hash = "sha256:5646aef0d936b2c761a10393f0384ee6b5c7fe0bb3e5cd710b17134ca1d99cff"},
+]
 typing-extensions = [
     {file = "typing_extensions-3.10.0.2-py2-none-any.whl", hash = "sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7"},
     {file = "typing_extensions-3.10.0.2-py3-none-any.whl", hash = "sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34"},
@@ -2457,6 +2886,10 @@ voluptuous = [
     {file = "voluptuous-0.13.1-py3-none-any.whl", hash = "sha256:4b838b185f5951f2d6e8752b68fcf18bd7a9c26ded8f143f92d6d28f3921a3e6"},
     {file = "voluptuous-0.13.1.tar.gz", hash = "sha256:e8d31c20601d6773cb14d4c0f42aee29c6821bbd1018039aac7ac5605b489723"},
 ]
+wasabi = [
+    {file = "wasabi-0.9.1-py3-none-any.whl", hash = "sha256:217edcb2850993c7931399e7419afccde13539d589e333bc85f9053cf0bb1772"},
+    {file = "wasabi-0.9.1.tar.gz", hash = "sha256:ada6f13e9b70ef26bf95fad0febdfdebe2005e29a08ad58f4bbae383a97298cf"},
+]
 wcwidth = [
     {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"},
     {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"},
diff --git a/pyproject.toml b/pyproject.toml
index f8cb2c1..be0a2a1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,6 +22,7 @@ Flask = "^2.0.1"
 Flask-HTTPAuth = "^4.4.0"
 minio = "^7.1.6"
 dvc = {extras = ["s3"], version = "^2.10.1"}
+spacy = "^3.2.4"
 
 [tool.poetry.dev-dependencies]
 pytest = "^5.2"
diff --git a/sziszapangma/__pycache__/__init__.cpython-38.pyc b/sziszapangma/__pycache__/__init__.cpython-38.pyc
index c5430061dec094f0029e62e5e40384fecf89ef8c..b243eacb517bf8af84e56012995bd94fed26eda4 100644
GIT binary patch
delta 52
zcmdnYw26r)l$V!_0SIiPGLt6qoRo3a&&bbB)z3{VO3utHPb?|QPfFAe2oH*P3=W$3
G$q@j3-w?O}

delta 53
zcmdnQw3&$~l$V!_0SKl~x}7kQ=cKHQerR!OQL%n*Vo`EtUU_0kQGQaQzDsIxc1eE0
H#E*^ux<eAc

diff --git a/sziszapangma/core/__pycache__/__init__.cpython-38.pyc b/sziszapangma/core/__pycache__/__init__.cpython-38.pyc
index c7505c2810e029559c1a80b76e0923bf278e236a..9ebedd210cd2a60a81f43e87d4db0e528114f9ee 100644
GIT binary patch
delta 67
zcmZ3^xR{YAl$V!_0SIiPGLt6qSZjOeXXNLm>gOgFC1>W9CzcfDCnf3!ga^et1_$XU
U78mIzrRF7P0Hw2wCzcrk0DEf{RR910

delta 71
zcmZ3?xSWwEl$V!_0SHbuHzrKvvDWv|4=qkDD%Q_UEK1JID^Dyb%1=tvcS$YIF3B&@
Z4-X9v(J!vbEUro{NX$#mO`KR{2mlaF7%~6=

diff --git a/sziszapangma/core/transformer/__pycache__/__init__.cpython-38.pyc b/sziszapangma/core/transformer/__pycache__/__init__.cpython-38.pyc
index 9ae71e4517916535584f24505c9625b24116a404..d0f705a00dfe1cccc11efba35e1a7a5499115881 100644
GIT binary patch
delta 67
zcmdnYxSo+Gl$V!_0SIiPGLt6qSZjyqXXNLm>gOgFC1>W9CzcfDCnf3!ga^et1_$XU
U78mIzrRF7P0Hw2wCzja)0E5pJg#Z8m

delta 71
zcmZ3_xS5eBl$V!_0SKy}?o61-W33;dA6lGRRIHzySd^TZSDsi>l%JHS?~+=aU6Nm*
ZA08SUqF-E<SzMJ^keHXAn>ew^762)x7_I;S

diff --git a/sziszapangma/core/transformer/__pycache__/embedding_transformer.cpython-38.pyc b/sziszapangma/core/transformer/__pycache__/embedding_transformer.cpython-38.pyc
index b1ed18db8c746888e658d359289cdbeeabaf6c9c..82cb3dee100c4689d154b8470bbb0e2bac422976 100644
GIT binary patch
delta 55
zcmdnYzKNYDl$V!_0SIiPGLts)<TA=6>SyHVrt0S=7A0rql_!=I<tHWT2ZRU3I|c`B
Jp2jH41OT9)5J~_5

delta 56
zcmdnQzL}jTl$V!_0SNXe3MOvk$z_yH(hn_8Eh^T}O)N^z%qveUDaubu)OSfO&MwI>
K*gTa{mI(m0eGxqX

diff --git a/sziszapangma/core/wer/__pycache__/__init__.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/__init__.cpython-38.pyc
index 8b598182573576ee0e3f62c5da88edbc96ef5202..fae011b1d830493ce889842f98b926d3a3c91e1b 100644
GIT binary patch
delta 67
zcmZ3@xSWwEl$V!_0SIiPGLt6qSZn*}XXNLm>gOgFC1>W9CzcfDCnf3!ga^et1_$XU
U78mIzrRF7P0Hw2wCzhE40Dd<WWdHyG

delta 71
zcmZ3^xSEkCl$V!_0SLZx>`Iu(W33;cA6lGRRIHzySd^TZSDsi>l%JHS?~+=aU6Nm*
ZA08SUqF-E<SzMJ^keHXAn>ew^6aWs%7%l(+

diff --git a/sziszapangma/core/wer/__pycache__/wer_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_calculator.cpython-38.pyc
index 2c81baf6b4a392f961031fa26b0c3170b95c3c3c..800b2cf0b964c1458fee0d3b9aa1ab514e39441d 100644
GIT binary patch
delta 55
zcmaDP_)w51l$V!_0SIiPGLts)v@pp8>1X8Urt0S=7A0rql_!=I<tHWT2ZRU3I|c`B
JUdv?41^~L65cvQA

delta 56
zcmaDT_(+f^l$V!_0SIPHx}C6*r-ey2SU<ElwWwG>H?b%=Gp{_cq$ocrQQsxCIJ+dj
KVDlO#Q#JtnqZ0xE

diff --git a/sziszapangma/integration/__pycache__/__init__.cpython-38.pyc b/sziszapangma/integration/__pycache__/__init__.cpython-38.pyc
index a18d1edc644ccabc99c7df659b505bf8aebead00..51a8113af5dac5b2eaeaf146816146332e5ad461 100644
GIT binary patch
delta 67
zcmZ3-xQdY{l$V!_0SIiPGLt6qSZn+1XXNLm>gOgFC1>W9CzcfDCnf3!ga^et1_$XU
U78mIzrRF7P0Hw2wCze?N0Dw>xaR2}S

delta 71
zcmZ3*xQ>x0l$V!_0SHbuHzrKvvDOdP4=qkDD%Q_UEK1JID^Dyb%1=tvcS$YIF3B&@
Z4-X9v(J!vbEUro{NX$#mO`KR{0RR<+7)}5H

diff --git a/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc b/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc
index 1351c5e0615d469566e90382919de84c575aadf8..26c7df6c9e9c9e96fc61e0704d394bc1e4d08456 100644
GIT binary patch
literal 2855
zcmai0&2Jk;6rY*hwbzdG(U69Q7BC<L7Oh=CAP!Mzno5XEs9H*^mC$y*Gf6hx4|irv
z70Wp}QaQkhV_PXX=3kmCC;kN_1n<qpA88{o(mcO6Upw!;-|vkd%+J>ev~PEl?naf6
zKhc<NE*Q6<tKUFy!f8qZ{ADT4SimwTa46bN>SkWxS=~)5Sv9C;e&A=dpk~KtS`X^5
z&P(UAM$oWhm2^H^2o@;$j_@k?j|le#8#%!u*fm}UyDk>NUIKfLH^6RK`x2-3No)Qu
zxDnZIG2?9B-ZBeOS7{l=Ium-Y;4Nz0yOG{&IWV0c823&RYvcdW(@Bv>DNHZj6LBH=
z_xyV;WTXoTW6R|jZoDgtUlJ~4j9-~;9vHWvt4~1)5>QS8CLG}k4|-*XtwEGtGS0Yj
z<c?@i<u2UTGmT9p;m}wpRmf9jYCB0P!XA9eRHV>-na2!H95>V{^eyNLvtXTOgWY=b
ztZntB6uC|hnpvTAGs${|)KLzaYVIT=<;}+skX*$w>FLMKXNlfxrU|SpcAC?P$#Ksh
z7^*!nw@(R#)>R_ikq!8~To{YLf9&iPndoGZjFbFXq-D{KI(Hx53pelI>qJW4=!!hv
zgXt%#qXvl@M7=2A&7w|{Yq1Nlh7{nsus04zyLVs~;wVi+2}#+_6D8)+ZyS)x;OcDX
z+UM78w|wJ=VIE~73{5=@vx4^_u%T&$;nRMUmNU|U^Na%!kri|VvnVg2nD|v{OE`v{
zK&NfseafgEc>@ZV7HsM#5MwAjAP5Fd*9ic^StFn`(4Era&P(>3>`?A<@2C+}V7v<5
zx3H-IY>)_UP<u{X64U`WH4B`FqPvwQkZt3JeVLlMFzV~QP!~@`jts!D`gABpZ^sSw
zaaZqwp^2tLGGfQ<1Ryp9J#>Z)q{|6>k!}N8J=O0CDOX`RhBwI*9ftC4Fy$2xEk`b*
zzJ}r*5U14C!Z1!FrC^<nsg2|{n8SI6Q73ePR_Nd|q;uj>dvf?3EI3C2+=6`zx_S(P
zQw`VUY{U|J2p8T5?mel=>l(PmU9?}2S1>oCxvSZT96H+B$8itbbGXMVzq^O-&^@l4
zkV8*bd3EUFjuC}0c;LFfZCs?KVX|xHHzCA@Oa>OPgX(P&Ng<obBS0mT6%Q9GQ{590
zlnGFDl8K_P2de<><{7k;e77lu>ZiI1ICYDj$Zh<L%WXZ9;%Q$ft&H0P4%_$|cV88`
z@p=;I(yEkkwha6bLLff^VH`L!n8$TM7}YCsC1!AJ1CQ8#AG%rw0fBe?e^;6N5A#;2
z4{5NRnkL~qr&@CocgeR<tf06I!Yqbil;?%E@xV00gFz%__%%+M`7<cQ0w{e32aAb!
zak0H0A`83nLDb<+4R*z|c#C1+T$|AB5e#2UGvOeCCRD44DiF<?A(~$jP}E){THO+D
z&Jt}I%M8@xttj3TXLPH_Xq08Tj>Q@{7bYC=-+{}RMXXnnhsoI+Vph%u>{0+h`G$-=
zK(|nz(D5c17tzt)9FTtG581JMLhg_e($upAb+%)C8O9BZB$hMGmO^qSWJrEqI;6jn
zW1!0k9nwG8Q+R|dMBtV4cZIedLFHiWn}#L&l*%t)=ajgnzK#fui>$ZtB_3RokrAH^
zI&a2l!s7ONS^${TMtiy+KztPsZ=*oCK>3;p>mpufT>Ie-k{FbhTLK@WEI&tK?`QGz
zHJTPgco?pptuFSlIG@GI?c^IZ1~Y-d+rm?>gMbPHb!0@3vA#G%x<+0=-Fd}MpyFuv
z*wd9EyzL{f0W;3_z-wE&QrW6XOs)I|gz@)_Bu5NbXJiY<+_Z?e{20`6^+Z+!^Kseq
z6|#;Yd%uEiA0R@n(ZL571pPJEHZEeVM5juABGY6Ih2<=|T|R*~P@CRhN>a=i?w^o!
ziLH7zkFDj>DJSa25)oKFuvK&>O}aJ|rlJpePyx%w76DXNj88x_pUwU=XwUpJXpbp1
zWtGI6n;ZYP;~eKoNAV%IMWqDa)JLmfsExl~{$tocPT6Yd(<R!VE6isN+HjWYORN6@
Da`TxY

delta 1111
zcmZuw%}x|S5bo~z*_mY*7Q__<f54!wC|uM7F(w8R^g=X*MCURYW_sC?-I=w!iII?S
zfP|AKo4J^v5#qrRU%^N4;>lz#CO(E<tm<_k5GU!Hs_v@l`o60ET>UuMzg#XC349Zu
zR#$%5o&Is!=uA&NYT2IMDtHA-A`;Q2<2ef?G9zn;L>6b;hUbE9M+LA8+y%P`wiCHv
zyV~xH=n|<FJ6|t$epRzc`sdCe%T39fb>YHwqOT`@FV8MZF63-05Mi9W3Y17!g4u^$
zuBo)0J!Oxno9(gZcP)%CQ4I8AkF$t=+$(@^U`cdyD5Ii~b?gh-g7IzXkS^A&+>+~U
zF2pdL=Z^2kNvwQdoCQ-<LDURUM*SR$5fFzocYQx>22z5bE?VC-24~f*ZeFEM_Qrfm
zy=>fiwuUKkGm1l%JA%Jl=TgbsY^PG`D{}LNOq1Mdi#Sm=yL**p<1z$_35+%bmtGnD
ze9+oy0^A&N)kTT$$FiT++*Sp{22l*5I0GUd@ckf3QWdBef}v;|kFZ{GlEJ+E4_L^X
z%^o;?)i^q9n8*{RmdI+6I<&+_EX`gB8`(R%x^){APCObBkL{8=jcCMn9M9k;w_w?I
zHcBGnJ=4=;dWBtL63>Czu=Ak@QalM%V?GEQe7+g;M1>U6^*+>%e@kK+iYU5Cw%I;A
zAUK&zM9huCKD6yT{KQQZ2(?DL<Z2`*yLJPPB?CdS@6M$y1Qu!&x8Oq^iJFHc5x*k)
zRIyFE2M^%`+olIN*lcm5G^2RfXr(jQEU4fb_~vFa4I*&`R8d0lpVr-i{cZt$8xz?V
zcXG=Wry8ldh21>WGBJHJ=W(MD`E^XJC!duJ%|)%$t;z^iLV>^p(wW=pbvsU0yN}5t
ti?FYUWC}g-4e1Vd**petL}TvUYo*b8li$Y%NyO>U3LR#SQ7Klce*r+9(T@NC

diff --git a/sziszapangma/integration/__pycache__/experiment_manager.cpython-38.pyc b/sziszapangma/integration/__pycache__/experiment_manager.cpython-38.pyc
index 8da8726202ed5dd42b5b88d2119dad72d4773efa..b999f14b73a56f40a5df21939781357eed62bf5f 100644
GIT binary patch
delta 60
zcmcb_b&-oZl$V!_0SF#MXKv)4z$6o?pOK%Ns-K%!l$@DYo>)?ppOmN{5FQlo7#uWt
P2h$J6TbnmBFJuG&Grbh5

delta 61
zcmcb}b%~2Rl$V!_0SFiurElb(z$6=`A6lGRRIHzySd^TZSDsi>l%JHS?~+=aU6Nlg
Qc{|e&#;DC3m=`hv05N?Ol>h($

diff --git a/sziszapangma/integration/__pycache__/gold_transcript_processor.cpython-38.pyc b/sziszapangma/integration/__pycache__/gold_transcript_processor.cpython-38.pyc
index 92bdb644e9c13dd935a97e0eceab9b945e57b691..d35f590b4949c4f5af78a9603bfb68b425d2abfa 100644
GIT binary patch
delta 55
zcmX@gc7%;5l$V!_0SIiPGLts)Y-5y3(9g)vP1Vm$EK1JID^Dyb%1=tv4+sy6cMJ~N
Je4jCm5dgEL5k&w1

delta 56
zcmX@Yc9e}Ll$V!_0SM+Q3MOvk*~TcFs2^IKT2!o`n^=^bnOB}zQk0*RsPB?moL!P%
Ku=yTi8Y2M1xDpuv

diff --git a/sziszapangma/integration/__pycache__/path_filter.cpython-38.pyc b/sziszapangma/integration/__pycache__/path_filter.cpython-38.pyc
index b70f37888ee88dc389a9aca5fdb47356cc822f56..fe1989427f54e3a4261b6c2ebc65b4bd8629ce56 100644
GIT binary patch
delta 52
zcmZ3-zm}gXl$V!_0SF#MZ{)hkC=;Zgk)NBYpPN{eoS9diSW=Xql&BvN9u)5w9JGmP
G9V-Bmg%E%M

delta 54
zcmZ3>zmA_Pl$V!_0SF3RHger$lnvGoElw>e*3V5WO3utHPb?|QPfFBxNiEJU$uHRa
IpK%>40H{t900000

diff --git a/sziszapangma/integration/__pycache__/record_id_iterator.cpython-38.pyc b/sziszapangma/integration/__pycache__/record_id_iterator.cpython-38.pyc
index 0763aca101d6f16378fe58009f6cf6ac3e2ccea0..548042b82a83071b71adf9677a36c9b66d74b97a 100644
GIT binary patch
delta 55
zcmbQiHJytml$V!_0SIiPGLts)I5Wyb>SyHVrt0S=7A0rql_!=I<tHWT2ZRU3I|c`B
J&S6}`3;>V-5KaI9

delta 56
zcmbQvHG_*Ml$V!_0SIPHx}C6*$C*(!N<XwXwWwG>H?b%=Gp{_cq$ocrQQsxCIJ+dj
KU~@L(8fE~|ED~1$

diff --git a/sziszapangma/integration/__pycache__/record_path_provider.cpython-38.pyc b/sziszapangma/integration/__pycache__/record_path_provider.cpython-38.pyc
index ab1556917ecaea9eca94ca0a6b30e7ecb68831dd..f4e94b708a9e85cc4f1ce0a42cba09613b165216 100644
GIT binary patch
delta 70
zcmX@ja-4-Hl$V!_0SIiPGLts)@Gxpe>u2QWrt0S=7A0rql_!=I<tHWT2ZRU3I|c{o
XCl(j!CZ*;jX8@(Mi#NM7vM>Swzl{}e

delta 74
zcmX@ka+-xFl$V!_0SF#9?oQaq!^5Z_uOC{RT2!o`n^=^bnOB}zQk0*RsPB?moL!P%
cpdTI@9HL)bm04VsSdf^Po}0MYnURGN0B=_qp8x;=

diff --git a/sziszapangma/integration/asr_processor.py b/sziszapangma/integration/asr_processor.py
index 9adc99c..416baf2 100644
--- a/sziszapangma/integration/asr_processor.py
+++ b/sziszapangma/integration/asr_processor.py
@@ -1,9 +1,13 @@
 import json
 from abc import ABC, abstractmethod
+from pathlib import Path
 from typing import Any, Dict, Optional
 
 import requests
 
+from sziszapangma.integration.record_id_iterator import RecordIdIterator
+from sziszapangma.integration.record_path_provider import RecordPathProvider
+
 
 class AsrProcessor(ABC):
     @abstractmethod
@@ -38,19 +42,14 @@ class AsrWebClient(AsrProcessor):
 
 
 class AsrPathCacheClient(AsrProcessor):
+    cache_path: str
+    path_to_id: Dict[str, str]
 
-    def __init__(self):
+    def __init__(self, cache_path: str, record_iterator: RecordIdIterator, record_path_provider: RecordPathProvider):
         super(AsrPathCacheClient, self).__init__()
+        self._cache_path = cache_path
+        self.path_to_id = {record_path_provider.get_path(it): it for it in record_iterator.get_all_records()}
 
     def call_recognise(self, file_path: str) -> Dict[str, Any]:
-        # files = {"file": open(file_path, "rb")}
-        # headers = (
-        #     dict({"Authorization": f"Bearer {self._auth_token}"})
-        #     if self._auth_token is not None
-        #     else dict()
-        # )
-        # res = requests.post(self._url, files=files, headers=headers, timeout=600)
-        # json_response = res.json()
-        # print(f'asr processing result {json_response}')
-        # return json_response
-        return json.load(open(f'{file_path}.techmo.json'))
+        path = Path(self._cache_path).joinpath(f'{self.path_to_id[file_path]}.json')
+        return json.load(open(path, 'r'))
diff --git a/sziszapangma/integration/repository/__pycache__/__init__.cpython-38.pyc b/sziszapangma/integration/repository/__pycache__/__init__.cpython-38.pyc
index d82948fab5ca19a68216ed42875796cfe0a80fe8..ba73bbb236ce40b3b8cbe8e87791231724d3d7d2 100644
GIT binary patch
delta 67
zcmdnVxRsG7l$V!_0SIiPGLt6qSZhb=XXNLm>gOgFC1>W9CzcfDCnf3!ga^et1_$XU
U78mIzrRF7P0Hw2wCzd$_0Eht<od5s;

delta 71
zcmdnWxRa45l$V!_0SJWO&rF!eW33;jA6lGRRIHzySd^TZSDsi>l%JHS?~+=aU6Nm*
ZA08SUqF-E<SzMJ^keHXAn>ew^82}N=7)SsB

diff --git a/sziszapangma/integration/repository/__pycache__/experiment_repository.cpython-38.pyc b/sziszapangma/integration/repository/__pycache__/experiment_repository.cpython-38.pyc
index b79c46b62439be55b5efe5a33cc28b01ffd877c8..b4625c3cce7b16f7bd86fcae9b8acfdf3cfcc9e9 100644
GIT binary patch
delta 54
zcmaDZ@JxU^l$V!_0SF#MXKv({WRgkO&&bbB)z3{VO3utHPb?|QPfFAe2oH*P3=Z1t
I$fU*w0JX9ZZ~y=R

delta 55
zcmaDR@LYg9l$V!_0SFq-rf=kyWRlI$4=qkDD%Q_UEK1JID^Dyb%1=tvcS$YIF3B(0
J?7*bP1^~}=5rzN&

diff --git a/sziszapangma/integration/task/__pycache__/__init__.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/__init__.cpython-38.pyc
index f04226f780906728ded8b6dddea48b4094670f9e..f102c9699c7c54986230d5638536633b80c4d018 100644
GIT binary patch
delta 67
zcmdnYxSo+Gl$V!_0SIiPGLt6qSZjyqXXNLm>gOgFC1>W9CzcfDCnf3!ga^et1_$XU
U78mIzrRF7P0Hw2wCzja)0E5pJg#Z8m

delta 71
zcmZ3_xS5eBl$V!_0SIm_nVB$=$67x^KeRZts8~NYu_!q+uRO7&C_gDt-zBv;yClCr
ZKRh%zM8CKyv$!g;ATcjJH*sQ-EdVLA7_0yQ

diff --git a/sziszapangma/integration/task/__pycache__/asr_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/asr_task.cpython-38.pyc
index d359f7e343c9c1b2e0cdcfeb83f3b0e5c1c3ae16..a34960b88f1c6e4bbe7fb65ec4b80fc16a9658e7 100644
GIT binary patch
delta 63
zcmaDV@I-(ol$V!_0SF#MXC`gr*~TOjqMwnUo2s9iSd^TZSDsi>l%JHS9}pfC?-(4k
S`99MLmdVEK(vuIdF9raBQ5Amx

delta 62
zcmaDN@Kk^&l$V!_0SL0bol4xuvyDkMR6n#hwWwG>H?b%=Gp{_cq$ocrQQsxCIJ+dj
RVDmku6D*8Tlb5sa1pth_7JL8z

diff --git a/sziszapangma/integration/task/__pycache__/classic_wer_metric_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/classic_wer_metric_task.cpython-38.pyc
index 94827e74b006da359353ae07802d25af99e2a6f0..18e3b18b82f12a862268f952ce794f58ccd5a8e9 100644
GIT binary patch
delta 701
zcmZ9}Pixdb6aerhyV-0uiEaJU*45p$Zo4(bHMVGBp(uU<k&1|j9kbI}63r&d%!rDD
zCsD!MgP!~XdlbKccfY{kK|wDbL~r8Bd9l>BIs7uooA>fw=4<^+-MiuYwJCjlx}Egj
zdQS~Qy?bR0Oye;8C@nmj)e$~}SjZ8?MviI%Ta^V%2mBbmnM3B01>`tV*NhG25JJ;P
z2tvkFziAlfLlzDrPLu3mBzfMC!fQ9T$d&6`VI+9B&oUb8@O=@&%W}_X!29y9aRP2x
zCl|4mpu}5Pd<j`bPVdu(UF&M)Zv?(s-$M)qCa>z7@?=i#i991a36+7V%yZ|JnaKar
zZTnGW1<N(zx!n!-=U1~dO6h|%l8g+Q<OvmleXx%JKHKZ=WH-%fJhzVDKBOljl1G`K
zJQ+#efj8BSmU1{7Fs=t?sun*U<uYa>5xQag(+Yl8=Uc_BSc7jM6V>rCgcWDAasihe
zNSw^VgHiC&X|;<)j3|4r<Acd-(5$_<+rVT8IfMAfMWl>)NCgRz5@I528s+buEM;j*
z$VB~9xTTFugXQ*&X}Ih*3<Y=GOQu?dZMRbsl8uVW3O=~2I`+$bVyZ5Ty(WP7y1rRt
OBmZVDR&D6{Xa4{)fuhO)

delta 666
zcmZ9}J#W)M7zgm4<Ci$L0|mrOlr(Be1CB7R3gSgVomg1dDxnkTFz!WhaqZ~tP(d&v
zAtWXq!NM1yBjOXVB0hmx7+_#Qoj_vZ9#fT24?o}O@$>g{_Seq0j=vrRtvP*d{`{D{
z^Pdnx-#;f~gUrKj<K*bDu82d3g&aW~<ftZeDpT@&ERN|7MCPezhg2?OX`V>Q*v{PV
zjd)#hA>*w?q*<|@C{f-^;_F=ASLGD8%^Rc(AI#UJ1GlX1BEBbVhy~1FLQW%#`wZZf
zb<_S^f$!Fj7)zlcn)*h4RkBT47HlI+RcKVk`IXAZ)PL=^^Uxk*To0Z)QM`Y+YCcW!
z^g*5|&bGJ`St>*4;2dY+i?iy@PSa~nj<4vrs+~x|lt>DhifpRHGQ4SC>Qh(nF&Df@
zdDWUcnwDz9WhV8&_!%keHc#~Hvtow0h|HwpHiDsh&0fXjGECgU!mm!@v)f;)8?i#2
zy@?Mt$Ixp%yW7QP7de9j$OXhie8ff~q=6Vntf_*%UzzeeXKW_FkB<zHHkjU3(uOs!
zM=0F()(kp=JKnG*6`$706h3()UHj!dHfRKI{RMdGzvx`9yODoqu2%!N5}f-3tV5Q6

diff --git a/sziszapangma/integration/task/__pycache__/processing_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/processing_task.cpython-38.pyc
index 04380c866974537282bb2280d7f4cbf65d66aa7b..e4edcb2de7edbf21f9af3f7c4ec0375d7bccdf94 100644
GIT binary patch
delta 66
zcmbO)I8Tr#l$V!_0SF#MXC`grQDKsa*3Zb#P1Vm$EK1JID^Dyb%1=tv4+sy6cMJ~N
V?8{Wn%5=+U@_%;e$q(800RVqI6`TM7

delta 67
zcmbOyIA4$_l$V!_0SL0bol4xuqrxN`qaRwFT2!o`n^=^bnOB}zQk0*RsPB?moL!P%
Wu-S*HoRulcX!1XH>B$e+_W=O6jTWT<

-- 
GitLab