From 0a9519f11de73bce2e1ba11b7d0b79bf1eacfc4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20W=C4=85troba?= <markowanga@gmail.com> Date: Tue, 15 Jun 2021 07:20:07 +0200 Subject: [PATCH] Add code from sziszpangma --- .editorconfig | 22 + .gitignore | 111 + AUTHORS.rst | 13 + CONTRIBUTING.rst | 128 + HISTORY.rst | 8 + LICENSE | 22 + MANIFEST.in | 11 + Makefile | 86 + README.rst | 37 + docs/Makefile | 20 + docs/authors.rst | 1 + docs/conf.py | 162 + docs/contributing.rst | 1 + docs/history.rst | 1 + docs/index.rst | 20 + docs/installation.rst | 51 + docs/make.bat | 36 + docs/readme.rst | 1 + docs/usage.rst | 7 + requirements.txt | 5 + requirements_dev.txt | 11 + setup.cfg | 26 + setup.py | 57 + sziszapangma/.DS_Store | Bin 0 -> 6148 bytes sziszapangma/__init__.py | 5 + .../__pycache__/__init__.cpython-38.pyc | Bin 0 -> 310 bytes .../__pycache__/__init__.cpython-39.pyc | Bin 0 -> 310 bytes .../__pycache__/wer_classic.cpython-38.pyc | Bin 0 -> 6759 bytes sziszapangma/core/__init__.py | 0 .../core/__pycache__/__init__.cpython-38.pyc | Bin 0 -> 167 bytes sziszapangma/core/sziszapangma.py | 1 + sziszapangma/core/transformer/__init__.py | 0 .../__pycache__/__init__.cpython-38.pyc | Bin 0 -> 179 bytes ...ached_embedding_transformer.cpython-38.pyc | Bin 0 -> 1923 bytes .../embedding_transformer.cpython-38.pyc | Bin 0 -> 891 bytes ...ttext_embedding_transformer.cpython-38.pyc | Bin 0 -> 1557 bytes .../cached_embedding_transformer.py | 32 + .../core/transformer/embedding_transformer.py | 15 + .../fasttext_embedding_transformer.py | 27 + sziszapangma/core/wer/__init__.py | 0 .../wer/__pycache__/__init__.cpython-38.pyc | Bin 0 -> 171 bytes .../classic_wer_calculator.cpython-38.pyc | Bin 0 -> 713 bytes .../distance_matrix_calculator.cpython-38.pyc | Bin 0 -> 4055 bytes .../wer/__pycache__/step_type.cpython-38.pyc | Bin 0 -> 948 bytes .../wer/__pycache__/step_words.cpython-38.pyc | Bin 0 -> 524 bytes .../__pycache__/wer_calculator.cpython-38.pyc | Bin 0 -> 7200 bytes .../wer_embedding_calculator.cpython-38.pyc | Bin 0 -> 2778 bytes .../wer_processing_step.cpython-38.pyc | Bin 0 -> 1860 bytes .../wer_soft_calculator.cpython-38.pyc | Bin 0 -> 843 bytes .../wer_span_question.cpython-38.pyc | Bin 0 -> 1053 bytes .../wer/__pycache__/wer_step.cpython-38.pyc | Bin 0 -> 621 bytes .../wer/__pycache__/wer_util.cpython-38.pyc | Bin 0 -> 1526 bytes .../core/wer/classic_wer_calculator.py | 9 + .../core/wer/distance_matrix_calculator.py | 89 + sziszapangma/core/wer/step_type.py | 17 + sziszapangma/core/wer/step_words.py | 8 + sziszapangma/core/wer/wer_calculator.py | 220 ++ .../core/wer/wer_embedding_calculator.py | 60 + sziszapangma/core/wer/wer_processing_step.py | 41 + sziszapangma/core/wer/wer_soft_calculator.py | 11 + sziszapangma/core/wer/wer_span_question.py | 16 + sziszapangma/core/wer/wer_step.py | 11 + sziszapangma/core/wer/wer_util.py | 33 + sziszapangma/integration/__init__.py | 0 .../__pycache__/__init__.cpython-38.pyc | Bin 0 -> 174 bytes .../__pycache__/asr_processor.cpython-38.pyc | Bin 0 -> 1471 bytes .../experiment_manager.cpython-38.pyc | Bin 0 -> 1211 bytes .../gold_transcript_processor.cpython-38.pyc | Bin 0 -> 826 bytes .../__pycache__/id_generator.cpython-38.pyc | Bin 0 -> 732 bytes .../metrics_calculator.cpython-38.pyc | Bin 0 -> 1615 bytes .../__pycache__/path_filter.cpython-38.pyc | Bin 0 -> 1959 bytes .../record_id_iterator.cpython-38.pyc | Bin 0 -> 1307 bytes .../record_path_provider.cpython-38.pyc | Bin 0 -> 587 bytes sziszapangma/integration/asr_processor.py | 30 + .../integration/experiment_manager.py | 30 + .../integration/gold_transcript_processor.py | 13 + sziszapangma/integration/id_generator.py | 12 + .../integration/metrics_calculator.py | 34 + sziszapangma/integration/path_filter.py | 47 + .../integration/record_id_iterator.py | 21 + .../integration/record_path_provider.py | 7 + .../integration/repository/__init__.py | 0 .../__pycache__/__init__.cpython-38.pyc | Bin 0 -> 185 bytes .../experiment_repository.cpython-38.pyc | Bin 0 -> 2350 bytes .../file_experiment_repository.cpython-38.pyc | Bin 0 -> 3874 bytes ...mongo_experiment_repository.cpython-38.pyc | Bin 0 -> 3189 bytes .../repository/experiment_repository.py | 52 + .../repository/file_experiment_repository.py | 100 + .../repository/mongo_experiment_repository.py | 72 + sziszapangma/integration/task/__init__.py | 0 .../task/__pycache__/__init__.cpython-38.pyc | Bin 0 -> 179 bytes .../task/__pycache__/asr_task.cpython-38.pyc | Bin 0 -> 1752 bytes .../classic_wer_metric_task.cpython-38.pyc | Bin 0 -> 2835 bytes .../embedding_wer_metrics_task.cpython-38.pyc | Bin 0 -> 3078 bytes .../gold_transcript_task.cpython-38.pyc | Bin 0 -> 1697 bytes .../processing_task.cpython-38.pyc | Bin 0 -> 1769 bytes sziszapangma/integration/task/asr_task.py | 40 + .../task/classic_wer_metric_task.py | 72 + .../task/embedding_wer_metrics_task.py | 81 + .../integration/task/gold_transcript_task.py | 36 + .../integration/task/processing_task.py | 51 + tests/__init__.py | 1 + tests/__pycache__/__init__.cpython-38.pyc | Bin 0 -> 202 bytes tests/__pycache__/__init__.cpython-39.pyc | Bin 0 -> 202 bytes ...tored_embedding_transformer.cpython-38.pyc | Bin 0 -> 1693 bytes .../test_classic_wer.cpython-38-PYTEST.pyc | Bin 0 -> 4165 bytes .../test_classic_wer.cpython-39-PYTEST.pyc | Bin 0 -> 4536 bytes .../test_embedding_wer.cpython-38-PYTEST.pyc | Bin 0 -> 2106 bytes .../test_soft_wer.cpython-38-PYTEST.pyc | Bin 0 -> 2086 bytes .../test_sziszapangma.cpython-38-PYTEST.pyc | Bin 0 -> 2679 bytes .../test_sziszapangma.cpython-39-PYTEST.pyc | Bin 0 -> 2582 bytes tests/embeddings_pl.json | 2720 +++++++++++++++++ tests/file_stored_embedding_transformer.py | 28 + tests/test_classic_wer.py | 42 + tests/test_embedding_wer.py | 27 + tests/test_soft_wer.py | 26 + tests/test_sziszapangma.py | 36 + tox.ini | 25 + 118 files changed, 5035 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitignore create mode 100644 AUTHORS.rst create mode 100644 CONTRIBUTING.rst create mode 100644 HISTORY.rst create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 README.rst create mode 100644 docs/Makefile create mode 100644 docs/authors.rst create mode 100644 docs/conf.py create mode 100644 docs/contributing.rst create mode 100644 docs/history.rst create mode 100644 docs/index.rst create mode 100644 docs/installation.rst create mode 100644 docs/make.bat create mode 100644 docs/readme.rst create mode 100644 docs/usage.rst create mode 100644 requirements.txt create mode 100644 requirements_dev.txt create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 sziszapangma/.DS_Store create mode 100644 sziszapangma/__init__.py create mode 100644 sziszapangma/__pycache__/__init__.cpython-38.pyc create mode 100644 sziszapangma/__pycache__/__init__.cpython-39.pyc create mode 100644 sziszapangma/__pycache__/wer_classic.cpython-38.pyc create mode 100644 sziszapangma/core/__init__.py create mode 100644 sziszapangma/core/__pycache__/__init__.cpython-38.pyc create mode 100644 sziszapangma/core/sziszapangma.py create mode 100644 sziszapangma/core/transformer/__init__.py create mode 100644 sziszapangma/core/transformer/__pycache__/__init__.cpython-38.pyc create mode 100644 sziszapangma/core/transformer/__pycache__/cached_embedding_transformer.cpython-38.pyc create mode 100644 sziszapangma/core/transformer/__pycache__/embedding_transformer.cpython-38.pyc create mode 100644 sziszapangma/core/transformer/__pycache__/fasttext_embedding_transformer.cpython-38.pyc create mode 100644 sziszapangma/core/transformer/cached_embedding_transformer.py create mode 100644 sziszapangma/core/transformer/embedding_transformer.py create mode 100644 sziszapangma/core/transformer/fasttext_embedding_transformer.py create mode 100644 sziszapangma/core/wer/__init__.py create mode 100644 sziszapangma/core/wer/__pycache__/__init__.cpython-38.pyc create mode 100644 sziszapangma/core/wer/__pycache__/classic_wer_calculator.cpython-38.pyc create mode 100644 sziszapangma/core/wer/__pycache__/distance_matrix_calculator.cpython-38.pyc create mode 100644 sziszapangma/core/wer/__pycache__/step_type.cpython-38.pyc create mode 100644 sziszapangma/core/wer/__pycache__/step_words.cpython-38.pyc create mode 100644 sziszapangma/core/wer/__pycache__/wer_calculator.cpython-38.pyc create mode 100644 sziszapangma/core/wer/__pycache__/wer_embedding_calculator.cpython-38.pyc create mode 100644 sziszapangma/core/wer/__pycache__/wer_processing_step.cpython-38.pyc create mode 100644 sziszapangma/core/wer/__pycache__/wer_soft_calculator.cpython-38.pyc create mode 100644 sziszapangma/core/wer/__pycache__/wer_span_question.cpython-38.pyc create mode 100644 sziszapangma/core/wer/__pycache__/wer_step.cpython-38.pyc create mode 100644 sziszapangma/core/wer/__pycache__/wer_util.cpython-38.pyc create mode 100644 sziszapangma/core/wer/classic_wer_calculator.py create mode 100644 sziszapangma/core/wer/distance_matrix_calculator.py create mode 100644 sziszapangma/core/wer/step_type.py create mode 100644 sziszapangma/core/wer/step_words.py create mode 100644 sziszapangma/core/wer/wer_calculator.py create mode 100644 sziszapangma/core/wer/wer_embedding_calculator.py create mode 100644 sziszapangma/core/wer/wer_processing_step.py create mode 100644 sziszapangma/core/wer/wer_soft_calculator.py create mode 100644 sziszapangma/core/wer/wer_span_question.py create mode 100644 sziszapangma/core/wer/wer_step.py create mode 100644 sziszapangma/core/wer/wer_util.py create mode 100644 sziszapangma/integration/__init__.py create mode 100644 sziszapangma/integration/__pycache__/__init__.cpython-38.pyc create mode 100644 sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc create mode 100644 sziszapangma/integration/__pycache__/experiment_manager.cpython-38.pyc create mode 100644 sziszapangma/integration/__pycache__/gold_transcript_processor.cpython-38.pyc create mode 100644 sziszapangma/integration/__pycache__/id_generator.cpython-38.pyc create mode 100644 sziszapangma/integration/__pycache__/metrics_calculator.cpython-38.pyc create mode 100644 sziszapangma/integration/__pycache__/path_filter.cpython-38.pyc create mode 100644 sziszapangma/integration/__pycache__/record_id_iterator.cpython-38.pyc create mode 100644 sziszapangma/integration/__pycache__/record_path_provider.cpython-38.pyc create mode 100644 sziszapangma/integration/asr_processor.py create mode 100644 sziszapangma/integration/experiment_manager.py create mode 100644 sziszapangma/integration/gold_transcript_processor.py create mode 100644 sziszapangma/integration/id_generator.py create mode 100644 sziszapangma/integration/metrics_calculator.py create mode 100644 sziszapangma/integration/path_filter.py create mode 100644 sziszapangma/integration/record_id_iterator.py create mode 100644 sziszapangma/integration/record_path_provider.py create mode 100644 sziszapangma/integration/repository/__init__.py create mode 100644 sziszapangma/integration/repository/__pycache__/__init__.cpython-38.pyc create mode 100644 sziszapangma/integration/repository/__pycache__/experiment_repository.cpython-38.pyc create mode 100644 sziszapangma/integration/repository/__pycache__/file_experiment_repository.cpython-38.pyc create mode 100644 sziszapangma/integration/repository/__pycache__/mongo_experiment_repository.cpython-38.pyc create mode 100644 sziszapangma/integration/repository/experiment_repository.py create mode 100644 sziszapangma/integration/repository/file_experiment_repository.py create mode 100644 sziszapangma/integration/repository/mongo_experiment_repository.py create mode 100644 sziszapangma/integration/task/__init__.py create mode 100644 sziszapangma/integration/task/__pycache__/__init__.cpython-38.pyc create mode 100644 sziszapangma/integration/task/__pycache__/asr_task.cpython-38.pyc create mode 100644 sziszapangma/integration/task/__pycache__/classic_wer_metric_task.cpython-38.pyc create mode 100644 sziszapangma/integration/task/__pycache__/embedding_wer_metrics_task.cpython-38.pyc create mode 100644 sziszapangma/integration/task/__pycache__/gold_transcript_task.cpython-38.pyc create mode 100644 sziszapangma/integration/task/__pycache__/processing_task.cpython-38.pyc create mode 100644 sziszapangma/integration/task/asr_task.py create mode 100644 sziszapangma/integration/task/classic_wer_metric_task.py create mode 100644 sziszapangma/integration/task/embedding_wer_metrics_task.py create mode 100644 sziszapangma/integration/task/gold_transcript_task.py create mode 100644 sziszapangma/integration/task/processing_task.py create mode 100644 tests/__init__.py create mode 100644 tests/__pycache__/__init__.cpython-38.pyc create mode 100644 tests/__pycache__/__init__.cpython-39.pyc create mode 100644 tests/__pycache__/file_stored_embedding_transformer.cpython-38.pyc create mode 100644 tests/__pycache__/test_classic_wer.cpython-38-PYTEST.pyc create mode 100644 tests/__pycache__/test_classic_wer.cpython-39-PYTEST.pyc create mode 100644 tests/__pycache__/test_embedding_wer.cpython-38-PYTEST.pyc create mode 100644 tests/__pycache__/test_soft_wer.cpython-38-PYTEST.pyc create mode 100644 tests/__pycache__/test_sziszapangma.cpython-38-PYTEST.pyc create mode 100644 tests/__pycache__/test_sziszapangma.cpython-39-PYTEST.pyc create mode 100644 tests/embeddings_pl.json create mode 100644 tests/file_stored_embedding_transformer.py create mode 100644 tests/test_classic_wer.py create mode 100644 tests/test_embedding_wer.py create mode 100644 tests/test_soft_wer.py create mode 100644 tests/test_sziszapangma.py create mode 100644 tox.ini diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..0bfc6a6 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,22 @@ +# http://editorconfig.org + +root = true + +[*] +indent_style = space +indent_size = 4 +trim_trailing_whitespace = true +insert_final_newline = true +charset = utf-8 +end_of_line = lf + +[*.bat] +indent_style = tab +end_of_line = crlf + +[LICENSE] +insert_final_newline = false + +[Makefile] +indent_style = tab + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4686d77 --- /dev/null +++ b/.gitignore @@ -0,0 +1,111 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# IDE settings +.vscode/ + +# macOS +.DS_Store + +debug_run/ + diff --git a/AUTHORS.rst b/AUTHORS.rst new file mode 100644 index 0000000..dc387f8 --- /dev/null +++ b/AUTHORS.rst @@ -0,0 +1,13 @@ +======= +Credits +======= + +Development Lead +---------------- + +* Piotr Szymański <niedakh@gmail.com> + +Contributors +------------ + +None yet. Why not be the first? diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst new file mode 100644 index 0000000..75058c1 --- /dev/null +++ b/CONTRIBUTING.rst @@ -0,0 +1,128 @@ +.. highlight:: shell + +============ +Contributing +============ + +Contributions are welcome, and they are greatly appreciated! Every little bit +helps, and credit will always be given. + +You can contribute in many ways: + +Types of Contributions +---------------------- + +Report Bugs +~~~~~~~~~~~ + +Report bugs at https://github.com/niedakh/sziszapangma/issues. + +If you are reporting a bug, please include: + +* Your operating system name and version. +* Any details about your local setup that might be helpful in troubleshooting. +* Detailed steps to reproduce the bug. + +Fix Bugs +~~~~~~~~ + +Look through the GitHub issues for bugs. Anything tagged with "bug" and "help +wanted" is open to whoever wants to implement it. + +Implement Features +~~~~~~~~~~~~~~~~~~ + +Look through the GitHub issues for features. Anything tagged with "enhancement" +and "help wanted" is open to whoever wants to implement it. + +Write Documentation +~~~~~~~~~~~~~~~~~~~ + +sziszapangma could always use more documentation, whether as part of the +official sziszapangma docs, in docstrings, or even on the web in blog posts, +articles, and such. + +Submit Feedback +~~~~~~~~~~~~~~~ + +The best way to send feedback is to file an issue at https://github.com/niedakh/sziszapangma/issues. + +If you are proposing a feature: + +* Explain in detail how it would work. +* Keep the scope as narrow as possible, to make it easier to implement. +* Remember that this is a volunteer-driven project, and that contributions + are welcome :) + +Get Started! +------------ + +Ready to contribute? Here's how to set up `sziszapangma` for local development. + +1. Fork the `sziszapangma` repo on GitHub. +2. Clone your fork locally:: + + $ git clone git@github.com:your_name_here/sziszapangma.git + +3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: + + $ mkvirtualenv sziszapangma + $ cd sziszapangma/ + $ python setup.py develop + +4. Create a branch for local development:: + + $ git checkout -b name-of-your-bugfix-or-feature + + Now you can make your changes locally. + +5. When you're done making changes, check that your changes pass flake8 and the + tests, including testing other Python versions with tox:: + + $ flake8 sziszapangma tests + $ python setup.py test or pytest + $ tox + + To get flake8 and tox, just pip install them into your virtualenv. + +6. Commit your changes and push your branch to GitHub:: + + $ git add . + $ git commit -m "Your detailed description of your changes." + $ git push origin name-of-your-bugfix-or-feature + +7. Submit a pull request through the GitHub website. + +Pull Request Guidelines +----------------------- + +Before you submit a pull request, check that it meets these guidelines: + +1. The pull request should include tests. +2. If the pull request adds functionality, the docs should be updated. Put + your new functionality into a function with a docstring, and add the + feature to the list in README.rst. +3. The pull request should work for Python 3.5, 3.6, 3.7 and 3.8, and for PyPy. Check + https://travis-ci.com/niedakh/sziszapangma/pull_requests + and make sure that the tests pass for all supported Python versions. + +Tips +---- + +To run a subset of tests:: + +$ pytest tests.test_sziszapangma + + +Deploying +--------- + +A reminder for the maintainers on how to deploy. +Make sure all your changes are committed (including an entry in HISTORY.rst). +Then run:: + +$ bump2version patch # possible: major / minor / patch +$ git push +$ git push --tags + +Travis will then deploy to PyPI if tests pass. diff --git a/HISTORY.rst b/HISTORY.rst new file mode 100644 index 0000000..0da9879 --- /dev/null +++ b/HISTORY.rst @@ -0,0 +1,8 @@ +======= +History +======= + +0.1.0 (2021-03-08) +------------------ + +* First release on PyPI. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..988ad74 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2021, Piotr Szymański + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..965b2dd --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,11 @@ +include AUTHORS.rst +include CONTRIBUTING.rst +include HISTORY.rst +include LICENSE +include README.rst + +recursive-include tests * +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] + +recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..72110b4 --- /dev/null +++ b/Makefile @@ -0,0 +1,86 @@ +# currently not used +.PHONY: clean clean-test clean-pyc clean-build docs help +.DEFAULT_GOAL := help + +define BROWSER_PYSCRIPT +import os, webbrowser, sys + +from urllib.request import pathname2url + +webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) +endef +export BROWSER_PYSCRIPT + +define PRINT_HELP_PYSCRIPT +import re, sys + +for line in sys.stdin: + match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) + if match: + target, help = match.groups() + print("%-20s %s" % (target, help)) +endef +export PRINT_HELP_PYSCRIPT + +BROWSER := python -c "$$BROWSER_PYSCRIPT" + +help: + @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) + +clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts + +clean-build: ## remove build artifacts + rm -fr build/ + rm -fr dist/ + rm -fr .eggs/ + find . -name '*.egg-info' -exec rm -fr {} + + find . -name '*.egg' -exec rm -f {} + + +clean-pyc: ## remove Python file artifacts + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -fr {} + + +clean-test: ## remove test and coverage artifacts + rm -fr .tox/ + rm -f .coverage + rm -fr htmlcov/ + rm -fr .pytest_cache + +lint: ## check style with flake8 + flake8 sziszapangma tests + +test: ## run tests quickly with the default Python + pytest + +test-all: ## run tests on every Python version with tox + tox + +coverage: ## check code coverage quickly with the default Python + coverage run --source sziszapangma -m pytest + coverage report -m + coverage html + $(BROWSER) htmlcov/index.html + +docs: ## generate Sphinx HTML documentation, including API docs + rm -f docs/sziszapangma.rst + rm -f docs/modules.rst + sphinx-apidoc -o docs/ sziszapangma + $(MAKE) -C docs clean + $(MAKE) -C docs html + $(BROWSER) docs/_build/html/index.html + +servedocs: docs ## compile the docs watching for changes + watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . + +release: dist ## package and upload a release + twine upload dist/* + +dist: clean ## builds source and wheel package + python setup.py sdist + python setup.py bdist_wheel + ls -l dist + +install: clean ## install the package to the active Python's site-packages + python setup.py install diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..e944b1a --- /dev/null +++ b/README.rst @@ -0,0 +1,37 @@ +=============================================================== +sziszapangma: evaluate the impact of your ASR on your NLP tasks +=============================================================== + + +.. image:: https://img.shields.io/pypi/v/sziszapangma.svg + :target: https://pypi.python.org/pypi/sziszapangma + +.. image:: https://img.shields.io/travis/niedakh/sziszapangma.svg + :target: https://travis-ci.com/niedakh/sziszapangma + +.. image:: https://readthedocs.org/projects/sziszapangma/badge/?version=latest + :target: https://sziszapangma.readthedocs.io/en/latest/?version=latest + :alt: Documentation Status + + + + +A library to measure ASR quality, dedicated especially to measuring how ASR errors impact NLP model performance. + + +* Free software: MIT license +* Documentation: https://sziszapangma.readthedocs.io. + + +Features +-------- + +* TODO + +Credits +------- + +This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. + +.. _Cookiecutter: https://github.com/audreyr/cookiecutter +.. _`audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..21df5f5 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = python -msphinx +SPHINXPROJ = sziszapangma +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/authors.rst b/docs/authors.rst new file mode 100644 index 0000000..e122f91 --- /dev/null +++ b/docs/authors.rst @@ -0,0 +1 @@ +.. include:: ../AUTHORS.rst diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..3abaefc --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python +# +# sziszapangma documentation build configuration file, created by +# sphinx-quickstart on Fri Jun 9 13:47:02 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another +# directory, add these directories to sys.path here. If the directory is +# relative to the documentation root, use os.path.abspath to make it +# absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('..')) + +import sziszapangma + +# -- General configuration --------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'sziszapangma' +copyright = "2021, Piotr Szymański" +author = "Piotr Szymański" + +# The version info for the project you're documenting, acts as replacement +# for |version| and |release|, also used in various other places throughout +# the built documents. +# +# The short X.Y version. +version = sziszapangma.__version__ +# The full version, including alpha/beta/rc tags. +release = sziszapangma.__version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a +# theme further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + + +# -- Options for HTMLHelp output --------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = 'sziszapangmadoc' + + +# -- Options for LaTeX output ------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass +# [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'sziszapangma.tex', + 'sziszapangma Documentation', + 'Piotr Szymański', 'manual'), +] + + +# -- Options for manual page output ------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'sziszapangma', + 'sziszapangma Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ---------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'sziszapangma', + 'sziszapangma Documentation', + author, + 'sziszapangma', + 'One line description of project.', + 'Miscellaneous'), +] + + + diff --git a/docs/contributing.rst b/docs/contributing.rst new file mode 100644 index 0000000..e582053 --- /dev/null +++ b/docs/contributing.rst @@ -0,0 +1 @@ +.. include:: ../CONTRIBUTING.rst diff --git a/docs/history.rst b/docs/history.rst new file mode 100644 index 0000000..2506499 --- /dev/null +++ b/docs/history.rst @@ -0,0 +1 @@ +.. include:: ../HISTORY.rst diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..87a2544 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,20 @@ +Welcome to sziszapangma's documentation! +====================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + readme + installation + usage + modules + contributing + authors + history + +Indices and tables +================== +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 0000000..c950816 --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,51 @@ +.. highlight:: shell + +============ +Installation +============ + + +Stable release +-------------- + +To install sziszapangma, run this command in your terminal: + +.. code-block:: console + + $ pip install sziszapangma + +This is the preferred method to install sziszapangma, as it will always install the most recent stable release. + +If you don't have `pip`_ installed, this `Python installation guide`_ can guide +you through the process. + +.. _pip: https://pip.pypa.io +.. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/ + + +From sources +------------ + +The sources for sziszapangma can be downloaded from the `Github repo`_. + +You can either clone the public repository: + +.. code-block:: console + + $ git clone git://github.com/niedakh/sziszapangma + +Or download the `tarball`_: + +.. code-block:: console + + $ curl -OJL https://github.com/niedakh/sziszapangma/tarball/master + +Once you have a copy of the source, you can install it with: + +.. code-block:: console + + $ python setup.py install + + +.. _Github repo: https://github.com/niedakh/sziszapangma +.. _tarball: https://github.com/niedakh/sziszapangma/tarball/master diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..f55a107 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,36 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=python -msphinx +) +set SOURCEDIR=. +set BUILDDIR=_build +set SPHINXPROJ=sziszapangma + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The Sphinx module was not found. Make sure you have Sphinx installed, + echo.then set the SPHINXBUILD environment variable to point to the full + echo.path of the 'sphinx-build' executable. Alternatively you may add the + echo.Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/docs/readme.rst b/docs/readme.rst new file mode 100644 index 0000000..72a3355 --- /dev/null +++ b/docs/readme.rst @@ -0,0 +1 @@ +.. include:: ../README.rst diff --git a/docs/usage.rst b/docs/usage.rst new file mode 100644 index 0000000..99f0fff --- /dev/null +++ b/docs/usage.rst @@ -0,0 +1,7 @@ +===== +Usage +===== + +To use sziszapangma in a project:: + + import sziszapangma diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6568fe0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +numpy>=1.20.1 +requests>=2.25.1 +pandas>=1.2.4 +fasttext>=0.9.2 +pymongo>=3.11.4 diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..4211aa6 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,11 @@ +pip==21.1.2 +bump2version==1.0.1 +wheel==0.36.2 +watchdog==2.1.2 +flake8==3.9.2 +tox==3.23.1 +coverage==5.5 +Sphinx==4.0.2 +twine==3.4.1 +pytest==6.2.4 +pytest-runner==5.3.1 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..a65cf7a --- /dev/null +++ b/setup.cfg @@ -0,0 +1,26 @@ +[bumpversion] +current_version = 0.1.0 +commit = True +tag = True + +[bumpversion:file:setup.py] +search = version='{current_version}' +replace = version='{new_version}' + +[bumpversion:file:sziszapangma/__init__.py] +search = __version__ = '{current_version}' +replace = __version__ = '{new_version}' + +[bdist_wheel] +universal = 1 + +[flake8] +exclude = docs + +[aliases] +# Define setup.py command aliases here +test = pytest + +[tool:pytest] +collect_ignore = ['setup.py'] + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..4830c9e --- /dev/null +++ b/setup.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +"""The setup script.""" + +from setuptools import setup, find_packages + +with open('README.rst') as readme_file: + readme = readme_file.read() + +with open('HISTORY.rst') as history_file: + history = history_file.read() + +with open("requirements.txt", "r") as fh: + requirements = fh.readlines() + +with open("requirements_dev.txt", "r") as fh: + requirements_dev = fh.readlines() + requirements + +setup_requirements = ['pytest-runner', ] + +test_requirements = ['pytest>=3', ] + +setup( + author="Piotr Szymański", + author_email='niedakh@gmail.com', + python_requires='>=3.5', + classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + ], + description="A n", + entry_points={ + 'console_scripts': [ + 'sziszapangma=sziszapangma.cli:main', + ], + }, + install_requires=requirements, + license="MIT license", + long_description=readme + '\n\n' + history, + include_package_data=True, + keywords='sziszapangma', + name='sziszapangma', + packages=find_packages(include=['sziszapangma', 'sziszapangma.*']), + setup_requires=requirements_dev, + test_suite='tests', + tests_require=requirements_dev, + url='https://github.com/niedakh/sziszapangma', + version='0.1.0', + zip_safe=False, +) diff --git a/sziszapangma/.DS_Store b/sziszapangma/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2fff18096fcf72463ee45cf922f8040ac30b2e8a GIT binary patch literal 6148 zcmeHKJ5Iw;5S)b+5i}`NzAJEprzo5t7eFW|LQ0GTg4$Ki#nG7k2oX6cC}_~EwL9;v zcb?@byj}pddf(mw3jlMvBaR-X=J(wvc2yZ8(s{;~*E0s}@HVMFpK$ICo^Ze;;}8DF z=5^S=47)u0JXu*OAO)m=6p#W^;MWRx@1@Oe6BVU^6p#Yn3i$V-(H(o?m>8c9h8O{e zGp571j#+}(JVES*V<Iy&ODZv`RwITbo%vREy>LuSI;@5dt0!AcC>BrW`7O#}JyB5# zNP$xYu5-Ka{(n!uF#n&Dw37l-;9n_Vv-M`Z;wx2eoxGg)+D3n%d(9`^jq9K=L^~!% iJLbmQ@iL0CuKAkxd*PTEbmoIj)X#wHB9j7tt-u%eJQaNa literal 0 HcmV?d00001 diff --git a/sziszapangma/__init__.py b/sziszapangma/__init__.py new file mode 100644 index 0000000..00e971c --- /dev/null +++ b/sziszapangma/__init__.py @@ -0,0 +1,5 @@ +"""Top-level package for sziszapangma.""" + +__author__ = """Piotr Szymański""" +__email__ = 'niedakh@gmail.com' +__version__ = '0.1.0' diff --git a/sziszapangma/__pycache__/__init__.cpython-38.pyc b/sziszapangma/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2d1b765c9654646bac7bac6fa6b91ae5f037e20 GIT binary patch literal 310 zcmY*V%}T>S5KhvqKb3-l4-oLwgPVF6=|S)&LK=iz#xc9LOOoBNn^d|7LDYxwL41i^ zeG5-cJk<~88@`!um|?Q%bP^C=-d-O*D1Q%P7a@rSg*zn@K^}-O4@HzmVw8^rOM}_i zpDj&uUdfHD;s%QnS2BJwHg;Y)j}7Wojl4Ui`8#D=8>fC-<JY?@l|Ru+3M|(PvZ#s| zruO4Gzu@!R8SD1}ged^}2>^EO+E{@65I_!IDjxxCq;<+@fX~1(TJT3dH%aEGk<@4l xrJu32<`I)?>B`nL$$geClil0?3jn263&5Lge{fkF(N*$l_=4LbVi5~j_zMQ`TN(fW literal 0 HcmV?d00001 diff --git a/sziszapangma/__pycache__/__init__.cpython-39.pyc b/sziszapangma/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0da856c208c3cdb6494296026133e07e14235d3c GIT binary patch literal 310 zcmY*V%}T>S5Khv>pGrZ&2MBoT!QFZn=|S)&N?L?m#xc9LOOoBNn^d~z`Y=9-FOjQf zU%-<SPjz6v`99_wW*D4I#sS6d`|<gm>enFlBP21Wbms&j$O94Pp@{NGjPj9)vtXL| zi-l<}E4h(X+F()QN~W*Irp_zpu|d76k#(mu|Da53)68#c{M@-x`7^Dgz;Zn&i>g>* zYM;#56`S2p<Ngppm;#_517O##jU|2zAO|ne6M&7hP8kjG5!i$l{K3y1Upi{=8f~HU w8@ATGAitHaY)!+Tmf3>u-}YYsD6Lum)@=Ku>)ME}k~hN_+yOxx#bF%&0;IoN=l}o! literal 0 HcmV?d00001 diff --git a/sziszapangma/__pycache__/wer_classic.cpython-38.pyc b/sziszapangma/__pycache__/wer_classic.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..52c628a2cee0dd4513aa2b26e43401ccda90bf67 GIT binary patch literal 6759 zcmds5OK==V8J_NW>}s`=*RrgK?Kl(iT*sCIq$(k094D~|4vrEm87f-|oAq|DcBIwr zs%JKO)T#=QQ&tX8=5dP;_JzV6I002$fTB2XfkU8*syPA|->BjiY>V&j-p5F`QVB<P zYkIo>?*9AregFUWTpAk6Sor;WzV`0gG0XY~6_URc3TN?ze_)m+EWv8lGAl4fd0yko zcEMhD3XU$@HMigjr{KM630HVmEa6GEWfxM?mgz~Wle&uoB6ZysX)$ocDP;E6p`4nu zMCQ6921OPv290D_J11GHaI_yn`(bea?X&l{7deb~P>i5<NMihCZ#gUuq3@_TjM@Xe zzB%X|yUxY9n1K9XM?S1&li~<uBfYkc%*h<hLa%WcJ&$6%W8xvSI@D_gImSAUnumpt zno)UJJhH^EG4(s~Iq|4?Ogt{05Kk_6g)x+=^pwbp6XNNGRAF3Z<d_`a;)Mx0NI5xy zeVvq9IjQCL+q`h3a8w?U6Q*1^CLfZA<fJ$$PKl`ndy8+;nb~tze)=|@Z=S`@Oue<7 z=dp7k2&34YZ#8RjHnz%Co&4D-oW&EKKq9RI6IOu>R<H#xIKnnZE>pt2&V(mYSXx@7 zG0;Hl%tf+!8^Ez{b4p0lZ3hWP_mqrUs$Qnr?vL`+<`q2QFp{>ls6qEWHWwQCtQvrn z=3Xx?OL~X0R=H&<YW(qQ)0aZ2!s+FbDhKt|QluJ}OVj6MxD+*-)8D)_H$NS21mQ-h zS*kBAmv$Fcr7D(dr7#T2Q_b~wctJ+Ru-Z^jk%l@zGom$EY~v8EYHD}=ELNK?>ZZkF zoGBKU8=_T{$Y+bil~$=nN>EC@^2WuBGq23Y*||$E&&{8ozchdTjoCPTZsx*_&JCQO zotwF+vvwFMmBWyFb%}%~qdX+E86ojE%^dURTAsJ#0VOL^$$DAFnd*A85mjXvglZ6t z%X%Hi&@YXLROu~0ivx(PMHcb4wZ#M*w~%8dH%c`)^6&8*&V%YJ{3hGvk=w7^V&|-P za+`MBZu7PcR@kSlm08`QqqEb(X*+E$(4y_2MKf|mVg_3rT7I%=cck5(B>n!T)3(~S zU=v_oo3$O2TjJ`UG-5~Ik(xQ^5q@@mEv<Fa?Y1#JwaiPl#TuwxD)QQH+iRz`Ix90; zSAN{)0I9RFlkxGJuLhxCY1PY7(5U<6Qmx#ol_DAXHTj;bht)_1bzgvbB^=UJM)!Fs zid1m9g;p=<GU@3Zq<`v+k7M*}=;Jpk{%S*s&~GXrtt^FqdEF12wIGUu`a-``PmLeS zdPI1b>WTWe_$;dXy?ToF=vL%3@(hL{T5n24tj?!m7uKe^UT`<0YJ{;}ll6kz3hL4G zg_M$EwbYcct3XT{+si>cpH`FTr;Z?r)7@=~SrD_uI1{!mhfxq=qhc?pgY;4C30aeo z+(VOif_R$P@nToiAt(+XLt+gbVa|@n$L=}yC>v(<jNw1Qj`InWMv+sGqg~!fOlkB% zIt{(Zf^~mHEi!nTUKcgJh+2R%0E@S?K(GT;0ooG;V0vF;E9X{5A>~W1x(2CvYZA2` zK%KzNHRw@CZ}hsRsbHi;g}&Uq0Xi+(HyI<cKn_ak`b`I;80{Z*p>MPk6^#bL$4UD~ zZKK68T6SsB^Mo6t`D3O!_X%?~8g(n9)7@ugrq<hCtLN+r8-M40m<$lkExm<})nMG0 zb0JK2NTexO31XoS?nOXbSXMzZ`oCfy*$mMo^fH`au;yQG1+~autx9c($PST_AY&p$ z$6|DOL7P<dOLgHZV<|L$HKk{7@8FkDDFljE?3J2LSr^*js8LANI3<TE8AFoy`gBoG zKoGOFn5`EEqDIm9{=Hls(moX@RU{MlMRW;YMq(XEGdMV&V;(!gMs^&QVVNDi!|g2O z8PL<);f#aASvbKAJI=;*9(9nU(MvrIEqNz)H4GBd((W+HS8|6g9?jRkA|t?6JesHA zsux)iBL-{N_4CaXSch<x_=U%*@Fh=O23K6TBcQX3-RFpN9;BT3tEJ!yq|VB*o(2y* z`{Dk5qZIW^v`?e`@2S11*-w8oAnYynU51`;S{3c0T7SaQG-F<uQMDobh2TAqqErhO z>To6Rm1->%frP&E>6?8&YWR91JG8l|1G&(TRvX$kh5p~a{Ug5;D7dLgL-lo>gr6uk zfGm>s^%<DW1B^jk$Tc%C#Vf|ug?_oA;31mS6GIbfY4~pIy(rxnIbX*D0%2O|wd6DE zA*@&fHvw3U)u*U}02>d^y#D6=Tg7j@bm7vB26H?xvsRW(a^UbsC#hl2DHRU%_NJ)k zUC8y9WF+z7eiNE;(d=tHmS~uiXLLa;+e?ixqDCcHlcGqC!j7sUrWDcwtietI$a30& zrQyUfc!q)hY$u=L8K%B~wt3sEWg2<)43hF*3XlWW6!<AJfW3`JQ(zbTYoG%sI<&n> z>xO+3v_OebN$0r9ZP}oSeS>1PPCF0*U^X;>T=z)GT*!3azHDVeup3_6xye2RC6R;E z=;YwoKC~)yv*8=_NlY6f(*t$yEKVk{rwyInQSA}v3Pi9+<P8HE8fbmYeAv7meh{9N z)=XSZJ0H8X#TM>Q&SFDHEgB`goh=NS<I+c$gp3g+A)_U+Yp^toG4x3nR|64M_rl>I z!J)%e7|Xg0zk{iSWM!?iK|+pkeeRjNjynOnnB8$Wr5=Ku%#L~<EfLUAk3OdFk6#EF zz4*mP0dWgiK-fQ6J=VsxvUAvw93^keHl9r$LL%<P4U2-*&Lzv2xE2w!%qHU=>Q-tE zxGQolK<J*C(FM=!0^NS%p`P9%)Lw@u^pIHT2{;1v0&;n7K(0+Zg`nLZEvIMVPon_y z&VX;+d2pvV0YlAiAUM<Lv?I0%gfHR_1LJPPxM$mp0+>y<lGV|^J=^z>c~@y;LNhTQ zvF{8WJ)1aHZnO}Ft2ouMb-2NT&wJ1JVLIO)V2w4AVG+T!p+C`5Uyv=EPMf?SPL-)M zkl%Ns?w#LbsEx2(!LQ&H&uK4)GUB&>S)ZXZyGse(7fHz+o{$3a1$2LBWD_u;+v1X~ z9AW){uQ+qoE$pQJ{p-bB<}Ru?;ueqj)HCYKsM@>K%xQA=<;HUJ%r8-`kI}QZm9aLS z-gU@Dcn!1qsnfLvs=_nKbo>6V6IL%`(C$=q0Q1uQlT>62f3<sd`1oLl_<q6+%=X}O ze4nZRqN~JWBOs&t3d$XGjOYa4B_41^r|l;m(i0LpB5NCL`@p(tU1b%n&okY|WqXT( zfA;RPBjl4c*VmnAk<n*&@@@VB6hzilPD-xY6`Y`8SJDx#3fl-5D7>N6oNAumx4pz5 zt&{wR*7?*X+veL~&LqHMTT4v&+YG5w$JfCXXWOP4qpyzv>^y~d>hnktpdIY6qL1`A zO}kP=pov=_<#1nCQDV>a#}GQ7-{R39Lp+XX&>FU~_cos0eGH@k@y&zqlYkP=gN~(d z@o~m9NzC;;$;ra!xq6NAU!$Zqh2NkQrjdA__n#4-xwn!3)DyrU#9k=Y9^_S0tU+~w za@yz}exR~nqh1ecIq>x!|5&|FdW?^c5B9CdstgvY(LMkpo23EvE;0ZZFz^Aqc?5V} z=T~_JMnl+FvcQbpK@2Wn!UbZS$W0<9QSUvdUK^OyEY|rPMlv2JX`U%h6Vc!zw8P(6 z+W_MdY%1V|+wL670$?JSKdYWZ8*Cmm#fy~UHH(+oKD~1pvZrSKe@2bjE}q%RaPQv6 ziQU_3nk7-v@4P<^o`70SgX2U4f_m39ZiB5<?vlw&>nu(1t4MCy>J7^Gpw*wYHamYo zr7?<t_3)>d?I+m89%(=8Y|nLPtKOuX#$^xnf7?An-1`W~v5jv#>PysohLT?V)J1sj zh!BSKcR))u1N6a-J@{<?NJEYw0p|Irn*-H6N2hnxMM&ve>~bln<BO|9-vnZJLCI1S zr<P0QD#E4ME|==`jkMfYs0SPPiWD}bELUT96|$w6mGFf*z_(NPvP2I|nQW*cY9mjt z+q9R+6I{(l>Ep2@@rg^3xsE-2`9cJ#2+4|kr#ec>DN2ZHY63}|s*93RrFH#zNncWR ze6E8tvvm~7sIOB(dur?w(Oa)l9Yii>DrVJ~iI{~kuc!jAR0}qOqS$RBh>gr)=@aW6 zTbw>^zE7W_>4)D!l4Uub=8l~k^l)Kz?q(m+Z|2>}o}!$`M($)E)w#RbV~&>aJJ}&f e?q##1o`+!5yPM5X3w}2{mVrK4KwLg@c>W8_8xE=f literal 0 HcmV?d00001 diff --git a/sziszapangma/core/__init__.py b/sziszapangma/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sziszapangma/core/__pycache__/__init__.cpython-38.pyc b/sziszapangma/core/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c7505c2810e029559c1a80b76e0923bf278e236a GIT binary patch literal 167 zcmWIL<>g`kf|JdS2_X70h(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o6v9KeRZts8~NY zu_!q+uRO7&C_gDt-zBv;yClCrKRh%zM8CKyv$!g;ATcjJHxZSSoL`ixA0MBYmst`Y YuUAlci^C>2KczG$)edCKXCP((05=mV&;S4c literal 0 HcmV?d00001 diff --git a/sziszapangma/core/sziszapangma.py b/sziszapangma/core/sziszapangma.py new file mode 100644 index 0000000..dd0b80e --- /dev/null +++ b/sziszapangma/core/sziszapangma.py @@ -0,0 +1 @@ +"""Main module.""" diff --git a/sziszapangma/core/transformer/__init__.py b/sziszapangma/core/transformer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sziszapangma/core/transformer/__pycache__/__init__.cpython-38.pyc b/sziszapangma/core/transformer/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9ae71e4517916535584f24505c9625b24116a404 GIT binary patch literal 179 zcmWIL<>g`kg6gL`6F~H15P=LBfgA@QE@lA|DGb33nv8xc8Hzx{2;x_SerR!OQL%n* zVo`EtUU_0kQGQaQzDsIxc1eDLet2kbh<<TZW^q+wL1JEdZXzltIlm}XzoaNJuQ)Bg kC^xl8KR!M)FS8^*Uaz3?7Kcr4eoARhsvXG2&p^xo0Jc3a3IG5A literal 0 HcmV?d00001 diff --git a/sziszapangma/core/transformer/__pycache__/cached_embedding_transformer.cpython-38.pyc b/sziszapangma/core/transformer/__pycache__/cached_embedding_transformer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d2655581e4ce34acb616f35c06fb8e4ec3d05ddb GIT binary patch literal 1923 zcmbVN&2J+$6t_LjWSS;vDGN&%i4Spr;gBG~fkUW}wulSrrCTA5gv{*N4cX2|YEM-) znx0Y#u88X%+GGC`{s$l7#EClx%6lG?OtfXS!je7qd-n71{rujWqh7B=p#AyVuit;j z3Hcot%?2{^C3N#S43bEil8i<)V-cgcW~mo(_<Ct8^CO><uZiTcbwy-L`ExI7OLj;? z{|`7#LT33l6H{8gKS|1iklF`dWlxoqlYIO{$GJH!bf$EIdz)>EAqS}<5tSrjM?`sw zOFAM7Zc^=M!7<w<AYmZ!r1zXU96Z{nwbFmiWLpMTJnE=T<-^{_Ik^Gaj@$%oS9WFZ zibOrx2jpA!(c?JzLCO1&+J{0d95A9>xnO;f;3;tN&HHyGtl=Tfu|I}xc3`MzMFc77 zoWPe!a!79JLFj28be2nCPq$%;gc_9|>i`zFM0eryS2{d0N}FL8>tvFj$E7Zw#>0JO zPRe3B{PyVZ$<WLuW)@H5e4NGWi=@zM*l=X%+>k3bn_dp47d8-Ll21w@dT_VFmm<`s zvk(58w87#8qCYRRw7yp5na*FJ!~<iFl5Ln&<Qixh(;HgRT_8knfDriYk5##XXF{eo zL7}&BKtAdL(b?YG*4IE}+$+%8M=yc9b_g)H5VO&UX$^4;1n57aHW3Uzl4B~VWLH5& z_sN`IvXWG6PNjFk^~XPvXY`s??1tvN^ycK!tJpsIZs(F$Ud49_=y}CQ<n%LV2XpFQ zJD5)g356RL4ZXmtUx#iU!7v88z!XpgX2?lf^NRk=PC3wqoZM0uKDW%W!6%L?kT}{L zV~(uOD1}r_ifp>~2Aajt^C3z*WcJ0YvSkLNv`FIA?7?JVO(XIb;3Inu{#!7aApGqG ze0=X;R^asm*r5@VeuzU8GG>N@j3m^M8M|Zu6Y|np*s@-#a+iJ@<3?lAtW&U66NlKN zja;1z7ama`r>oSDf`zKa>yRVQ&K!DV&~ZX{Sm!RF?2c1@$5=qOX@{}DXW=VTzQ!z4 zy(}z!SX2-W*Yt*9?Ox;%bB@?nlB!rYI9(E}--eU#LSLlIdoMxbtQ>@WtPFXaDIsh} zh^&xjDbBk>oSwyLt<e~?c1wsj&x<lHCq-_A(3nBiGo`jHpGNIm0<8E#BYut5QKL)s zJ0P-}QHOQ~8RH2ngh5e<20(0|?|AQQYSe^*^~;MX{C8?wcw^Mc&$8)-ejn$bHcR9H jtKy(h6$i_QZI(mljWd5AA{&fq687*>Tx@Q$fOYvlBsR?p literal 0 HcmV?d00001 diff --git a/sziszapangma/core/transformer/__pycache__/embedding_transformer.cpython-38.pyc b/sziszapangma/core/transformer/__pycache__/embedding_transformer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b8f5eb717f11eb752da710b33d6fba62417c5c4e GIT binary patch literal 891 zcmah{y>8nu5ay30%TAnR$dau;lb7fPj3RAZbSb<zMgc7<Ez+)AQBsdG0((s!qv)Pj z=%aA$)K}=#I~h)GAV3Zv!TavW@4Mqo`~4n6`}y+M=OJh8H#z$jpmT-G4RO;<^MYBP zaw}57$rpvRK^oXF4O?FpQQG;ybfCj~rbEN;WEyKZVT0%ozQ6{;%eU{Y{fU~n22|cy z(|oBk!okO-YkV+T=FK>#)O`!lxx(c}xEYpm%~D~c2~2pwOk_gS`5Mo~1q%Ejw9xWC zxZ`OTGuns_5Dj(IqCMR~Ik7)~Z)Zm9Wx2QkRl0cv)&L3#Z!6HwM_`&2l(|60ozWS) z#IA8UYK>+S_Q1yvR9E0kF^3MKKs{}@zb2VF11_-&@@09e8mML}85#Gjsp{m@baIop z&C+dDt;&T}hfZFBNw7;haf#U}&i2sDdhJgZrpX?ou1oq_;bQapsWLAQ^GQ8b4KZGP zzG9&F+xi9qj_7={1Gc3fqwD(z9DRg#l3qP8X*@Xcah8?Jnk@6ZEVGqf6?8t%vhOQZ zY$G6vn0~@boF||;@uIAKr_>5St$}#ql(xrD`L7Wo4A74`Dm)f~zmS5r=%DMRn&m)U qY*~ENtZN*WmTgNEmMdGY;ha+FrKc!-JlsluLxbZe?Tnv^SpEZ|1JIlR literal 0 HcmV?d00001 diff --git a/sziszapangma/core/transformer/__pycache__/fasttext_embedding_transformer.cpython-38.pyc b/sziszapangma/core/transformer/__pycache__/fasttext_embedding_transformer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..98f0ba0759805530444137824e7fdf6c14ecc1c8 GIT binary patch literal 1557 zcmb7EJ8u**5VpOK+iY@4fCz{L5)H+HbcaM61th|wK-VNuq?M4_tbNgjeH^xrkke5m z5;Rn_DUy=kz&~+I#SfsN0A}`bmyHxf;FZ1hjCbanZ)SG0*$fGcldW&Be=<V8<DouW zI6Q`_U%(=Wpc%<&L~|A~if5KNkyE|h)3+D-aCWjr9z+2p&xvq_cSwXMg99g;5p12b z{2ySAw3u~YrrKC{Elo_fMeWkld|L{U7CRd%E_AO{xl{?>t&b_D4UkF_Q9&ZMNu(oP zL08}{*bx<ua1Y(c1F65;5Z)0JzG#3XkTcR*CNdmrdTg190JJkA1nsPt1$=Y%*0Wd} zBj1~g5Jj^*FNMr_5$6(MH?p|c;i*V~!Rc^dhe)5m)OTPpWS8!;5fxOh&+G#;<k%Up z6FMT?6$HFGUF)YiMXA7MG?VtYd=KpCmS=<BDH`hzOqxaWqTDO8G8SVTQFyku3w&~U ztZX^f)iUcTA237<y+M}M(0?Ur4&=Xz&Zd@1ck);zX|Wd@Rc^<fHL2g2vftU-T;J&E zVXB95A41LJ^M?c=cg$JFI#UF4l22Y!xBL4x;5;o-!}$Vu*HsNtpEAP!_-uIhGV<G# zFR^^~N+qnXq#3B<9Q&9F%s|CHMmQq7<d}}=2?eHUx5X4XjTUyK;b?`wlZh!6$Y(*W z5LU&^cxl4XHu3H>q9qXND&cf^?^3|$J_FPq`qOF=3|xpAaqH`_d?dZ9S`Gt4M)ZKL zkpo2UbQ22N$v_O4+iNiOO;}P0_SNDC2=)^>h7V8Z7xva&AIH<8>I&Qe7_9xE3iB%+ zhm0(#D$sC6Ktm>F-d{y~8m;?`{b6MI=-(sN?Uk%d;!Ll$&+I|BI9i$s%~#Q9<rwK4 zd!IkYm%cGUKxoMP-^2U=fuIYW43&m$%DD|W7XBc^{T%0S2XS_)akNp;I6N+j(!?e$ z3(dKj2hWxjecLERtW>;TF{hAkg*j3PtyUyc0+j~}EF&Gj60(5Jdsm%G+6t|2_WSUM ztJ%R@QL}g2<85ql>lK5%zprkC%08^OUK_iuU2mspN!FW8;lHPHc@aVztc<Bx#Y(FB MZ;=HQ9yaIx1(QgU+yDRo literal 0 HcmV?d00001 diff --git a/sziszapangma/core/transformer/cached_embedding_transformer.py b/sziszapangma/core/transformer/cached_embedding_transformer.py new file mode 100644 index 0000000..f58fe33 --- /dev/null +++ b/sziszapangma/core/transformer/cached_embedding_transformer.py @@ -0,0 +1,32 @@ +from typing import List, Dict + +import numpy as np + +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer + + +class CachedEmbeddingTransformer(EmbeddingTransformer): + _embeddings_transformer: EmbeddingTransformer + _cache: Dict[str, np.ndarray] + + def __init__(self, embeddings_transformer: EmbeddingTransformer): + self._embeddings_transformer = embeddings_transformer + self._cache = dict() + + def get_embedding(self, word: str) -> np.ndarray: + return self.get_embeddings([word])[word] + + def get_embeddings(self, words: List[str]) -> Dict[str, np.ndarray]: + new_words = [word for word in words if word not in self._cache] + new_embeddings = self._embeddings_transformer\ + .get_embeddings(new_words) if len(new_words) > 0 else dict() + for new_word in new_words: + self._cache[new_word] = new_embeddings[new_word] + return { + word: self._cache[word] + for word in words + } + + def clear(self): + self._cache.clear() diff --git a/sziszapangma/core/transformer/embedding_transformer.py b/sziszapangma/core/transformer/embedding_transformer.py new file mode 100644 index 0000000..a953f44 --- /dev/null +++ b/sziszapangma/core/transformer/embedding_transformer.py @@ -0,0 +1,15 @@ +from abc import ABC, abstractmethod +from typing import List, Dict + +import numpy as np + + +class EmbeddingTransformer(ABC): + + @abstractmethod + def get_embeddings(self, words: List[str]) -> Dict[str, np.ndarray]: + pass + + @abstractmethod + def get_embedding(self, word: str) -> np.ndarray: + pass diff --git a/sziszapangma/core/transformer/fasttext_embedding_transformer.py b/sziszapangma/core/transformer/fasttext_embedding_transformer.py new file mode 100644 index 0000000..a74ac7e --- /dev/null +++ b/sziszapangma/core/transformer/fasttext_embedding_transformer.py @@ -0,0 +1,27 @@ +from typing import List, Dict + +import fasttext +import fasttext.util +import numpy as np + +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer + + +class FasttextEmbeddingTransformer(EmbeddingTransformer): + + _model_name: str + + def __init__(self, lang_id: str): + full_model_name = fasttext.util.download_model( + lang_id, if_exists='ignore') + self._fasttext_model = fasttext.load_model(full_model_name) + + def get_embedding(self, word: str) -> np.ndarray: + return self._fasttext_model.get_word_vector(word) + + def get_embeddings(self, words: List[str]) -> Dict[str, np.ndarray]: + return { + word: self.get_embedding(word) + for word in set(words) + } diff --git a/sziszapangma/core/wer/__init__.py b/sziszapangma/core/wer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sziszapangma/core/wer/__pycache__/__init__.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b598182573576ee0e3f62c5da88edbc96ef5202 GIT binary patch literal 171 zcmWIL<>g`kg6|x=5<v805P=LBfgA@QE@lA|DGb33nv8xc8Hzx{2;x_OerR!OQL%n* zVo`EtUU_0kQGQaQzDsIxc1eDLet2kbh<<TZW^q+wL1JEdZXzltIlm}XzdW@_KR!M) cFS8^*Uaz3?7Kcr4eoARhsvXFt&p^xo0ONoxJpcdz literal 0 HcmV?d00001 diff --git a/sziszapangma/core/wer/__pycache__/classic_wer_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/classic_wer_calculator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5114407c01a0c978a0d91a8e6465be84cc749485 GIT binary patch literal 713 zcmZuv!A{&T5FN)^6zJ}yswxiUfYi%MRniYYRaKC<aiJ?DA26<MDyvSi*jZ8bx=8#* z1xJ2?Psz2X{sJdvvRlYXjO5WwoO$!!j3=Yf7NGsveK>p10RB?aw-l4Fc;Y8?1`P91 zGQk+diBC!{ID<nl+@#lFQp*=yq-Jmo<Bfk<0b|x~e{+=zR|hUOs?v5}c|G&0sRQ1P zPHcGjpsAv7e3irx7+4U@fJkoOoEc`4>rC()NI|67jkoXiy^7K4bwt$kY!nkBo%j}A z0}HriO|pQyP7&g;BZL~n+0=$MlhReLkuoIsGqO#i_2>7j-IP*OGb!)k@wAvk8)8wa zpj~yTno$2x#et2#ntEECOpd=7aqi+=O;z=?RO^YZgDoy?C^|f+CB{-ePbi;Wg$?YT z5o00&Y`*iF&ip^s@qlPos<cwJTT+&_nR$wbQeMoIU*2?}?Pw)KTklaMJ%pvBcHs>< zT0ar!nrBIuAB_j%b6-+UDsp6=o7IkGiDPiTdrG<yijS|Ct&0w{pRQAnQb&#`wb|ZM O(-&H2B!a_a$e#dCtGWXK literal 0 HcmV?d00001 diff --git a/sziszapangma/core/wer/__pycache__/distance_matrix_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/distance_matrix_calculator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2bf0c9c59168075c6523a4c441087072e2f1547f GIT binary patch literal 4055 zcmb_f&2JmW6`z^?;EJLsR&{JCPFgu_TC+{5)@?3@>N>WQ0!1Jok&OZbitUOsl9ncy z%*+aoSn5&>h;rzm_W+fSy7W)zA2QdTeCs()oc`Xd_@NKBHjpJ|_~z}rnK$q2H~V;D zq0I37{q2AL>DD}B|DwV4<703S%}8{TN#0>y-sD};6r9GQ<8}R}?`W?RG(+(GPN^F; zBhG%uWFW&sCPT%adCfVDOESVZQgaxWF`koUjLU93FTG7xpMQ>>SY24}z55SrHQ6$` zPFi_a<-0vu=hpu)HTedJYwvfrl$2?<^GGL|+3xAC(k&96egX{cp_%*W6l-$HnnHQX zSHTK9>%(?Q@-I9oq<82)<IM=~1N`Qs4=x>nxMf*_dXZgv2nCX?r5+@m)?O#cd-~L~ zb6RaHt-!YB-Ti(q-&H0xHq<KL(^<>I?xTljj`je}kbx!JWT$-Nlwlp;sLr;Iz}0iq zRYu*HTWgPv(q^rj=vJCNO>*7aO4c4K^Ca)}*Z%N$^U<0aq-K!xlWeD(oFBA$TCF`* zdQF}wA9t}p`f1!6AEMFUw=1KeiZ4t=a4;j#v2gItWeYTB3gS$`)1H>UDE{tf3b_Ao z4^11o)1P{}O7s$S-yLxa!z^NG-?(CLL_<uw8WJe<7K62y(|attQJ=R_9A`;a#j!2N zaknS;Iz(6E_>;Y)Q@qjSP)&}s9)i`1tZ#!v>ttWoh#72mdNB5DG;8e6iT*mo3o?qK zV@6zfArHC#Rkb9zd)ET{+%rzfUxPgr(}?Ox95(|MDi1MU^rdz*iRR|qSu8K#PqRet zU$%?>4wk;;CGx+ES~}N|55*IqE0S-b4j?tW4NYPFoVwz8*T$4PH+jpMVFg6t6h3%& zmI_ye)Yvt;ge5NNN6AjkFt4M_*)ji&ea5$i<Wd|)L;jFG<0sG-nzl&E(L*$ex>#8S zvBQS!kISDkhzp3L>>Wl&PW*4;6CVqRbMj;sa-x%89?91Z_z^#5M=;chDCC?~>fG7j zR9MmYd2?}6EL^~%@Kwj9#Bqw~9~;n?*E`7JR<GN?`xf?c6(r>1o54q~V#y-?8|$4O zvd7#7F<Nwrbo!ohy^4KKMNyC_NWr>=KkXb!O3=tq(F#5BqC(1i@a-s57n*#jZ&J5F zoioG_iE_JBB))-W$N?;<dE)EAomby`qh8k6v4Flo9r;jGkZM}G$XrUgDP3saLYGNO zJB8lF3z9dK(h*+@ifq=zzm&;ibn4*kD3c#E!Y9es@h`;}Tzb+!^j$uOsw4woQYcH{ z07g~0J?|QT6?}qI!m5f|P$6=^=rKpm<84JQK&mRMh~P!L{GeyjOkK`>dp%YoXpI5B zBWwlczINq{7A3&wA)1=26J!I_2^Kqyb2S~||6_3BSXb3=gPyV@nUHF3nx%Oh{}iK9 zowzki-NtD4)isOa!s$Y;9I|7yHa`|fO6cV%ae@ThWQBH~z1?zlnE>FH%AYEg6?if+ zjdAC&3p*;GF!ijG*Px?NhA{5)GB3XvygTzi{|nFRo?UcsI@z;e+?g;6h@Ex{ZA`J| z4Dk;TLv{cY9C$;~W<#&do_P9p&LtV<jB_HM@xP$Nw-JLV^B?AZJ2(!9FxG*8Mth)x z?4FzBVa|~FZOp+<%9p`ky<#Rfbhj!vK~}>)q1y-Yv`6kAmxcmbL?>eC4})v$CObfc z55r}4P#U^h*T&$cL)6tjAFP_)q_0-9UcQPV*4hP2e-B4`P6vL3`spX1;^q}SQK~P~ zt{J>pNP<?GNqd>0MMzwyJG=P~r11RvxAi$2xTS67<Gn1$?Dw_qX&chbGRdu<VFgzK zY}f&8c6PwRE9=Q#Zuz5nq~F0p`e*1YPb}YRhDl5_n&DRQKX1}*+DQ|g=KI!{sJItG zU;hNczd|#s=vd$*SWr%R^&4N5K~)htOZ=Ls@#{R|gZE~K(96c~l{iW{<DnT+<p=?B z%yW@@Ljc+hU?`#J=L~KO2wwq_-YX&OTUgW~?CU^Sf@lv56c{TAqGw_(>3RX9fw6)x z_K2@Iq+MYbv6d{VzmJYGXI#Dv(z}%xDCX*rtKwrk`xx_G0S^Q5m5=LK_+s$UY-69@ zp)XyUUj|<F53m>Rxa*{+yYf4I-lpMseH^}q;>&6v{xkUC+tH>T7FDczu8JA!({%vX zS(Q&=-UNStGuDo9@b_qj49Ea>2vu(hS`O~Kx`rF|#fvCRRm(2Magrfplbq5wj&&W} z3jl2^_$tfOR`H=YrL_wHhu$+W`$t%gfXEPVmw6<rbOb@!C%&%UI74unRj*ht*=lL3 zHSXST!+gJwZ_wiYA7p#o{(ey;emVU$)}Rlt#uS(~#-TV3v6_zSF8%JlplQpg5-|=^ Z8?6`LB6sQT>TF-*6%mPwU#r$?{{^-r(U|}M literal 0 HcmV?d00001 diff --git a/sziszapangma/core/wer/__pycache__/step_type.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/step_type.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d3ee780df43be6c35dd46f10f72bad0a42c35bb8 GIT binary patch literal 948 zcmZ`&!EVz)5Z$$%#7UC?1r>3EYc938qpBheC=yCV><G!1#r1AVn%EArj-o`pQT~D- z$T#pOzH-VhaAIanszlJScD!#rJNw@3c+_e&7~1ROpPyHpv0nuC%|UREL%%{NndCE; z@Q`!%o=HpEcTC!ff3rhNIzH=Ewiv}ayl{HyB8ds?n~mTchkl1nv5-rK8+m9;D|DnC zR-_YFRZUiYa9NeLJ12Bi-5^)i5jFmzhCD)aB%63-t8o2X&4blkZBYQ*TIA@YEpbOK zRZ#f?(wM&PjOG;6#i85iZrP2I_uYEIFsn6yDz1`}G)mM4-&)Pa8wO2W{{7J#X$9I# zB8VsHGRh&lio6S@r+GH_K8^gq)9Z;|NAoDXPNG8)XP~^L0#74vkt6Zb`KoANt6b<W z8RUXaq+8J>h4J<Coxsz>n1>ZT#(lReXEz*&HMXWzpKbV{V*`mUYTe7>u-6TWrayY? z2mN3a^e+cMXBvH_Yf)%xqD2gu)`Hm5(vH>1J*$I|e@KxCQ)WtElA78+gGUHER!OMd zANalDgCtJkEX|`yDnN}Ds5Dk$nSmsgJ2o%x+5A6t&}kN~Fr^iuXb6#HaxqglZwm2s z5zVORLICB2C+G_IqW7U^C_N=;>l}bOrEspYYzD^^_>3HN#Ga!X8CBGNrng+~l#kW6 i+vOGDRTPfGe*~yJCVVf2$!N;&zA1U0QNtRoP5Tc)=+P_y literal 0 HcmV?d00001 diff --git a/sziszapangma/core/wer/__pycache__/step_words.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/step_words.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4142240350aabea762e0c740887510d3e13d56cf GIT binary patch literal 524 zcmY*Wy-ve05Vn&vgwoQP7eK0n$ii5m4oGZ8qOB@rF{VD+h$Id^S5>=~l?P$r0eBRz zOuPaU=Tze7r2Bl|-TD04o_4!!MuunacW<<Qc=FdGB13Y#AW%&4f|Weu+{Zx?lwlTf z23&<Idg0ehwN(7fRJ;pzJd4!9nDvuS+R6Id9V%(%ypV`QlAGG*x{}3&Nc>>*7OJ^# z{A(eRAvvBB0L!>ySukY~LUh8QrTE}C4hBqxbX?>P#}?|DHVWObfd?=&8N`|ruD9IO z+AaZeY!ah>>=Gd=S%MI*Ekvo+s_=0~h^Lh-_7&rWxDc&zJt1UOX)C>Y6ylYcE)^U2 zx=p7D2GdfSd0wrhHTqtrSAdUJ*Xe9Jo}{?VaVu+CElT;H%(a1Z4JJjB->Ad$dShBN z-8=f9Xa^nMWJdj#JNkJ~Kod~a#dcHY)uNHxwEb$^N5{S^8SHO)$^F=vpu;<1!oLAt CUxl9l literal 0 HcmV?d00001 diff --git a/sziszapangma/core/wer/__pycache__/wer_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_calculator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0e3f29381a1c6f19fd5f025c13ae15dd3a3baf30 GIT binary patch literal 7200 zcmb_hOKcm*8Qz&)E|(NVJuO?dV=J+pge@vLZBR6Y>ZFP7B1NjCksSkzq+5zJl4<j$ z*;Q;4OQ3~R*g53T-U|3&9(vHBEzo0|TaUf=wAY?;>7}Q}uYUgwUrWh(09|2*v-9|O z{(t`Oe~hk9PUbB<r$6}HFTEwp`WL;7J`P^qKnY`1!V;`z)tS#2wRw%#ZQrgtzEjWm z8Qo{sTtAC3r{>jjeopsgYWezvKcU-htx%uzCv`hpE7qs{Dc$yJ)AbpDratS>GV3Rn z$cg-(CGwK(+Wwrd*R9gT=h%T&Vu}6ZH{UARiStf1jFQZiomNdI-n*@++H90-YnU*5 z5hLYBMZQ(8Rd#CSsHxEFtw*wT<xWeYJ#$T}_f)eY!?4=eq5*U|>#cI5#FH!rsVm7* zeXXfPSfNjkJ~_O+ff9bkEK6EG6PC}fTGEz|V5|5Q{5})5aP}NOBVLrQ%r08e!*6cU zLd%u;MN7E%ZIKn;UdEq5PeJ!nJ$a)jau~~t39O&Om|m~PK}Qtsb1^B37@L+eGACzI zvSRAKBc{d7p0msRxp8`Oo~RBfZF10=#fmvG4?0IcXLJX{*&gkj{`3mk)e*6vzp-F; zh59IH9mSrGiAB&lh8}PybGq-iIDx*!6m`vw=J2F=4x`6Yqx0ZKQy6^llsJu9CsMOq zc~Z**@)S$p<9YD{<~%nx2czJ8l-?Q4JuA*(?kSOhTD_D^;=G5NlAH)cPkn-VIa1Zz zL1k2pjz4>}!benu+Em^mEg_7>EVcr`X5HFn&{8#tK4_a0%G#zX;$`1bGx+)9gO#hH zRN+d!tSZ&UE$pGWQC_(y!<$jFwQ}w1`jwTiT@Bl1sOwg}oPMb^m0Y<c6_t^VFSqU_ zUJz6p)hGz)E1}*iYcHgjU+zy?1Hh(R1B3)2kPs#_N^VLefvn(`tZrS8!o*cF+EI;) z4VwLj+GIEAH+&P-Jua-9o>~x0@IC7uyJ+pQbt|?WpxtGkSa+?v>@MH50T{mLMJ(d? z*e?FeJQ7*SS*rkPgNJtSqO7BYr%-Ky+(!!?7D3K87z#(RckSQuZD-vGcHdES4oQxf z2_jg6B(Iyd2;#l&N;8c1dBT^^<76;TABm}EX>oqF2JNUc>#YmN>D$Do%lXsx#k3$R z&Bn)4MZvfXgQyuO8Sd1g<<(jfniXC^qfhbxj~>X+(RaR&O7pl(Jl+F`i`EU+`{noe zJ@8E^?e*C6R*H9Z6zfY|oy3oCxUM#YyuEMxGrA$epaqT5=d9*2OC6z#R7DZviiG?; zN;r+m&GIie^YE8v-c$Y<+OiaE$q20V732WbB%`3OGO=s2QL+`08UN5}>eFI=;j86G zn+&P4G9JSbJaID)<4+x4%;x{wKL1UR11#hQyHE1h@3flHbs1JewSZ3?s^1#ggxuJ= zVLiZ+#JtO)Y*XlhtO?-BV&8bkakJ8w!C6dG=cv-c9!NZSo&-8>Z8l_dOUg!|b@tm> z6_Scr8JBSjTDX#C`ztBH02TC^_iUYER@(+5i-%A~O0Jpx9U6qNT_)HutHY@+;7NM# z_LkLgVlMbiuFl5*^GzP^kysY3*j}_cnb?7tGF^lZapne(>>Cbdeux#C+V)bfH-lb< z_0(&lx7Qk(=ODzeg#8;()yCm-G7gHW&!6Hcx#|V96Sv%I$%atJXf%t;_by&~=hBt8 z-(B-_Z?COidY_t@t+nxGTEcE8?2ez)FeS_87b-gnHZRiH7(b&~c{@_&iWvli%F*@j zfl5fW!Ey?0nmH&1z@%3=L8iTwKBj|2N=`D@Qu5=VqMBhi&;qqcTb%CisIMG{t7ryu z^{J7Dk?bJVg@_cDPK|XDRN;-&8x)s9EWV~i2+bs=^5<&uW7!C=N3z-o;8>KbME<eS z&TwZVjH=NN#a>vof>nBp>O8f^^!Y_?Ax4*v8ArqHWh@Qpz%0*wqSYFT?byIJ{3(aA zX}_A5-q#~ra?}86$y8r*DYYjx??1G6QW!13f1_d4zT@DR?=q<J0P!F4j@!vbJVxMk z(fa7Kj)!(exBt}1MK;9i0RTKkFtW=#`PkO)c<W5W?x0-&MJF!A`FNshcP69E1D8ex zI|{Q-YvX*EcZ%rG_E+`BsDCQ*;>oxePi+Bq!ba@ue9Al1@$`dS*BYfmYgVmzI?fJh zo4)N=w_nE0>?UkjDYK?Z;5y}y2wr3ww9_RGArIcgtFd@2DMrnxTpMUBVrpKE#?`=E zt(EH=qI{ta3jaZ|+OMY%l&sdMn#lTw6as;Iuu%@B7<HV65<X<#ly^#bgZeXCBPiCX zmTT3v6e(&Zs8i(2)C5(pP(|z{M@GL|Z6FFG#ZKl&yM?St-D}sY4Mkut%^34BP%rge zA~rejCir|KDGoh~elHAEu&k5`KEgmZu?~S6oO6=H8q*v>6@kp3G<IRDsjZS<9BkIS zPxg0dUUM5VJluYr{t8*YkB{jP&dQRv*C2fA*#($0I9)hfw0Iwy4q8P5Yya2IV~*>% zPiOft-u})vI^|R_O{YVS$foUZ=pj%Dr(+z@E+WW?!H~hk9kuQuM?GrITV3|R9_`uW zUDmPhTAOfKz=s!+qryxRWdheD?;l+TzB9mS2FZo9YDIVx&P_+{!B}R<EiGCN#I%C& z>_#pSse5C$3oFoHwf&38F}u;W;BqGm%poNNlCwlp;8#;kU?H+Ysy&*X=`%F@tG54w zT{+K0Xg@5TBRP==)FI>X4$@P>_8>Lvhxd6lp>fGhVVo$Clr_f5D{2ZJQRs!z6y0b{ z@0)NkVZj*0b*%VnOf%6jY)1RN^x1z6fR3WPA0CfH%Gx?1i_$PABfZbmkNP_sMy$UY z?1EzSOaGT$7$2E4wuU!aNv18~rIRe;`s-yx(L>WhdaHg&)oZ9q-p~c9RqDM!)$3Hf zK^1v-^<%2E>%2rQ3WwDXQ0<_EM9V;NffWI?JkLLIU5YR`TVVW&<2cL(1dH7Cp?BD4 zw_o|j0ziP6F-d_AgGoL555*VcGPR2&lP3U3M1XL3aDz*T1nmcKdpg>4ZUCTNOBDb* zC}4+g7h;MyY!h*q)3g-@v^rQz7LWi$WyBprD@q}6DPwHy+nAVSE6troB$c1-B^&BF zTAvO()DrrBj}lU(Zb3edPFwO6hb<o>kKqRoq%bL)L@1%6*?$?02m+w|me{B4m8_G_ zXa_wpKVibv1CS#*YRHY|vdCZ&WI$>Qg3Jx;0hViN87<@{wEZcTs}od>F;C?7QHFWy zSYkA|BcPOC9R|7sdnKmzUP(2zdB}Gt<L(;>+5(VCA@DQz^hXR1tOL34aHrl+i0Et$ zJkrc0#q?(XKr{$pmg6D)^PKj{vEv-(Uf?a2!jUr}{T&=0g)%x0=Fs0>XcyaE8^N8U zgFS?FU0hn2P{&p;VX=CXs%2FD3}O%h_zMH~p32zNMOs4IGImnQkrC!hMn=iRpE0wS z<Qm^gzSRW69&ev{Cci@pDYjFPrOh5-+lH5P&<j7-WyaHJmiKM7M!P;k)z47%kvL%4 zT&9>DNa-^iJHE-(ar_lDLOKR3L#a{wM4Fe>+*ItI332b$%gP;E-+QP{h14_r7OH!! zf25!A2ntGZ!r-Qfkm|*@xnY7CI-un_P?#f9FQU>~eyW4}7P_*+y$f!a9^lTU3%B?w zEL;zn3|T+jNa(v0s58k)OUyq(an{hX^ls0BsJu~=y=xiVH-Rz8S$hVZkiJLjwG_1% zATa*}MO!l0<qthreW=4z{-pidmxwReY~p5zq;?p-@p*l+6fQu}`om}G!{?}apDMkD zE7YQWBu7(+Go=1^4C<>+%cbzJ=<+Yx=f2jyQU{#vNylWhbfo=<)P>$PYUx}?)R&YN z3Jc8gAztGQg-Tk1)w`H~0H^8$^eQdq!&G)5&8gX#6#v8`qq|_Vz}jcOjLH6@5^jA@ z(<N-9T$e$R<b$Bz6gxF)7lL4Wr(825ir7r1f}q@JAfKXSBMbsX%BV=Wb*`ca%1LIk z*2MiVU2AGRgB^{Ox=cgahCnt^zGEyE87aEy0kuk9M%SBSB=;ea8e2wjMq*bRky@fY zt-o41eon6h33ZjKYg7&4Jyg*G28t+Q0~PKDdC|2VbM6A+?xVt@%UtF>D&+C@xG={# zhXUj8QE^g_u=$6DGc?;}#~v5Y;rDT&h%11{g#z_~f}U4=RG853%zapxGp*u7Mc*#X zCw6(Gq9{&DT-@;@YcxXBOd6pzcIvG=MoQ;LZ_=0P@_hLg+~B}?ANUV|J{<q_;mgSC z>WgJ_*Hk)u7Ctb3Eo|;vUV8R?LN8PVX&kgcAB2DEOJ>u>wN?t9;F4etJFUc9H5zz< UVTQigEwFj+@dBlF_?x!>2dQYnO8@`> literal 0 HcmV?d00001 diff --git a/sziszapangma/core/wer/__pycache__/wer_embedding_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_embedding_calculator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..924ae83ebc3c912ad3acb439a5766bc626c0ac74 GIT binary patch literal 2778 zcmbVO-EZ7P5Z_(<e7=kCBO#QgDs3qRaUk7QR6>BF3eh5XfKVxoDp{(wuD!Wl>yPWM zLz?LNbdhqr!FwN)$NUZaC;Q41{{bM<FthjB=0YWGc|AM3p56KFZ)Utvsgwy^_wW4n z;Fl61zhh_ivZ3=8JSvAKA`y*ALVcPrpHb|y*h*~QHnJ5L632IvqF+p0-!(crE+u8Z zY~(_`ka)glWGAj9RljQFVq8n=ew~tUN#sVQJrb3K*SGw|sJu=Z3xB}Mq`|cPeMjZm z{jr;OvNVVrR4;#%JQPvXN!xd2kgCm0CPFqWeg5lC8pxg7Fh58`aVv<!tvJXtsh8&H z&fm&ZCl#khV1?3pF1q(J87b&g?g=@y1&T#b+>u!*6c}jZR8ST{iDHP`p1lgAj_QIS zh)*Npvkf9FVMlb8h(g3((0#H=BP+7^Jl_#T;YNiQEOMgap6!=J8Tu~tOHmp63sFsY zqP#*x1>Wuo0m&296%u(btf&%I_X>V(pV-8&1B`Wj4mLIcZ0d(z=Ftc!9!`(~N8T1= zba_67H)JzDp=^yFbrpo1^vMCuS)UwIN_w<MxRu*I+DEAnk#%xJ*BXv4sI9J$+U2~H zc5==of}_vzxl76-49W^L$O0weO<9G`5gZTaVH_yM`4Rd1L2E+^sai=O!%n&#<T860 zv~COaB+t67dmHO_TWYtXc7tw^wv*u1PMAs2+7=R@nO#f)X?AzyBCJ}6N12$RRq9Yi z*~#wZ`N%fM-oduZ3Mj$1kxAN-BHxnfDI_u0ZlU=lXnN!^Alajj89de@TPLyx>V|EA z&=<!$6VuZJ{*P9uO0g-V0PY)wu07i6X89wbI?5;-Ouhxi<aun)O`3N=((g@JFvm1U zH}C`HLPG!mo7yjTKR*kA|JbE51<2I8lL#^@xdMh?!Dh@bgUuBf1w_bm#DyNPLn1Hq z$br>khjblB>@iXW<DM=V<nx`b&=qh$lo{{^b@E*>H5v{xl{c&*D3?Gz7n#d5$Vj2K z;+*4m9C1g37vaMpHEf5uCmsW2K6^7V^GvNl7OTGojM9rU$0|DQN4Xk;g#e8&l#xjI zR^LH0U=exZ$_x7zWEL8+J$FEFlRkhounx$89@3x5KG~=H3})DndtgQmAv}Pf?T=<+ z;5O{Fkb=GOg|Uw$Kq8sgZv$uVUwBS)JO#tt0iFvi(6@eNPw`xkIY+i!fS0_CjUgZ= z4j=++UWe!g92<0a5QdnBljYSIQdF2F-J74lWJ5m3VlT_oJ=y)?42Z)l{Yl82zYZhM zGd{$;xf*A{P<0c8vHep%Vyqh<zXZYf_$m5001sElV>)`X0ULndn1lgVYp6Y>%j6Z` z<p;)9`60YUDWQ`_;@Od{7mXkkhrDRmq~!ZBOJ2jq1nUG2`4Q-Dz@wI-aa{Hnqb}TK z2uA0Gov>qI&lTPlIrurw1E<XE0wnde&{izcG4-b_n#DfGwao!seKVNon%NL3z*a1F z>^L?gN1N5%@}Ik3{O?0*K*3nVN|6SM;9QqEPqJt$M%m;1=~fUAN2CKg&^6A3G|ln= z{}P}uNDKy(xh1ky7dGPzN(08e#G{qSK8a+O7&1d{wm{OAQE>@H9D^N$h-naY1B>)P z!n&l8$3RB5<kVd#3CEsCju~^{NSuGnthL6Hb|4ACf5LPzePgyDHL(UYXG(2zel<jg z^!3wUZVTC*RPqFT>^z$!tW25;YalY^R9~5Iz$8mkriO4`n?DWKnzapojzl-kp88q{ p8sPP%v*(-iY_bF*JOlkC2DH06WWi0$Necf5>da%y%%kSE{slhP_jUjP literal 0 HcmV?d00001 diff --git a/sziszapangma/core/wer/__pycache__/wer_processing_step.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_processing_step.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e74e7f411d80ab89142f285eaaaf84f831cf9522 GIT binary patch literal 1860 zcmbtV&2Aev5GJ|1(n?y(acm=PQ6NEqCQ%R4t*4+U;uwX2LTN3j2DS%wv!vs7SGy`n z#jv29>}&5m<OTc+eU!cSlvn5_o#9IAN-^9+DR5@U<<ETI3`e7_ttNpM|M=%GKh+5N z3kTKa!r)uz8igQ&resV*O3`L18#|$6Z6|fdUg%LGsc?n&JN=G`ny6nBQNLnWG^`1K zKst^0a0=<rq9Gy^#c8B94E%wSli}q=;s>-(awYT-MzvFw&tw+ku4)5MORxR_Lz0jR z60#AIj&ucmMx+Otfv$me7TN_}7rQX$$(pFWb3|SE*B<!h!~9-lz9E`+z9H+C*_H^d zU0B^*o81y^JKKUYx8G55PwZT~VIb}U+WlhZM5-TD9!ss0>@`9w8X6=wfVOB_^8&#Z zI};_}B>7Zxk!TZTv0SW-b8X&JfZ1_W1C;VnV9;WW&9Zr5U_XGaKL=qTLUVRbuh?7H zFyoxNb9YYX&XqeLH?-gJ6mBoJ&n_pqIg>ik96eD;t#a^F<ETG4JRTms?B95W6Q_FP z{=&1a-w<^lmjC^8Z=|Kty>X=CB)f==%3npj7gGOf@=5PxG#K{uEYY)Q5@oN)(fT0H zmF!(eg`H1UjB#YBJGm_OQ~5?_`pn2A<4Fb=nIzBJaId!c>jc!NjIa;0Cm&JXy>;(@ zv4`9VavOUpfNPihwUs=T3Q|;V(&xW8{QeNxQ`@lKa#V_47#4d~r-V!;;@Q6i&*QuC zEVr(~!N{(X;T{YOG4!0x*)jo22u>x~!C>@!FgzNLY`k5CyEfvz3E`6}1U-GFO=6}9 z1Jh4qni6Lbl&|gvQQiE1fz%IP9v>eb4ArNI05h|~`fk-_$T=lr^93wf_x8)Xy<P0X z@}#7hsw~Fmxol`N==LFErp(DXcKUz}NXa|1&)2dGhqzj%r<;zpO>UxeRSTZNIT}wT zboS_D@75RhJ5;q`=~8VKKId69mYf$&&d0f!rf3J8|2B=%@=N*Tw>gip46ep7YR=0V z;hm;=WQvxp*JEkUa-sI%0`&mJXDBe_>LH4M8MhWDyG^q?82W1vP3FT}g&$*#d~ofn zZP(Up%F0;}^un@d^gH#iwfg6fS|KIH!)jUSVkPNbNY!1{Sqm+EbjK=N$1K+JMcjg+ U@Sl~$eS-nE`4X^z29V+Z0NskhuK)l5 literal 0 HcmV?d00001 diff --git a/sziszapangma/core/wer/__pycache__/wer_soft_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_soft_calculator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e66f737228a10bcea2f3c886a1ccb85c904c6d0c GIT binary patch literal 843 zcmZuwJ#W-N5S{hM<%oO*QjicOqH%$ZL_>j4qy*8Wxtt=IV%d22B8y%7*!3ylM0b($ zCn%syN6k;!mWsbX#mrs;J|!c~XvXWEH}8!-n@n~AZU6bt4?h^dFDizXV6l(xUSW_R znFhlu#wd?;Wa26gd8`wYRw;vbAQPEhf=pGsj;oOzO(Ea;jkh6Z{_#QQnpPb)t`}{s z4ur0kTJ)X8v&o6FTMzc$no}iZ)1Dn$(YkqOjj}a$8X8SV=AL3uP%#Nrq+-dwvMZP~ z8OiuEtCA~7ph|I;#6Lu!X*ch;z#gevFy0T)qR@L2C?~WS-R)uMVGZ9{AAMnK_C08V z{n=3-`_wHL%K9<qP22RGTe_P^K5<IV?Ffr&<{xlgYvCN{*YM|kIdjUo(g<5O?YZb} zcPh$5<v#Y^qCA;Rk4v{|+)6A&duGITQFm6A=gN|EhtzoePuXJe*^UvA_Ur;3wt8|$ zf#P=3d_+^X!YI!DF6X9`OHFyk`KP7On-fbqgtmTi!=hGNBhicwLLfjpp^##r5d!=; z3iVOG<6jIJ6-1&y0)@Q!I=Dd<nu`IKAftbF|25+NpSGnf`ImQJ9c;I_cjp^!H1mK= XU_@m6`1MATH~71Nqg7^6#<JuuKO5#O literal 0 HcmV?d00001 diff --git a/sziszapangma/core/wer/__pycache__/wer_span_question.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_span_question.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..61e5fac5c3866d97cb474694ebc1fa60aba75bd9 GIT binary patch literal 1053 zcmah|KW`H;6!-I8E|+U7ihwL2#K<iP17j2_QHd=}8X+Pf<D8vv^m3Qlt_r6zr4lR* z-8;b8&%ihF%G7Uw7=UMoRskWf^nQNt{QUm>o^Q0Zl@MHCAANpt3WR)d#`+4;*g?_v z5d;xXkU5M2hy+1J@By9>5sJnM5sf8X!kCI^NYds7dXp4vOK`)pf@{5pRy0*rEz8Uu zuCD-%9TeR|kYo&kjOmETK!yT(tDXA2A%ZjN6NckR1|m8G(G>AXIBtp-CQfWPT=3Fv z<fV|WnKoP*+gcg26e+b0Z`eplbEwLUV#dqmbXUq_6nzKbn7ks#5aP#usM=`3J{b~9 z05OF@N>u~BRD^($LoJJ`YNByYlw<YVv+hVsrMq*kvb;RvMpYBu-IaRZREzG@(QvP; z>s;3yOP<a7??zTB**%iV3DfAo4i2R@c~y26Z|pY9HCy#HkwsO`T9{D#ZlJ#9IOzQ8 zfdfcPWq84KUdz9B?z-1)gm+{L0w;VD8)RS!yJQL2b1=y8kz6g8&M|NUMc+l3A!X0o zZ{a=ph}S-WY70_Q0jKmJ#Ow6J-Bu1#P@8U}mUbRknwxV_t_4q=+Ca@Fy&~5ptLBUT z7TVf9#fZ`$^+Vs@Od6)-R4RN>k{!u>_R?r(_@t1XUQuPd(0x=cLtys}g)~c?nos0n zuCiTsnooKAA3<HY+Krk(LZk2Xz5i7)NMjo_R`R)I%qEP@D{)vjeUsrVdGYHc$V+3d zF~-Xhr{e~!(I=c#RpB{OH(h=1L9Kx+S2;$U=olfPF~oE`@LvaMU^lM*J*kx&l|NaF U;Y0eq<KC*az8j4vdNYXOC!st0zW@LL literal 0 HcmV?d00001 diff --git a/sziszapangma/core/wer/__pycache__/wer_step.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_step.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..15b375efa80ab3c0292468ee00bb3e178407600e GIT binary patch literal 621 zcmaJ-y-or_5Z=9i5Q1-@l?e^^0>n_5*clTbB-bqK?P7>`x99E>0xh;agSF4#n`~?4 zE7<7FVGtW9nVIj~x%=j4+U>Rp*vtHF{ss9R$*~1MM&Jg6Ac7X8q#31%y}~Q~%x|z? z1Z9|olt?N95kAvvA|eq#5fSga9nB(<j7h)s0V<?VT}yDw^MV@#BpF*-Pd2s00^*s{ z!c2fg3$3=Y%JHvb^MQ=O%>@KWGAc;sO^NhnAn1_D5V8k3g6t282%vZ9;+fPa?^*^T zv(VAC8@R@XKhL=`_NE`Zgt3a3k}=n2tW;uEAnq{sxZ=gWqHzw_V~kgovfM6JWf;?V zGZ!uj#qBH6Jz!1vyiKP@YLk{+=gVr%tyXtDy^`j^sydxb$CK1-muAarUfq}cFUggb z=~`-ZhC>hPjgCM&1~&-WUP2Svyy5&OhZ9SqJ*d00WAy<F4nS;hIL3aR^MAf-?$qqp bwE5j_k4~{mGTa~QlHy<qgpSvt9Y3Mp2Og#v literal 0 HcmV?d00001 diff --git a/sziszapangma/core/wer/__pycache__/wer_util.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_util.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5caa38f0edc34f2f56e7ed8b79fd36da809fc616 GIT binary patch literal 1526 zcmZ`(&2HQ_5GJYLmDigfNP;HmADWYL*v$hNMt~UYrEm*uod5y~Ru!4G97$A2Zj4=Q zPWH8j-g}6TJ@o<lCcO5PS15|KGu(LN)Q*H14Tr<|`DUnt?d^=fQ&oTd^hZF*U%0sV zfQ~!@GP}S85wsyKEojRMMsdxWpa}gpDq{EsP12@CO361wgd#d6B06EkK(Hyv<2T@g z<jjTN*2cQ@yWZBSE1UhCy5y<UQ!D!lO)fqmEFJ-wC%_~rs2~NCfedAIhsa3K-<V(` zI1Nu|5raOFsT|ya;GmC0?DZMwQ<;GM*3nQTu#<{`-`N%!B(~+lAK1Eq<f4~lGnX#b z(#~~PQ3!kfpv-_C1DWf<Rsg~CfgY~N6rcf5Y~PXJNltYPyLxcW{{DV)V5Byaw$xSK z9hX+CpUcUfG|#Q-Cr=NiKTOQBHp{XvyIEU)xTuttlVhoo_#Cnv_X~HEt9S5mV>P!5 zKU<g>W=PobQ$Q3;8ee6IO)vnrPQdn3jhMWzaSXzW9Ff=bj9B(M069F0&ge#tK~6xM z3VOn3p-12i&P+aV0}R0JqL*$ola`x9r7iEuR_X{We6lX{M#oAE<3?IOlUjC_<fw4h z4i~+$htkw$t%$UmcjBGnN*N1457E^IXpPhC?r}tF9bR#ORFsBmb<CUEUIPQ;XFf>j z5Z>&c<^2s*A3ukafga6e$Pn-iWDj6r0Wd)YI|cCe$O%28uK=>6U`0iMw?f3TP$a+L zok;gTKp$NpI8z{V8`un7{5XQ+;Bp0yeq}EJWnXa4);Qh*GsupOA6^MYw$s3Et5n<X z-bJr?p<>4VU5?MI_GRG&iPDzOG&E?u)2OO!%q|G$hxG^!y1}MLE_>e|7p}YEl1er6 zwljI?Sl7F#)Vf?a)<e_w!1>qc9P-A>TmYN@@7#^cxqO02V{F1Q#@?o3NEyxEF7JLk z{r!C8Qtn5MbC+@6Dlu;)$V1Lw%*zIc5Ld>Qwys)f50%h3wl08M<R?=jP8$0GiFON( zDC;kfeTnQVWben$OE^`&CAcy;m?>lQb^w$z>UZ+Y#c(6)ZsuLDV@BP)?HBqpI8emv tY4wrZ=NJD8WBd(_VL*}maIaJJcj<~uT)ML^dl!FOCIxn#r8H&3;6ImOdsF}b literal 0 HcmV?d00001 diff --git a/sziszapangma/core/wer/classic_wer_calculator.py b/sziszapangma/core/wer/classic_wer_calculator.py new file mode 100644 index 0000000..db54d33 --- /dev/null +++ b/sziszapangma/core/wer/classic_wer_calculator.py @@ -0,0 +1,9 @@ +from sziszapangma.core.wer.distance_matrix_calculator import \ + BinaryDistanceCalculator +from sziszapangma.core.wer.wer_calculator import WerCalculator + + +class ClassicWerCalculator(WerCalculator): + + def __init__(self): + super().__init__(BinaryDistanceCalculator()) diff --git a/sziszapangma/core/wer/distance_matrix_calculator.py b/sziszapangma/core/wer/distance_matrix_calculator.py new file mode 100644 index 0000000..50e359a --- /dev/null +++ b/sziszapangma/core/wer/distance_matrix_calculator.py @@ -0,0 +1,89 @@ +from abc import ABC, abstractmethod +from typing import List + +import numpy as np + +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer + + +class DistanceCalculator(ABC): + @abstractmethod + def calculate_distance_matrix( + self, + reference: List[str], + hypothesis: List[str] + ) -> np.array: + pass + + @abstractmethod + def calculate_distance_for_words(self, word1: str, word2: str) -> float: + pass + + +class BinaryDistanceCalculator(DistanceCalculator): + def calculate_distance_for_words(self, word1: str, word2: str) -> float: + return 0 if word1 == word2 else 1 + + def calculate_distance_matrix( + self, + reference: List[str], + hypothesis: List[str] + ) -> np.array: + return np.array([ + [self.calculate_distance_for_words(reference_word, hypothesis_word) + for hypothesis_word in hypothesis] + for reference_word in reference + ]) + + +class CosineDistanceCalculator(DistanceCalculator): + _embedding_transformer: EmbeddingTransformer + + def __init__(self, embedding_transformer: EmbeddingTransformer): + self._embedding_transformer = embedding_transformer + + def calculate_distance_for_words(self, word1: str, word2: str) -> float: + return self.cosine_distance_between_words_embeddings( + self._embedding_transformer.get_embedding(word1), + self._embedding_transformer.get_embedding(word2) + ) + + @staticmethod + def cosine_distance_between_words_embeddings( + word1_embedding: np.array, + word2_embedding: np.array + ) -> float: + a = word1_embedding + b = word2_embedding + if a.shape != b.shape: + raise RuntimeError( + "array {} shape not match {}".format(a.shape, b.shape)) + if a.ndim == 1: + a_norm = np.linalg.norm(a) + b_norm = np.linalg.norm(b) + elif a.ndim == 2: + a_norm = np.linalg.norm(a, axis=1, keepdims=True) + b_norm = np.linalg.norm(b, axis=1, keepdims=True) + else: + raise RuntimeError("array dimensions {} not right".format(a.ndim)) + similiarity = np.dot(a, b.T) / (a_norm * b_norm) + dist = 1. - similiarity + return dist + + def calculate_distance_matrix( + self, + reference: List[str], + hypothesis: List[str] + ) -> np.array: + embeddings_dict = self._embedding_transformer.get_embeddings( + list(set(reference + hypothesis)) + ) + return np.array([[ + self.cosine_distance_between_words_embeddings( + embeddings_dict[reference_word], + embeddings_dict[hypothesis_word], + ) + for hypothesis_word in hypothesis] + for reference_word in reference + ]) diff --git a/sziszapangma/core/wer/step_type.py b/sziszapangma/core/wer/step_type.py new file mode 100644 index 0000000..974e02a --- /dev/null +++ b/sziszapangma/core/wer/step_type.py @@ -0,0 +1,17 @@ +from enum import Enum + + +class StepType(Enum): + CORRECT = 1 + SUBSTITUTION = 2 + DELETION = 3 + INSERTION = 4 + + def get_short_name(self) -> str: + return self.name[:3] + + def is_cross_step(self) -> bool: + return self in [StepType.CORRECT, StepType.SUBSTITUTION] + + def contain_reference_word(self) -> bool: + return self != StepType.INSERTION diff --git a/sziszapangma/core/wer/step_words.py b/sziszapangma/core/wer/step_words.py new file mode 100644 index 0000000..889c2fc --- /dev/null +++ b/sziszapangma/core/wer/step_words.py @@ -0,0 +1,8 @@ +from dataclasses import dataclass +from typing import Optional + + +@dataclass(frozen=True) +class StepWords: + reference_word: Optional[str] + hypothesis_word: Optional[str] diff --git a/sziszapangma/core/wer/wer_calculator.py b/sziszapangma/core/wer/wer_calculator.py new file mode 100644 index 0000000..724fb94 --- /dev/null +++ b/sziszapangma/core/wer/wer_calculator.py @@ -0,0 +1,220 @@ +from abc import ABC +from typing import List, Tuple, Optional + +import numpy as np + +from sziszapangma.core.wer.distance_matrix_calculator import \ + DistanceCalculator +from sziszapangma.core.wer.step_type import StepType +from sziszapangma.core.wer.wer_processing_step import WerProcessingStep +from sziszapangma.core.wer.wer_span_question import Span +from sziszapangma.core.wer.wer_step import WerStep, StepWords + + +class WerCalculator(ABC): + _distance_matrix_calculator: DistanceCalculator + + def __init__(self, distance_matrix_calculator: DistanceCalculator): + self._distance_matrix_calculator = distance_matrix_calculator + + def convert_processing_steps_to_result( + self, + processing_steps: List[WerProcessingStep], + reference_weights: Optional[List[float]] = None + ) -> List[WerStep]: + if reference_weights is None: + return [ + WerStep(step.step_type, step.step_words, step.step_cost) + for step in processing_steps + ] + else: + indexes_per_steps = self._get_reference_indexes_per_steps( + processing_steps) + return [ + WerStep( + processing_steps[step_index].step_type, + processing_steps[step_index].step_words, + reference_weights[indexes_per_steps[step_index]] * + processing_steps[step_index].step_cost + ) + for step_index in range(len(processing_steps)) + ] + + def get_distance_matrix_between_words( + self, + reference: List[str], + hypothesis: List[str] + ) -> np.ndarray: + return self._distance_matrix_calculator.calculate_distance_matrix( + reference, hypothesis) + + def extract_steps_path( + self, + steps_matrix: List[List[WerProcessingStep]] + ) -> List[WerProcessingStep]: + x = len(steps_matrix) - 1 + y = len(steps_matrix[0]) - 1 + to_return = [] + while not (x == 0 and y == 0): + current_step = steps_matrix[x][y] + to_return.append(current_step) + if current_step.step_type == StepType.DELETION: + x = x - 1 + elif current_step.step_type == StepType.INSERTION: + y = y - 1 + else: # creation and substitution + y = y - 1 + x = x - 1 + return to_return[::-1] + + @staticmethod + def _get_levenshtein_processing_step_cross( + prev_cross_distance: float, + step_words: StepWords, + current_distance: float + ) -> WerProcessingStep: + return WerProcessingStep.levenshtein_correct( + prev_cross_distance, step_words, 0) \ + if current_distance == 0 \ + else WerProcessingStep.levenshtein_substitution( + prev_cross_distance, step_words, current_distance) + + def get_levenshtein_embedding_based( + self, + reference: List[str], + hypothesis: List[str], + distance_matrix: np.ndarray + ) -> Tuple[np.ndarray, List[List[WerProcessingStep]]]: + + reference_len = len(reference) + hypothesis_len = len(hypothesis) + distance_arr, steps_arr = self._get_initialized_levenshtein_matrix( + reference, hypothesis) + + for ref_index in range(reference_len): + for hyp_index in range(hypothesis_len): + step_words = StepWords(reference[ref_index], + hypothesis[hyp_index]) + current_distance = distance_matrix[ref_index][hyp_index] + prev_cross_distance = distance_arr[ref_index][hyp_index] + + cross_go_step = self._get_levenshtein_processing_step_cross( + prev_cross_distance, step_words, current_distance) + insertion_step = WerProcessingStep.levenshtein_insertion( + distance_arr[ref_index + 1][hyp_index], step_words) + deletion_step = WerProcessingStep.levenshtein_deletion( + distance_arr[ref_index][hyp_index + 1], step_words) + + best_step = min([cross_go_step, insertion_step, deletion_step], + key=lambda it: it.total_distance()) + + distance_arr[ref_index + 1][hyp_index + 1] = \ + best_step.total_distance() + steps_arr[ref_index + 1][hyp_index + 1] = best_step + + return distance_arr, steps_arr + + @staticmethod + def _get_initialized_levenshtein_matrix( + reference: List[str], + hypothesis: List[str] + ) -> Tuple[np.ndarray, List[List[Optional[WerProcessingStep]]]]: + + # TODO: consider about remove distance_arr replaced by steps_arr + reference_len = len(reference) + hypothesis_len = len(hypothesis) + distance_arr = np.zeros((reference_len + 1) * (hypothesis_len + 1)) \ + .reshape((reference_len + 1, hypothesis_len + 1)) + steps_arr = [ + [None for _ in range(hypothesis_len + 1)] + for _ in range(reference_len + 1) + ] + + # levenshtein initial + for ref_index in range(reference_len + 1): + distance_arr[ref_index][0] = ref_index + step_words = StepWords( + reference[ref_index - 1] if ref_index > 0 else None, + None + ) + steps_arr[ref_index][0] = WerProcessingStep.levenshtein_deletion( + ref_index - 1, step_words) + for hyp_index in range(hypothesis_len + 1): + distance_arr[0][hyp_index] = hyp_index + step_words = StepWords( + None, + hypothesis[hyp_index - 1] if hyp_index > 0 else None + ) + steps_arr[0][hyp_index] = WerProcessingStep.levenshtein_insertion( + hyp_index - 1, step_words) + + return distance_arr, steps_arr + + def _get_reference_indexes_per_steps( + self, + steps: List[WerProcessingStep] + ) -> List[int]: + counter = 0 + indexes = [] + for step in steps: + indexes.append(counter) + if step.step_type.contain_reference_word(): + counter = counter + 1 + return indexes + + def _calculate_wer( + self, + steps: List[WerStep], + ) -> float: + reference_len = sum([ + 1 if step.step_type.contain_reference_word() else 0 + for step in steps + ]) + return sum([step.step_cost for step in steps]) / reference_len + + def _calculate_steps_path( + self, + reference: List[str], + hypothesis: List[str] + ) -> List[WerProcessingStep]: + distance_between_words = self.get_distance_matrix_between_words( + reference, hypothesis) + _, steps_matrix = self.get_levenshtein_embedding_based( + reference, hypothesis, distance_between_words) + return self.extract_steps_path(steps_matrix) + + def calculate_wer( + self, + reference: List[str], + hypothesis: List[str] + ) -> Tuple[float, List[WerStep]]: + steps_path = self._calculate_steps_path(reference, hypothesis) + steps = self.convert_processing_steps_to_result(steps_path) + return self._calculate_wer(steps), steps + + def calculate_wer_for_spans( + self, + reference: List[str], + hypothesis: List[str], + spans: List[Span] + ) -> List[float]: + steps_path = self._calculate_steps_path(reference, hypothesis) + reference_len = len(reference) + return [ + self._calculate_wer(self.convert_processing_steps_to_result( + processing_steps=steps_path, + reference_weights=span.get_reference_weights_table( + reference_len) + )) + for span in spans + ] + + def calculate_wer_weighted( + self, + reference: List[str], + hypothesis: List[str], + weights: List[float] + ) -> Tuple[float, List[WerStep]]: + steps_path = self._calculate_steps_path(reference, hypothesis) + steps = self.convert_processing_steps_to_result(steps_path, weights) + return self._calculate_wer(steps), steps diff --git a/sziszapangma/core/wer/wer_embedding_calculator.py b/sziszapangma/core/wer/wer_embedding_calculator.py new file mode 100644 index 0000000..30dc82a --- /dev/null +++ b/sziszapangma/core/wer/wer_embedding_calculator.py @@ -0,0 +1,60 @@ +from typing import List, Optional + +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer +from sziszapangma.core.wer.distance_matrix_calculator import \ + BinaryDistanceCalculator, DistanceCalculator, CosineDistanceCalculator +from sziszapangma.core.wer.step_words import StepWords +from sziszapangma.core.wer.wer_calculator import WerCalculator +from sziszapangma.core.wer.wer_processing_step import WerProcessingStep +from sziszapangma.core.wer.wer_step import WerStep + + +class WerEmbeddingCalculator(WerCalculator): + _distance_calculator: DistanceCalculator + + def __init__(self, embedding_transformer: EmbeddingTransformer): + super().__init__(BinaryDistanceCalculator()) + self._embedding_transformer = embedding_transformer + self._distance_calculator = CosineDistanceCalculator( + embedding_transformer) + + def _calculate_distance_for_word_step(self, step_words: StepWords) -> float: + return self._distance_calculator.calculate_distance_for_words( + step_words.reference_word, + step_words.hypothesis_word + ) + + def _calculate_result_cost_for_step( + self, + processing_step: WerProcessingStep + ) -> float: + step_words = processing_step.step_words + return self._calculate_distance_for_word_step(step_words) \ + if processing_step.step_type.is_cross_step() \ + else processing_step.step_cost + + def convert_processing_steps_to_result( + self, + processing_steps: List[WerProcessingStep], + reference_weights: Optional[List[float]] = None + ) -> List[WerStep]: + if reference_weights is None: + return [ + WerStep(step.step_type, step.step_words, + self._calculate_result_cost_for_step(step)) + for step in processing_steps + ] + else: + indexes_per_steps = self._get_reference_indexes_per_steps( + processing_steps) + return [ + WerStep( + processing_steps[step_index].step_type, + processing_steps[step_index].step_words, + reference_weights[indexes_per_steps[step_index]] * + self._calculate_result_cost_for_step( + processing_steps[step_index]) + ) + for step_index in range(len(processing_steps)) + ] diff --git a/sziszapangma/core/wer/wer_processing_step.py b/sziszapangma/core/wer/wer_processing_step.py new file mode 100644 index 0000000..15d92dd --- /dev/null +++ b/sziszapangma/core/wer/wer_processing_step.py @@ -0,0 +1,41 @@ +from dataclasses import dataclass + +from sziszapangma.core.wer.step_type import StepType +from sziszapangma.core.wer.step_words import StepWords + + +@dataclass(frozen=True) +class WerProcessingStep: + step_type: StepType + step_words: StepWords + previous_distance: float + step_cost: float + + @classmethod + def levenshtein_insertion(cls, previous_distance: float, + step_words: StepWords, step_cost: float = 1): + words = StepWords(None, step_words.hypothesis_word) + return WerProcessingStep(StepType.INSERTION, words, + previous_distance, step_cost) + + @classmethod + def levenshtein_deletion(cls, previous_distance: float, + step_words: StepWords, step_cost: float = 1): + words = StepWords(step_words.reference_word, None) + return WerProcessingStep(StepType.DELETION, words, + previous_distance, step_cost) + + @classmethod + def levenshtein_substitution(cls, previous_distance: float, + step_words: StepWords, step_cost: float): + return WerProcessingStep(StepType.SUBSTITUTION, step_words, + previous_distance, step_cost) + + @classmethod + def levenshtein_correct(cls, previous_distance: float, + step_words: StepWords, step_cost: float): + return WerProcessingStep(StepType.CORRECT, step_words, + previous_distance, step_cost) + + def total_distance(self) -> float: + return self.step_cost + self.previous_distance diff --git a/sziszapangma/core/wer/wer_soft_calculator.py b/sziszapangma/core/wer/wer_soft_calculator.py new file mode 100644 index 0000000..e177282 --- /dev/null +++ b/sziszapangma/core/wer/wer_soft_calculator.py @@ -0,0 +1,11 @@ +from sziszapangma.core.wer.distance_matrix_calculator import \ + CosineDistanceCalculator +from sziszapangma.core.wer.wer_calculator import WerCalculator +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer + + +class WerSoftCalculator(WerCalculator): + + def __init__(self, embedding_transformer: EmbeddingTransformer): + super().__init__(CosineDistanceCalculator(embedding_transformer)) diff --git a/sziszapangma/core/wer/wer_span_question.py b/sziszapangma/core/wer/wer_span_question.py new file mode 100644 index 0000000..bfdf434 --- /dev/null +++ b/sziszapangma/core/wer/wer_span_question.py @@ -0,0 +1,16 @@ +from dataclasses import dataclass + + +@dataclass(frozen=True) +class Span: + index_start: int + index_end: int + + def _is_index_belong(self, index: int) -> bool: + return self.index_start <= index < self.index_end + + def get_reference_weights_table(self, total_size: int): + return [ + 1 if self._is_index_belong(it) else 0 + for it in range(total_size) + ] diff --git a/sziszapangma/core/wer/wer_step.py b/sziszapangma/core/wer/wer_step.py new file mode 100644 index 0000000..5e1efa0 --- /dev/null +++ b/sziszapangma/core/wer/wer_step.py @@ -0,0 +1,11 @@ +from dataclasses import dataclass + +from sziszapangma.core.wer.step_type import StepType +from sziszapangma.core.wer.step_words import StepWords + + +@dataclass(frozen=True) +class WerStep: + step_type: StepType + step_words: StepWords + step_cost: float diff --git a/sziszapangma/core/wer/wer_util.py b/sziszapangma/core/wer/wer_util.py new file mode 100644 index 0000000..a6ef666 --- /dev/null +++ b/sziszapangma/core/wer/wer_util.py @@ -0,0 +1,33 @@ +from typing import List, Optional + +import numpy as np +import pandas as pd + +from sziszapangma.core.wer.wer_step import WerStep + + +class WerUtil: + + @staticmethod + def _optional_str_to_str(value: Optional[str]) -> str: + return value if value is not None else '' + + @staticmethod + def _wer_step_to_pandas_row_lit(step: WerStep) -> List[any]: + return [ + step.step_type.get_short_name(), + WerUtil._optional_str_to_str(step.step_words.reference_word), + WerUtil._optional_str_to_str(step.step_words.hypothesis_word), + round(step.step_cost, 3) + ] + + @staticmethod + def steps_to_dataframe(steps: List[WerStep]) -> pd.DataFrame: + arr = np.array([ + WerUtil._wer_step_to_pandas_row_lit(step) + for step in steps + ]) + return pd.DataFrame( + arr, + columns=['step_type', 'reference', 'hypothesis', 'cost'] + ) diff --git a/sziszapangma/integration/__init__.py b/sziszapangma/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sziszapangma/integration/__pycache__/__init__.cpython-38.pyc b/sziszapangma/integration/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a18d1edc644ccabc99c7df659b505bf8aebead00 GIT binary patch literal 174 zcmWIL<>g`kf|JdS2_X70h(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o10aKeRZts8~NY zu_!q+uRO7&C_gDt-zBv;yClCrKRh%zM8CKyv$!g;ATcjJHxZSSnOBmUUX)mpnV+X0 fAD@|*SrQ+wS5SG2!zMRBr8Fni4rJM9AZ7pnjQ1_l literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc b/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b70aca5ff61fb49ab62aded80d52d31f3990f1a8 GIT binary patch literal 1471 zcmZWp&2Aev5GJ|Y)k?M;H9u&L7KN??3JWND42q-*&>RAxKx(Ae3r#I4dENaBNx`rn zpKKtfyg-pda`Y?l+EXumfSx)-IkJpU;4Ftj4rjjkM#uerkHDDy^w-J1jF7)k*&Q5| zXE5z65Q->Tl8R=uVi}{Tvofl9#;Z7sgC3Phmi$T-SMfDbv8Gp1)&V_H9nd?v1A3~W z*JPCb10KnU`RMsC6MrCQ))|?*O1slW!8U(YSm*hRB6rgqkM52HlxHyQ4<Izjs3IBD zk>)zq3CPZz9g;%`_XAT*Mc4d_!m}Knjs5;}Yko6LuB~m%4fEZ3QEJgjcj^<P-O|)K zAh8{|rZ6WkEyAHFxM8bBZ+JT<OJj8H%8OyuST`)HwlPlDu(HE>p-VM9fz)c7o1%3m z!}G$O4ogUG*vyA_hON8v#wdHVRi`%y61TdXn-rcmSg;`e`(u1;wXx$$n!Kpbr8CV; zj$df|r)%2ro8#BNkL{|kE7{6=QOWyCQ9Hdb(iKfTmez>&GbBgtg+It;Sqh``W>FVb zcQN+>O0xQP7rvwW=cc2sPlc#urG@Z4A*x0#0bJqtg?PJ^<;G$n@aH2axIsWo98_6f zm_Dl81UIXF+{1!*V1^2$jM1?31uxJO4*L?sXI>l=WQL+o;j7pO`i{)0iWI-z&tkA7 zx&t!3WlRi=L9+6!`;Ksu^^mH8GjH^4QWj8X&&AS|BkCjge8CmW4X6DEgd>iw$rby^ zJ^_i=uojUb;F3;9+{bp=YGWS3L4>WS3nzqm4C;uPJ@}X>Aa1Dd3Xzx6TCfIi0wiC9 z4ebFELJ#Qb36y3V<<Zt5;&v}gL<xbP;O!qlDC!_M#jaREFPWofY)#IhHFVB<b{Vas zkNgw4<St%AA3++}M||ozHjZaz22jMfwxh)Jrq#7ijefh-*4d1=&=Vmi&(CaA`?xhg zy-~bLFfjT%2*F22+pLdAfOocSYM}NV6GFzwjzf$Y_c7xEP2m?Pd6)NnN{;r-0US0D zPz+Ff1;RfNLe_QT0{;<LMDvb~<{_&1p6}mcpKIudTl_;@SYH$3!UEz2i!ANYls)93 zsL&0OoaF{PXT$l#U9?5L*m>xCwBfyQV<+Qnp?&&v(?&nz-;hPb26VvEXt4J`;ZIH` literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/__pycache__/experiment_manager.cpython-38.pyc b/sziszapangma/integration/__pycache__/experiment_manager.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..878ec62a813cd95936980836e5a04d91728a0606 GIT binary patch literal 1211 zcmZuw&2H2%5FRJlBpbHv_J@k{2H~)KMygN+MNkPU)OLjwsWN75R%!EBI~8=boahs9 zMxs6T8F-Ui5r@74LWr4ce?nV3Gak>x&Ntso1~W4O;Q0Fe$GcxPz)#vV2l`+Q#XLnN zL9zr=7BNP)l~}1A*=XC+Nu0D5wQ9SSxTzO;3|@eArFRU{`(j1D^tT~wpW{6cvdVcC zn-bTh=bs9t<5XqkmMU@+m$^PVXVaIuTWXkVxgj@7r3FUt!0a2H50x=-Hrf$pe@OQ= zhl86n6!Q|5f{00otN|!nIg+h{YT;_(>MCyq(6-UG(Od7O^Uanm=^i_X*OUG+L~Yqd z+(9*e4fM6h#7OCCnX4=CTwkHD7I8zkd5q5m@@k$JSHO9R$W3Th_wT-7;g0j&o5zQg z>E$5XLV1Q_mQhXE1UO7A)NEp-wj^xB8QTnP?PFZGNm21br*e!+KInVo>5!U1bq2bN z%kSO(z$k6{snEkX`zT7CzZd;=W%kRw=)WCo@AS<$He*qUY?O-sH*r>~5mqkFvwkDh zbV^{aII4Wk<18*YpT&<E7ZtcJbJ_UeExvn~uMVj_E(c}ZLa8%<gvJE+;Dnt5Bg<Nc z-KEcL!cLHbJ%?ha7K4K>v7zakVXNjN3J69lk{GikS}N~?@S3U6(sSsH=5AD>=4h(s zF|HR;JwP$UBd`FcL-X%=<#s+7cWq*KAq|;G73WpJd78_^gzOII2Zth=Ml}74)g0#{ z%kr}3)NrmTcTFT{B2O=nA`}f|R7YLI^U$Kg8*&BA$Juc#W-<3qC(sn#4BM4k9u@d| z)wfoUo7L?#3fjBw_+F#7*WMcNw4n%Ov_xIsyGdY;n!bD&;L2Z}lJSH}Zm4lP7Eac& F{{a#KG`0W$ literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/__pycache__/gold_transcript_processor.cpython-38.pyc b/sziszapangma/integration/__pycache__/gold_transcript_processor.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..035e15e52b368db6f519a04ed2b9bbb79855db88 GIT binary patch literal 826 zcmZ`%&1&2*5SC=G_lIl=^iXp1bt$ZS=&_Wv8wiE66q2Rji&kqXj)*O}BQ3#uDx`1F zYhR#mQ0SZJ*q-_dJyr5<Hfc!*W|&VN&3yVii}84bybZs7U%tTz{S4r~#U31cH_v_G zh~o;?IL9@~2@aT4Q61-T-OGCzogyCb_!Hu>z&BBz@ZK4kB)|MkG$Af}^?K$Wv6ZpF z3R??%skuLn-%4X$d?E{5gv$GtcyR39Jo7=I9CMTtj`K*wq9+p3_i=DRj?j^3@`ZHM z;~Sh0x0BtIQ(f_MV9FFA8~YA)A&k-B0-qs>LIbCgy99(?gDQw0b88{@ilCWy6J+4n zzlN+!pN`U<&DH=y*{V(LrATXSY%1$UgJsI5+&?M35^X9?y1e$Img$d=pHmI8l!{fU z*I`F&-!D{6_h|k#@QjV9E@0rtLikr*_|3D05n!^KK_S&Ov!GWjI}zr}>Lz=?I6KeG zMw$(4m?~@bH;~GT5}1`*Wu;e$-Z7!gtwz(Pb&pG7=|2^Li0OnPo2UQLclyA+`DEk< zlqyyWO5KRkTJv=k@&iggtXZ}D(x;T`f>Mb5AQ$;<fuKJ0I>=ND;{f+kY_o%K7S<({ wnUD-{ObG7I=99jQ*s6d)d)qg$Z6j5A*SafwsH+>i*fM<?wxQdELwp$h0lI9{asU7T literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/__pycache__/id_generator.cpython-38.pyc b/sziszapangma/integration/__pycache__/id_generator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..051a0798d43d79433b153ea102202bd9f1158001 GIT binary patch literal 732 zcmY*X&2H2%5VoD4uC^2^p1=nV>}7M}h7hnwRVBKYvZ|B=#>vd?MkH}+J3`%Sc?qPx z08hZX_{xb_=m|U9En%b?d1jQE@B1dJland&_DlWv@|hCyJ0xR^ee%}3dFca?oB#z@ z(}v}&iSme&GXfFB-wDLXZlgSg<dV$N`&Wxv8pGF0^HSDjUrUR;0=~f(wY0um+;EMh z(!keMjl7d~JOAUCkQsB)yY~zCR2If+S=t8eRzVqhV|#@5!dH>x)Wc&y^9W;1FadVQ z259ju&C@+BcYF?K2L#&l4a-jVFhqbM0t}aGw7qUiHd+_n;e&;D6IKKDMA`1+#ej}m zH72?a^ae`PDSxnY;AX4(8hC-cZ>#Gbxop9!(4n-lsBymh834rw>$Oh(4m~-9`>Ska zP@Algx~$r7(rQ)6>;sywR(09c>he=&c9q%5PPUsy{-0E>#f_J}Qf*d&cs#<qyK(6T zt*9VuG6P|@dp=TWzHcvQM{XiSD;pHTO@(L_^j<IFo(OT>%le^nBm}5Z2puGHkw2h5 zK5+QQMu(qHCq4u>f$D_DjM1UCoF)31FS$q-rH=e-m^SoAGX`zKAUAovNAo5eXgE$x GPouxh(6@E~ literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/__pycache__/metrics_calculator.cpython-38.pyc b/sziszapangma/integration/__pycache__/metrics_calculator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..64abcc4563ec492321106b4c30b36abb903f4342 GIT binary patch literal 1615 zcma)6OK%%D5GJ_~t=F<0w@KTyJqRd}U@ubd6^s;eQWQvBAgW<tfq-R}l)UXe6p}(< zK{?si9&+!YIp&x2+$*m=^)Cc%)0x%7uH{1saF)x*@cX`*;qlH+OrSmb{kI?9dxZRf zKUW*fkq6MtJus4_L{iBLTC$XtUg}Zu4UwMoFNpNdS?bGRNRsf~XHP_4buv+8(?Zx< zcj{56R95Gi>HZ0hNWz@|I5*b$hk0f*oVwZq_;~=`XfTST6mDk6aChn{Uj>r&m$#;& zYN<$VsaS1~*j;j$z+ViuErVY?8Oqj$e@@e#8~3bFHG3nub>XMGG6n|QZvV*w?qJP9 zQ^4Xr;wx5)XN52(&-hEFGX{&RhGXml=mzDT(>dXw9(bll3DqIYp#nw~qx8;=4&M?T z!LE1DddEg-(<_C}^6I6qx_&Nthsyk9>q+nAc=&zK%yKgm6H$#z@!y}kvTCe_&FiYS zlz$Ds*4@dOi#X4#+;WaEO+$jtKHcEnU2RJ|w>Pc}$*mTZ$#g!kE*3^}t<1ErF4W3S zb+sXf5AGW|Ixw(J5?zovIV9&4(ucvAHe>&11|kJV8H&AX2n!typ_|)a#&Gmm`xTv2 z3lV?KUiia>TnSABe)8SZ!?(<_?w5|`wvlmRw&19X`vv4(R+p25578ew4k28#uWn$i zrxiwbEgA@Ey8WVtkeLG*tUPVN#j<JO-hx2`H$u2qpe`Wo!tynQdLQPH<r-ST(k-n< zN~<bUX?%J%sqLvUxdFPjO<qn4$i-!zD#V*Gy^W6Bo8H|WeiNx3yblwO1dO(6`=8nU z|0zd?R96kfycX(n^2(c&AF2M_vR^%@1ono_J?r<$+{1d9-5ac+cSKRFo{7!~UJWU% z=oo<nyGh`D32#MHJQ^Pe{Q;P?%?FRa9uB`9@RNs66JNJ*Wz9ZqE%W}G$Zr2qu;q@A z;h@2IkT9en^(iCledNu)*c4*p>`}6ZrBsPhaqeQy%UVt$D%`a>e=!xsa;3$&tTUMQ zpVxKa_Ba<+RokXQ4d)uG(6tRtWB}{5mKx>NsHyXe)y@;unF~fm4KD%m*O)0+$YBgM zCVC?g%49wP6H}i>Eb<uJ1#ax`-EI8Fe>J;~l0f4o7uvH)UX3sBT<F~RyfjT2Mg0ZE N11!`gQ+;~N`xiBNhTi}H literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/__pycache__/path_filter.cpython-38.pyc b/sziszapangma/integration/__pycache__/path_filter.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..12476b326cff42cc21f1c1d17c3d78b9ecbb967c GIT binary patch literal 1959 zcma)6&2Ah;5bo~j`Puc_aR><*<uI2ZjSsuIMzX9J2M`i6QH&Eb5*lWw+q>h;&!)Q< z<JIP5^8g$XK>~5gE6srmCtd+X2~|C@y|#nINL^FiU0q%E)z`JRyxb-5{Zan>-JhJ0 zzi_hn!5X;_Lq7x|h@b^2X-rENGm3LoI3<s{)t$mEz1S=L*r()kBDio*h;Su6cH#i$ zp73Gr%K+vb5$uwv^EY@U5o?^SPqv$tbf}F=GgC@4u0=!}{xCIT*ydm6+BCse6O-3f zT4Z=?@jHw1V_20Wrh>#w(Abe&y3$*NIA2iV2!6tkY3vIZ?)I84djBjhj8wD3ZV2Dj z+W^9>NVN{@{qWqR-?b*KeK=9|_qmW^S`=YA6?q-*=Y`avsbNdPZYIIO@F?8ceH5mZ z2r~$KTBJsXke$ji-M8*u(#BKLOjVW9g^W<gu?<6Gc7j64Y<ByRwbfw(fM9$JclUQ* z62wf)VqalZt@!8hU{6b>2W6_VygE#cs)y;|fz(e;JsCXN+x>c=XStrGle8L@>BUK2 z897oZ!akS)S_xzcaP=og&GnHq35Jo>`?xIiG6u^)5D0VuxGRP`QP2cQQl+I#lBSy^ zWi6%!>b)d+HcgASJARUgI!hAeU^Xs@#sP*Xyy!f`R_o!K55j|?Q3RBGjM}v;gh5~T z(eN7(r-a!9lv~OOwn30O!hydh{WStQ-yos~tGrzi-U~<gA~<p5Kz1Ytl68P&U9oh+ z<F2LHa&zO;AB?Otz;}+vY|mEyi*i!PQdY*Idy#A5;W*F6Kq(*&GOXp+VQ$8up2#fU z&!q_Esq2<WfaBRpb1hMIZ4!|ynVDK00kGBw6s*k6PUJKzmkc`RhnE&iGbG>XhdS2R zvho%vb2cXlnL8lqoPvZ$XL)*ayRM*8Q@k8gv+t=7_p0~dn8LH_CWwfuCDcDa@ga&e z6ld&JSXSgpUgai9K7yI8E%80-(b?^Hz}r7HRsmRFcvToXR^G<H35uYCoe)Fj<bWR2 z-<W_*4xC@euk3gFfE?53kkbJd{Ar->&LOio6&zQ7WNQTSbZ6VPl-N41mQzc3apSxi zEtc;L<+f;e@=AyZ^DYdHSdAdKZ+p)v1PLMj#Gdip7N3Z!_h28oBkSL7SZ*$(*4@C@ znbqZFGeU3Z)?@6=?B2UjyYfi;8$}IGOK*a5>SyMzuMgItj;=y)wuB8I74`7Et`V~( zjXOZxmUNc_v(NVqMzjKgz$3l&+H-?99_ziH-T7a!MRX@xQg~clL$QkDItqjY*q@|T zRktroL;t{b&^XY{6f($mlK*ZZH^2nlXk=-Zawnhx?Rn%CV~jHDz3MF~wAqQejgt;D zg|xCQs`0T$7WvSk(|G1+0_AV7!w_sQvS$|{v0QIyzR~u<P3-&{*}F=6bk$ky{R=D( B-Ddy* literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/__pycache__/record_id_iterator.cpython-38.pyc b/sziszapangma/integration/__pycache__/record_id_iterator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e2264bf3624c27c85f232c56c84110e81319cb1 GIT binary patch literal 1307 zcmaJ=&2AGh5VpO4X-V5q+DdRja0-XyhJ;k1LP4k!>Y*V*TB$Pb+JuGu3AQU})C+1K zg99A<47`c2oOlIJ%y>zgv;|8Zdpx`2Z|3_vTv}QrFn)gd@n)aFiq7l^!P$aoUV@QC z(t?yUr6o%lMV}Qx8K$A%2SrrIX-vrrB10LS5*aCa5~K<Eu}r{ER04iWwg#lT@C!1M zE_1<?r`v8>92l!bW=mz?)v`-nFi;kjwdbG5O6R4j?4BCeCbzXd%JA3OaSrnyoGOx1 zNm8Z)6{_eqfc{D)lfh|tLep4=aCzi9dn&87+>tw0X#q*ksEf6-hq}t>OlsWY+=glJ z8c7D^jP9O+aiLMgTa6v`=J$SoXp}bnQs^wNJ_)Pq1JQq`%m-VK`>%(CSA8?d%|whv zH7dotlUG)a07_n0eeJ>VT=M)cyx#c8t&EiAq9}M1HVcReSR;f@?q05^cj@EZZsJ;; zSE5v$yG72+S{@d-Z*%_fP!vr>2Uraj>H=UyFQa>2VY6GpvlvVZrf*7%GU`vRFoWr~ z@Uw?trpycpawF*`tdapcfecT>G*YojfUgKnP%z<CD(=n&_m}H*8(f`B1UfU-2xw>h z+Cb(z254~L9;c@l%RHvXgnN=Twzo_58ZJz?VbSYoF1mq@z^3L^ZaKdRUPA|+-2XS= z-t>E&zz<i%Hjjq9C*SCIkX(a?c{FZyo?Fm~(KoI_^W-7?DXItmK{cP!>~<IQ3a0F! z!P0aD#;tHJs;ahLr-pOAfg#k3UPbdyz1)H*1`P^lk+#@!pz#bYa)CI=G#2jF=VE&_ y&Z|+Q!bh_&rZ@9QdKX;xt|i(7_Nw$3&G6<KY{=Vep21^$;|BHIp>5U<JMkZr*);$F literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/__pycache__/record_path_provider.cpython-38.pyc b/sziszapangma/integration/__pycache__/record_path_provider.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab1556917ecaea9eca94ca0a6b30e7ecb68831dd GIT binary patch literal 587 zcmZuuJ5B>J5cOvhHW5LHmWG06o9qP;0)j*rMT7`x8{@1^vSNRX9T4eIxB?v);0)Zv zEfrUw!ro0F2u2!v=Gl7hc|7WL+Q`QH<J;A<+g-CBo^6h7s(rf%Byfr{9N~;a1fw$~ zh@ekMP>B~bqQV=Zpz&dEqJS8Be9|*pe5^F^SZ7jC3lY1s_4wxBv#3N77APW;N>6$M zzmSUGTi~csbDN!W85bZ1Tu%p3+$BPSm1;qSvP76BNIio*CTl9L#x{F4l~WYR8pZur zXVX(My@rNGVPmy?vT&p%s4(LYC;2_sP>gwaD%FiH%J6bDya?4iQFC7Me3J3sO_FOl z0j`rG50}PD>z9>F!`<@1_!Fs_L!Gdy%#!);-^X=V_xeH8_>AQ|lZ=@*V_6|)sjD{` zyPfg0J^^a^47D={_pv`us;id6+yq=!qwq1o719qpL-{xcXQra5;Qttme^9F*x~W#~ Gx9Jxn>5H`h literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/asr_processor.py b/sziszapangma/integration/asr_processor.py new file mode 100644 index 0000000..580695a --- /dev/null +++ b/sziszapangma/integration/asr_processor.py @@ -0,0 +1,30 @@ +from abc import ABC, abstractmethod +from typing import List, Dict + +import requests + + +class AsrProcessor(ABC): + + @abstractmethod + def call_recognise(self, file_path: str) -> List[Dict[str, any]]: + """ + Currently most important is field `transcript` with list of transcript + words. + """ + pass + + +class AsrWebClient(AsrProcessor): + _url: str + + def __init__(self, url: str): + super(AsrWebClient, self).__init__() + self._url = url + + def call_recognise(self, file_path: str) -> List[Dict[str, any]]: + files = {'file': open(file_path, 'rb')} + res = requests.post(self._url, files=files) + json_response = res.json() + print(json_response) + return json_response diff --git a/sziszapangma/integration/experiment_manager.py b/sziszapangma/integration/experiment_manager.py new file mode 100644 index 0000000..b28ad39 --- /dev/null +++ b/sziszapangma/integration/experiment_manager.py @@ -0,0 +1,30 @@ +from typing import List + +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository +from .record_id_iterator import RecordIdIterator +from .task.processing_task import ProcessingTask + + +class ExperimentManager: + _experiment_repository: ExperimentRepository + _record_id_iterator: RecordIdIterator + _processing_tasks: List[ProcessingTask] + + def __init__( + self, + experiment_repository: ExperimentRepository, + record_id_iterator: RecordIdIterator, + processing_tasks: List[ProcessingTask] + ): + self._experiment_repository = experiment_repository + self._record_id_iterator = record_id_iterator + self._processing_tasks = processing_tasks + + def process(self): + self._experiment_repository.initialise() + for processing_task in self._processing_tasks: + processing_task.process( + self._record_id_iterator, + self._experiment_repository + ) diff --git a/sziszapangma/integration/gold_transcript_processor.py b/sziszapangma/integration/gold_transcript_processor.py new file mode 100644 index 0000000..a632254 --- /dev/null +++ b/sziszapangma/integration/gold_transcript_processor.py @@ -0,0 +1,13 @@ +from abc import ABC, abstractmethod +from typing import List, Dict + + +class GoldTranscriptProcessor(ABC): + """""" + + @abstractmethod + def get_gold_transcript(self, record_id: str) -> List[Dict[str, any]]: + """ + Currently the most important dict key is `word` – original transcript + word. + """ diff --git a/sziszapangma/integration/id_generator.py b/sziszapangma/integration/id_generator.py new file mode 100644 index 0000000..a73b695 --- /dev/null +++ b/sziszapangma/integration/id_generator.py @@ -0,0 +1,12 @@ +"""Class for calculate id generator by record file path.""" +from abc import ABC, abstractmethod + + +class IdGenerator(ABC): + """Class for calculate id generator by record file path.""" + + @abstractmethod + def get_id(self, record_file_path: str) -> str: + """Method to calculate if by path. + + Id should be unique and repeatable.""" diff --git a/sziszapangma/integration/metrics_calculator.py b/sziszapangma/integration/metrics_calculator.py new file mode 100644 index 0000000..0d145c0 --- /dev/null +++ b/sziszapangma/integration/metrics_calculator.py @@ -0,0 +1,34 @@ +"""Main calculator of metrics.""" +from typing import List, Dict + + +class MetricsCalculator: + """Main calculator of metrics.""" + + _calculate_classic_wer: bool + + def __init__(self, calculate_classic_wer): + self._calculate_classic_wer = calculate_classic_wer + + @staticmethod + def _run_wer_calculations( + gold_transcript: List[Dict[str, any]], + asr_result: List[str] + ) -> float: + return simple_calculate_wer( + reference=[it[_WORD] for it in gold_transcript], + hypothesis=[it for it in asr_result], + ) + + def calculate_metrics( + self, + gold_transcript: List[Dict[str, any]], + asr_result: List[str] + ) -> Dict[str, any]: + """Calculate all metrics for data sample.""" + metrics = dict() + if self._calculate_classic_wer: + metrics[_CLASSIC_WER] = MetricsCalculator._run_wer_calculations( + gold_transcript, asr_result) + # TODO: put additional metrics here + return metrics diff --git a/sziszapangma/integration/path_filter.py b/sziszapangma/integration/path_filter.py new file mode 100644 index 0000000..aa9db3a --- /dev/null +++ b/sziszapangma/integration/path_filter.py @@ -0,0 +1,47 @@ +from abc import ABC, abstractmethod +from pathlib import Path +from typing import List, Optional + + +class PathFilter(ABC): + """ + Abstract class of PathFilter. + + Filter provide all audio files to preprocess by ASR and calculate metrics. + """ + + @abstractmethod + def get_list_of_files(self) -> List[str]: + """Returns list of audio files.""" + + +class ExtensionPathFilter(PathFilter): + """ + Implementation of PathFilter which find all files with specified extension. + """ + _extension: str + _root_directory: str + _files_limit: Optional[int] + + def __init__( + self, + root_directory: str, + extension: str, + files_limit: Optional[int] = None + ): + """Constructor of class.""" + self._extension = extension + self._files_limit = files_limit + self._root_directory = root_directory + + def get_list_of_files(self) -> List[str]: + """ + Implementation of searching files with extension. + """ + all_files = [ + str(it) + for it in Path(self._root_directory).glob(f'**/*.{self._extension}') + ] + return all_files \ + if self._files_limit is None \ + else all_files[:self._files_limit] diff --git a/sziszapangma/integration/record_id_iterator.py b/sziszapangma/integration/record_id_iterator.py new file mode 100644 index 0000000..f62ad1e --- /dev/null +++ b/sziszapangma/integration/record_id_iterator.py @@ -0,0 +1,21 @@ +from abc import ABC, abstractmethod +from typing import Set + +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository + + +class RecordIdIterator(ABC): + @abstractmethod + def get_all_records(self) -> Set[str]: + pass + + +class RepositoryRecordIdIterator(RecordIdIterator): + _experiment_repository: ExperimentRepository + + def __init__(self, experiment_repository: ExperimentRepository): + self._experiment_repository = experiment_repository + + def get_all_records(self) -> Set[str]: + return self._experiment_repository.get_all_record_ids() diff --git a/sziszapangma/integration/record_path_provider.py b/sziszapangma/integration/record_path_provider.py new file mode 100644 index 0000000..fd3bca1 --- /dev/null +++ b/sziszapangma/integration/record_path_provider.py @@ -0,0 +1,7 @@ +from abc import ABC, abstractmethod + + +class RecordPathProvider(ABC): + @abstractmethod + def get_path(self, record_id: str) -> str: + pass diff --git a/sziszapangma/integration/repository/__init__.py b/sziszapangma/integration/repository/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sziszapangma/integration/repository/__pycache__/__init__.cpython-38.pyc b/sziszapangma/integration/repository/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d82948fab5ca19a68216ed42875796cfe0a80fe8 GIT binary patch literal 185 zcmWIL<>g`k0^#>F6F~H15P=LBfgA@QE@lA|DGb33nv8xc8Hzx{2;x_qerR!OQL%n* zVo`EtUU_0kQGQaQzDsIxc1eDLet2kbh<<TZW^q+wL1JEdZXzltGp{5yy(qCHGe1wi qD77HJII|?bs8T;ZJ~J<~BtBlRpz;=nO>TZlX-=vg$llLD%m4tVs4_YL literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/repository/__pycache__/experiment_repository.cpython-38.pyc b/sziszapangma/integration/repository/__pycache__/experiment_repository.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7e187bae3886fd9ea14a4b3f7d734667994e846 GIT binary patch literal 2350 zcmb_eUvJ|?5ck@S^QTRk_Hx%231J;bV04#AJRyV-a3FZ0NR$dGFJ|N2rdItYx4S7M zuh);!FVJ`1_$GekiLY=vBxcrjoaP*q2R8C(cRaKE`_0UbbJ6W~3>c^1|M}}p!!Z8B z#rm*e@g+R+QxMD;8;meBBC$C(V{2>?Tw9SHH^z<F89T)I&R{lc+#0OG%~#gA$((b; zYrg*MCq7GMs8X?TRqDnxp;PV(F6WVwZhGlbDO~>hjEgYliE?Km4Y-tHG9CU6CmWub zSzmqqBJ0tKRDuR7=ISbCpl<y*Q(>CWD6`JF3eb3c0I8h7A28v@m;ecrk+H>XW-{y6 zctyqyZgU5IO=jO&tihaH8?>!L+h$GBwpd$hJGqv1?o77F_O))Wu-xbEQ=kvlUDnfY zy7-12+!5Aihg#DE%>j47?g+Gx*g$Lh544Zj6RkaD#}K@eZ1C+WFjW`@0YU+aa^M`F z7w||-93$XvF5Mq=tT66j5~`3!;SytCCS-VaZ`_m2mOQ!?E%4Lg_4VjNav?`C6+xK% zMwLh>bo33E*D9Tjez`dRc_f#iT+$g$rZIiE2ouGpf?{Sy)k&i&!+xb5&K6mxL?(F~ z;hP`~bNOB!^I>J{Sy@}~AQj9HS=NQBfeKO!KcO+t9KqFGB!Pu{6$9sWF?wFaD7fOm zwRF`L)itPhgQ!8Y@DQlbP%1e*(-FG21YRD|MbSgC3ZB@5Wp=Q_ll!4z$?St#W*ZKk zRc)U2RxfX8H0J?kxEwgIqxH0)HJ>r6xWsF<L8;*4*_&LBV9QNH$N4^fxs89B3jdlf z4#74j)?7ZTCAP)UGb@5S<*6hjc%0t+7a`HF!_Roc6@Lr=J1hLo7W`Wrcf$OBk%YV) z*OCCyD+;T~LM`aQl&iN8KV3uY)keI|%OhDo=1PQtT<sX>s|MTQ!1=>EGN6T!yxt*M zy566F(Pm|w;Bn>Fd0Y2Y>a)v_!MLpX=Uai+d3r<~fb&i*?xQ@!62ggBUb-}jT<C8y zoJ82_^yLj%d2<E!m`w^tNCEtyGZhU*4%fhcxE1_EpZ@`GalJym8{W$Nlb!KaK4;!h z*79}7^!=>k`*F(V5r_G{@4uYW2zx24n!eA{!1o3A8nF)|v*1&l*^@Ml1a^STqRB!$ z!q)>7_~{joQ5>UiP@JGRMN#!iJ;R16njp|jB9RucO_OvilME~qB>dj=T6*?RubFF{ zH!vgp+=giXGp{Ay1$RI*2?X|gjW~1EVg`RFa#R`%o>m7VYki)_;0wG85^ZhLhg1Fq DwhvTC literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/repository/__pycache__/file_experiment_repository.cpython-38.pyc b/sziszapangma/integration/repository/__pycache__/file_experiment_repository.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dcc82e13461b3f7816892ee5636b8d7f5dbff9ef GIT binary patch literal 3874 zcmai0U2hx56`h&=;D;zmp=DZ9oJ6+MG(=meX^}Q<g(7VgH4lZ_#x_#0Xtx{A$XS`> zu4h(tBv4N>kf)+QATB^X`Zx5S%xj+V-lrl!bM7p)BxNyOV(;$E&J5?CIrrTAaCy1T zaBY0}@4sy}8T*nNvzL#?`?$^TU?h{g&$`8vcRAXkF9u%M8~9y+5Of1xd<(l3eDnI% zL9JWk?6*w%GI+*hplat{w=Vg8)(OADitM1{+3N4JJV}Rf-}-w=FSp)(m3O#Z`{1)o z>13dW`9~^CO_Hbjq(_rxuVB(J|G|r5w5hIm$dspi$#<Cw@DzB4czU}`-o=+d2G7Ok z?2yY)R-P?mvsGDp#=2E}qp$Vno?McRXMVTldP2F3o~B$uPu=w_sc?h2ajWtb^fuHo z`olwk-qv$2Z^$+DHfMToqPHz?p?3u%SC3wi>(7PUkeldf(VFsAv~SDT(7vJ8ls8%F zb@>K*ZqD><$vfz2f6jc?y#*le+4cRTudYMO-iQV&k7b<4(ILh}S)6}rn^C4!o}WaA zsUF0+ZA87e_o<5h9`}#cen;5WHItU-=JdRoGoW{Yo1SJ1CTHht%r6*cBR*o07ztQU zGFV@-`|Jfj=ma(}$2ebGjiO|j<WZz6n5wH}YA_&%Z&d$K6G8P7%nNR7QPk_l#zfHz z_TOyhp;6lG3}W3&hEL;Mr=P?-d&)e{(`@JChxZ@snA60Z##uakG>GRL$uL)sbWFtC z(Ivfh97?JrUNrr*ot<bz2wTN%XqSxFd5GWXokj3(U%J-gG=6dsOyuBiaGUpGa(2YW zfau7(;AS`X=}q!;@z2|5JP*#<nHceLm{-Qtk+|R&oU@PFyX=qbBlahR8yFxCEPvXm zS&<s+yCV)0L!LNDt)FIUXnjes6+G2RuB?A#(xDCH@gOrDPp@DXmLG!LgABZ&k-CMx z!rY**bvmRZ?k7gI(VigC<V`~0-a-VXvv8g@t$L}JQ6eqqgk5R;*)Sd`8-kF>dN@Zb zk{-g!;q-mHjM$i6urm*&@*Y?88p#N#h4uzR`%P*~ftxV#4)z~FL~cfpRDQ{QZIq12 z^B2BzS-*i*^%jh6P<PZz!HVHazOR&<Ri7m$H@7jvG+-F7;^*@Q_r>Yk3qiOt9e_{& zxLJO5;nyRtFE7%z5`mwfF+sow0zN#xo%4j9<9!6B0}gm@01>=1@X05p5Eh!Yia))J zhu(kzD#sZ_LCvc}C%A%&ev4+jN#+V0ZcXNwn}?J{kJZU{FxI$?5)v{E{7!$k5SImu zyu<>+#IfLQyp$|}!~h3zA@o+kff3ZVIQ^;DqVSsbrSFls;>a4&<Ej_$U|>P;!1*9f zf3VPp>*ja3zJn#^qMih@jQaP`K$PQj9#{2`a){)Sa1q}|#Cv0$<2blr7bKoDu}(84 z(XKn86G^>}l?vf}6>m1^r?G?xXcfzjD30?r8Zz&;17#QerI0oGCO_R?=;gIh1UHEr zJ#y{xqN6I^+(W}<0CJY=Rmq1!Le6q0c2Yp%HWAKnI1i3OI!x*P0}}U=!1)JX(ZYHS z20<MrLy0i$R0<4S%9sMNNh1jjO>x?pggWh1Z<DzWkMH8cuW?WC2qA0DZ^h%<_z`v$ znNue{Q&OrR!Yo3R<>+>V)_9?835j<Z)1e_Q*dF`KAI|*T!<PVI9Z)Gl0gmuSh>wdA zchACU-v|ZCC>ZfCAc3Rs3-&J*J5Syl36LouM4z<tU>s5capGtNM3`@PD%OXj=Z+B8 zM+BQfA;LIA(vVhU($@A+KkRGhG!Um$p?6@q%QP`c4_&%2wh{OHrJi&v$T9;6eYfUb zD5G?j3UEj#%oRe~b&8`)@U$|={XELkNFIKOzcO_042g!Kk?fji{;wsVE$=L3?4tD$ z6{YyTBs8Vi76Ce!ZZ5?Z>BgOfI|srORSp}2#q)CO&&aKprI{v*T`NJir0OeZ@l_Ri zK%P4-g~#MM7`a$W7c4TTK~#m5E>azn)7DX_5uwgOj7g@jo&k;T(uRKwgE)7mKCdCR zMG(`Wms#EX8KaFOJ3sy2w;s*>yt!Rc)}z}62#J&n5NILo6oDcp^xs0TAp4i$&^#mE zM36YTzxAX&Dn0V`FR;$kBkRRdPSTyNPk>L9(Lcp7>w%!Cqfnq+Woha1bjn{bV^Rwg z{9L7T`v1;PSLpEl&N`LBR0&0qtw+%ymB*;oBb4URljFEQ`Qjl^?Mf8I!(p1c(iMfU z^*>3|K0F#0^{IZHTy+VL(xs;3&>b>#^aTcvi|<mCstQeP*FPuYRF*K;zk;#LlXAPK zaLmyR8blsVb)gPns5m$DXLZNas%VGRu(c#OeqT45^bEglEVaXS*lMg+TArKLSt?Ze zCMMc2Kgp8eqhhyiqqLQ7h|*ogM$=xqKbx)Qw`U6Y?U^e5+S*+cpWL6$_~7X5YTN2= T0r)Q|KwQXCz^#d@X!zd%{86w$ literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/repository/__pycache__/mongo_experiment_repository.cpython-38.pyc b/sziszapangma/integration/repository/__pycache__/mongo_experiment_repository.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..32f957c9503ef7ba409a19f25cb4578e649b51e8 GIT binary patch literal 3189 zcma)8-EJF26rP#gwbzdAq;AqQ{X>LOSSX3g1yHG=qNWt2QY6xVv=SP&o=LQ1z3a?u zps}2*M(RuCBDu^n@B%!;+;YV$AVfK5*8hzw*4pFw+cW2U=gfCzt6r}Xcvimt{pU|B zg#3w}@yCVEeR%a#5P}GrkTwlzn}w|Hgbu|qmbh&%^laNnO6_u3wrw}@+m)~aZ7-~f zQaJaBh_djHi120Qg%j39Ws?Nezd$ny9OHl2$>S`I65~FOb#9zZnFrL&eVe5_*@GmO zDR%vhD36{+T0(pA;q#7EaT{j7mz_+<d8Q7G!((B*U!!DSwovu>gYMv?{{SILNCgSm z7Lks01-(b42mcKI9q9?@n7t(1RJg)Bs)Z#{66GTjmSK%2ec>NFq9UqCZdkD+RWS!6 zHBpC=IUEs<V<zUs0`zNkMO{``z;#%4K{R2s0c$QEQ?V$Ppf@iU#3kr0i_6fvAe+*I z)i|;uuE5B}v5~9d8jLKyBrXY;0K)5L#e#G$9ChAC0C~$IXc}Df0pw}amH{)hiTV1( zbjz`D6$0A~Ui{ZrK;+~FIiaV7k{<05&Uy@_BgiH>qfY|YczPe=V|>ozG|o9!bMTR> zfq-zkS|;17jw1~aXVg?U7#V5J`5F0pZ+%NkrPte$YQ^bqQLeIQ(fWqeyLr}G|8Z;c z>ALR5x*K((bf+CncH%UbJ1W9-T35r&ST7uv!=&M(xwX!Lx&UV2t4kmVt<n<huFeE@ zZQxal;`083ZG0cVt1;UU4K)u9wFKgfs^*|=2h=6#nkwQGM@g(D_BEnRX!qvq%|{;q zlocggnG%@y^^VE_^ZdX@*OZjZ_f<L(Mh|qOW!A(`pwS~I<dhsbJqLq%=t3CX-LkqS z=qBkoJ+?wlSnl*_Oi$sw?82c%rvmomD~NHO@5p?ZQ)UT#$ayPE64^rdtr^<*+i@y* zmP*6E*$BJ>gcaCZVICQOkaXdElq6%H>jET%u0U#j=%0KJJ89IR5mG{Z`*WF3ORpGZ zue!l(_@=Z3jv5)$7)}-(GT;o{1lyP$7P}9if!IOvvr`6#z#fJXUb)+IVs=WA4EsqC zcxGA1MCOtYciGMq-<1dI8mPlefV9sMD)kl)ct3pkc<W){PC=$tVR9Or<^2u-`MPRc ztMQN#R--k_2*5+wzuh~t0h*yHU?ZR=k_tItm{hl*GXh4fP6LC9G!BZZ-i9yw0QBJ* z0C46^dv23qm=Wq7_!J{yPa6UE-sIK{>}R{OHtEW(^IXZ@6^Jj!cr4B%m(&e(4uh%Q zMe#n0Nhi!A9-`@tz`cvGc^I}Lt6itNHal$RsewYd(3ZK1TY9)f%Q6h;#|PVQ;4Fx_ z<tzkw+V0D^;Xt{PdOyi|o^i2#7Zw*Z#=0NB+1~R$52(5d8eXGTG}WFxEFwMf3%(T5 zw-gJO4l9gWh80uh+6?;~#s>uZ-fYVzXl=rd2@d!Sv|$-MgaU}{xj}xqc?f0jgq;#( z0_Zn^S?LL|E9MC%{@#tfMi02Pu-P+?mbqynl1P62EQqBp4ulg4VY8>t0b#jddu`^Y zXcpNX>%f%PBh|w)q3<II_T~pgu0|JVlXh><24i5`NWlamm^?@THZlv)$opJ~D7+YY z>Zsc=XT4I_v2|XEK7xry@D4)sKdc%d7F@#12HtBpH&qTdAF-ccyT<w6ev}k53M-(Q z=R8W&EVs7=xN|@la%JxT>N3u|&$2ABr`R}AdZ3WhEZHbbLKUF+5CztQ!YRxQbq57{ z0j06H&$J|jm5)W`A}+x~uP_971=6b^s#w6-qUSr!iVFp-<}Z|hscGq-+FZkJIL)$M z|0JlJQhv~h)1AT;gkIs^rrbGb<7Ll!7%udCYlG@)O%3mk-<#IPu1afT*P(MSNve!H gjhGCrA9HKS$;Q7|ApZqshQ_nHh?R%c9GAMze=Xh<9RL6T literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/repository/experiment_repository.py b/sziszapangma/integration/repository/experiment_repository.py new file mode 100644 index 0000000..6f6d148 --- /dev/null +++ b/sziszapangma/integration/repository/experiment_repository.py @@ -0,0 +1,52 @@ +"""Repository to manage results of asr experiment processing.""" +from abc import ABC, abstractmethod +from typing import Optional, Set + + +class ExperimentRepository(ABC): + """Repository to manage results of asr experiment processing.""" + + @abstractmethod + def initialise(self): + """Method to initialize repository.""" + + @abstractmethod + def property_exists(self, record_id: str, property_name: str) -> bool: + """Method checks that property in record exists.""" + + @abstractmethod + def update_property_for_key( + self, + record_id: str, + property_name: str, + property_value: any + ): + """Method updates property in record.""" + + @abstractmethod + def delete_property_for_key( + self, + record_id: str, + property_name: str + ): + """Method removes property in record.""" + + @abstractmethod + def get_property_for_key( + self, + record_id: str, + property_name: str + ) -> Optional[any]: + """Method returns property for record.""" + + @abstractmethod + def get_metrics_result_to_df(self, metrics_property: str): + pass + + @abstractmethod + def get_all_record_ids(self) -> Set[str]: + """Methods returns all possible record ids.""" + + @abstractmethod + def get_all_properties(self) -> Set[str]: + """Methods returns all possible properties.""" diff --git a/sziszapangma/integration/repository/file_experiment_repository.py b/sziszapangma/integration/repository/file_experiment_repository.py new file mode 100644 index 0000000..520390e --- /dev/null +++ b/sziszapangma/integration/repository/file_experiment_repository.py @@ -0,0 +1,100 @@ +import json +import os +from typing import Optional, Dict, Set + +import pandas as pd + +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository + + +class FileExperimentRepository(ExperimentRepository): + _metadata_file_path: str + _pretty_format: bool + _cache_value: any + + def __init__(self, metadata_file_path: str, pretty_format: bool = False): + super().__init__() + self._metadata_file_path = metadata_file_path + self._pretty_format = pretty_format + self._cache_value = None + + def initialise(self): + if not os.path.isfile(self._metadata_file_path): + with open(self._metadata_file_path, 'w') as f: + empty_dict = dict() + f.write(json.dumps(empty_dict)) + + def property_exists(self, record_id: str, property_name: str) -> bool: + json_content = self._get_file_parsed_json() + if record_id not in json_content: + return False + elif property_name not in json_content[record_id]: + return False + else: + return True + + def update_property_for_key( + self, + record_id: str, + property_name: str, + property_value: any + ): + json_content = self._get_file_parsed_json() + if record_id not in json_content: + json_content[record_id] = dict({}) + json_content[record_id][property_name] = property_value + self._update_metadata_file(json_content) + + def get_property_for_key( + self, + record_id: str, + property_name: str + ) -> Optional[any]: + json_content = self._get_file_parsed_json() + if self.property_exists(record_id, property_name): + return json_content[record_id][property_name] + else: + return None + + def _get_file_parsed_json(self) -> Dict[str, any]: + if self._cache_value is None: + with open(self._metadata_file_path, 'r') as f: + self._cache_value = json.loads(f.read()) + return self._cache_value + + def _update_metadata_file(self, json_content: Dict[str, any]): + self._cache_value = json_content + indent = 4 if self._pretty_format else None + with open(self._metadata_file_path, 'w') as f: + f.write(json.dumps(json_content, indent=indent)) + + def get_metrics_result_to_df(self, metrics_property: str) -> pd.DataFrame: + list_of_dicts = list() + all_metadata = self._get_file_parsed_json() + for item_id in all_metadata.keys(): + item_dict = dict() + item_dict['id'] = item_id + for metric_keys in all_metadata[item_id][metrics_property].keys(): + item_dict[f'{metrics_property}.{metric_keys}'] = \ + all_metadata[item_id][metrics_property][metric_keys] + list_of_dicts.append(item_dict) + return pd.DataFrame(list_of_dicts) + + def delete_property_for_key(self, record_id: str, property_name: str): + json_content = self._get_file_parsed_json() + if record_id not in json_content: + json_content[record_id] = dict({}) + json_content[record_id].pop(property_name) + self._update_metadata_file(json_content) + + def get_all_record_ids(self) -> Set[str]: + return set(self._get_file_parsed_json().keys()) + + def get_all_properties(self) -> Set[str]: + properties = set() + all_metadata = self._get_file_parsed_json() + for key in all_metadata.keys(): + for prop in all_metadata[key].keys(): + properties.add(prop) + return properties diff --git a/sziszapangma/integration/repository/mongo_experiment_repository.py b/sziszapangma/integration/repository/mongo_experiment_repository.py new file mode 100644 index 0000000..73f3f6f --- /dev/null +++ b/sziszapangma/integration/repository/mongo_experiment_repository.py @@ -0,0 +1,72 @@ +from typing import Optional, List, Set + +from pymongo import MongoClient +from pymongo.database import Database + +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository + +ID = '_id' +VALUE = 'value' + + +class MongoExperimentRepository(ExperimentRepository): + _mongo_client: MongoClient + _database_name: str + + def __init__(self, mongo_client: MongoClient, database_name: str): + super().__init__() + self._mongo_client = mongo_client + self._database_name = database_name + + def initialise(self): + return + + def property_exists(self, record_id: str, property_name: str) -> bool: + database = self._get_database() + all_collections = database.list_collection_names() + if property_name not in all_collections: + return False + else: + return database[property_name].find_one( + {ID: record_id}) is not None + + def update_property_for_key(self, record_id: str, property_name: str, + property_value: any): + self.delete_property_for_key(record_id, property_name) + self._get_database()[property_name].insert_one({ + ID: record_id, + VALUE: property_value + }) + + def delete_property_for_key(self, record_id: str, property_name: str): + if self.property_exists(record_id, property_name): + self._get_database()[property_name].delete_one({ID: record_id}) + + def get_property_for_key( + self, + record_id: str, + property_name: str + ) -> Optional[any]: + if self.property_exists(record_id, property_name): + return self._get_database()[property_name].find_one( + {ID: record_id})[VALUE] + else: + return None + + def get_metrics_result_to_df(self, metrics_property: str): + # TODO: implement + pass + + def _get_database(self) -> Database: + return self._mongo_client[self._database_name] + + def get_all_record_ids(self) -> Set[str]: + records = set() + for collection_name in self.get_all_properties(): + for record in self._get_database()[collection_name]: + records.add(record[ID]) + return records + + def get_all_properties(self) -> Set[str]: + return set(self._get_database().list_collection_names()) diff --git a/sziszapangma/integration/task/__init__.py b/sziszapangma/integration/task/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sziszapangma/integration/task/__pycache__/__init__.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f04226f780906728ded8b6dddea48b4094670f9e GIT binary patch literal 179 zcmWIL<>g`kf?G>wCV=S2AOaaM0yz#qT+9L_QW%06G#UL?G8BP?5yY<u{m|mnqGJ8r z#G>TPyz<17qWq*peV5eY?2`Nf{qWG>5dGq+%;KuVg2cS^+(cAPW?o5ZdQoCYW`3T2 kNn&xfetdjpUS>&ryk0@&Ee@O9{FKt1R6CH3pMjVG0Jdr{0{{R3 literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/task/__pycache__/asr_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/asr_task.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c984f35f66feeb3525f85bbbeffc0f0f683a234b GIT binary patch literal 1752 zcmaJ>&u`l{6eg)3mL1tivt${HV#r|_ro-&>fMQtD4!s3M(_n)xoGQ|;YFScAxmyhN z)B*+eH{=i>^EdQA;dQ6ob}lev?@@N+Ht8tvk?)DWzK`GINx$DCQ2zPz-ETvmkiXE` zEG{@dLDdhy2qI`sW;CW0`z&WOCw6S#$=#V3d$#Z9{;U&sD0xVPC;T@=__Fibi38C& zCQ<M=tVAMe!h2dhQe`T&E*1E@k7Zga@hGXELHK1RBu4js{-csAo5`YnEUQvyb*WAv zK3Gl7ipi5iPg7iUvj7-d^cWgRVk$_?PKb1*E9hM!J!l!UKD3T)9f-j}g?r|`C1Yxj z9>xL0x)AN0IU*3<H*Va+i0GX$u_Z$Ax84$$#34ZIn+{;Y<CuM}EE-+`9A2&Pm;q0; zLbpY2cGo^&tWSB7%w*)5ZVgLV$L!s>O#kCmZmry_Ma)3S7xPR>KCeVl%XCc#a)N!l zR|l&8224#}lXKe8##=H<8rBfb8V9V?xL{pDj>!c*f|GfA4x5;O^Q_2f&Q%Ads{l<G zOynC^%Y3X*RB8)Nh^CKb0Oo?4F6U{UXwCTrdH?I^L`$VdvqYs?@q1FM@@X=9Aoa8^ ztI;nf$4^Fjk?BQJCB<ZxeC%XJEhj3ev$7Z=V<V&iJHzT!-Gbe4TaAY!!GQYkEq3m; z_cL4rh#b>hQJe|0FF%M4?r*I&<11OuRgofL>w*Mgy$@A?4W?n|1SllW>5{#28b^?6 zx4a{0<4k?f20JqSS|x=}RaW7VBWCs{vPLnq_cksSpUTsgNLTH`n(6?}S772Uf_s_d zbE)<*Xw8?<GuwKaRUBuxKyE`&+pK0TWyHO7+2UZ0`;)N|yV~L!JiJ>A?>p!KFsqRz z1#s+~S*W0q=kB@JI7_;;@4zup+b}rttuRbr0YV0tgI0$(qqPDugEYx=i(yh^T1M9> z*coTJ+{~*_nb=kHf@^pYVAv|h4&gL<2Ml2$1v;SmlwGoa7q{1xT%QDq3<}3&<lOW) zpOs<`OL7x({$ifw?H`45Fx#9bMN!sPh9GLiFzV=98TV;f=2nObm7?yTImF-nRSu*Q z5c3TfU*HhDRfS24($$AxdNjb#WdOi_?|1sp5_TE-&|ccnM^V>&yO}M+jRYB@d_KH8 z?IFMa?BJUoofhs7|1)QuFT)$N$Z}vlzh|4c?aK&PXcJj^0N#Sz;OI*E!QJ*|exyhg LvK3PJSm^u*<`mxZ literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/task/__pycache__/classic_wer_metric_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/classic_wer_metric_task.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..afb1c1a779039fb604f524ec832bd68c10553e17 GIT binary patch literal 2835 zcma)8NpBoQ6t1pbJTo3|Nt|FHVSo^rD2#LAfUwMB2ZR(%6vwjE5-K{~WvAnw=}A?O zf=82+E#;I0_Z;G*FC6$2-1q@?<-}izvhclb&*I5|=vKXcwY`4ttGCqF%uI#h`R&g? ze!dUxFY1gw4mw|<>02NXi>!n-E#GQ#pIg-DiQRO3C+F?NZF;_!^G-6=Ecqqyu3wHk zzw#Z6rlQgzi%PO|VEfZid6U&Ee?uy(bK~5Mb!MFPILzvnIk%PsTF2qGRBJ&Jc9I}V z6^-Bcr7cz5l&$QxY^OTL@IJ<8Z>cnt*wNa(6X-pEIxO~!hf*2mVXC5#{xkY`CmsDe z7|DDqVm{wu(w0tStrTC5uZzB&_ifDC(u<s@?qjxNMQ-FBR{bfAl`uN>)Q(C~`Oxvp zG!j*w@@P7$qF<5IvWl^p9lpet7<xA7Y&3%%v(X$@%uQS|*psiEj~1|U{xNfyzX0UU zjDqxo%v2nb>E^s><{d5CDg{{CzGwwaX)cM~G>Jr}f|d?d+|I`4FNhJ$L^wp<HRTL~ z3n>@>TgXD7)tKDH$?D81`JfXkDLU;a$Yj_*4muU$$?a@3eGMdI2ke>EvwH3koUX@v zOz@r!YWEyaCvy)NzFx#O*|D`zpE9oQz;;s-B5uW*5Q?1KoDpy6r^=X6^fxsPQumCb zWwN8HG%`bEmdG5Dd5~kvltlrR5XbEK?(&wFN-sA96~?WHL8j8X!ScG)ds*6E-rm~0 zv#h(Z?gs6kwc89%cj8ticU6$ZX=|A<ULHMy0yjb9jrP7ei=Qu`={#IZvzC{?-HXFe z8G1pZZ+G1`WhFxdfQTcrC<oyq)F6J0r)1Vqt&s5UKg1I%YiODv=<#O^C&2DoM|_hh zQUzgqZdZ;t(oU+xp4`tdt=no5Yt-9B-XZeVfGmMCb9ygs3))gB_#Osz350RS;>>y7 zy*$yhv2Are{KY8?Tu35uxdq>X8MWY%J?QZx{s<x8bM`#-4My$UO8C`%7e0oWB_z2g z{K%%XhQPBo%*-%~aCCrSZY(TJg&a*u-AS^#r!L~}xd*Cq;LL@>=ZAqZikn&J$DZs* za@`$=vPJ^mCo%@=8P#bCN`r|c@stxUW10Q{B*)cqUO9Gwuft0gI5eW(E9YhR<B5pB zDHDfVgTO+Ccfp-P{*>>0f>~e*zlp4fAHs*$*?|?=InQ(6dBP)iiLmuHLJRBr4+ld| zxQV6-x?PC+srm@1AVVhol|OLciUa{_fGf6tyuE(R4R2gGJkG|UsjMU@F=5(luYN)D zNU+B^>#j~TT9DN!AA?kglh%!uB*pRQRWN;FZ)i;B=NePK$TcpJ#v|Gms$IeKn3AfF zu@$s|c6II}fOn*ltx)=vd;9G)yC-$56#)h3r>y1A41ESF_d~CSsfk{)IMKMzuv}B# zW3FdattuRazwBQ9|LB}@0)m%X9)6SrD2XHNibRf}>)9FVYtP29Tiq*bCk02=f+VS( z)QlPeqlWSj)O65nC$fQb=0vz6sH+fYs$%Wt*PEN)uZis&x9d(3BsI+BM*CAIQpPd< z5G%e!D{S>Va(~8OmpHT-TsuZ`=@8o!fvCMGEL2s6Fcl%1Y1Bcy2`D5Ubb{n$#@3lK zb3z2IR+{BWUZW5ZCdRp&rb(XC6{RV4iO6Ll<noF<$k;(^-?%$Tikd>^D$?7`oD`o> zCJCes!~Z-jp~R{v>CrLj>mU`YM6bgU;`#f0eimQYTXpe$T|G;cgukd(>05b`@7}0a zjhF4Wkw=CK`PW7ZSpzjiHsEZHaCp@k#7e3PJ3l|QWu)9ShUKR*u_)!Zm{0$8e^C|( sx4yhgD!Mo*x-h7Zh!7AX2<2GirKhw~9N8+xQ0@lv91hQ`s63AS59k=%PXGV_ literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/task/__pycache__/embedding_wer_metrics_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/embedding_wer_metrics_task.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb121b6163c23215d419fc36220f227d69e09cb4 GIT binary patch literal 3078 zcma)8OK;mo5MC}RijpPy5yyekHg($|0sD}P=G4Lsio{M(Aa#N`h5-V_mf@~l+N7xL zQb;VwCmYDAhu(9rd&@8Btv_IIMNj<+E!w0rD@&r~C@2Ze&dxsP^$j=c^(uj<{rj&! z{8l6659|~l2Rh%tr=9`fgj)$ot-wlYKrQUk#7>>SF}$6WQa5l7?<D2a3p~S@l1f?) zs)l!ysk9c<3|~&_X(MO=?*-Gm63p;wFngEqDPB7wye1llb}+~58^mw?38xdEYUf_8 za_y|fQSMv%^6fDCQSi0&vEV#z?L3lUOKoQ|6%t10C&(AJgd7gt4wI;pgn5Q5W?^I_ z+s@Aj=hvRKg^W|t${&h$rec`850e`YWEKgf@Z3=C1)hSH?5U7JT?{USUF|&0B#%&m z;!{5Ds9%5)B(OLM=q3@iaJaS7Zz;4T=-be`&^pkTp)Hwl8|K==6BX{haG#THi<h}~ zR12ygof00eys&wdPaQcy4JEwxg7P|VK))^;Vj5&K+jNmE66o1LhY~&wD`)sD%$$8r z91_fd3+D8tq0zuh--%qtkwVS&0!#Zn)|MIAD&J?VFcta=+sP8na`Z|h<90qazQ9I? zGL!nQ(~D@f!dNu)k)Ah0#?m9H@9Ihp7wMn+-rEnI4V4_b$Hg~azahnwPAmoMw0W2d zeesRAjRuZHe<F_=W5cKJ0LjTAIktLMuXF<W(xW}XXwL>}_Z*;3Uh27r7I1D}?vdR- z@8y*tU*%+jytLN+DP2+>Fs}9(i(7Hdn5@GAdXAk_NJ1hLiT98VBrcL^BzQkL1H><D zM~P%x&Z1-v$ps|yNG>8-0P@n(6~>|@REn{e<nP_(O(mpSPD2^Rt*2owv&Z4`s!)4* z)?VJ)+<3ICy0Pkp?Xb0zhG#o*D;GO5%;T)JjNV!vnG@YT@Bt1r+xv16&c6(w!Yh$V z!*cPZ?rX>P%-3C-5Rc7)fqYw6q=+E<Sj@qF!#rkkn8@0dBJapngf1IA$fNmg!>7=M zJ$g*QiDcJ0p&LYA0ug}G{EjGuGz2f(6Z^*azAfK@IdTcf2S~0A$@{?RX|)%(87}Ex zun9ut0U^|}C~;nQuTRo$Y^6^}r<_ru1e@WGMu`W&6qPuzhdp{i4*+s{&Yml8fz&pN zLUityAW~su5m5dodSXNTLWCZdPY9wnK!Uxl>!aibzk*Gs$FfgXF+8PEoh0`?`4Qa1 z#H~c%>P!998s%frh<bLA;$jdTCZ2UEN<=9A(zuPU;pW$oj9Gh5c3KR9IuUpjqV^4# zrmg}p2DhA7j@>uBLp=*P_>j7<oPQm!`|%`aoZlE+Kf1?<u>U!y3?pk5MiC$mY;JLS z1QE7M4y^+;y>(0hAiy;okhy2)Pys;dj7gym^lWg1)4et*Sz;+nlBLsRS^{$}0d|E; zDoopnXhs%leELvm@h%(inJn9ZGrq4K09@rz6@I2q9Ef!^B~-f-+q(a76&y^PpKBWD z<0h`IBv6^7EN$Pshx_5iE}^9R^(51sb8d4b$v~)X0yEf4;)Ma<9S#-WAeXL>7Gia+ zu#Nk79<i;phX&$wgROmcYi)J)+x5FB97M^`C%SxU9K?ogAw??^W6DS$4qZbnP<i6I z7NQR&^fk5UYmXNBuK&OF^DiKFV`XGaR~bt)-hqH*y2jX(PMDmI*eaKLnz69e%5npj z0KR=RH$aXUuARqOmPkx-1C$aIU0y}<2@-UzL=@9@*xEPx==v#=M<PjJ!%>R7i<1$R z<P?w#@F|3&s^tw{2lO7if6dGT54Nb4&>FRQv`zINvvwU+bo2c-WLn`0UlnUY6W~}h z3)Q|EnSZ!Kxi1VFwe^j+Zy<($Tg8=Vo1l4qCB#f~((i?-lz~5a(b$iNsi*qOGxG~& zt~n|*%{LYw<2L=-#P$7JIQ-3I*q0ShOn!j`F-(4n1RYoKsPV0-Gu}!+%5GvFnIxZ~ Nwf<{iizw&V{{aLtPy7G? literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/task/__pycache__/gold_transcript_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/gold_transcript_task.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3c45e39c7669d1eb90145199e976c60239df6b92 GIT binary patch literal 1697 zcmaJ>&2Aev5GJ{+)k>D_)Nz`hHaQgNL5E5}Zv}#)KvVb@1hI_*ivsLMq?N7rFGH%W z1>IsGFX4lI^cxiDOZ3cZPkn_J`I+HbwyenIg2UmEGs|!0Blp+Wy9`_Bk3WC@tHs#g zG+AsRPQJr#o}f@n@thSr;hc1liy}w@uLpTpw33$B!#pb5Nt?4BrdldGW-8L{*Fh4i z_Acwk|6nB6=kD%Km8(5SOEZG3w$Gp%X=5tDrLE_BRDpVy+LvhlHB*|b+duqXYsd;+ z+UL5iOlB)MLi@&rYgUf;Qgb+>_=^p3`DT8?L9>J_mWX|(105><nCTXd!jA#!kqS=2 zH|&6`P_>SGNgHi3TBDOdwN-o^CLOY<&WTW6wTAQV8y2!;4YBpy)^$*H9``_w5vjD7 zP-Q*G>K?6HLzMPNmT95;q3c+T;V16?&2b*CSh@=1HbB3cWT54wR;ksac}gfRq*Iv# z?B)>)%U-inKI60SOmH?6GbY6>Kp7~u%g*^zJXOm~FuaSU%*xD432k(Q7)8J3LZkBo z=#Yij1#2XFD9*X-NIA+=W28K1|El4>(O`x}3Ztz2Ewxa+NQY0fIkZ(h{BeJGZ)m2O znWlAGj*ImABrB~RLu#|C98!*k%PAn^pgw|4{PQ|?6QN-34Ic67XFJ!(M|B#^vGfDi z0Z%}gDR)O-vQEM!?OX)fPM{o7*5-|L%H;Rh%?65@IAxee_6t7~yA0?Mec`sp+7bnt z`W#ds5A~7H-FX1Fa1Y!e@hOShmtq?=w`mTuT9Qk1z<X#klyxRTE_nEE`o#)hOHO@p z^@ESjPj&R?{~VR8^CJdv8qD|^Kl2xUjwW<>8%nJP<Z0n<6X2!lz<rFr3~vi2r8Ibt zb3MN!L`dV`s0JbLzMX!#0^`S?cpH(Pbc>{PT`7x7O)#!>Jt<#J(!5y#fj@AYQl@2D zS<fcqrVES(7rv;f+|viBtnh%uL)yQ7!Qzq5b0q&&l|G>@R4O2{(G@V1$S#k0911KE ze*fKCL&?PZUWDU&zw>Fo<Gx<x`e1?HgQeWfb5lWc`mb(uyr6N74=FkMmlTMm?wkMd ynZK=rmDKmSa5w+6xpvJ9hozJ>2YO^Y(1l$2ta)0;kDKCr%PC>R=bq?s?7;_u{ohmo literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/task/__pycache__/processing_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/processing_task.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..46a1b9f7e3a486498a384e05811cafe91ee187cf GIT binary patch literal 1769 zcmaJ>OK;mo5Z(u0vSnFuQ`@}+%8@8yoeLO6+PElkF^V`w0RqLA)^2RZ6iM%{kVK$* zkxvDB=`{(E9Q_;mPxjhVZaoKm%}|mbN)Q5^<;)Iuzxn2IxVE-J;Q8y<KYrc|2>BD0 z>yHP@eR$0-2uUQ($&e;=$Pz|TXE`5wiPwDd+#d!>K*{$+deT29(pPlKlTZc+Bo5!f z2_$BYfAj4F*GZ3z)#<<vl|3nCOkMYp8kAZ-ln<@asVz0wZh!Z@QaT%|!ah<}X)-Xs z7~nbAA0L$a@R}14iX>E$gdGyal_%*gQ9i7!S-Iq|ScA<=0u{neBm=O7GCKFBG-=Ij zip$n3E?4AgV_i|J;MWGfHQ71$lD1rjc)G6hgDwZkn5;N{oSIX&F0?uuWm<_*B~z>1 zim+f8MLJXvck5DdotusW4!{==elvI<z-zi7YE~0LY6=n}J0KTyKV~|F9UXy)x$}(5 zpXe4^Fq;=dufg(O^bd{Fraw&eAS<4wR+mR<e@~fHTUPxa4-X#q%{VjTv`UNPVR}=^ z3agGG4_R6C5qQ7447Q1Cr+VQcA+jQ~LbTyJgQY@fK*w7*dD&TbT>uBVNwUbK`&?b- z7I?c%?lKh=4sEEw<1u`_3Li~?7hL1T`WA|fg|*qz9Z=nd9u)%9lPfWYkAxbemjvUx zOE^FDj``K$F3`%3bn!v5K7mkw1{ci$VMb=C*3D^Fi6^Cou(9H|VP^s9&Jv_0SE!<m zO9n+YZ<HqX8?dkxpqppJ{SF@%>gNr8OQv+fEwjAlQ#z%4<R?4vYUq+J@)CM^W`9Xu z^NC;c+WUe`f|@+_>Y!$l|HdXEXg+v)HA=q@z}KHdb*N8ZC(ysxu#TpblA64U@UOQp zCC;|d3jV=shI?n*n8rBVe}~MCd9UBon3i$7H%}5|toNSw#;d)1_j*V{rDWW4UY;52 z){m7HX`av7GtSpgzRK}j6?zlSX}Y5659e1J>R8_fYuuU@%NBZIu_UWACJhv$N!klq z3#pzvuS&ITQV+ca_b1U&YLv*!<K<+AbI3h7(cA$+{5I{<O&U>;GRpo9yiK|ST0q%< z0UvKJC3yb(e!Pa=gp)!Dm>XhP$`MqpaBU&ZMrl6Z@s7$ehe0FKq9|?Cod%pZ%u>A$ zmuqYvjRUa>dd3HKj=qbcDZ(p(O`@?*aE>(S5oyG_9^-)hpHA4Ul>Oalq2BoJ$C2ad z(LiGtHTStMuaA+PYm;MV!K%C}Rz%~z`otfLQLyvT0Wk{>L4Pb77wyiN`x<M@pj(%< HY1{iB7#Y6% literal 0 HcmV?d00001 diff --git a/sziszapangma/integration/task/asr_task.py b/sziszapangma/integration/task/asr_task.py new file mode 100644 index 0000000..3393121 --- /dev/null +++ b/sziszapangma/integration/task/asr_task.py @@ -0,0 +1,40 @@ +from sziszapangma.integration.asr_processor import AsrProcessor +from sziszapangma.integration.record_path_provider import RecordPathProvider +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository +from sziszapangma.integration.task.processing_task import ProcessingTask + + +class AsrTask(ProcessingTask): + _record_path_provider: RecordPathProvider + _asr_processor: AsrProcessor + _asr_property_name: str + + def __init__( + self, + task_name: str, + record_path_provider: RecordPathProvider, + asr_processor: AsrProcessor, + asr_property_name: str, + require_update: bool + ): + super().__init__(task_name, require_update) + self._record_path_provider = record_path_provider + self._asr_processor = asr_processor + self._asr_property_name = asr_property_name + + def skip_for_record(self, record_id: str, + experiment_repository: ExperimentRepository) -> bool: + asr_value = experiment_repository \ + .get_property_for_key(record_id, self._asr_property_name) + return asr_value is not None and 'transcription' in asr_value + + def run_single_process(self, record_id: str, + experiment_repository: ExperimentRepository): + file_record_path = self._record_path_provider.get_path(record_id) + experiment_repository.update_property_for_key( + record_id, + self._asr_property_name, + self._asr_processor + .call_recognise(file_record_path) + ) diff --git a/sziszapangma/integration/task/classic_wer_metric_task.py b/sziszapangma/integration/task/classic_wer_metric_task.py new file mode 100644 index 0000000..dfd8d26 --- /dev/null +++ b/sziszapangma/integration/task/classic_wer_metric_task.py @@ -0,0 +1,72 @@ +from typing import List, Dict + +from sziszapangma.core.wer.classic_wer_calculator import ClassicWerCalculator +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository +from sziszapangma.integration.task.processing_task import ProcessingTask + +_CLASSIC_WER = 'classic_wer' +_WORD = 'word' + + +class ClassicWerMetricTask(ProcessingTask): + _metrics_property_name: str + _gold_transcript_property_name: str + _classic_wer_calculator: ClassicWerCalculator + + def __init__( + self, + task_name: str, + gold_transcript_property_name: str, + asr_property_name: str, + metrics_property_name: str, + require_update: bool + ): + super().__init__(task_name, require_update) + self._gold_transcript_property_name = gold_transcript_property_name + self._asr_property_name = asr_property_name + self._metrics_property_name = metrics_property_name + self._classic_wer_calculator = ClassicWerCalculator() + + def skip_for_record(self, record_id: str, + experiment_repository: ExperimentRepository) -> bool: + return experiment_repository \ + .get_property_for_key(record_id, self._metrics_property_name) + + def run_single_process(self, record_id: str, + experiment_repository: ExperimentRepository): + gold_transcript = experiment_repository \ + .get_property_for_key(record_id, + self._gold_transcript_property_name) + asr_result = experiment_repository \ + .get_property_for_key(record_id, self._asr_property_name) + if 'transcription' in asr_result: + experiment_repository.update_property_for_key( + record_id, + self._metrics_property_name, + self.calculate_metrics( + gold_transcript=gold_transcript, + asr_result=asr_result['transcription'] + ) + ) + + def _run_wer_calculations( + self, + gold_transcript: List[Dict[str, any]], + asr_result: List[str] + ) -> float: + return self._classic_wer_calculator.calculate_wer( + reference=[it[_WORD] for it in gold_transcript], + hypothesis=[it for it in asr_result], + )[0] + + def calculate_metrics( + self, + gold_transcript: List[Dict[str, any]], + asr_result: List[str] + ) -> Dict[str, any]: + """Calculate all metrics for data sample.""" + metrics = dict() + metrics[_CLASSIC_WER] = self._run_wer_calculations( + gold_transcript, asr_result) + return metrics diff --git a/sziszapangma/integration/task/embedding_wer_metrics_task.py b/sziszapangma/integration/task/embedding_wer_metrics_task.py new file mode 100644 index 0000000..67f8b94 --- /dev/null +++ b/sziszapangma/integration/task/embedding_wer_metrics_task.py @@ -0,0 +1,81 @@ +from typing import List, Dict + +from sziszapangma.core.transformer.cached_embedding_transformer import \ + CachedEmbeddingTransformer +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer +from sziszapangma.core.wer.wer_embedding_calculator import \ + WerEmbeddingCalculator +from sziszapangma.core.wer.wer_soft_calculator import WerSoftCalculator +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository +from sziszapangma.integration.task.processing_task import ProcessingTask + +_SOFT_WER = 'soft_wer' +_EMBEDDING_WER = 'embedding_wer' +_WORD = 'word' + + +class EmbeddingWerMetricsTask(ProcessingTask): + _metrics_property_name: str + _gold_transcript_property_name: str + _embedding_transformer: CachedEmbeddingTransformer + _wer_embedding_calculator: WerEmbeddingCalculator + _wer_soft_calculator: WerSoftCalculator + + def __init__( + self, + task_name: str, + gold_transcript_property_name: str, + asr_property_name: str, + metrics_property_name: str, + require_update: bool, + embedding_transformer: EmbeddingTransformer + ): + super().__init__(task_name, require_update) + self._gold_transcript_property_name = gold_transcript_property_name + self._asr_property_name = asr_property_name + self._metrics_property_name = metrics_property_name + self._embedding_transformer = \ + CachedEmbeddingTransformer(embedding_transformer) + self._wer_embedding_calculator = \ + WerEmbeddingCalculator(self._embedding_transformer) + self._wer_soft_calculator = \ + WerSoftCalculator(self._embedding_transformer) + + def skip_for_record(self, record_id: str, + experiment_repository: ExperimentRepository) -> bool: + return experiment_repository \ + .get_property_for_key(record_id, self._metrics_property_name) + + def run_single_process(self, record_id: str, + experiment_repository: ExperimentRepository): + gold_transcript = experiment_repository \ + .get_property_for_key(record_id, + self._gold_transcript_property_name) + asr_result = experiment_repository \ + .get_property_for_key(record_id, self._asr_property_name) + if 'transcription' in asr_result: + experiment_repository.update_property_for_key( + record_id, + self._metrics_property_name, + self.calculate_metrics( + gold_transcript=gold_transcript, + asr_result=asr_result['transcription'] + ) + ) + self._embedding_transformer.clear() + + def calculate_metrics( + self, + gold_transcript: List[Dict[str, any]], + asr_result: List[str] + ) -> Dict[str, any]: + """Calculate all metrics for data sample.""" + metrics = dict() + reference = [it[_WORD] for it in gold_transcript] + metrics[_SOFT_WER] = self._wer_soft_calculator.calculate_wer( + reference, asr_result)[0] + metrics[_EMBEDDING_WER] = self._wer_embedding_calculator.calculate_wer( + reference, asr_result)[0] + return metrics diff --git a/sziszapangma/integration/task/gold_transcript_task.py b/sziszapangma/integration/task/gold_transcript_task.py new file mode 100644 index 0000000..291eb80 --- /dev/null +++ b/sziszapangma/integration/task/gold_transcript_task.py @@ -0,0 +1,36 @@ +from sziszapangma.integration.gold_transcript_processor import \ + GoldTranscriptProcessor +from sziszapangma.integration.record_path_provider import RecordPathProvider +from sziszapangma.integration.repository.experiment_repository \ + import ExperimentRepository +from sziszapangma.integration.task.processing_task import ProcessingTask + + +class GoldTranscriptTask(ProcessingTask): + _gold_transcript_processor: GoldTranscriptProcessor + _gold_transcript_property_name: str + + def __init__( + self, + task_name: str, + gold_transcript_processor: GoldTranscriptProcessor, + gold_transcript_property_name: str, + require_update: bool + ): + super().__init__(task_name, require_update) + self._gold_transcript_processor = gold_transcript_processor + self._gold_transcript_property_name = gold_transcript_property_name + + def skip_for_record(self, record_id: str, + experiment_repository: ExperimentRepository) -> bool: + return experiment_repository \ + .get_property_for_key(record_id, + self._gold_transcript_property_name) + + def run_single_process(self, record_id: str, + experiment_repository: ExperimentRepository): + experiment_repository.update_property_for_key( + record_id, + self._gold_transcript_property_name, + self._gold_transcript_processor.get_gold_transcript(record_id) + ) diff --git a/sziszapangma/integration/task/processing_task.py b/sziszapangma/integration/task/processing_task.py new file mode 100644 index 0000000..c494851 --- /dev/null +++ b/sziszapangma/integration/task/processing_task.py @@ -0,0 +1,51 @@ +from abc import ABC, abstractmethod + +from sziszapangma.integration.record_id_iterator import RecordIdIterator +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository + + +class ProcessingTask(ABC): + _require_update: bool + _task_name: str + + def __init__(self, task_name: str, require_update: bool): + self._require_update = require_update + self._task_name = task_name + + @abstractmethod + def run_single_process( + self, + record_id: str, + experiment_repository: ExperimentRepository + ): + pass + + @abstractmethod + def skip_for_record( + self, + record_id: str, + experiment_repository: ExperimentRepository + ) -> bool: + pass + + def process( + self, + record_id_iterator: RecordIdIterator, + experiment_repository: ExperimentRepository + ): + records_ids = list(record_id_iterator.get_all_records()) + for record_index in range(len(records_ids)): + part = f'{record_index + 1} / {len(records_ids)}' + record_id = records_ids[record_index] + skip = self.skip_for_record( + record_id=record_id, + experiment_repository=experiment_repository + ) + base_log = f'{self._task_name} processing {part} {record_id}' + if not skip or self._require_update: + print(base_log) + self.run_single_process(record_id, + experiment_repository) + else: + print(f'{base_log} -- skipped') diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..cea9628 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Unit test package for sziszapangma.""" diff --git a/tests/__pycache__/__init__.cpython-38.pyc b/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9522d1467896f2392233d07d8e9dfe361a05efef GIT binary patch literal 202 zcmWIL<>g`kf|j>|30gq<F^Gc<7=auIATH(r5-AK(3@MDk44O<;%At9gB?={}#U%;_ ziOJcC>8T27`9%uFRhh+Ai3N#y>A8t|ewvK8*yH0<@{{A^S27ea0ZjlCzZ~>Ki&Kk= z^>Y)8k~8zl6HAKnlM?k^Qj4=o@(c9CLxV%~kqy!ZSyQYZAD;=dGd^Cgpz;=nO>TZl OX-=vg$bFxIm;nIW%{UAI literal 0 HcmV?d00001 diff --git a/tests/__pycache__/__init__.cpython-39.pyc b/tests/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..07dfd7553f26f87c81fb445cc479eb5395efb4b2 GIT binary patch literal 202 zcmYe~<>g`kf|j>|30gq<F^Gc<7=auIATH(r5-AK(3@MDk44O<;%At9gB?={}#U%;_ ziOJcC>8T27`9%uFRhh+Ai3N#y>A8t|ewvK8*yH0<@{{A^S27ea0ZjlCzZ~>Ki&Kk= z^>Y)8k~8zl6HAKnlM?k^Qj4=o@(c9CLxV%~kqy!ZSyQYZAD;=dGd^Cgpz;=nO>TZl OX-=vg$bFxIm;nIgMK~A$ literal 0 HcmV?d00001 diff --git a/tests/__pycache__/file_stored_embedding_transformer.cpython-38.pyc b/tests/__pycache__/file_stored_embedding_transformer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3568677ba9b42bd52e710fcacef96f71d48c43fd GIT binary patch literal 1693 zcmb7E&u<(x6t+E{+1btRHfjqc{0Q98N_4jp;zSftHNXX}1Zh+liJIxyn@&1Ewzi{c zSKCvf5>6br_K+O&H}Hob`O1lZfkWXv4{0`P1k{q>j-S2neee68z0>P;2#jaH{rW>c zAmn#kEDsKg&tMufl1N&R1b<mUOO~)QNCMjUc@ly@D5A2Jv?%$KNG`)eB16?W2$ERR zZ8C`dfGA`<V2*#48|(O1p4kC)*FP^GC@J%5_pVMWGpTi{bcSb_$4e~t!K+9@B}v#0 zQGwzr+>0h`ji{ES8$@x*o>Lh}e#nzpwH1e*=2Qny$?!Rok!->4ivMp%#^7no4tTn9 z1(0^#`WJbjwr#DIybPU-M3!a`6`*fV;G~$Q_F=}B@B*Er4HJLO78pwwRMG?X^R1`M z(qpz@$H56Xp+6DJe<TwsnG6o&BXS&`kS%gR59or3=pOlmd`-R~-vZ45jE^1F8HI3X z19P@uUV~}g1(TBn`TqJ-x}cUE(<8{b23dY#kNGw^B|tu_rY=miPG`6n-g3bsH9MtA zchA(7$m+_f%4)>^=XawWqqG^7sm}81NosZdARTQf^T^iI(Y>ARyCXBt%{-l^)oz)N ztTNV&JR!z&C)Am=!k%k8oX%WlLxTEQT~0Sw5qbnBV(c$Q=AZsM_d_A_Dz`!mH;Ot- z3$r;qk2PpHUQbo!I9B7rqE4l8T&q+%ju{<?YK_KpgTQg4iit-49G&=@bsrY`3VPzR zL_Js;e^Y3e_Gy<zFXkWqPmsWI_@veVBGSt4>*_+6pnBNYzDwSO4@-P6KsOx53)=Jn z=pKw;5j}*{Ui7!%!;8L(q92hxFL@)lFX@yv<ucucqmCmS@ZEq#Q!-YoQ3XV>ti<jA zC~{ZXbA`>NdQkla1Rby@-P@Q6TT8E_@ig}@M#1AA0|BDhi}}sh!T^ch!^rP|d4*|Y z`woUjEf@a~1?<wX#+!2n(g*M$z40J>7eHck<8M2ifAl)|#)BTVRh5=X2-gv!tmQsD zAHsEoc)Xt$&5p)?aDlN}BUHVD%PT^pRaM*6=5=L+&}+DbC|s+O{zB{b(e(kEbNnxh z(AY8<&&2paud;r;9(nJet*?U5MRqoYci2a5K!nwPIh}bJ?&IalKFr{894_DLOYgeI Z_Vzh0gXH3khJj6d(>$)2uCkbQ`QPS+pxFQb literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_classic_wer.cpython-38-PYTEST.pyc b/tests/__pycache__/test_classic_wer.cpython-38-PYTEST.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13735b1575cd3e0689a77fd04b3ced38c1052de3 GIT binary patch literal 4165 zcmb_fOOG5$5zeevS69Dgdd83OKHPZmu6u3f;kUsyUTgz4)-3XjWvK<*lsjvB+OB?( zS+lb<R7;4l5E4Q%oH$s>b3w9y16OVwIM)>kY4*g~2#GJUdfGK3uVo(H%E-v5sH}*{ zs4t@Kl}cXi^FRIM*A8QUWBYy<*4Hr9uRsJ7JY)^7XQOKxjI*xQusK~ZLZ_Q+<hpLd z)oUK+8+pjh(CZc&1ue6}Vz<;NY2FUY-AbdvS>Oh_U}A=)bj6rq!g<O?PPmV)#-zv# z?=fpsML`rXPYJ%n>ZKi6!xrmCV%_PeD6yB<24PU=$@KNmSE|#xAIR%|*jfwys4pRR zm!e>>yfGl1Jn{GYQYb7O8ANNc*P{J)e+HNw#&rz!F^J&85T>w%Eu6M9q&2NzAz9bX z4SCFBzGA?#!eU}AuXa>ow`Jf*Nv`X+Ry(~QF<X8w$;+VCZ}&QzLE@-E5VTekdmXYz z3HKAL*J%Zb{UE>{&l!A!_DUdw9;Dvt#-Jaq2CAcE6&7jo^80i5R3Oz{*O#qMZ{3e% z|A9YuBT$c`{$TF@y`|+jwb@ad{=o0GyZ&4hs7PtrY^8B(t_N~<u#r^SLDW=!7g1~q zKk_|t9r?f*pWsz44}#khqap+@hK`ZJ8H?B!`;jqZLmu%h_ROG&Y#T!(G9Ow)Gd2XD zVM8l6V{3~KZD9<Z*p8h?#xNJ>g!#yoJw(+C?XEMV{Nch5kxRooXK_Aug|kI5*Af4X z1zApg8RegO!i#e&Toh=p6=T~H#i;PCxXm6p>M5?5B6FL|KY}Y0*E4V(d<<UC)@Zgz zv$M@b>8X8?4Lwm7mDuZW@P()X?TNUs&A0e3V28zqG%O4oBR6z}iL;-0d2~sKuttHd zt@LHB)t5o7HOj<bRw7WWedDt~|Ng~`FTcI`oF_+qxIxOFC>&&TH&~aQD7f#-UZ>Z_ z)zUiHX7I%z?6f-3%X7$WsNq@-xlsGu57&a)V!s!hs^Q>#71Y)+j-^_)j?&kT{r}Qr zXBy@|HxswzM?t$UHxhTH69zrMi+m1|<-PuM{?fQ|rOr3+96vGGIA2%Z&qhq`N{zVF zyCN_n(R^La`h$V&f8h;Jkj~k<s(ZuQmGq8Fb(J^+9j3aSn1hXZBBzO*C32p~rOnF7 zZQ5XAp{^R1o+owgO2V<EG(HK+dwHB>PWt5JKr?;BEQK5qZbm>0C1*&-9m$k@hnC*8 zRJG~LWU`4p!=?-dveoYnd>JG~Ty8#C>x4KcSRVGFp_0Q*wc1|`g;s60L%gYBQ%5e3 z7mwcaflSJ29(4M>n^N{=GSx&9b^S=EfZy{YT&Wl36s(YR7KsNxu%lWFqohF3O@pwI zU7X1-&NVV>7qg2C+Sk-W?LzwKh51Hyb@~<j$OB}7&h;^z><MsFC+GyUR1Jg~9xrnT zu;DUG(;_RfY4D$6E;D%<I(tBd{3ImOAoB<FO&|ln;9CG20%|>5!qT%XY(1NR5V{T! z(KDc1&z>l38EtoHM%F_c!x_!Q=SIWD>_W!^j1>W6c9f58z?dUSfG;P`0mfW_6v5bE z2{@?a5p;o{pprNA09js~7n2&ss$xn^YZ!B%aCJK>KdXoXu?rwONV{nuI}}Ykn<OBE zh6Rmkl;7r0xGaO8A|7CNi1?Vmi)MhAo^t@P!%r;&E-@pH#03pv#b_E=55&a`h!J=> z81679-VyIUW~qNS7<?<mrPzXB<+x0K;mI%F%Cq-Io|T^%ik?WGmEqa@v>$n<Z7jjY zqZtqX4rw(89OF6v&>G<3;Zae0YQU3<_?kEtSF|S+BTptWo|J}V@xgu%U~dtFsQ%*w z9aup{XV)u3{`{@H`g+Ezzht~RPWxrO(za6>`B2+EDLxV>;z@0Hb!2xnYd4}_MjqAo z+m3zBjGd-D@m6+zJhszRzsuN3(KoYpij(8$n@T{lN=5OBDSO(3v{;_}528;6@RU4+ zw^dSKAoC!LlHOifAW|k$AwsQ#Bvg<kB9kCHR2O&jeZ<MbM2--7m&kh{J0B5ul*k7l zs4(-RQjt~|)Qnf<L{||Mm`oM9fGT35R@E)}A#I|!I{~hN>TzMSoT(ltIFj1F7TL1< zqoxoD#Fj7=wP0-)vmbkC{rE%nTjQtJQreW&c_XjTVCwqu+(?Ys`NZf%`(e|&9HMch zl5v%cP*k5BX0Xk()DC<Fp3=T*_HtTLu7cU!Vu$uXNUFN{Q6$pRc?M2<V`~(8N;tbf z9-rCc@i~-B2+#d*9=G}`lApkmea^}oWF5xX%N<(1#$GD?hWs9FK_2a~5xs<@+x}Y{ z(>SbC-~W|H(y6~Pl2%D&+=Msvz4e2y8VY$58j{*%X@fL!*FU{`_vZEGhI`}Yotw+I zK3#0&Z!Ip}ysO#b(!K92E#F$cM=N=a?)nB1YBA(25$Z1F9Fch<3q(#6IYZ<uk#j`O zgVd*9wE^-1NpBPRE|H5wE`en0`+50I66>eYgVJkhk-sRvHI}3=!hH7D(^=iK*~R(n zV$@rtTVB|g1aE8<bYc^Bs9%EUM!`gzz{i8T|Fm)zS_S<1i2lp6_qP%bYv@)2-z;b_ zJn+<DIPA8Tx{yxqk)P7?;%l|#4`4}sI&pVfIei|<%x<5U#t9ACtW4YXBtI4<xsgnt zX5yfiL`x|NLnMKSZao{PvL9!&^r<jQpJSO%GiokJAF;ZFZ5)05E#p@_&BNLow9*%V xayBvXx~9oYotO4Eb7=ZAQ@42T<!)cBg~3(ALydhdtFj5hHHsX6^L@w7{TsTpAl3i? literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_classic_wer.cpython-39-PYTEST.pyc b/tests/__pycache__/test_classic_wer.cpython-39-PYTEST.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7c0332e512a88a58748684080e2cef3a093cbc93 GIT binary patch literal 4536 zcmb_f&2QY+5huAMm&^T<WLdG3)@5BJ^#-ow$Y1@4Qzy2A7)|BGl97T4u%-7=E81M{ z;yuc?R4!0N>8%BPD0(P-kaIH-6gd<{kW>GO0tHV68Wa_J>9O#o{mpQ-S}Ad3qum9E z!+DR7!{P5YGcTyu-Jkr$|7p$Dw7+TxekQIj;!wYa5Sq|q&DZ&C^h$=NYdzDqbQ&>Y zyI1zhJ;!(WT8}G!1@=<x_NsohSMzJVx?k@#{6??oH+xh5l&*zNSPrL;YK2@ij%vbu zpbJabADaG*C=2I9%|9e6!o_)3R7DNv!=f%4IL`@vS!*?S;F-46GII0nNTs>8vOb8z zmYvr-LEKr7gEVa44P}0$8>a0zyb~sBEe)fjEigMsI$>U?5tW96c1#P*(dBA%FBEK4 z?K?rd9;$qr>_HrL6O6=B0+YFnyIOi~%P?J+Nryby{TWc`xP(I;f#@Q7AL&Cn8#wEk zmg%bo-k`JuZ_SmpNaa>nhC!N_dqHOnPsmH1AjvB-?DV@yv>E2M8iZkItyPv)tSIXw z8W6d47n5%1dXSq*gm<!ThFIc}hOprF)li1?rtaFtpr5XVDpK+=-Am%}TMIW-DAhtQ zkew*G8>F&-Gg!DBs@rLQu<+iE<&_1s8L7=+5G37Ru#ko-RV>?`I8Z9;P=MzL8+nr> z8T9Z0+9F7U3YJ$kgr+<A%cIct1qqPT92^{^e?dvLE$w|{s15a$1GHrfjnv2t3f5L> zSV~J-X-n6&)VyU4O`#91%*;xeg?V;n3u9fA15pwtfpJ&{5KF>}onB=KP-ae67WS4d z3<8#@{K|yQR(}_+sHX0A^`Ul0Q@>1W+x66ZsLS6#Ymk;07>Axg7q#)sKhVsqgqeTj zQD~T1W8HeF^V!B7^#|6RHmsygSf;YdLw!sC06v)7(8OU4;Wa?e*gUq(f%7Q5-fCZZ z2&y;6@O<|d|9JP)V}Dw?xFmH1>XDv5|IP-@eO=+}rPsr|GD^eu0+~cf7o+vNP}||X zK^%3W^zm7IC``kc2LO1CX>X~YgeN_Gtw4p|I*#MTEH6?#cKrW`jh#7ix|BN|5VG5s z8@aO@#bFZkfNEk>(%*b{^z;``3^vZUl>4)h;$84aTii848JSMC)O;`)$o@Tdc!K7f zZmE+z`D{zI++l7HI7WE|!7PZ}vhvbk<CG7vNZM%<XX)~MYnl-%Ex0Nv^X1k+MoB7P zB;76^lpk&vYtB=E5zltIF$k4>f#x_Po04Cr%i6`kM*HS^6eI2mui3}vRJjW)eYkdj ztkhO({q<P1WjK)SZVcwdZ4OGMtpeihK<16&6G#2zij;ksA8G?0y&&bU8ze!Bk(Mhd z!tyzK?(CQ4Ir$O|Rmr6y&ae0r7iT6f&hgvuI~3;PdEebj8j><vKEtmu#wewMChUr6 ztr@Q2>NTSMd0ai2_W#7_6KD_8XGTH$TV`6iW#OPR9roxb<6Q1KLrMeYZ379x5*pIL z&jm=v)VGla^r8+?O32Dc6eiMuMs-G%ISp9qbI`LcoXlF)MTOQ{H6EJ6O&i-yP7J?e z`V!qgg*HvZ2i;xPDKS(Z7@)Z$YNDPwk<OH#!RVne<u#_eDKw&c!D9J5US58U#4!@z zfY@Q}o6sKVkBti#9_gDi1Z`m5yHGFzD9<gbSn81>zfCuufOy=P;1BRGTQIg}CooN^ zSW;5<%a^WQzkcP?itk*$^7fULtJjwN+VYJzmRGK>+~BLq)urVt*GZKp=`Mn|{0<31 zwmb!qPq*>uV}@?0-x~xHRe%N;NW4N~k;G{dXCNjqD$kPjRftJQF3NLc=7%wZ7@-CF z&J+`ii*pB|c!Z%y^=A)`93iSXifW9e7l4#hZ1%yi216MKSD^>Uu3rsD0E8<NYLz*( zMy1_GEwTZ@GU^OLFh!*)#b(c<>I1!~N)<q`k~zX+2m*k@VF;GhpHpYM0`QdqL6_EI z2=3LUk1$@NDkMLKRws>X6X{fGY79Y_+M~>62v*YuW;V0xsOk^|%R?LfHqnCEnKkL} zeO<ovR31<5dVF73_xJgX_1Nd>-71M0K6{Odi1h{g4IO?@v;IteGj(Q`T29ga(0AMG zfjITZDcEvNUZx?clV8fPmuY|)Ccj7G2N3z;ahp{1f<@OSFVKY7NHCj5Pv+&v-ItuE z@fi|_NX$a)5Nq<ey@qRY)z;KOQWE^0NHqCF8vXCos_mu!J!;7}>BhH6TqLodT=I3Y zy+LB!3>JC)g)y1F*av?L^A$yTmrsUn|JyM>G2~U)_VJ2p!I)P*n&D^VRmw>n-CgPZ z($EHS9dvw^%+4xcnVY%HvJV)uR?i+*M=Zl8WLN{sYFYIQEOXW0(#p0AmbuI_tTkd; zb-Tui{u7KdpX6uI8pJGS8R_WWMl7o{%j(Rs270>+db`F1%iLi_G~sJ`(pNCx)Khu8 z%P<%G`RzV`vEGPbBY!bNu*YR)Si(L-XNEny&ol7ZIpFi>Pv!HWQC4v88)|8v&sdLr zo=tasHq;UJ*@4f7JkCBthtIRDKU<uspdMb@T%;|O$2%x*w1GnZoOt8ylUMAXyuII( zQ^fay-GeD&dM`zc_JIe|!^nkV{Ye*icgE(R7^F>_$9uFLu7NWXiDI<&<|1me=b=91 z7FI`R)ahaoZ=|7Admf7&%2aus3*YWubzfJ@9RMdADvAG7#%X><#`zIF;KwAck+?|W zX%o=gI05b3aG$~&yqD(OW7PB__mFgrkB#3O@*T`*J(oMXO*d!F{OE4WUNlH(m*-`% zU&<?EQ(hj~isr^fTaWHu(jH!xNYF08KQg%$o}bwN@`j<<IvfZxHC%Z~rLsu+)D3eF o!U>Z-p2)wPiMhFUUhVb8dK|t+WlYg7|7F85JoA|CxcJxq0|%CGod5s; literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_embedding_wer.cpython-38-PYTEST.pyc b/tests/__pycache__/test_embedding_wer.cpython-38-PYTEST.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17c78ef3ac84f9cc3228f776808069de4f6fc5bc GIT binary patch literal 2106 zcmb_d%WvF77@zTLz1~NXLZb$xVpRfa32mCt7Ag@aD&+uGgh;DO7Q#$+W;bs9(#$OB zF1C=GUilAt=$?QpXZ{l33a1{pa_9xV8E>{*&g^Qw`Cj&Xulf02ujl<S`Te_ziO`?u zKR*lPo3O+qAQVxYpb+cbNKFG_lv<&UmBmP$v=z2eH*~d(lXln!y_tAvC+ujQm2}fy z*u#jqti}3UsG%cc3sL(CrVedAw!$^m(UMEsAgxo6b{?Z}gW>}ebe}^UXg@G&>rO0W zZQq>~2@7z&d7tsy=^>*u&c?S$GMpqt<{T^+KaUf306e2hhr66)VwCfgDG!G;IpNt* zz548D0OP>A1xvgGgkow?lUmfK&e*9CZBYnNOLm1zRN|2V0ebtPNfJ^UDG6INKjdtj zhwYP?CC9O1%|ejS<wWEsO0>Z@BcY|;++#i&9)bE-1PQh|8!^tZA*;QkX_3n#CSt)| zh((hZUu@qKjEn7*@L`;t5XtjHvi%to$1*Rr@83JPyDet1n2~~H<CJVmCZy0bVrP|# zP8c5)(|T>pWF$xm#f&JCq@&VS`6IlJ`<Qoud!;_gWWv(*QC5j0ruY>8Pz7UucA+_L zd6I~VWbi=bSzvHARoqr&nN7o~W9z<(Ww>nN2Iv-{7})uT_dtFn6m`0Kj}E1eRVb(E z8>2!MmiQDsHB@uvMrBB|G$`Ierx+t?J+P~VTv?@ATBo?OsZlwlT{_1`)hb)mJa+jE z8gHrDuGCy(YA0^muG$!t?b4;rDTblYqwtdjx|aA`dQUslD_bK>JL+8{V{Xx|>^|+y z(Xk_b0efFsbIgAOZcTBS0e0X^bB^^6I=$PA-I-(Bdtz^*%A<X{R(df8z9ZMcYoqMU zq3Dkw4hvOkS+Hzat;(S$G-G}C)`9M<uUg{6*!x4c4gOh)*?>dQ(D=*0evE#8_Uw~= zxDs!CHC4`^3YakYH9O(4WcLZr;BtYrcLHq09u`SFjOFYa(QaV#VMzV&Nit#nex9-S ze3&O9m_LDasqyk-p>3CD|Kt2Ve-jc2%-S6i3HOIjYj*^<J|iisog~g!mIwHSv9}lC z*`2p97t`y3@P1e@{+_S6-7^(13)PiC3`kM%{GnG}Rz6n)5qOoq*F13}5Vce2LIrkh z7Sk(=>?(3qk?V@wn5``mS1;Jv2}Ee={hYp3dYtRtvKQ(?)^@?;O!D)PRP8hY)aRq- zq=BkElsb~rf(hPNKJG#%_?i;En~iHkc)g)+QW9}i@L`@7gtNK}=IC$|Co;|;fg}eH zQD2C}Q9enic8$gfU`Z0`0=5Ad6cQD@?l%Jw=h<z}b6&p|DG()6&jJ9M1WVB6ZHR(< zIuu9;-Y6!CtUK!5rWQM^!uzYjwQ$Al!&S=%I@cycokDZ;&XsUg+FgMZjV5~NqhSIE z$HOJ8FjePBPwFPbDJ}s)h6hmB#SWl}i!4pMsEb|){#)4lh!4OkI9I!8)#K{9dh4t; zO>$6!0dKBM-CnBd)<Va9^^61UgD!IQo#6^db+^O8Dq;`RH618~R>3ZjbeTlXHIjz! zYN#EkiI0T-(5*Pjm!CQfxwR=Ie>r+OrIng#K-PtGKT2~tN!Z5<9K<>h7o9`vhHG?< Ku65Dr+y4Sw6kCY^ literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_soft_wer.cpython-38-PYTEST.pyc b/tests/__pycache__/test_soft_wer.cpython-38-PYTEST.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f093aad44d8601d2d603909a1a0cfbd2856109fb GIT binary patch literal 2086 zcmb_dOOG2x5bmCr$K%I7@*o?DV66mX1oCD>5{Se?Pza}Jh1f)q3}WbYx4n~{m%7{8 z*qOOxbLAi85Z}m^Gyj5HpOD~^KfoausP3^h8_wA3>Z<CR>3V*1ztaiU9{u*y_clU* zqW}CHkZ-{fPk~TGae@Ze>{e=92&2>)xLBW9iI+A8jnp6bM#f2V&;-4m1Ziu~GCC(| zr=39uBj&RP>u#W$j;swt-RGEkwDHUttgx1meA)zQl?Jr+3=P&O-a}#g75IU6LrXb# zV<DA$Z&D;I#A@vU<9qo?-X_U#k`S45P+s~bPS_ssjNVE28KrSHzQ;)>MmbNJw(vNU z6P^wATbDlz7!THMSmFa96jO`Z)S)i*#$JVJgMx2Haw}Y-5|1qKE!b5yNr<viq8c>c z=WLv-=26U&!&tL+A&Bxh5&4l8U9ioFa*UaO$S1=CP`^YFTa&X9<18C86&y^9TplnH z3%&}z81nkg)_uXa*h&c>#@P{(Jl`i<Uo&wi^J444{k?lzVit=TDM&U>$(Ce73PU3S zNs5jb?-x_GGG;OoB!v`4lt>cjh;`@)ui`G|XMuYwHTuYgWm3}yMk0wRK88Oez}lT% zsBdD+Y>TK!`VU2(g%;Pn#9d8J0;#z(pi^D#%kZRn+GOF1g}s0HIgsCSMc?kjGOl#7 zcI6oTU{$EX5+9=%mafa(sw`=j7R4Lr7-J-zhi<iyE2p$e=NMNmwJNW4OYhLC8fAmp zhd!S{%^ls*mF{Lt-Na9uRTHDKS^CsF#?TL@3x0M$*ARcp;6;lDWn+YCOTTMm%^lj7 z?H8RnI`qUZaNd>99P?j+ThUx*!8!1yJ;!DTo!Q;R?#(gnJa^BdDxh7uQU);wz9m<| zYOQR|A?Z)S4+m9xIj~$<jmo1o6k~Pv-kzy!PnV<z9p4+mAoP|gW_=DxL*w87dh+t_ z${%0u!Zcj`cB+lP63|@oJ9fll$sQ1%!PvlQ=Lpz{JuZ@X7|YppV$9&I2mRT5M3M>X z?dBQ#xCi}21nW&;ZPr%3SeUcT+5gkJuP%b`p{@KOkuWuUs{9emdPY*Fyd=(8mWTMY zwX+lA+1>Xq7t<S|2%apM-cC<*SC=Ya7OE?u=#!%0`QxCvtZlA^A`GhDPW{BqP$;i3 zISO5v=jjzqu4-~klN*}coUJTk)-TxJhG!hJubX)?ovb=1Wze&z+=9oM<nKaI%Bvkv z=c4+izD_%2Ig-<Y34UJN_zRuj?`Sc&RhveHt2I4DNyJ&fhk05M&Qu$Yqy0&o$T))l zk{m3Ax)6zje3DRO8jTY`kR&n*Yyu`|{3*EV*1Ztt*&WVvuFganI*BxG0PrQ@By96F z@Pe-!S0EgCqnIR8we-DpF1Ak#pPUx1pB6qlEqrQXts^uc)K_m`sRLR_S5Kjd0TR>i z(J+CF<KYRSFrDYfG^vIXu?YlO0iawPdjKRpatv*wHaZLZkFa+UzX?|1y7HIV;~#-S zZ7fSu2M0Om^LkoT^F*Z@3mp&jGai%=s>t=%g=-8o)gFYWak{T3w6DQgJ8O`7*(W(S z_^CcL0NdBi9trbhi%!5cl5f6k256NnB!AO&CYn>}GB9j%=6{~%bds<yG$aVE_t839 OwS23M+s-Ag>;4N{+gXtS literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_sziszapangma.cpython-38-PYTEST.pyc b/tests/__pycache__/test_sziszapangma.cpython-38-PYTEST.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d8769579dc75ba7b9c6e85830fe6732e2d30786c GIT binary patch literal 2679 zcmb_e&2QX96rZuZUfa8yueKrOL%1y<R%&;fmbR!Os3>r#gd)_WDzXI5bUnN4uD>+n zZQ8{?Kq?MMNcjUebdTKm6S(mo_`-oxE{Jnaycs*`CJj<R*z)-Ky*KmTZ{G8JbHCNH ze*fy*?cY>{{y@hr;GsM4@NF18<RA}wN`M_jzSU4u5Jq9m(QzrMJ9Ut#ff1UHDS298 zg$<{H5v@OJdNogfhIX;j^6H-P3^{Y2=~?ie_moZ4ZcHw0QJ!+UA2WNKPkcTh35f<F z*|rnX8<7F+?1Lj{qm2u5J@D_1qlhy2P<w&j6O!2r!^b*2T!X>GU_hBTn<PvEY9~)q zuw?iBC+V2cj%C?!ZBlB7F{3s~Z`;E(O>TF)Ufk=H%AJ@Ex-{wrL`>}-OdY0SAZKs< zjF_<Wa(HpKd!JLryCGpcKiVNFiyxBir<9M<IO#sPzq!?&McEY#b2+-Rd~l#J80ASE zaoQA!6c~`gbQpUA=D{WClVGnOg<WDFN4?aKBYVeBhjuzVz9}~0Hi>M)2IG)MX=g(M zYh!8kB}i5Sn?+MB>Gt9%1=VHXlrCac2l_~4g#UsmBXd(lC`G#{M;UsKQ{|C5#W~LK zBQ1v%%hgQD)RB^FndT`YgMBHJs}|~ctxB$XXylN5nV!|W+Abugr;F73Q3IaFe@)Hj zmS<*KAA6QqtFKHouaP#MH>YT%^Y204%J3AkJ)q_U6)7MWv@*r=4IX*TpVVcP*S(fE zm(_g?w2{t(bRjdQco%;Mb~TiXN6S@jJ~siuB6x3t_j+z+mhgTLvz1f%zfk#a?P2}{ zSh2jt%mM^UVm*OC`elM&he$wk3%C*s*!fy&%%Tw}&YTK`2`DV^y^4o-c7Uf!-pCp! z#{{hXc`8)r4x+NB@NJ3ecwCkb;sP2MC7Q+*?<wrEL<2O$W?9nzJvOgFY#JxTMq$64 z3dPD{Y*hZ9MDb6tnML#044N0Op!wIak;U@-q_rhVnD}wmKfl{y;=dP#@%1lcZTo~n z=}GU=9p<O>0b!9J4M5u30n(*U5-6B{I=Mll8pz~Ak4Z45_C_4h%Qlor!l^xm=VGa4 z`&`N{PX52bwr_*mZM86ZP*w*qgG$%;0~(Q#7JA@EG>Y5!g|fER#*@`|-%p-iYjf+H zim=ygL0xV04t?T_`r}b6x7SMcjW#cID4XLT1p_MRiQ$?UZcOGXNMimiPz2_!$+;s{ z7kWx3B!0AiLAZzu#gD*=j+n99=C&V6<6x!q2Z+j_>)=m3PLpwJ<!#l0rB}uJ*Vz&* zG|{hI8PXuxzXTseLy&Ii<m6H*fkrfpcOV|H-67>1+8z{Q&lV6n`|k*6mYC=Oj_yZ~ z;}K<0MNy#%Umf#sBZ_lw)wo2+A;NC@G@+asLfNQzoSEX&x>KsUgcXZ!j|2hul(1eL zCWO(V31au**bh=af;|ml(BQ?2%ZKqe@T96c2%xtGt_+;%a?+<_g<`JEL+CS~F&4Ap zj0?>@B&ke65)sjX3ccK|b`#<yTRNK;UNwZ-GES?rA0Id~lIs#s2}82W^>3CF%8w6c zCtRIf>Ka=COUJv}8&x+8L)dp$WTERrXJdWRqi=#f@56u;9XGL#G-SZtg!?Se8}Ob( zO{^guxK_p2(Y=blP|bK%a`Fsz87#FIin{B<QN(o%;~);~ZIBe_4}x6o6YOS(mH&KU z9<qvh#ba-YIeHmvrzK8JqmBRz=~1TC;?Z9KPD5pO)vNPP+5JTWm!8e+oILx;!+`No d7<=P@ek}5mi=@{S11%v#X(~-s!<K32e*@P^*(Lx0 literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_sziszapangma.cpython-39-PYTEST.pyc b/tests/__pycache__/test_sziszapangma.cpython-39-PYTEST.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2b0c02ed667d9f69f0f4cf6568730a02cd63d617 GIT binary patch literal 2582 zcmb_e&2QX96rZuZANKAhX-h-Ohj3Fstkmu{Ep1UnP*LDe2}P($Rqz34x-+}$#$TG5 zP1?ogKq?MMNcjUebdTIQb3*+iEOFwL3*y`pZ^oN+8;TSVmOOrb@6Ej5Z{GMllUB?5 z=C?1mo|*{#j*eZRL$}}-TQE@MA&RLHV$aY|GqMbXQB?7&xU|%~8d%J*9@(C)byny^ z4X=R_t37Da3avgyd)RBy8m&J@-W;{51J8MCY@l{ye14M&DcpX--7PT=#F(Tc9z<lz zO-XN?3|Qv?fS~m@F6^~1xHF1l#^J^6g+WhQW-kmcYj6t-28tnoG4?h{l!nYrA4!Ph z_JfCV#97C2T(~wEbEAYam&lv$P|EaXw@Z^=r*!Tle9&caHzaCm_i*Y^Mxma){uAOV z((~cj?e1N{xadZN_kwtrNS@p$-A|a<mPy*ZcXwm6JIk`G78ZK+XY~M}sB<RLBo?fx z7#T1igI8hfDVm3ul3zx9?M2!p_fg!FK@z*WfgHMWczjb_B3u%?gbzj$i)CkB6Ki7~ z^*KpiftW>8E$R1?Sc2;!XiA8f*Fb(DGYY>_>dag>5t3*R<tRf>urwZ+6P)7=Kd^E* zV!4?anYnG`R%TISg!mWwa8;seUYQ=QT2#;B@MYDkLMwZ4IB8WKtsgAVS>jh|KXIs? zS$#|$wN~GlShOJ<Pnr|7gT!}WZ)JFb`94T<N{S7zE7_P}{RBj``J=gr@)~W?xvUmo zkn3_DtP5Fvg7@&Z5Z6Mvx~<%#^SKQSP62v5Gj~w#WR60=kNMJx@Gng9JNsCC4^bR? zDszCrX|<kWpmEuN>xc<>Zh}^GfjD1jk6AW~#hDYauz`gG*r)lRuMF_S$QxPX<vD?9 zf1HTb*~6^t8)8edI-Zxs!@PjUvS!nm;C+K%)NDY8+$?JOzvt#v$W7xVxiR?9Ct|U5 zlp9mLt6BV0Zf4p1Im71J-`M=?+~{I?Zrs{bB}{#|YoFcjaP{4*!uZ<fy0(2Rp!CQ) zY?lX;-6K2>;sID&yCC}PVG0E^kmGAayMawFv_rxXbJvrYU38&L62aUN+{>kx8whP% z9{+!Z?Z5@tZL_HNpsWrO4wbGSge)czE2?1-vp8wvXU6Jk8;@7sc`to*r7fJVr-Hlc zD(P}tblAf{)gQ{7++8iz*V>|}LfIUJ5(1dur-mzPxHg`fQd0A8fFmexjL*JMb)lzp zA`-+0=M_X;7(ommI%>vBTev~2gF}=O4+v8{R{>8llIch~dE4}0=@qs9Rel;48don} z8nQ4wxBxFjLy3L~a(tn*KqDF^yO0mq?uZEiZ4U}Dbrr?V!P^SVX|6heR}JEwWSeoQ zqNuPGR?j}#h~lh2ZConlkYQgwN}1qw<y@cY1h>_zbE|aq2`^6hJrah1DdoK+N(pC0 z6U_eoQ4q=?hCK}v@DRn4FNVn|q}tUVgwWeUUnkD?1?e-jLNQmKL+CS~a-Q(wj1SE{ zB2phg5);*difXxA?Iz?)w{$+QP&HK8GEXbBFYkLZmaCdjNkgm4^{<x`%9jsjCtRLg z>Iz?iNXNU`>(g!)MX>L_I)#1!osIWNkG%o@Vh#pmRB;pkf}i3AxcNoUv=@q+@53*H z?-%vM{o-$dr8sw(jFJzq?(p*4EbJpyQJd=cn`%zAOp4c1e>dA51q#xrQ(rV^k7{`U tYSUkmvO%jJt3l0>)4wOWU)Mj35;_XmN9yQkI+n3)G>xWdVaKjl{{lQ3*4O|5 literal 0 HcmV?d00001 diff --git a/tests/embeddings_pl.json b/tests/embeddings_pl.json new file mode 100644 index 0000000..70e7d6b --- /dev/null +++ b/tests/embeddings_pl.json @@ -0,0 +1,2720 @@ +{ + "rego": [ + 0.025360366329550743, + -0.04195535182952881, + 0.019350925460457802, + 0.13248692452907562, + -0.1212526187300682, + -0.01891663670539856, + -0.03484756499528885, + -0.02140629291534424, + 0.07373380661010742, + 0.09841269254684448, + 0.00857617985457182, + 0.03878685086965561, + 0.1382981538772583, + 0.141482412815094, + 0.0943804532289505, + -0.04145222157239914, + -0.11814873665571213, + 0.17937591671943665, + 0.06660573929548264, + 0.0669114887714386, + -0.06707686185836792, + 0.03332226723432541, + 0.019138574600219727, + 0.038645848631858826, + 0.12267481535673141, + -0.04557936638593674, + -0.03389456868171692, + 0.10589240491390228, + 0.15420544147491455, + -0.004094157833606005, + 0.07477718591690063, + -0.019102349877357483, + -0.015680953860282898, + 0.00466578733175993, + 0.036196645349264145, + -0.019614320248365402, + -0.07911614328622818, + -0.11238190531730652, + 0.12062164396047592, + 0.08306576311588287, + -0.04920080304145813, + -0.028720583766698837, + 0.04233407601714134, + -0.13503096997737885, + 0.0017086525913327932, + 0.0012746453285217285, + 0.10003215074539185, + -0.050891172140836716, + -0.005998636595904827, + 0.017386065796017647, + 0.005148351192474365, + 0.033908747136592865, + 0.010332940146327019, + -0.0023164614103734493, + 0.11763325333595276, + 0.07122141867876053, + 0.029413599520921707, + -0.02747083455324173, + 0.02197670005261898, + 0.0697161927819252, + -0.1450590342283249, + -0.027237500995397568, + -0.053309470415115356, + 0.03383652865886688, + -0.008408180437982082, + 0.06446542590856552, + -0.0007105203112587333, + -0.08366017788648605, + 0.0050771646201610565, + 0.04338033124804497, + 0.029303759336471558, + 0.02494737133383751, + 0.030596241354942322, + -0.0253981351852417, + 0.032596975564956665, + 0.03707187995314598, + -0.018734438344836235, + 0.006434278097003698, + -0.06769295036792755, + -0.12360601872205734, + 0.0838269293308258, + -0.031027745455503464, + 0.2682488262653351, + -0.030780944973230362, + 0.04645909368991852, + -0.017756134271621704, + -0.08343406766653061, + -0.18675582110881805, + -0.05608057975769043, + 0.03880838304758072, + 0.06651925295591354, + 0.12371407449245453, + -0.07567261904478073, + 0.0946085974574089, + -0.02659708820283413, + -0.0827547237277031, + -0.04034993052482605, + 0.01702699065208435, + 0.06940530240535736, + 0.013810629025101662, + 0.11997637152671814, + -0.14746129512786865, + 0.131831094622612, + -0.014702586457133293, + -0.13849946856498718, + -0.006234161555767059, + 0.04045787826180458, + 0.05735635757446289, + 0.0066235922276973724, + 0.012200994417071342, + -0.006246550008654594, + 0.10443231463432312, + 0.038930173963308334, + 0.012603623792529106, + 0.0642445981502533, + 0.012551533989608288, + -0.06847232580184937, + 0.0003183564986102283, + 0.09870034456253052, + -0.006378100719302893, + 0.11994011700153351, + 0.07558205723762512, + 0.013828049413859844, + 0.11783914268016815, + -0.05701836943626404, + -0.025578824803233147, + 0.08175185322761536, + -0.009828973561525345, + 0.04556669294834137, + -0.0019460500916466117, + -0.10199606418609619, + 0.007795512676239014, + -0.0326656699180603, + 0.12185566127300262, + -0.04753127694129944, + -0.037251636385917664, + -0.10819704830646515, + 0.06959820538759232, + -0.016401033848524094, + -0.03345759958028793, + 0.032997798174619675, + -0.13399460911750793, + -0.05238836631178856, + -0.22104597091674805, + -0.03195151314139366, + -0.05830279737710953, + -0.07016734778881073, + 0.12436967343091965, + -0.11996293067932129, + -0.04278447479009628, + 0.05688241869211197, + 0.03518470376729965, + 0.035848721861839294, + -0.0497179739177227, + 0.1173618733882904, + -0.026286771520972252, + 0.11869470775127411, + -0.06522578001022339, + -0.02036689594388008, + -0.049065422266721725, + 0.09066169708967209, + 0.10371477901935577, + 0.07177156209945679, + -0.01686999574303627, + 0.004568031057715416, + 0.016954246908426285, + 0.024920357391238213, + 0.20036350190639496, + 0.07762596011161804, + 0.042032480239868164, + -0.06874687969684601, + -0.08206215500831604, + -0.059366341680288315, + 0.21933695673942566, + 0.07867085933685303, + -0.041550636291503906, + 0.045646168291568756, + -0.0058541386388242245, + -0.030714213848114014, + 0.01864239200949669, + -0.07514034956693649, + -0.04487421736121178, + -0.0725104957818985, + -0.09751518815755844, + -0.15162283182144165, + 0.06641946732997894, + 0.012081718072295189, + 0.02505236491560936, + 0.019052943214774132, + -0.011952199041843414, + 0.02710006758570671, + 0.07528761029243469, + 0.0053316703997552395, + -0.05752786621451378, + -0.13719043135643005, + 0.10667595267295837, + 0.07605913281440735, + 0.20441934466362, + 0.011313196271657944, + -0.04499569162726402, + -0.010336518287658691, + -0.04517510533332825, + -0.10358977317810059, + -0.08371572196483612, + 0.07000277936458588, + -0.12187262624502182, + 0.03666067123413086, + 0.03201401233673096, + 0.027945850044488907, + 0.06616507470607758, + 0.047266267240047455, + -0.11192768812179565, + 0.02800518088042736, + 0.04106767103075981, + 0.044681113213300705, + -0.03224359452724457, + -0.05946209281682968, + -0.06543562561273575, + -0.003850643988698721, + 0.09258010238409042, + 0.11185108125209808, + -0.04267556592822075, + -0.03479834645986557, + -0.09976062178611755, + -0.01968393474817276, + 0.04705026373267174, + -0.03983002156019211, + 0.20730488002300262, + 0.014524552039802074, + 0.07460898905992508, + -0.07390899956226349, + 0.032891854643821716, + -0.01906980760395527, + 0.006029362790286541, + 0.045801129192113876, + -0.002989241387695074, + -0.022413700819015503, + 0.054536692798137665, + -0.022095564752817154, + -0.16709403693675995, + -0.07985913008451462, + 0.07989532500505447, + -0.02780805341899395, + -0.1545652449131012, + -0.05080298334360123, + -0.13523483276367188, + 0.030402880162000656, + 0.024189475923776627, + 0.1913859099149704, + -0.037512216717004776, + -0.04817260056734085, + -0.03770771622657776, + 0.06344256550073624, + -0.0014704714994877577, + -0.06589218974113464, + 0.01659468002617359, + 0.01861189678311348, + -0.003923402633517981, + -0.02344740927219391, + -0.0045809210278093815, + 0.02147747576236725, + 0.08546024560928345, + 0.05269842594861984, + -0.15124723315238953, + 0.056306660175323486, + -0.07883468270301819, + -0.029240280389785767, + -0.18876883387565613, + 0.001567062921822071, + -0.04881870746612549, + -0.0008857144857756793, + -0.08416055142879486, + -0.0807884931564331, + 0.0021422021090984344, + 0.028993740677833557, + 0.027406230568885803, + -0.06310993432998657, + 0.0981152206659317, + -0.07681175321340561, + 0.060740552842617035, + -0.30095866322517395, + -0.00856530200690031, + -0.07610736787319183, + 0.012243842706084251, + 0.08163291215896606, + -0.05355709791183472, + -0.09321880340576172, + -0.01624113880097866, + 0.08702488988637924, + 0.08788813650608063, + -0.12482321262359619, + 0.21521306037902832, + 0.12818582355976105, + 0.028399482369422913, + 0.07849821448326111, + 0.0042789168655872345, + 0.0586482435464859, + -0.07221205532550812, + 0.08500781655311584, + 0.04169676452875137 + ], + "kruchego": [ + 0.016303671523928642, + -0.019872045144438744, + -0.0092224245890975, + 0.014146615751087666, + 0.015383906662464142, + 0.002108193002641201, + -0.062140874564647675, + -0.025106163695454597, + 0.08381172269582748, + 0.05710531026124954, + -0.01848313771188259, + -0.04641922935843468, + -0.10143981128931046, + -0.0652991458773613, + 0.018572727218270302, + -0.047973163425922394, + 0.014040370471775532, + 0.005996278487145901, + -0.023732611909508705, + -0.02009929157793522, + 0.0007243369473144412, + -0.08037777245044708, + 0.08096491545438766, + 0.032353661954402924, + 0.013201138935983181, + 0.029858581721782684, + -0.08444873243570328, + -0.035845350474119186, + 0.041100140661001205, + 0.04591462388634682, + 0.006408806424587965, + 0.08591236174106598, + -0.015455985441803932, + -0.08906127512454987, + -0.08432091027498245, + 0.05640733242034912, + 0.012656928040087223, + -0.08276824653148651, + 0.034724295139312744, + -0.0023717572912573814, + -0.03359077125787735, + -0.0729396641254425, + 0.009350333362817764, + -0.0018471118528395891, + 0.01208903081715107, + 0.021758658811450005, + -0.03382786363363266, + -0.04432319104671478, + -0.008544672280550003, + 0.002590786200016737, + -0.12001265585422516, + 0.007330841384828091, + 0.01335948333144188, + -0.006848776713013649, + -0.01573527418076992, + 0.03964998200535774, + 0.04370586946606636, + -0.07679811120033264, + -0.028589816763997078, + -0.03548365831375122, + -0.018836772069334984, + -0.036120202392339706, + -0.15691232681274414, + -0.03119748830795288, + -0.03921635448932648, + 0.03989606723189354, + -0.011425910517573357, + -0.05037058889865875, + 0.020870450884103775, + -0.010966272093355656, + 0.0342303030192852, + -0.014046432450413704, + 0.043870583176612854, + 0.001676222076639533, + -0.0018626556266099215, + 0.0322086326777935, + -0.04106687009334564, + 0.023960450664162636, + 0.0004788232909049839, + -0.014335878193378448, + 0.03212282061576843, + -0.03531443700194359, + 0.11197002977132797, + 0.0059305643662810326, + -0.051486533135175705, + -0.06372729688882828, + -0.012052038684487343, + -0.036066845059394836, + 0.025950072333216667, + 0.007260511163622141, + 0.011519390158355236, + -0.0005208541406318545, + -0.07554544508457184, + -0.056650351732969284, + -0.03829614445567131, + 0.04423166438937187, + 0.04932614788413048, + -0.046451959758996964, + 0.07947622984647751, + 0.009505164809525013, + 0.02357821725308895, + -0.02505599893629551, + 0.029041992500424385, + 0.003937460482120514, + -0.03490827977657318, + 0.019778598099946976, + -0.0485965870320797, + 0.031887635588645935, + -0.06730393320322037, + -0.0526353195309639, + 0.026736879721283913, + -0.00929854717105627, + 0.02515203133225441, + 0.03929869458079338, + -0.04754827171564102, + 0.03447788581252098, + 0.026109604164958, + -0.009087104350328445, + 0.017221178859472275, + -0.020497629418969154, + -0.07402393966913223, + 0.002154354704543948, + -0.048016391694545746, + 0.017812075093388557, + 0.04352061450481415, + 0.038450323045253754, + 0.09475485235452652, + -0.0038469440769404173, + 0.015702111646533012, + -0.015994977205991745, + -0.01263596210628748, + 0.023923177272081375, + -0.013737129047513008, + 0.052358273416757584, + -0.10664000362157822, + 0.028132114559412003, + 0.009037270210683346, + 0.03250018507242203, + -0.0019254302605986595, + 0.03156875818967819, + -0.035373505204916, + -0.014041352085769176, + -0.05016426369547844, + -0.09106452763080597, + -0.00925979483872652, + -0.060555923730134964, + -0.017428983002901077, + -0.04451008141040802, + -0.06614529341459274, + -0.02807685174047947, + -0.04897867888212204, + 0.021360717713832855, + 0.06190156936645508, + -0.03151264786720276, + 0.09433288127183914, + -0.036462247371673584, + 0.025535333901643753, + -0.020453132688999176, + 0.01571831665933132, + 0.018754001706838608, + 0.06105025112628937, + -0.08530690521001816, + -0.03154398500919342, + 0.02956015057861805, + -0.01883108541369438, + -0.03418896347284317, + -0.07070766389369965, + -0.04285828769207001, + 0.03157927095890045, + -0.018144255504012108, + -0.03566468507051468, + 0.017696401104331017, + -0.1211676299571991, + 0.06651831418275833, + -0.028725869953632355, + 0.03908716142177582, + -0.025371583178639412, + -0.09087547659873962, + -0.0053370059467852116, + 0.02943328209221363, + -0.01232836302369833, + 0.014140562154352665, + -0.132462739944458, + -0.033594027161598206, + -0.06298306584358215, + -0.06971868872642517, + 0.031759992241859436, + -0.05055658891797066, + 0.021255437284708023, + 0.0643974095582962, + 0.03741704672574997, + -0.042381517589092255, + 0.02649880386888981, + 0.051538918167352676, + 0.015897568315267563, + -0.019174762070178986, + 0.019789621233940125, + -0.007294032722711563, + 0.007013384252786636, + 0.017515065148472786, + 0.012397066690027714, + -0.013402302749454975, + -0.003292143577709794, + -0.01289359200745821, + -0.050516702234745026, + -0.020899120718240738, + 0.010199878364801407, + 0.05411527678370476, + -0.09620281308889389, + -0.012224164791405201, + 0.03825684264302254, + 0.007434458006173372, + -0.04569121077656746, + 0.058289967477321625, + -0.00944305770099163, + -0.022278716787695885, + -0.14312857389450073, + 0.05767802894115448, + 0.06804315745830536, + 0.012865548022091389, + 0.05509680509567261, + 0.038668449968099594, + -0.004185462836176157, + -0.07543434947729111, + 0.05272316187620163, + -0.04099275916814804, + 0.11773957312107086, + 0.049500640481710434, + -0.029802843928337097, + -0.03579108417034149, + -0.006666944362223148, + -0.0681883841753006, + 0.05225653946399689, + 0.021549444645643234, + -0.012632218189537525, + 0.02295316383242607, + -0.007066467311233282, + 0.03605067729949951, + 0.02144845947623253, + 0.046653252094984055, + 0.022743625566363335, + -0.005440773908048868, + -0.086735300719738, + -0.008057722821831703, + -0.03225211426615715, + 0.05366796255111694, + 0.006222546566277742, + 0.0432293564081192, + 0.05440787971019745, + -0.05515147000551224, + -0.011478396132588387, + 0.022528432309627533, + -0.04767437279224396, + -0.042326103895902634, + -0.010052939876914024, + 0.015939511358737946, + -0.00036999586154706776, + -0.023475253954529762, + 0.055372823029756546, + -0.0001733503449941054, + -0.005122533068060875, + -0.0006684677209705114, + 0.06594069302082062, + 0.006065706256777048, + 0.029512591660022736, + 0.05839845910668373, + -0.03526012971997261, + 0.025412848219275475, + -0.02851499617099762, + -0.05593691021203995, + 0.0002525225281715393, + -0.0645996704697609, + -0.01837906241416931, + 0.013739209622144699, + 0.08159346878528595, + 0.07416143268346786, + -0.019867463037371635, + 0.021767443045973778, + 0.050454869866371155, + -0.05333651974797249, + -0.06338602304458618, + -0.004230514168739319, + 0.028763094916939735, + 0.09021307528018951, + 0.062178753316402435, + -0.05397014319896698, + 0.019923459738492966, + -0.02907441556453705, + -0.05785417929291725, + -0.009038555435836315, + 0.0461433045566082, + -0.012566862627863884, + -0.06420175731182098, + 0.03078165091574192, + -0.04840996488928795, + -0.001747101778164506, + 0.03705829381942749, + -0.0348169319331646, + -0.019208354875445366, + -0.01984819397330284 + ], + "dobrego": [ + -0.05503441020846367, + 0.050614506006240845, + 0.014939900487661362, + 0.022841446101665497, + -0.035710658878088, + -0.00022126563999336213, + -0.055314503610134125, + -0.011580642312765121, + 0.09758414328098297, + -0.0014309933176264167, + -0.00970902107656002, + 0.009510613046586514, + -0.060983806848526, + -0.049659788608551025, + -0.00969758816063404, + 0.001697653322480619, + 0.06200318783521652, + 0.014445019885897636, + -0.007899831049144268, + 0.015542875975370407, + 0.056988172233104706, + -0.01890401542186737, + 0.016180939972400665, + -0.03481048345565796, + -0.07366231083869934, + -0.020606830716133118, + -0.07077424228191376, + 0.02347145974636078, + 0.04351171478629112, + 0.04190950095653534, + 0.08247499167919159, + 0.07471105456352234, + 0.016334928572177887, + -0.047066256403923035, + -0.037381626665592194, + -0.007403416093438864, + 0.07530312985181808, + -0.1086779460310936, + 0.03151707351207733, + -0.007788960821926594, + -0.013699382543563843, + -0.026787307113409042, + 0.003590720472857356, + -0.029651783406734467, + -0.029520414769649506, + 0.09409046173095703, + 0.010464581660926342, + -0.0015392971690744162, + 0.011096972040832043, + 0.0061229499988257885, + -0.03382788598537445, + -0.01570059172809124, + -0.0022417332511395216, + 0.005053428001701832, + 0.006914355792105198, + -0.005147556774318218, + -0.0021074186079204082, + -0.1275118887424469, + -0.0010959183564409614, + -0.05029448866844177, + 0.003498578444123268, + -0.03031982108950615, + -0.1730177402496338, + 0.058089450001716614, + 0.02865409106016159, + 0.014864973723888397, + -0.07246039807796478, + -0.02818896993994713, + -0.008460788056254387, + 0.03579428791999817, + 0.0058321887627244, + 0.0035161348059773445, + -0.007880325429141521, + 0.009851514361798763, + -0.0072380900382995605, + 0.0053444793447852135, + 0.03675371780991554, + -0.007921168580651283, + -0.036690182983875275, + 0.006854891311377287, + -0.058318451046943665, + 0.0952877625823021, + 0.09722201526165009, + 0.04418746754527092, + -0.011816742829978466, + -0.07397593557834625, + 0.021046597510576248, + 0.028026368468999863, + -0.018990855664014816, + 0.03698477894067764, + 0.013694602064788342, + 0.023896997794508934, + -0.034639257937669754, + -0.016944263130426407, + 0.03237055242061615, + 0.041516393423080444, + 0.03431608900427818, + 0.016778334975242615, + 0.02164391800761223, + -0.016220778226852417, + 0.005468351300805807, + 0.07054583728313446, + 0.033825766295194626, + 0.012653696350753307, + 0.009620989672839642, + -0.013323506340384483, + 0.0011839298531413078, + 0.0032863817177712917, + -0.026157012209296227, + 0.005652490071952343, + 0.00202672160230577, + 0.019793830811977386, + -0.047925181686878204, + 0.023398518562316895, + -0.006825267802923918, + -0.010128451511263847, + 0.0003894492983818054, + -0.008031472563743591, + 0.0025933757424354553, + -0.01935240998864174, + -0.003638872876763344, + 0.03990951180458069, + -0.04441092163324356, + 0.024207061156630516, + 0.07817339897155762, + 0.05504067242145538, + 0.08335605263710022, + 0.011402687057852745, + -0.015040148049592972, + -0.05686502158641815, + 0.005943832453340292, + -0.0012180309277027845, + -0.02516915462911129, + -0.01568392664194107, + -0.08357439935207367, + 0.02690807729959488, + -0.04646637290716171, + -0.003297194605693221, + -0.05312460660934448, + 0.023275455459952354, + 0.008113538846373558, + -0.00679142028093338, + -0.07799074798822403, + -0.13153618574142456, + -0.02928849682211876, + -0.007434172090142965, + 0.01668117195367813, + -0.00548437237739563, + -0.14418922364711761, + -0.024840187281370163, + 0.05023394152522087, + -0.047024182975292206, + -0.017895188182592392, + -0.016912518069148064, + 0.11875057220458984, + -0.059332262724637985, + 0.013615688309073448, + -0.0350460410118103, + -0.033755674958229065, + -0.010012625716626644, + 0.00852448120713234, + -0.09327387064695358, + -0.09281349182128906, + 0.002719936426728964, + 0.023487087339162827, + -0.009898770600557327, + -0.005996163003146648, + 0.07511038333177567, + 0.023885391652584076, + -0.008119173347949982, + 0.015617724508047104, + -0.019267044961452484, + -0.14018207788467407, + 0.03772170469164848, + 0.008187171071767807, + 0.028302377089858055, + -0.07401987165212631, + -0.13756874203681946, + -0.012730449438095093, + 0.007814953103661537, + -0.008676527999341488, + -0.007358209695667028, + -0.14923608303070068, + 0.022795915603637695, + -0.038868680596351624, + -0.007667593192309141, + -0.009204352274537086, + -0.014817837625741959, + 0.03745538741350174, + 0.014328669756650925, + -0.011396514251828194, + 0.007435772102326155, + 0.03275608643889427, + 0.051575761288404465, + 0.039839815348386765, + -0.03059149906039238, + 0.03545346111059189, + -0.00953246932476759, + 0.014869826845824718, + 0.039299629628658295, + 0.005853123962879181, + 0.039247822016477585, + -0.00855233334004879, + -0.04278772324323654, + -0.03592199087142944, + 0.016183575615286827, + 0.037463702261447906, + -0.021429400891065598, + -0.008685288950800896, + 0.01981208845973015, + -0.04635133594274521, + -0.01600891351699829, + -0.023495368659496307, + -0.021313704550266266, + -0.007663375698029995, + 0.001660804613493383, + -0.07794070988893509, + -0.03147993981838226, + 0.081428162753582, + 0.022639550268650055, + -0.021628186106681824, + 0.018755175173282623, + 0.05396069586277008, + -0.015229545533657074, + 0.093647301197052, + 0.018851300701498985, + 0.07163792848587036, + 0.04553893208503723, + -0.012650231830775738, + 0.008197706192731857, + -0.036312516778707504, + -0.05427968502044678, + -0.07333257794380188, + -0.03381696715950966, + -0.024884497746825218, + -0.010352661833167076, + -0.03323546424508095, + -0.03927123174071312, + 0.04288269206881523, + -0.025867480784654617, + 0.0055990070104599, + -0.012100070714950562, + -0.06686313450336456, + -0.012311631813645363, + -0.011405255645513535, + 0.0791744664311409, + -0.025842085480690002, + 0.031465329229831696, + 0.0641825944185257, + -0.03286121040582657, + -0.03579680621623993, + -0.007758041378110647, + 0.05622842162847519, + -0.003918825648725033, + 0.08444945514202118, + 0.024446159601211548, + 0.031444065272808075, + -0.05884227901697159, + 0.061347395181655884, + 0.016597749665379524, + -8.372985757887363e-05, + -0.061184417456388474, + 0.02964228205382824, + 0.011809336952865124, + 0.01711207628250122, + -0.06209190934896469, + -0.05188027769327164, + -0.03551027923822403, + 0.02497977390885353, + 0.01093998458236456, + 0.04215599223971367, + -0.07007014006376266, + -0.007190425880253315, + -0.0012528281658887863, + 0.0788157731294632, + -0.001301859854720533, + -0.01195349358022213, + -0.06861717253923416, + -0.008431742899119854, + 0.003524206578731537, + -0.014077512547373772, + 0.012319128029048443, + -0.005923453718423843, + -0.009170498698949814, + 0.011832969263195992, + 0.007556426804512739, + 0.026151878759264946, + -0.005054730921983719, + -0.02938242256641388, + 0.007560659199953079, + 0.0022403658367693424, + 0.01216946728527546, + 0.010623137466609478, + 0.049052171409130096, + -0.0782318189740181, + -0.07438185811042786, + -0.005524127744138241, + 0.027553638443350792, + -0.012044156901538372, + 0.00031448499066755176 + ], + "psa": [ + 0.057362064719200134, + -0.0021233586594462395, + 0.0852590873837471, + -0.00017283856868743896, + -0.07565198838710785, + -0.08121095597743988, + -0.03883061558008194, + -0.24827690422534943, + -0.04554160684347153, + -0.06741677224636078, + 0.01622174307703972, + 0.04753381758928299, + -0.034107424318790436, + -0.1321285367012024, + 0.2126137912273407, + 0.004417058080434799, + 0.24809572100639343, + -0.013338938355445862, + 0.02788003347814083, + -0.022250644862651825, + 0.05647706240415573, + -0.04903966560959816, + 0.08530472218990326, + 0.057897310703992844, + -0.05830305069684982, + 0.07067838311195374, + 0.012787431478500366, + 0.03585117682814598, + -0.029449574649333954, + -0.10926508903503418, + 0.14043037593364716, + -0.08161585032939911, + 0.06856999546289444, + 0.046775929629802704, + 0.00400913879275322, + -0.123019739985466, + -0.10679252445697784, + -0.3075258135795593, + -0.027423249557614326, + 0.11304585635662079, + 0.004162512719631195, + -0.05730903893709183, + 0.028299182653427124, + -0.2490377575159073, + -0.10130404680967331, + 0.035720933228731155, + 0.1140424832701683, + -0.12318414449691772, + 0.07150633633136749, + 0.07565151154994965, + 0.3346860408782959, + -0.41780269145965576, + 0.1797449290752411, + -0.19060444831848145, + -0.03451978415250778, + 0.05773979797959328, + -0.013813059777021408, + 0.06421604752540588, + -0.022001102566719055, + -0.07702115178108215, + 0.039187997579574585, + -0.1147729754447937, + -0.18180298805236816, + 0.030310610309243202, + 0.2371119111776352, + 0.06150751933455467, + 0.06664823740720749, + -0.11622954905033112, + -0.2445794939994812, + 0.1371057778596878, + 0.1067652553319931, + -0.22976189851760864, + -0.07021855562925339, + 0.07665050774812698, + 0.12312096357345581, + -0.15748558938503265, + -0.0540604330599308, + -0.3322606384754181, + 0.025297746062278748, + -0.05489290505647659, + -0.31037935614585876, + 0.052722714841365814, + 0.3149702548980713, + -0.013982502743601799, + -0.19472122192382812, + -0.4911949932575226, + -0.12228991091251373, + -0.09965498745441437, + -0.007575744763016701, + 0.22067439556121826, + -0.18881841003894806, + 0.14516963064670563, + -0.09653589129447937, + -0.07203446328639984, + 0.10860505700111389, + 0.06878969073295593, + -0.08648698031902313, + 0.0617685467004776, + 0.0033463016152381897, + -0.007826440036296844, + 0.03804641589522362, + -0.10928153991699219, + -0.012081462889909744, + 0.050878673791885376, + -0.01474258117377758, + -0.003034410998225212, + 0.0524611696600914, + -0.12404404580593109, + 0.07047775387763977, + -0.16520074009895325, + 0.09477685391902924, + 0.09919562190771103, + 0.17180292308330536, + 0.04514208436012268, + -0.022381702437996864, + 0.016167882829904556, + -0.1328677535057068, + -0.028757061809301376, + -0.12459208071231842, + 0.16779808700084686, + -0.0913533866405487, + -0.04869356378912926, + -0.08780577033758163, + 0.011766023933887482, + 0.1935703605413437, + -0.1136653944849968, + 0.062263745814561844, + -0.028202399611473083, + 0.02112056314945221, + -0.12952084839344025, + 0.18272802233695984, + -0.14360137283802032, + -0.13668759167194366, + 0.04907786846160889, + -0.0533972904086113, + -0.05436975508928299, + 0.0009280666708946228, + -0.08174237608909607, + -0.0355672761797905, + 0.004176635295152664, + 0.07150948792695999, + 0.22621557116508484, + 0.007818220183253288, + -0.35068655014038086, + 0.0303746797144413, + -0.10332699120044708, + -0.021822044625878334, + -0.06092840060591698, + -0.020471841096878052, + -0.21362267434597015, + -0.08224484324455261, + 0.03248654678463936, + -0.134386345744133, + 0.09233617782592773, + 0.1559479683637619, + -0.25665464997291565, + -0.05508199706673622, + -0.02319049835205078, + 0.024425826966762543, + -0.006014466285705566, + 0.16134054958820343, + 0.11818048357963562, + -0.06406328082084656, + 0.06898031383752823, + 0.12925899028778076, + -0.06003619357943535, + -0.1294277012348175, + -0.004379419609904289, + 0.11942613869905472, + 0.1586723029613495, + 0.11098378896713257, + 0.19619882106781006, + -0.21351541578769684, + 0.06364531069993973, + -0.18440209329128265, + -0.0919143483042717, + -0.01907193847000599, + -0.2686134874820709, + -0.22919818758964539, + 0.1355423927307129, + 0.1894533634185791, + -0.07859388738870621, + -0.285798579454422, + 0.10207819193601608, + 0.047448981553316116, + -0.10605946183204651, + -0.1191759929060936, + -0.04839218407869339, + 0.029088757932186127, + 0.3339240550994873, + 0.04115835577249527, + -0.2934126853942871, + 0.17244583368301392, + 0.06257472932338715, + 0.027948465198278427, + 0.0893276184797287, + 0.061908647418022156, + 0.2384718954563141, + -0.04881766065955162, + 0.07925871759653091, + 0.058146096765995026, + -0.19848506152629852, + -0.11947030574083328, + -0.025473620742559433, + -0.07998187839984894, + 0.07768119126558304, + 0.1560533046722412, + -0.11095115542411804, + 0.016813859343528748, + 0.06978719681501389, + -0.007182464003562927, + -0.21029652655124664, + 0.19573143124580383, + -0.07578675448894501, + -0.0630551353096962, + 0.07033172249794006, + -0.09898793697357178, + -0.20700423419475555, + 0.24050123989582062, + 0.019243769347667694, + -0.012027114629745483, + 0.14534005522727966, + -0.05904839187860489, + -0.10008929669857025, + 0.1328340619802475, + -0.005067941267043352, + 0.30113252997398376, + 0.14709892868995667, + 0.10914036631584167, + -0.2452794313430786, + 0.1597498059272766, + -0.19580554962158203, + 0.05337795615196228, + 0.009804232977330685, + -0.0010682251304388046, + -0.14123985171318054, + -0.14881402254104614, + -0.061318617314100266, + -0.01960357278585434, + -0.09682243317365646, + 0.001654595136642456, + -0.05271979421377182, + -0.04621410369873047, + -0.16775497794151306, + -0.041242245584726334, + 0.09836067259311676, + -0.21611469984054565, + 0.20738446712493896, + -0.11346103996038437, + -0.08157006651163101, + -0.13330580294132233, + 0.08540618419647217, + 0.0015641041100025177, + 0.027379745617508888, + 0.0014384165406227112, + -0.03790559247136116, + 0.025344736874103546, + -0.13137786090373993, + 0.11160910874605179, + -0.053189780563116074, + 0.11196362972259521, + 0.015298347920179367, + 0.052557751536369324, + 0.01666770875453949, + 0.05463384836912155, + -0.1502702683210373, + 0.06611311435699463, + -0.0667155385017395, + 0.14284445345401764, + 0.08641599118709564, + -0.19731296598911285, + -0.08339843899011612, + 0.09820728003978729, + 0.03302033245563507, + 0.08921126276254654, + 0.03478021174669266, + -0.15989071130752563, + -0.013648039661347866, + -0.16263335943222046, + -0.0749879777431488, + -0.3541197180747986, + 0.12606799602508545, + 0.2353769838809967, + -0.014140097424387932, + -0.09331642091274261, + 0.2911835312843323, + 0.07249271869659424, + 0.019088830798864365, + -0.22341814637184143, + 0.06633023172616959, + -0.0937628448009491, + 0.038561124354600906, + 0.07998501509428024, + 0.03847867250442505, + 0.23800534009933472, + -0.02517944574356079, + -0.05979456380009651, + 0.14871375262737274, + -0.11308622360229492, + -0.08662764728069305 + ], + "wielkiego": [ + -0.007335918955504894, + 0.045563600957393646, + -0.009011227637529373, + -0.006715674884617329, + -0.026933681219816208, + 0.012662549503147602, + 0.012494136579334736, + 0.008883165195584297, + 0.012246189638972282, + -0.03626121208071709, + 0.007100577466189861, + -0.005960305221378803, + -0.07582269608974457, + -0.07219238579273224, + 0.03428555279970169, + -0.010050034150481224, + 0.04782329127192497, + -0.00587445218116045, + 0.01832985319197178, + 0.0006436360999941826, + -0.007253450341522694, + -0.05597827956080437, + -0.004176432266831398, + 0.00509627815335989, + -0.02963927760720253, + -0.007906301878392696, + -0.07323390990495682, + 0.010382292792201042, + 0.021376198157668114, + -0.015944762155413628, + 0.08404891192913055, + 0.04819642752408981, + -0.01974131353199482, + -0.017839740961790085, + -0.021567292511463165, + -0.011944983154535294, + -0.03820865973830223, + -0.06094135344028473, + 0.015175022184848785, + 0.0021336509380489588, + -0.011225801892578602, + -0.011345837265253067, + 0.022221948951482773, + -0.03339093178510666, + 0.010368917137384415, + 0.08850111067295074, + -0.028319694101810455, + -0.056911665946245193, + 0.013130288571119308, + -0.057576969265937805, + -0.03817208856344223, + -0.02077348157763481, + -0.002910812385380268, + 0.01794436387717724, + 0.0090854587033391, + 0.026645468547940254, + 0.020759597420692444, + -0.0730491429567337, + -0.004025932401418686, + -0.008608357980847359, + 0.006705998908728361, + -0.062277913093566895, + -0.09697772562503815, + -0.0045755901373922825, + 0.022000394761562347, + 0.04762493073940277, + -0.0174410417675972, + -0.01452673226594925, + -0.03763464838266373, + -0.007314097136259079, + -0.02416522428393364, + -0.009630979038774967, + -0.008832407183945179, + -0.009594069793820381, + 0.0010206708684563637, + -0.001869378611445427, + 0.0013620385434478521, + 0.019671745598316193, + -0.0298677496612072, + 0.037589557468891144, + 0.006166642531752586, + -0.01747388020157814, + 0.07862847298383713, + -0.004796740598976612, + -0.001299227587878704, + -0.06612580269575119, + 0.010320048779249191, + -0.057449087500572205, + 0.017147401347756386, + 0.018646012991666794, + 0.027461230754852295, + 0.029924925416707993, + -0.04038987308740616, + 0.019173771142959595, + -0.019169077277183533, + 0.01183535810559988, + 0.004596712999045849, + 0.013401293195784092, + 0.015031768009066582, + -0.006155865266919136, + 0.0016141459345817566, + 0.0016486234962940216, + -0.0016265441663563251, + 0.011905558407306671, + -0.024959390982985497, + -0.002869216725230217, + 0.003961748443543911, + 0.005381135735660791, + -0.010750019922852516, + -0.0092091616243124, + 0.0033775819465517998, + -0.009264200925827026, + 0.002375534502789378, + 0.014500413089990616, + 0.017437733709812164, + -0.016412558034062386, + -0.02643391117453575, + -0.010554300621151924, + -0.01729186624288559, + -0.011880900710821152, + -0.02654194086790085, + 0.08797746151685715, + -0.004345945548266172, + 0.018173690885305405, + 0.05139186978340149, + -0.021036317571997643, + 0.0019254737999290228, + 0.03657185286283493, + 0.05179467052221298, + -0.03216397762298584, + 0.02832106500864029, + -0.00732059171423316, + 0.001953238621354103, + 0.0552280955016613, + -0.0816313698887825, + -0.04635390639305115, + -0.0010044174268841743, + -0.01145216915756464, + -0.024835683405399323, + 0.004363479092717171, + -0.023021824657917023, + -0.028819985687732697, + -0.012252752669155598, + -0.05512222647666931, + 0.0018695993348956108, + -0.003732760902494192, + -0.0033529000356793404, + 0.0055102757178246975, + -0.0353131964802742, + 0.009309625253081322, + 0.017049534246325493, + -0.01661543920636177, + -0.037630483508110046, + 0.04413406550884247, + 0.10759931057691574, + 0.006216149777173996, + 0.01743883453309536, + -0.0321224182844162, + -0.04338386654853821, + -0.010352222248911858, + 0.019557559862732887, + -0.03616500273346901, + -0.031152736395597458, + -0.03747161850333214, + 0.03515058010816574, + 0.021793408319354057, + -0.006729810032993555, + -0.005254229065030813, + 0.015773463994264603, + 0.023700086399912834, + -0.0074120257049798965, + -0.02505037561058998, + -0.10634505748748779, + 0.044078417122364044, + 0.017648646607995033, + 0.024428721517324448, + -0.03220108523964882, + -0.10613435506820679, + 0.009063808247447014, + -0.004706214182078838, + 0.015496071428060532, + 0.007620756048709154, + -0.10322338342666626, + 0.004127911292016506, + -0.042836785316467285, + -0.005709556862711906, + 0.014780810102820396, + -0.013590950518846512, + 0.0009020622237585485, + -0.011628083884716034, + 0.0026952442713081837, + 0.02772394008934498, + -0.013394390232861042, + 0.08837705105543137, + 0.034877389669418335, + 0.00796973891556263, + 0.0036066118627786636, + -0.007537400349974632, + 0.009657973423600197, + 0.015483388677239418, + 0.009112080559134483, + 0.016993626952171326, + -0.009587174281477928, + -0.01089375652372837, + 0.020153310149908066, + -0.03212655708193779, + -0.009104751981794834, + -0.017992831766605377, + 0.027265973389148712, + 0.016177978366613388, + -0.00933571346104145, + -0.001989271491765976, + -0.011062455363571644, + 0.02645493671298027, + -0.02389197051525116, + -0.014967194758355618, + 0.04017965495586395, + 0.013360623270273209, + 0.025009073317050934, + 0.03212787210941315, + -0.018112191930413246, + 0.029857879504561424, + 0.0026419730857014656, + 0.0054023731499910355, + 0.038368940353393555, + -0.006983892060816288, + 0.11624155193567276, + 0.04358835518360138, + 0.01519742701202631, + 0.0025886106304824352, + -0.009856360964477062, + -0.034163057804107666, + -0.02339169755578041, + 0.007131385151296854, + 0.012214010581374168, + 0.009165623225271702, + 0.01322084292769432, + -0.001932912040501833, + -0.03742665797472, + -0.0019902572967112064, + -0.03884515166282654, + -0.0014395690523087978, + -0.08395906537771225, + -0.000144916120916605, + 0.011286070570349693, + 0.02790246345102787, + 0.02004914917051792, + 0.027476679533720016, + -0.014214974828064442, + -0.055873312056064606, + -0.033963315188884735, + 0.0310048870742321, + 0.026784010231494904, + -0.01679147407412529, + -0.01151774637401104, + 0.037868570536375046, + 0.00873725488781929, + 0.0177710372954607, + 0.025565484538674355, + 0.01120650302618742, + -0.01566700078547001, + 0.010263899341225624, + 0.029856469482183456, + -0.04190037399530411, + 0.03027251549065113, + -0.016120538115501404, + 0.011049199849367142, + -0.007124597672373056, + -0.021132146939635277, + 0.004725765436887741, + -0.006855126470327377, + 0.016022494062781334, + -0.0004426361992955208, + 0.009785481728613377, + 0.03574788570404053, + -0.0002664937637746334, + -0.024519827216863632, + -0.017815103754401207, + 0.0001179906539618969, + 0.001410734374076128, + -0.060682058334350586, + -0.030002214014530182, + 0.030191028490662575, + 0.003258659504354, + 0.03131566196680069, + -0.010314303450286388, + 0.01821562834084034, + 0.0028082067146897316, + -0.02450772374868393, + 0.014137959107756615, + -0.015743648633360863, + -0.0011878833174705505, + 0.027442604303359985, + 0.062496740370988846, + -0.0329519584774971, + -0.022016093134880066, + 0.0022447926457971334, + 0.010238151997327805, + 0.011039968580007553, + -0.016544777899980545 + ], + "rasowego": [ + -0.021374976262450218, + 0.022061966359615326, + -0.05104579031467438, + 0.02448401413857937, + -0.08397653698921204, + -0.04359633848071098, + -0.04844851791858673, + -0.014722022227942944, + -0.014786135405302048, + -0.0075484081171453, + -0.024928996339440346, + 0.020970119163393974, + -0.07415851205587387, + -0.05593620985746384, + -0.02552773244678974, + -0.017214473336935043, + 0.005021178629249334, + -0.019948236644268036, + -0.030592218041419983, + -0.0011016674106940627, + -0.05149977654218674, + -0.07006055116653442, + -0.02875533327460289, + -0.005115842446684837, + -0.00615012226626277, + -0.022595401853322983, + -0.009116356261074543, + 0.0655575841665268, + 0.02221864089369774, + 0.007018675096333027, + 0.012067667208611965, + 0.06786805391311646, + 0.052019406110048294, + -0.05035649240016937, + 0.03981306031346321, + 0.00491845328360796, + -0.023388657718896866, + -0.14638745784759521, + 0.007331090047955513, + 0.018253570422530174, + 0.0010674468940123916, + -0.10486278682947159, + -0.016898659989237785, + -0.0384083166718483, + -0.015604713000357151, + 0.0340690091252327, + 0.06729978322982788, + -0.001415115431882441, + 0.012272392399609089, + 0.007575922645628452, + 0.02495703287422657, + -0.07444107532501221, + 0.04819320887327194, + -0.024856077507138252, + -0.020392099395394325, + -0.011424148455262184, + 0.006205352023243904, + -0.03662575036287308, + 0.0034616547636687756, + 0.005699946079403162, + 0.03262940049171448, + -0.011213726364076138, + -0.06626541912555695, + -0.005661826115101576, + 0.0671444833278656, + -0.031498223543167114, + 0.07749772071838379, + -0.05587504804134369, + -0.014321302995085716, + -0.05381925776600838, + 0.016615932807326317, + -0.00021696757175959647, + 0.006970888003706932, + 0.029460981488227844, + 0.01869632676243782, + -0.0015935662668198347, + 0.052388809621334076, + 0.0024564608465880156, + -0.0073577179573476315, + 0.03273550420999527, + -0.025524629279971123, + 0.04064859822392464, + 0.09277748316526413, + -0.015083663165569305, + -0.04081587493419647, + -0.08818261325359344, + -0.04710976779460907, + 0.03098716214299202, + -0.00924242939800024, + 0.017826063558459282, + -0.011905795894563198, + 0.03991704434156418, + -0.06569720804691315, + -0.05167306214570999, + 0.08313693106174469, + 0.09779570996761322, + 0.022606493905186653, + 0.042619917541742325, + 0.039925672113895416, + -0.001008810824714601, + 0.017232539132237434, + -0.03307091444730759, + -0.046558354049921036, + -0.009505311958491802, + -0.03368028625845909, + -0.04864661023020744, + -0.027219058945775032, + 0.024790547788143158, + 0.016621660441160202, + 0.024113919585943222, + -0.005963386036455631, + 0.01810196414589882, + 0.054781120270490646, + 0.03975182771682739, + 0.03511524945497513, + 0.0995873436331749, + -0.022380894050002098, + -0.0230866726487875, + 0.018141157925128937, + 0.02894613891839981, + -0.04706616327166557, + -8.305055962409824e-05, + 0.017096150666475296, + 0.0054153925739228725, + 0.05484890937805176, + 0.01858840510249138, + 0.04709519073367119, + 0.023030517622828484, + 0.002663001650944352, + -0.00626006256788969, + 0.04791156202554703, + -0.0452740304172039, + -0.01642681285738945, + 0.0029099690727889538, + -0.13237300515174866, + -0.03394050896167755, + -0.09977129846811295, + 0.06643113493919373, + -0.05048809200525284, + 0.005331167485564947, + -0.039210133254528046, + 0.019543815404176712, + 0.011891878210008144, + -0.10855355113744736, + -0.024832867085933685, + 0.018708225339651108, + 0.0241832472383976, + 0.02966644987463951, + -0.03818413242697716, + -0.06066809594631195, + -0.06038204953074455, + 0.019552376121282578, + 0.03553164377808571, + 0.008572856895625591, + 0.09808126837015152, + -0.060862522572278976, + 0.02406022883951664, + -0.02335485629737377, + -0.03715915605425835, + 0.02603251300752163, + -0.017873579636216164, + -0.03878195583820343, + 0.03942133113741875, + 0.01384984515607357, + 0.029413016512989998, + 0.03399429842829704, + -0.04369866102933884, + 0.0016028174431994557, + 0.010400917381048203, + 0.0596524178981781, + 0.04103970527648926, + 0.02264336682856083, + -0.05215459689497948, + -0.0068299793638288975, + 0.008646439760923386, + -0.06566585600376129, + -0.016624554991722107, + -0.10221347212791443, + 0.036093540489673615, + -0.0018918951973319054, + 0.062389664351940155, + -0.06332951784133911, + -0.126982182264328, + 0.013618029654026031, + 0.032118864357471466, + -0.020803697407245636, + 0.01726769097149372, + -0.015036938712000847, + 0.005806658882647753, + 0.13138039410114288, + -0.024694399908185005, + -0.020180627703666687, + 0.02100687474012375, + 0.07018229365348816, + -0.0117644676938653, + -0.015375608578324318, + 0.004260370973497629, + 0.025789979845285416, + 0.04272473603487015, + -0.008279502391815186, + 0.05301598832011223, + -0.00556846521794796, + -0.0681459829211235, + 0.021686162799596786, + 0.008348362520337105, + -0.009950746782124043, + 0.07436788082122803, + 0.039569661021232605, + -0.00981104839593172, + 0.018843822181224823, + -0.0018322996329516172, + -0.0871618464589119, + -0.029130319133400917, + -0.05660361051559448, + -0.03280451148748398, + 0.009062877856194973, + -0.0653960108757019, + -0.04036419838666916, + 0.10359936207532883, + 0.06099063530564308, + 0.014591563493013382, + 0.10982383042573929, + -0.014729869551956654, + 0.011956637725234032, + 0.06778937578201294, + -0.03720470145344734, + 0.11712870746850967, + 0.10046470910310745, + 0.0196642205119133, + -0.004155087284743786, + -0.045008398592472076, + -0.05250081792473793, + -0.01716792583465576, + -0.002933334093540907, + -0.00195779325440526, + -0.03827226907014847, + -0.014527697116136551, + -0.0639154240489006, + 0.028759459033608437, + -0.028576435521245003, + 0.08100293576717377, + -0.04702320322394371, + -0.016875263303518295, + -0.024568067863583565, + -0.016138112172484398, + 0.04338507354259491, + -0.01013384759426117, + 0.01467440277338028, + 0.013792979530990124, + -0.02351970039308071, + -0.09970521181821823, + 0.04032248258590698, + 0.008005659095942974, + -0.017492681741714478, + 0.010736191645264626, + 0.0822356790304184, + -0.039843007922172546, + -0.04999406635761261, + 0.04765477403998375, + -0.043019380420446396, + -0.017399491742253304, + -0.0477265864610672, + 0.008344064466655254, + -0.027777623385190964, + 0.03964628279209137, + -0.051099471747875214, + -0.005542307160794735, + 0.031032240018248558, + 0.015952764078974724, + 0.02505410648882389, + -0.01774345338344574, + 0.030676454305648804, + -0.015608660876750946, + 0.010213768109679222, + 0.014368381351232529, + 0.025099189952015877, + -0.0523533970117569, + -0.013378938660025597, + 0.03179111331701279, + 0.009646384045481682, + -0.11224357783794403, + 0.054288893938064575, + -0.015104515478014946, + -0.045272890478372574, + -0.016325663775205612, + 0.02102903090417385, + -0.0095363212749362, + 0.049041442573070526, + -0.01453242264688015, + 0.05265913903713226, + -0.0029614169616252184, + -0.01707213744521141, + 0.038969986140728, + 0.06106139346957207, + -0.015846004709601402, + -0.01887490227818489, + 0.031026743352413177, + 0.03736288845539093, + -0.0058111087419092655, + 0.014338910579681396 + ], + "ala": [ + -0.08510930836200714, + 0.08026311546564102, + 0.005172867327928543, + -0.23018845915794373, + -0.1327226310968399, + 0.0936470776796341, + -0.06244175136089325, + -0.061952557414770126, + 0.05288635939359665, + 0.24452939629554749, + -0.08917640149593353, + -0.027700629085302353, + -0.05412629246711731, + 0.09416685998439789, + 0.05794994905591011, + -0.07576432079076767, + 0.06472751498222351, + -0.07342499494552612, + 0.09097124636173248, + -0.060087792575359344, + -0.003248483408242464, + 0.10523930191993713, + 0.008373997174203396, + -0.017618943005800247, + 0.07374531030654907, + 0.024025272578001022, + -0.018744762986898422, + 0.02191735990345478, + -0.09690085053443909, + -0.03857237845659256, + -0.03770364448428154, + 0.2185291349887848, + 0.006812140345573425, + 0.03166177496314049, + -0.04394921660423279, + -0.13524998724460602, + -0.006302122492343187, + -0.09569491446018219, + 0.06988120079040527, + 0.10575003921985626, + 0.11440394818782806, + -0.062137097120285034, + -0.045692794024944305, + -0.12013816088438034, + -0.041717927902936935, + -0.11439419537782669, + -0.02021404169499874, + -0.02632732130587101, + 0.04471046105027199, + -0.10949230194091797, + -0.012956470251083374, + 0.029622625559568405, + -0.1134611964225769, + 0.11538143455982208, + 0.2252194583415985, + -0.12420423328876495, + 0.062214866280555725, + -0.045071493834257126, + -0.3804093897342682, + -0.11720050871372223, + -0.006554341875016689, + -0.07014153897762299, + 0.011116482317447662, + -0.022332951426506042, + 0.1854228675365448, + -0.06904476881027222, + -0.01595909520983696, + -0.17870397865772247, + -0.014215500093996525, + -0.03437959775328636, + -0.04773184284567833, + 0.011287234723567963, + 0.052162572741508484, + -0.10293835401535034, + -0.05555473640561104, + 0.1461411565542221, + -0.06653688848018646, + -0.05456464737653732, + 0.0390891507267952, + -0.008416191674768925, + -0.037790387868881226, + -0.11679981648921967, + 0.3891960680484772, + -0.007345590740442276, + -0.19290880858898163, + 0.053135018795728683, + -0.1099163144826889, + 0.09811556339263916, + 0.01629701629281044, + 0.03220151364803314, + 0.01977045275270939, + 0.03380807489156723, + -0.1566382199525833, + -0.06573638319969177, + 0.029347993433475494, + 0.08862161636352539, + 0.004392128437757492, + -0.16510900855064392, + -0.0026791300624608994, + -0.07369282841682434, + -0.1060524582862854, + 0.10011787712574005, + -0.016776975244283676, + 0.07388751208782196, + 0.04492664337158203, + -0.11080620437860489, + 0.06120843067765236, + 0.023568084463477135, + 0.17439115047454834, + -0.18897901475429535, + 0.0991172194480896, + 0.025369100272655487, + -0.09956826269626617, + 0.010338149964809418, + 0.16792134940624237, + 0.10722075402736664, + -0.04778091609477997, + 0.004353707656264305, + -0.10673243552446365, + 0.040953852236270905, + -0.0960560292005539, + 0.09371501207351685, + 0.10394444316625595, + -0.0056099677458405495, + 0.06729613244533539, + -0.06795096397399902, + 0.17284581065177917, + 0.0033453311771154404, + 0.018823932856321335, + -0.1607680469751358, + 0.116116002202034, + -0.10165654122829437, + 0.19662612676620483, + 0.06528636068105698, + -0.11422064155340195, + -0.06504455953836441, + 0.029768574982881546, + -0.0484953373670578, + -0.11648310720920563, + -0.12250812351703644, + -0.01510370522737503, + 0.15246586501598358, + -0.07638084888458252, + 0.07381592690944672, + -0.05145636945962906, + 0.11254043877124786, + -0.035568609833717346, + 0.007457010447978973, + 0.09195952862501144, + 0.08615238219499588, + 0.007009610533714294, + -0.004218161106109619, + -0.01877135969698429, + 0.006081234663724899, + -0.15653260052204132, + -0.10768484324216843, + 0.24579471349716187, + -0.08761277794837952, + -0.10177511721849442, + 0.04498010873794556, + -0.09588050842285156, + -0.13153505325317383, + -0.02207024022936821, + 0.07547876238822937, + 0.032599080353975296, + -0.08592616021633148, + -0.13776612281799316, + 0.03250812739133835, + 0.26500555872917175, + 0.11078671365976334, + -0.02925412729382515, + 0.13246124982833862, + -0.22584369778633118, + -0.08997879922389984, + 0.053682196885347366, + -0.14366936683654785, + 0.2318786084651947, + -0.11107944697141647, + 0.16802093386650085, + 0.1674017310142517, + -0.08454294502735138, + -0.02038326859474182, + -0.18258585035800934, + 0.023696796968579292, + 0.07218381017446518, + 0.03743196278810501, + 0.08329722285270691, + -0.2355189323425293, + -0.05182601884007454, + 0.005523643456399441, + -0.042271655052900314, + 0.07470633089542389, + 0.016380978748202324, + 0.12326782941818237, + -0.0034765824675559998, + 0.02539176121354103, + 0.034960951656103134, + 0.07186581194400787, + -0.00383613258600235, + 0.03699350357055664, + -0.04867836833000183, + 0.09089645743370056, + -0.1530541181564331, + -0.14013057947158813, + 0.14007122814655304, + 0.14388898015022278, + 0.1655329465866089, + 0.06484488397836685, + -0.05194415524601936, + -0.14215832948684692, + 0.029214072972536087, + -0.006806789897382259, + 0.20004573464393616, + -0.07185720652341843, + 0.002342197112739086, + -0.11681094765663147, + 0.07678108662366867, + 0.20371383428573608, + -0.14282578229904175, + -0.10786132514476776, + -0.12049733847379684, + -0.05071673542261124, + -0.02978406846523285, + 0.039153002202510834, + 0.03860694169998169, + 0.025827309116721153, + -0.04815760254859924, + 0.1684526652097702, + -0.06348452717065811, + -0.036018870770931244, + 0.11284859478473663, + -0.18286296725273132, + -0.018906600773334503, + 0.09744884073734283, + 0.13633206486701965, + 0.07883652299642563, + 0.04424794763326645, + -0.14852368831634521, + -0.046556584537029266, + -0.14331218600273132, + 0.12250048667192459, + 0.09036845713853836, + 0.003670506179332733, + 0.06500013172626495, + 0.048821426928043365, + -0.07175587117671967, + -0.030644508078694344, + 0.05143702030181885, + 0.062334153801202774, + 0.027922067791223526, + -0.10170811414718628, + 0.02496853470802307, + -0.05066241696476936, + -0.08822708576917648, + 0.015883728861808777, + -0.013464706018567085, + -0.19596795737743378, + -0.1251690536737442, + -0.034124307334423065, + -0.09212999045848846, + -0.048308201134204865, + 0.07198561728000641, + 0.051106300204992294, + -0.08067712932825089, + 0.18825247883796692, + 0.04958396404981613, + -0.11096809804439545, + 0.2280646711587906, + -0.013503428548574448, + 0.08193613588809967, + -0.057304851710796356, + 0.08633160591125488, + -0.14459016919136047, + 0.05562886223196983, + 0.16470640897750854, + -0.23238077759742737, + -0.09258431196212769, + 0.05243774875998497, + 0.11136229336261749, + 0.09222377836704254, + -0.3185659348964691, + -0.1533215492963791, + -0.09634050726890564, + 0.14749886095523834, + -0.03518515080213547, + -0.03340547904372215, + -0.031760625541210175, + -0.007977711036801338, + -0.1727142333984375, + 0.044638026505708694, + 0.03248946741223335, + 0.03752497583627701, + 0.13379395008087158, + -0.06517000496387482, + -0.09402096271514893, + -0.01689091883599758, + 0.1753440946340561, + 0.149289071559906, + -0.03721372038125992, + -0.02596273459494114 + ], + "alana": [ + -0.008905170485377312, + 0.00797797366976738, + -0.020857801660895348, + -0.05502761900424957, + -0.026971876621246338, + 0.021845532581210136, + 0.0031013954430818558, + 0.04111144319176674, + 0.007476105820387602, + 0.04155835136771202, + -0.040180355310440063, + 0.04202931374311447, + 0.002554595470428467, + 0.010071192868053913, + -0.02424287237226963, + -0.05443716421723366, + 0.1433088332414627, + 0.049010977149009705, + -0.012187544256448746, + 0.01602715253829956, + 0.05048424378037453, + 0.02727050520479679, + -0.04620783403515816, + -0.0734170526266098, + 0.050709083676338196, + -0.026002367958426476, + -0.04151768237352371, + -0.0624970942735672, + -0.09327693283557892, + -0.03458327054977417, + -0.03242221102118492, + -0.01252170279622078, + 0.015669796615839005, + 0.027511216700077057, + 0.018900066614151, + 0.01364364568144083, + 0.04275044798851013, + -0.0047136880457401276, + -0.012517403811216354, + -0.01921612210571766, + 0.09476295113563538, + -0.06644897162914276, + 0.006958749145269394, + 0.041043996810913086, + 0.030241474509239197, + -0.03629227355122566, + -0.019569141790270805, + -0.06644894182682037, + 0.01731383055448532, + 0.0049531301483511925, + 0.05581922084093094, + -0.020732585340738297, + 0.020846780389547348, + -0.0034689255990087986, + -0.0023536141961812973, + 0.043540552258491516, + -0.00607583113014698, + 0.13842181861400604, + -0.08284986019134521, + 0.01645781844854355, + 0.10372365266084671, + 0.020972564816474915, + -0.014481345191597939, + 0.008332233875989914, + 0.11382568627595901, + -0.04178755730390549, + -0.00029972195625305176, + -0.10192164778709412, + -0.010269921272993088, + -0.04217970371246338, + 0.002580307424068451, + -0.014096757397055626, + 0.015790149569511414, + -0.0542927086353302, + -0.03320926055312157, + -0.01713576726615429, + -0.004578539170324802, + -0.049160730093717575, + 0.027759740129113197, + -0.087232306599617, + 0.000209444435313344, + -0.027234351262450218, + 0.11440762877464294, + -0.008088681846857071, + -0.0016661311965435743, + -0.041061677038669586, + -0.03332071006298065, + 0.043460845947265625, + 0.024811111390590668, + -0.011206693015992641, + 0.042661525309085846, + 0.05522497370839119, + -0.15101587772369385, + 0.0034809946082532406, + 0.042527489364147186, + -0.09491441398859024, + -0.025948340073227882, + -0.028411749750375748, + -0.10226449370384216, + 0.0067985886707901955, + -0.014013301581144333, + 0.05011998862028122, + 0.02444552071392536, + -0.039902541786432266, + -4.013441503047943e-05, + -0.056684911251068115, + -0.004129653796553612, + -0.06479571759700775, + 0.040350291877985, + -0.029520612210035324, + -0.04164367541670799, + 0.000608866335824132, + 0.05645133554935455, + -0.014965702779591084, + 0.02611115388572216, + -0.05806443467736244, + 0.0641874447464943, + -0.05745403841137886, + -0.06339256465435028, + 0.029419532045722008, + 0.0014499658718705177, + 0.026731204241514206, + 0.035704415291547775, + -0.0008268561214208603, + -0.0939759686589241, + 0.0077078393660485744, + 0.1284271776676178, + 0.047485750168561935, + -0.0011111609637737274, + -0.09236142784357071, + 0.029201526194810867, + -0.06830106675624847, + 0.042804352939128876, + 0.045423250645399094, + -0.07132693380117416, + -0.0472111701965332, + -0.02110600657761097, + -0.000526216346770525, + -0.010649396106600761, + -0.0013150530867278576, + -0.05174721032381058, + 0.01884961687028408, + 0.012327498756349087, + -0.11739764362573624, + 0.009444170631468296, + 0.1098916232585907, + -0.05842617154121399, + 0.04192979261279106, + 0.07221657037734985, + 0.022518323734402657, + -0.07432927191257477, + 0.023226700723171234, + 0.020223218947649002, + 0.012946680188179016, + 0.01574738323688507, + -0.06886599957942963, + -0.0070308903232216835, + 0.025227678939700127, + 0.07279892265796661, + 0.059939946979284286, + 0.03627409785985947, + 0.029419073835015297, + 0.004540672525763512, + -0.012865041382610798, + -0.04613533988595009, + -0.032980334013700485, + -0.06465771794319153, + 0.0077165658585727215, + 0.18178483843803406, + -0.020831923931837082, + -0.07532940059900284, + 0.03866453468799591, + -0.07253919541835785, + -0.034813571721315384, + 0.020511168986558914, + -0.06999587267637253, + -0.031060680747032166, + -0.00856416393071413, + 0.02916804328560829, + -0.019706588238477707, + 0.09040533006191254, + -0.05908718332648277, + -0.04164492338895798, + -0.027276139706373215, + -0.004476554226130247, + -0.03351961821317673, + 0.05742602050304413, + -0.07617287337779999, + 0.016344115138053894, + -0.020810041576623917, + -0.024502446874976158, + -0.06571627408266068, + -0.010821627452969551, + -0.023639731109142303, + -0.01641128398478031, + 0.01913810521364212, + -0.048936039209365845, + -0.0062243156135082245, + -0.038358692079782486, + 0.002915005199611187, + -0.06888172030448914, + 0.11447683721780777, + -0.04947744682431221, + 0.08107712864875793, + -0.038916926831007004, + 0.047509852796792984, + -0.001883181743323803, + -0.02029532380402088, + 0.014639850705862045, + -0.02925298735499382, + 0.0033461658749729395, + -0.06095549091696739, + 0.008958661928772926, + 0.01875258982181549, + -0.026076897978782654, + 0.009152323007583618, + -0.021915778517723083, + 0.016577403992414474, + 0.07379716634750366, + -0.042609456926584244, + 0.010794570669531822, + 0.03872985765337944, + -0.024744680151343346, + -0.017851850017905235, + 0.02018165774643421, + 0.026857100427150726, + -0.009717192500829697, + 0.07921796292066574, + 0.008214155212044716, + 0.033190082758665085, + -0.05482260882854462, + -0.059472158551216125, + 0.09949895739555359, + -0.06345643103122711, + -0.01975860260426998, + -0.067245252430439, + 0.020705673843622208, + -0.030896490439772606, + 0.0023532509803771973, + -0.04635317996144295, + 0.01883975975215435, + 0.039829663932323456, + -0.011591881513595581, + 0.003323711920529604, + 0.05427715927362442, + 0.01540394313633442, + -0.017894219607114792, + -0.038341909646987915, + 0.06111633777618408, + -0.07981638610363007, + 0.003194071352481842, + -0.0013840049505233765, + -0.015772690996527672, + 0.011809502728283405, + -0.03507591784000397, + -0.019812079146504402, + 0.05649995803833008, + -0.01170569472014904, + -0.010453758761286736, + 0.038305509835481644, + -0.004376763012260199, + 0.0023491024039685726, + -0.007492097560316324, + -0.014653166756033897, + 0.031125254929065704, + -0.007065157406032085, + -0.017799459397792816, + -0.018134284764528275, + 0.03267719969153404, + -0.002434193156659603, + 0.03548000752925873, + -0.00011319480836391449, + -0.04457104206085205, + -0.017937686294317245, + -0.02655956894159317, + -0.04748003929853439, + -0.018020927906036377, + 0.011457541026175022, + 0.08198951929807663, + -0.01670980453491211, + -0.037620238959789276, + -0.015428856015205383, + 0.07995229214429855, + 0.01623024418950081, + -0.026009880006313324, + 0.007154380902647972, + 0.026373393833637238, + -0.027543775737285614, + -0.0497322604060173, + -0.004626482725143433, + 0.04496750980615616, + -0.0015953592956066132, + 0.02821824885904789, + -0.0577494315803051, + -0.012989328242838383, + -0.014337323606014252, + 0.050722211599349976, + -0.012495487928390503, + -0.00028792815282940865, + 0.06026756018400192 + ], + "ma": [ + -0.017319366335868835, + 0.19939786195755005, + -0.002433155430480838, + -0.08989187330007553, + -0.6585466265678406, + -0.029773512855172157, + -0.06268081814050674, + 0.059397727251052856, + -0.23301823437213898, + -0.06189410760998726, + -0.14514213800430298, + -0.12059704959392548, + 0.28610071539878845, + 0.11204845458269119, + 0.017116647213697433, + 0.19462355971336365, + -0.014472516253590584, + -0.1369037926197052, + -0.1411769837141037, + -0.07382507622241974, + 0.05684584006667137, + 0.30498403310775757, + 0.07963626086711884, + 0.04957783594727516, + -0.7672253251075745, + -0.03167421743273735, + -0.049892496317625046, + 0.12684543430805206, + -0.07745488733053207, + 0.0686381608247757, + -0.04825098440051079, + -0.062087059020996094, + -0.1282641738653183, + -0.032829031348228455, + -0.00793223362416029, + 0.008400922641158104, + 0.1427750289440155, + -0.09181278198957443, + 0.13264483213424683, + 0.08397654443979263, + 0.02419828064739704, + 0.27262043952941895, + -0.04234355688095093, + 0.12569856643676758, + -0.2034372240304947, + -0.05482612922787666, + -0.044704314321279526, + -0.23985600471496582, + 0.01257017720490694, + 0.05313592031598091, + -0.0772976502776146, + -0.13264617323875427, + -0.10442794859409332, + -0.03915044665336609, + 0.9156799912452698, + -0.1419679969549179, + -0.06024566665291786, + -0.1681678742170334, + 0.033169180154800415, + 0.045470189303159714, + -0.23528966307640076, + -0.16084609925746918, + 0.09364213794469833, + -0.01774413324892521, + 0.08702721446752548, + 0.23189006745815277, + 0.051224786788225174, + -0.0024700334761291742, + -0.061074648052453995, + 0.10324984043836594, + 0.0466703362762928, + 0.024757828563451767, + 0.13603328168392181, + -0.10486587136983871, + 0.1308596432209015, + -0.08184386789798737, + 0.013749654404819012, + -0.03720531985163689, + -0.005877527873963118, + 0.008133389987051487, + -0.1295010894536972, + 0.01219901442527771, + 0.9368226528167725, + 0.030340071767568588, + -0.004245404619723558, + -0.05012226477265358, + 0.01501045748591423, + 0.1663505882024765, + 0.03797484561800957, + -0.03615853562951088, + 0.030388187617063522, + 0.10608846694231033, + 0.3501804769039154, + -0.14999425411224365, + 0.19569289684295654, + 0.02038898691534996, + 0.08674237132072449, + 0.00725436769425869, + 0.03604792058467865, + -0.043699733912944794, + -0.0020810714922845364, + -0.1282309740781784, + 0.0036961385048925877, + 0.0073354230262339115, + -0.008381481282413006, + 0.08217158168554306, + 0.12295476347208023, + 0.07235073298215866, + 0.0834241732954979, + -0.08394422382116318, + -0.1825396716594696, + 0.3363122045993805, + 0.1044616624712944, + -0.14969971776008606, + -0.07193519175052643, + 0.17450383305549622, + -0.02342291921377182, + 0.03796623647212982, + -0.1543733775615692, + 0.0026052214670926332, + 0.13685671985149384, + 0.05383211374282837, + 0.0909600704908371, + -0.024072518572211266, + 0.14813996851444244, + -0.10074421018362045, + -0.03563304618000984, + 0.11980004608631134, + 0.015190711244940758, + 0.028492972254753113, + -0.13054578006267548, + 0.0598987452685833, + -0.0018208019901067019, + -0.26574867963790894, + -0.1046622022986412, + 0.025759147480130196, + 0.22038379311561584, + 0.0029779020696878433, + -0.32297682762145996, + -0.13471390306949615, + 0.01832430809736252, + -0.11024904251098633, + -0.05563639476895332, + -0.10372592508792877, + 0.12006429582834244, + -0.1740075647830963, + -0.38642027974128723, + 0.1258246898651123, + -0.14513635635375977, + 0.08748295158147812, + 0.046368662267923355, + -0.1185491606593132, + 0.1896124631166458, + 0.08767387270927429, + 0.10766582190990448, + 0.11399984359741211, + -0.014304372482001781, + -0.017331356182694435, + -0.12349628657102585, + 0.27972283959388733, + -0.03475717082619667, + -0.3587721586227417, + 0.16097621619701385, + -0.026457762345671654, + -0.013791014440357685, + -0.09797482937574387, + -0.19777368009090424, + -0.2188035100698471, + 0.11841845512390137, + -0.14907479286193848, + 0.1518198847770691, + 0.03868028521537781, + 0.03809288889169693, + 0.4041370451450348, + 0.00430183345451951, + -0.03444720059633255, + -0.18718919157981873, + -0.23583096265792847, + -0.016519270837306976, + 0.18206454813480377, + -0.027213064953684807, + -0.252016544342041, + 0.0491454154253006, + 0.014860464259982109, + -0.06894470751285553, + 0.17181752622127533, + 0.04482664912939072, + -0.09661887586116791, + -0.1286282241344452, + -0.058976612985134125, + 0.09160139411687851, + 0.051311444491147995, + -0.14805659651756287, + -0.3604443073272705, + -0.09560105949640274, + 0.06101277098059654, + 0.011444945819675922, + 0.027357032522559166, + 0.12012814730405807, + -0.07233128696680069, + -0.34351375699043274, + 0.08263317495584488, + 0.10284171998500824, + -0.09838875383138657, + 0.12973551452159882, + 0.021962497383356094, + 0.038289088755846024, + -0.15210531651973724, + 0.01791217364370823, + -0.07782623916864395, + 0.05756104364991188, + -0.002721688710153103, + 0.24015699326992035, + -0.0803576409816742, + -0.11913346499204636, + -0.003947827965021133, + 0.34993964433670044, + -0.09963877499103546, + -0.2036137729883194, + -0.09120816737413406, + 0.010198036208748817, + 0.039048973470926285, + 0.11832015961408615, + -0.2459758073091507, + -0.1269475370645523, + 0.08283139020204544, + 0.036106932908296585, + -0.13724209368228912, + 0.027216419577598572, + -0.034134261310100555, + -0.05846599116921425, + -0.046584442257881165, + -0.1039048284292221, + 0.19857743382453918, + 0.018287429586052895, + 0.06368196755647659, + -0.03516785800457001, + -0.06189749762415886, + 0.18062718212604523, + 0.039525266736745834, + -0.43550774455070496, + 0.0037974969018250704, + 0.17117415368556976, + 0.07821366935968399, + 0.5407046675682068, + -0.2062196284532547, + 0.08034863322973251, + 0.015322472900152206, + 0.029957646504044533, + -0.20152480900287628, + -0.09223990142345428, + -0.06302131712436676, + 0.0019804774783551693, + 0.025030547752976418, + 0.14647270739078522, + -0.09102798998355865, + 0.04946552962064743, + 0.0033420773688703775, + -0.18276983499526978, + 0.07320235669612885, + 0.07692569494247437, + 0.15848369896411896, + -0.11740195751190186, + -0.18770445883274078, + 0.40553897619247437, + 0.07385458052158356, + -0.05059506744146347, + 0.06969370692968369, + 0.007246922701597214, + 0.023643385618925095, + -0.20888538658618927, + -0.14561277627944946, + 0.048460979014635086, + 0.11156976222991943, + 0.40917372703552246, + -0.01046369131654501, + -0.053318917751312256, + -0.06229471042752266, + -0.07372928410768509, + 0.058629900217056274, + -0.2429196834564209, + -0.05559220165014267, + 0.19617997109889984, + 0.007329883985221386, + -0.028705155476927757, + -0.09250165522098541, + 0.3471665680408478, + -0.016368677839636803, + -0.27010607719421387, + -0.05045217275619507, + -0.026646358892321587, + 0.15299373865127563, + 0.3539401888847351, + 0.1516050547361374, + -0.15655280649662018, + 0.08041416108608246, + -0.08694977313280106, + -0.010244608856737614, + 0.03272118791937828, + -0.15399935841560364 + ] +} diff --git a/tests/file_stored_embedding_transformer.py b/tests/file_stored_embedding_transformer.py new file mode 100644 index 0000000..2e329fa --- /dev/null +++ b/tests/file_stored_embedding_transformer.py @@ -0,0 +1,28 @@ +import json +from typing import List, Dict + +import numpy as np + +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer + + +class FileStoredEmbeddingTransformer(EmbeddingTransformer): + _cache: Dict[str, np.array] + + def __init__(self, file_path: str): + with open(file_path, 'r') as f: + json_content = json.loads(f.read()) + self._cache = dict({ + key: np.array(json_content[key]) + for key in json_content.keys() + }) + + def get_embeddings(self, words: List[str]) -> Dict[str, np.ndarray]: + return dict({ + word: self._cache[word] + for word in words + }) + + def get_embedding(self, word: str) -> np.ndarray: + return self._cache[word] diff --git a/tests/test_classic_wer.py b/tests/test_classic_wer.py new file mode 100644 index 0000000..90a29bc --- /dev/null +++ b/tests/test_classic_wer.py @@ -0,0 +1,42 @@ +from typing import List, Tuple + +import pytest + +from sziszapangma.core.wer.classic_wer_calculator import ClassicWerCalculator +from sziszapangma.core.wer.step_type import StepType +from sziszapangma.core.wer.step_words import StepWords + + +def get_sample_data() -> Tuple[List[str], List[str]]: + reference = ['This', 'great', 'machine', 'can', 'recognize', 'speech'] + hypothesis = ['This', 'machine', 'can', 'wreck', 'a', 'nice', 'beach'] + return reference, hypothesis + + +def test_classic_calculate_wer_value(): + """Sample test for core calculate.""" + reference, hypothesis = get_sample_data() + wer_result = ClassicWerCalculator().calculate_wer(reference, hypothesis) + assert pytest.approx(wer_result[0]) == 0.8333333 + + +def test_classic_calculate_wer_steps(): + """Sample test for core calculate.""" + reference, hypothesis = get_sample_data() + wer_result = ClassicWerCalculator().calculate_wer(reference, hypothesis) + + reference_words = [ + StepWords('This', 'This'), StepWords('great', None), + StepWords('machine', 'machine'), StepWords('can', 'can'), + StepWords(None, 'wreck'), StepWords(None, 'a'), + StepWords('recognize', 'nice'), + StepWords('speech', 'beach')] + step_types = [ + StepType.CORRECT, StepType.DELETION, StepType.CORRECT, StepType.CORRECT, + StepType.INSERTION, StepType.INSERTION, StepType.SUBSTITUTION, + StepType.SUBSTITUTION] + + assert len(wer_result[1]) == 8 + assert [it.step_type for it in wer_result[1]] == step_types + assert [it.step_cost for it in wer_result[1]] == [0, 1, 0, 0, 1, 1, 1, 1] + assert [it.step_words for it in wer_result[1]] == reference_words diff --git a/tests/test_embedding_wer.py b/tests/test_embedding_wer.py new file mode 100644 index 0000000..876af94 --- /dev/null +++ b/tests/test_embedding_wer.py @@ -0,0 +1,27 @@ +from typing import List, Tuple + +import pytest + +from sziszapangma.core.wer.wer_embedding_calculator import \ + WerEmbeddingCalculator +from tests.file_stored_embedding_transformer import \ + FileStoredEmbeddingTransformer + + +def get_sample_data() -> Tuple[List[str], List[str]]: + reference = ['ala', 'ma', 'dobrego', 'wielkiego', 'psa', 'rasowego'] + hypothesis = ['alana', 'rego', 'kruchego', 'psa', 'rasowego'] + return reference, hypothesis + + +def get_calculator() -> WerEmbeddingCalculator: + return WerEmbeddingCalculator( + FileStoredEmbeddingTransformer('tests/embeddings_pl.json')) + + +def test_classic_calculate_wer_value(): + """Sample test for core calculate.""" + reference, hypothesis = get_sample_data() + wer_result = get_calculator().calculate_wer(reference, hypothesis) + print(wer_result[0]) + assert pytest.approx(wer_result[0]) == 0.55879563 diff --git a/tests/test_soft_wer.py b/tests/test_soft_wer.py new file mode 100644 index 0000000..c72b97f --- /dev/null +++ b/tests/test_soft_wer.py @@ -0,0 +1,26 @@ +from typing import List, Tuple + +import pytest + +from sziszapangma.core.wer.wer_soft_calculator import WerSoftCalculator +from tests.file_stored_embedding_transformer import \ + FileStoredEmbeddingTransformer + + +def get_sample_data() -> Tuple[List[str], List[str]]: + reference = ['ala', 'ma', 'dobrego', 'wielkiego', 'psa', 'rasowego'] + hypothesis = ['alana', 'rego', 'kruchego', 'psa', 'rasowego'] + return reference, hypothesis + + +def get_calculator() -> WerSoftCalculator: + return WerSoftCalculator( + FileStoredEmbeddingTransformer('tests/embeddings_pl.json')) + + +def test_classic_calculate_wer_value(): + """Sample test for core calculate.""" + reference, hypothesis = get_sample_data() + wer_result = get_calculator().calculate_wer(reference, hypothesis) + print(wer_result[0]) + assert pytest.approx(wer_result[0]) == 0.50186761 diff --git a/tests/test_sziszapangma.py b/tests/test_sziszapangma.py new file mode 100644 index 0000000..838dc13 --- /dev/null +++ b/tests/test_sziszapangma.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +"""Tests for `sziszapangma` package.""" + +import pytest + +from click.testing import CliRunner + +from sziszapangma.core import cli + + +@pytest.fixture +def response(): + """Sample pytest fixture. + + See more at: http://doc.pytest.org/en/latest/fixture.html + """ + # import requests + # return requests.get('https://github.com/audreyr/cookiecutter-pypackage') + + +def test_content(response): + """Sample pytest test function with the pytest fixture as an argument.""" + # from bs4 import BeautifulSoup + # assert 'GitHub' in BeautifulSoup(response.content).title.string + + +def test_command_line_interface(): + """Test the CLI.""" + runner = CliRunner() + result = runner.invoke(cli.main) + assert result.exit_code == 0 + assert 'sziszapangma.cli.main' in result.output + help_result = runner.invoke(cli.main, ['--help']) + assert help_result.exit_code == 0 + assert '--help Show this message and exit.' in help_result.output diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..c5655d8 --- /dev/null +++ b/tox.ini @@ -0,0 +1,25 @@ +[tox] +envlist = py38, py39, flake8 + +;[travis] +;python = +; 3.9: py39 +; 3.8: py38 + +[testenv:flake8] +basepython = python +deps = flake8 +commands = flake8 sziszapangma tests + +[testenv] +setenv = + PYTHONPATH = {toxinidir} +deps = + -r{toxinidir}/requirements_dev.txt +; If you want to make tox run the tests with the same versions, create a +; requirements.txt with the pinned versions and uncomment the following line: +; -r{toxinidir}/requirements.txt +commands = + pip install -U pip + pytest --basetemp={envtmpdir} + -- GitLab