diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000000000000000000000000000000000..0bfc6a66c63170b5c2d620b0e9ae2c364865a4dc --- /dev/null +++ b/.editorconfig @@ -0,0 +1,22 @@ +# http://editorconfig.org + +root = true + +[*] +indent_style = space +indent_size = 4 +trim_trailing_whitespace = true +insert_final_newline = true +charset = utf-8 +end_of_line = lf + +[*.bat] +indent_style = tab +end_of_line = crlf + +[LICENSE] +insert_final_newline = false + +[Makefile] +indent_style = tab + diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 0000000000000000000000000000000000000000..5d317839d1f4f2da03534748f8af31d1ca02572a --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,15 @@ +* sziszapangma version: +* Python version: +* Operating System: + +### Description + +Describe what you were trying to get done. +Tell us what happened, what went wrong, and what you expected to happen. + +### What I Did + +``` +Paste the command(s) you ran and the output. +If there was a crash, please include the traceback here. +``` diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 0000000000000000000000000000000000000000..a4e456ad2a36708a04722acb51f8441880e9d443 --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,28 @@ +name: Python package + +on: + pull_request: + branches: [ master, develop ] + +jobs: + build: + + runs-on: ubuntu-18.04 + strategy: + matrix: + python-version: [ '3.8', '3.9' ] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox + pip install --upgrade -r requirements.txt + pip install --upgrade -r requirements_dev.txt + - name: Run tox + run: tox -v diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..4686d7796c69f27e9e952f79a354d1acfcfc01cc --- /dev/null +++ b/.gitignore @@ -0,0 +1,111 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# IDE settings +.vscode/ + +# macOS +.DS_Store + +debug_run/ + diff --git a/AUTHORS.rst b/AUTHORS.rst new file mode 100644 index 0000000000000000000000000000000000000000..dc387f83ad79b4f8a350d76cb86f22f4fbddbed1 --- /dev/null +++ b/AUTHORS.rst @@ -0,0 +1,13 @@ +======= +Credits +======= + +Development Lead +---------------- + +* Piotr Szymański <niedakh@gmail.com> + +Contributors +------------ + +None yet. Why not be the first? diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst new file mode 100644 index 0000000000000000000000000000000000000000..75058c1db8dd95b93aebb241c50faca4223b2855 --- /dev/null +++ b/CONTRIBUTING.rst @@ -0,0 +1,128 @@ +.. highlight:: shell + +============ +Contributing +============ + +Contributions are welcome, and they are greatly appreciated! Every little bit +helps, and credit will always be given. + +You can contribute in many ways: + +Types of Contributions +---------------------- + +Report Bugs +~~~~~~~~~~~ + +Report bugs at https://github.com/niedakh/sziszapangma/issues. + +If you are reporting a bug, please include: + +* Your operating system name and version. +* Any details about your local setup that might be helpful in troubleshooting. +* Detailed steps to reproduce the bug. + +Fix Bugs +~~~~~~~~ + +Look through the GitHub issues for bugs. Anything tagged with "bug" and "help +wanted" is open to whoever wants to implement it. + +Implement Features +~~~~~~~~~~~~~~~~~~ + +Look through the GitHub issues for features. Anything tagged with "enhancement" +and "help wanted" is open to whoever wants to implement it. + +Write Documentation +~~~~~~~~~~~~~~~~~~~ + +sziszapangma could always use more documentation, whether as part of the +official sziszapangma docs, in docstrings, or even on the web in blog posts, +articles, and such. + +Submit Feedback +~~~~~~~~~~~~~~~ + +The best way to send feedback is to file an issue at https://github.com/niedakh/sziszapangma/issues. + +If you are proposing a feature: + +* Explain in detail how it would work. +* Keep the scope as narrow as possible, to make it easier to implement. +* Remember that this is a volunteer-driven project, and that contributions + are welcome :) + +Get Started! +------------ + +Ready to contribute? Here's how to set up `sziszapangma` for local development. + +1. Fork the `sziszapangma` repo on GitHub. +2. Clone your fork locally:: + + $ git clone git@github.com:your_name_here/sziszapangma.git + +3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: + + $ mkvirtualenv sziszapangma + $ cd sziszapangma/ + $ python setup.py develop + +4. Create a branch for local development:: + + $ git checkout -b name-of-your-bugfix-or-feature + + Now you can make your changes locally. + +5. When you're done making changes, check that your changes pass flake8 and the + tests, including testing other Python versions with tox:: + + $ flake8 sziszapangma tests + $ python setup.py test or pytest + $ tox + + To get flake8 and tox, just pip install them into your virtualenv. + +6. Commit your changes and push your branch to GitHub:: + + $ git add . + $ git commit -m "Your detailed description of your changes." + $ git push origin name-of-your-bugfix-or-feature + +7. Submit a pull request through the GitHub website. + +Pull Request Guidelines +----------------------- + +Before you submit a pull request, check that it meets these guidelines: + +1. The pull request should include tests. +2. If the pull request adds functionality, the docs should be updated. Put + your new functionality into a function with a docstring, and add the + feature to the list in README.rst. +3. The pull request should work for Python 3.5, 3.6, 3.7 and 3.8, and for PyPy. Check + https://travis-ci.com/niedakh/sziszapangma/pull_requests + and make sure that the tests pass for all supported Python versions. + +Tips +---- + +To run a subset of tests:: + +$ pytest tests.test_sziszapangma + + +Deploying +--------- + +A reminder for the maintainers on how to deploy. +Make sure all your changes are committed (including an entry in HISTORY.rst). +Then run:: + +$ bump2version patch # possible: major / minor / patch +$ git push +$ git push --tags + +Travis will then deploy to PyPI if tests pass. diff --git a/HISTORY.rst b/HISTORY.rst new file mode 100644 index 0000000000000000000000000000000000000000..0da9879aed04c0fa3e639376d27c7070ee335ce7 --- /dev/null +++ b/HISTORY.rst @@ -0,0 +1,8 @@ +======= +History +======= + +0.1.0 (2021-03-08) +------------------ + +* First release on PyPI. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..988ad74e6049c247b3fda52ab782fe5b0d6a83b8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2021, Piotr Szymański + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..965b2dda7db7c49f68857dc3aea9af37e30a745e --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,11 @@ +include AUTHORS.rst +include CONTRIBUTING.rst +include HISTORY.rst +include LICENSE +include README.rst + +recursive-include tests * +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] + +recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..72110b4ba25269c24cd3fff454bcb63209a094f0 --- /dev/null +++ b/Makefile @@ -0,0 +1,86 @@ +# currently not used +.PHONY: clean clean-test clean-pyc clean-build docs help +.DEFAULT_GOAL := help + +define BROWSER_PYSCRIPT +import os, webbrowser, sys + +from urllib.request import pathname2url + +webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) +endef +export BROWSER_PYSCRIPT + +define PRINT_HELP_PYSCRIPT +import re, sys + +for line in sys.stdin: + match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) + if match: + target, help = match.groups() + print("%-20s %s" % (target, help)) +endef +export PRINT_HELP_PYSCRIPT + +BROWSER := python -c "$$BROWSER_PYSCRIPT" + +help: + @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) + +clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts + +clean-build: ## remove build artifacts + rm -fr build/ + rm -fr dist/ + rm -fr .eggs/ + find . -name '*.egg-info' -exec rm -fr {} + + find . -name '*.egg' -exec rm -f {} + + +clean-pyc: ## remove Python file artifacts + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -fr {} + + +clean-test: ## remove test and coverage artifacts + rm -fr .tox/ + rm -f .coverage + rm -fr htmlcov/ + rm -fr .pytest_cache + +lint: ## check style with flake8 + flake8 sziszapangma tests + +test: ## run tests quickly with the default Python + pytest + +test-all: ## run tests on every Python version with tox + tox + +coverage: ## check code coverage quickly with the default Python + coverage run --source sziszapangma -m pytest + coverage report -m + coverage html + $(BROWSER) htmlcov/index.html + +docs: ## generate Sphinx HTML documentation, including API docs + rm -f docs/sziszapangma.rst + rm -f docs/modules.rst + sphinx-apidoc -o docs/ sziszapangma + $(MAKE) -C docs clean + $(MAKE) -C docs html + $(BROWSER) docs/_build/html/index.html + +servedocs: docs ## compile the docs watching for changes + watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . + +release: dist ## package and upload a release + twine upload dist/* + +dist: clean ## builds source and wheel package + python setup.py sdist + python setup.py bdist_wheel + ls -l dist + +install: clean ## install the package to the active Python's site-packages + python setup.py install diff --git a/README.rst b/README.rst new file mode 100644 index 0000000000000000000000000000000000000000..e944b1aed6dcaec9b9b41e755d65ebb9c0f3f380 --- /dev/null +++ b/README.rst @@ -0,0 +1,37 @@ +=============================================================== +sziszapangma: evaluate the impact of your ASR on your NLP tasks +=============================================================== + + +.. image:: https://img.shields.io/pypi/v/sziszapangma.svg + :target: https://pypi.python.org/pypi/sziszapangma + +.. image:: https://img.shields.io/travis/niedakh/sziszapangma.svg + :target: https://travis-ci.com/niedakh/sziszapangma + +.. image:: https://readthedocs.org/projects/sziszapangma/badge/?version=latest + :target: https://sziszapangma.readthedocs.io/en/latest/?version=latest + :alt: Documentation Status + + + + +A library to measure ASR quality, dedicated especially to measuring how ASR errors impact NLP model performance. + + +* Free software: MIT license +* Documentation: https://sziszapangma.readthedocs.io. + + +Features +-------- + +* TODO + +Credits +------- + +This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. + +.. _Cookiecutter: https://github.com/audreyr/cookiecutter +.. _`audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..21df5f5d6a50fd491ce181b895361a7b356a1dc7 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = python -msphinx +SPHINXPROJ = sziszapangma +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/authors.rst b/docs/authors.rst new file mode 100644 index 0000000000000000000000000000000000000000..e122f914a87b277e565fc9567af1a7545ec9872b --- /dev/null +++ b/docs/authors.rst @@ -0,0 +1 @@ +.. include:: ../AUTHORS.rst diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..3abaefc3b8e75e64c6611f87c9884d41685b4f64 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python +# +# sziszapangma documentation build configuration file, created by +# sphinx-quickstart on Fri Jun 9 13:47:02 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another +# directory, add these directories to sys.path here. If the directory is +# relative to the documentation root, use os.path.abspath to make it +# absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('..')) + +import sziszapangma + +# -- General configuration --------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'sziszapangma' +copyright = "2021, Piotr Szymański" +author = "Piotr Szymański" + +# The version info for the project you're documenting, acts as replacement +# for |version| and |release|, also used in various other places throughout +# the built documents. +# +# The short X.Y version. +version = sziszapangma.__version__ +# The full version, including alpha/beta/rc tags. +release = sziszapangma.__version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a +# theme further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + + +# -- Options for HTMLHelp output --------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = 'sziszapangmadoc' + + +# -- Options for LaTeX output ------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass +# [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'sziszapangma.tex', + 'sziszapangma Documentation', + 'Piotr Szymański', 'manual'), +] + + +# -- Options for manual page output ------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'sziszapangma', + 'sziszapangma Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ---------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'sziszapangma', + 'sziszapangma Documentation', + author, + 'sziszapangma', + 'One line description of project.', + 'Miscellaneous'), +] + + + diff --git a/docs/contributing.rst b/docs/contributing.rst new file mode 100644 index 0000000000000000000000000000000000000000..e582053ea018c369be05aae96cf730744f1dc616 --- /dev/null +++ b/docs/contributing.rst @@ -0,0 +1 @@ +.. include:: ../CONTRIBUTING.rst diff --git a/docs/history.rst b/docs/history.rst new file mode 100644 index 0000000000000000000000000000000000000000..250649964bbc36f4bec2942f69238aa6f7c02c1a --- /dev/null +++ b/docs/history.rst @@ -0,0 +1 @@ +.. include:: ../HISTORY.rst diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..87a2544b213b4474e62b71f4fa0bb972ff339267 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,20 @@ +Welcome to sziszapangma's documentation! +====================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + readme + installation + usage + modules + contributing + authors + history + +Indices and tables +================== +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 0000000000000000000000000000000000000000..c950816f3bcd8d8ac09ed0bee2642a14a5e40ce1 --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,51 @@ +.. highlight:: shell + +============ +Installation +============ + + +Stable release +-------------- + +To install sziszapangma, run this command in your terminal: + +.. code-block:: console + + $ pip install sziszapangma + +This is the preferred method to install sziszapangma, as it will always install the most recent stable release. + +If you don't have `pip`_ installed, this `Python installation guide`_ can guide +you through the process. + +.. _pip: https://pip.pypa.io +.. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/ + + +From sources +------------ + +The sources for sziszapangma can be downloaded from the `Github repo`_. + +You can either clone the public repository: + +.. code-block:: console + + $ git clone git://github.com/niedakh/sziszapangma + +Or download the `tarball`_: + +.. code-block:: console + + $ curl -OJL https://github.com/niedakh/sziszapangma/tarball/master + +Once you have a copy of the source, you can install it with: + +.. code-block:: console + + $ python setup.py install + + +.. _Github repo: https://github.com/niedakh/sziszapangma +.. _tarball: https://github.com/niedakh/sziszapangma/tarball/master diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000000000000000000000000000000000000..f55a107c9e21600bcb77ad26fff96eacd7db5bff --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,36 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=python -msphinx +) +set SOURCEDIR=. +set BUILDDIR=_build +set SPHINXPROJ=sziszapangma + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The Sphinx module was not found. Make sure you have Sphinx installed, + echo.then set the SPHINXBUILD environment variable to point to the full + echo.path of the 'sphinx-build' executable. Alternatively you may add the + echo.Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/docs/readme.rst b/docs/readme.rst new file mode 100644 index 0000000000000000000000000000000000000000..72a33558153fb57def85612b021ec596ef2a51b9 --- /dev/null +++ b/docs/readme.rst @@ -0,0 +1 @@ +.. include:: ../README.rst diff --git a/docs/usage.rst b/docs/usage.rst new file mode 100644 index 0000000000000000000000000000000000000000..99f0fff2b58379cddc374883e1766e9fd5a936df --- /dev/null +++ b/docs/usage.rst @@ -0,0 +1,7 @@ +===== +Usage +===== + +To use sziszapangma in a project:: + + import sziszapangma diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..6568fe0f0f68a6b0e274833a184acf33ae99408f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +numpy>=1.20.1 +requests>=2.25.1 +pandas>=1.2.4 +fasttext>=0.9.2 +pymongo>=3.11.4 diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000000000000000000000000000000000000..4211aa637ce1f8e33135540beed543f026c9886a --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,11 @@ +pip==21.1.2 +bump2version==1.0.1 +wheel==0.36.2 +watchdog==2.1.2 +flake8==3.9.2 +tox==3.23.1 +coverage==5.5 +Sphinx==4.0.2 +twine==3.4.1 +pytest==6.2.4 +pytest-runner==5.3.1 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a65cf7a3f90c161536575830569515ffb564a0e6 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,26 @@ +[bumpversion] +current_version = 0.1.0 +commit = True +tag = True + +[bumpversion:file:setup.py] +search = version='{current_version}' +replace = version='{new_version}' + +[bumpversion:file:sziszapangma/__init__.py] +search = __version__ = '{current_version}' +replace = __version__ = '{new_version}' + +[bdist_wheel] +universal = 1 + +[flake8] +exclude = docs + +[aliases] +# Define setup.py command aliases here +test = pytest + +[tool:pytest] +collect_ignore = ['setup.py'] + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..4830c9e700cf3a614e22d12ddd8846380984a1fd --- /dev/null +++ b/setup.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +"""The setup script.""" + +from setuptools import setup, find_packages + +with open('README.rst') as readme_file: + readme = readme_file.read() + +with open('HISTORY.rst') as history_file: + history = history_file.read() + +with open("requirements.txt", "r") as fh: + requirements = fh.readlines() + +with open("requirements_dev.txt", "r") as fh: + requirements_dev = fh.readlines() + requirements + +setup_requirements = ['pytest-runner', ] + +test_requirements = ['pytest>=3', ] + +setup( + author="Piotr Szymański", + author_email='niedakh@gmail.com', + python_requires='>=3.5', + classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + ], + description="A n", + entry_points={ + 'console_scripts': [ + 'sziszapangma=sziszapangma.cli:main', + ], + }, + install_requires=requirements, + license="MIT license", + long_description=readme + '\n\n' + history, + include_package_data=True, + keywords='sziszapangma', + name='sziszapangma', + packages=find_packages(include=['sziszapangma', 'sziszapangma.*']), + setup_requires=requirements_dev, + test_suite='tests', + tests_require=requirements_dev, + url='https://github.com/niedakh/sziszapangma', + version='0.1.0', + zip_safe=False, +) diff --git a/sziszapangma/.DS_Store b/sziszapangma/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2fff18096fcf72463ee45cf922f8040ac30b2e8a Binary files /dev/null and b/sziszapangma/.DS_Store differ diff --git a/sziszapangma/__init__.py b/sziszapangma/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..00e971cf296054b0d437ed7a068ac6856f6eb140 --- /dev/null +++ b/sziszapangma/__init__.py @@ -0,0 +1,5 @@ +"""Top-level package for sziszapangma.""" + +__author__ = """Piotr Szymański""" +__email__ = 'niedakh@gmail.com' +__version__ = '0.1.0' diff --git a/sziszapangma/__pycache__/__init__.cpython-38.pyc b/sziszapangma/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2d1b765c9654646bac7bac6fa6b91ae5f037e20 Binary files /dev/null and b/sziszapangma/__pycache__/__init__.cpython-38.pyc differ diff --git a/sziszapangma/__pycache__/__init__.cpython-39.pyc b/sziszapangma/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0da856c208c3cdb6494296026133e07e14235d3c Binary files /dev/null and b/sziszapangma/__pycache__/__init__.cpython-39.pyc differ diff --git a/sziszapangma/__pycache__/wer_classic.cpython-38.pyc b/sziszapangma/__pycache__/wer_classic.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..52c628a2cee0dd4513aa2b26e43401ccda90bf67 Binary files /dev/null and b/sziszapangma/__pycache__/wer_classic.cpython-38.pyc differ diff --git a/sziszapangma/core/__init__.py b/sziszapangma/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/sziszapangma/core/__pycache__/__init__.cpython-38.pyc b/sziszapangma/core/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c7505c2810e029559c1a80b76e0923bf278e236a Binary files /dev/null and b/sziszapangma/core/__pycache__/__init__.cpython-38.pyc differ diff --git a/sziszapangma/core/sziszapangma.py b/sziszapangma/core/sziszapangma.py new file mode 100644 index 0000000000000000000000000000000000000000..dd0b80edeaff5ea8f8a83dbe18e0f7725aaf7067 --- /dev/null +++ b/sziszapangma/core/sziszapangma.py @@ -0,0 +1 @@ +"""Main module.""" diff --git a/sziszapangma/core/transformer/__init__.py b/sziszapangma/core/transformer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/sziszapangma/core/transformer/__pycache__/__init__.cpython-38.pyc b/sziszapangma/core/transformer/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9ae71e4517916535584f24505c9625b24116a404 Binary files /dev/null and b/sziszapangma/core/transformer/__pycache__/__init__.cpython-38.pyc differ diff --git a/sziszapangma/core/transformer/__pycache__/cached_embedding_transformer.cpython-38.pyc b/sziszapangma/core/transformer/__pycache__/cached_embedding_transformer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d2655581e4ce34acb616f35c06fb8e4ec3d05ddb Binary files /dev/null and b/sziszapangma/core/transformer/__pycache__/cached_embedding_transformer.cpython-38.pyc differ diff --git a/sziszapangma/core/transformer/__pycache__/embedding_transformer.cpython-38.pyc b/sziszapangma/core/transformer/__pycache__/embedding_transformer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b8f5eb717f11eb752da710b33d6fba62417c5c4e Binary files /dev/null and b/sziszapangma/core/transformer/__pycache__/embedding_transformer.cpython-38.pyc differ diff --git a/sziszapangma/core/transformer/__pycache__/fasttext_embedding_transformer.cpython-38.pyc b/sziszapangma/core/transformer/__pycache__/fasttext_embedding_transformer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..98f0ba0759805530444137824e7fdf6c14ecc1c8 Binary files /dev/null and b/sziszapangma/core/transformer/__pycache__/fasttext_embedding_transformer.cpython-38.pyc differ diff --git a/sziszapangma/core/transformer/cached_embedding_transformer.py b/sziszapangma/core/transformer/cached_embedding_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..f58fe33f260fd6c6ced5c9a0f867f15dc9ecb66a --- /dev/null +++ b/sziszapangma/core/transformer/cached_embedding_transformer.py @@ -0,0 +1,32 @@ +from typing import List, Dict + +import numpy as np + +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer + + +class CachedEmbeddingTransformer(EmbeddingTransformer): + _embeddings_transformer: EmbeddingTransformer + _cache: Dict[str, np.ndarray] + + def __init__(self, embeddings_transformer: EmbeddingTransformer): + self._embeddings_transformer = embeddings_transformer + self._cache = dict() + + def get_embedding(self, word: str) -> np.ndarray: + return self.get_embeddings([word])[word] + + def get_embeddings(self, words: List[str]) -> Dict[str, np.ndarray]: + new_words = [word for word in words if word not in self._cache] + new_embeddings = self._embeddings_transformer\ + .get_embeddings(new_words) if len(new_words) > 0 else dict() + for new_word in new_words: + self._cache[new_word] = new_embeddings[new_word] + return { + word: self._cache[word] + for word in words + } + + def clear(self): + self._cache.clear() diff --git a/sziszapangma/core/transformer/embedding_transformer.py b/sziszapangma/core/transformer/embedding_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..a953f442c56d0846a1da38bb0aa888d9e1233b73 --- /dev/null +++ b/sziszapangma/core/transformer/embedding_transformer.py @@ -0,0 +1,15 @@ +from abc import ABC, abstractmethod +from typing import List, Dict + +import numpy as np + + +class EmbeddingTransformer(ABC): + + @abstractmethod + def get_embeddings(self, words: List[str]) -> Dict[str, np.ndarray]: + pass + + @abstractmethod + def get_embedding(self, word: str) -> np.ndarray: + pass diff --git a/sziszapangma/core/transformer/fasttext_embedding_transformer.py b/sziszapangma/core/transformer/fasttext_embedding_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..a74ac7e8315b44571db788be31db331773ff805d --- /dev/null +++ b/sziszapangma/core/transformer/fasttext_embedding_transformer.py @@ -0,0 +1,27 @@ +from typing import List, Dict + +import fasttext +import fasttext.util +import numpy as np + +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer + + +class FasttextEmbeddingTransformer(EmbeddingTransformer): + + _model_name: str + + def __init__(self, lang_id: str): + full_model_name = fasttext.util.download_model( + lang_id, if_exists='ignore') + self._fasttext_model = fasttext.load_model(full_model_name) + + def get_embedding(self, word: str) -> np.ndarray: + return self._fasttext_model.get_word_vector(word) + + def get_embeddings(self, words: List[str]) -> Dict[str, np.ndarray]: + return { + word: self.get_embedding(word) + for word in set(words) + } diff --git a/sziszapangma/core/wer/__init__.py b/sziszapangma/core/wer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/sziszapangma/core/wer/__pycache__/__init__.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b598182573576ee0e3f62c5da88edbc96ef5202 Binary files /dev/null and b/sziszapangma/core/wer/__pycache__/__init__.cpython-38.pyc differ diff --git a/sziszapangma/core/wer/__pycache__/classic_wer_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/classic_wer_calculator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5114407c01a0c978a0d91a8e6465be84cc749485 Binary files /dev/null and b/sziszapangma/core/wer/__pycache__/classic_wer_calculator.cpython-38.pyc differ diff --git a/sziszapangma/core/wer/__pycache__/distance_matrix_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/distance_matrix_calculator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2bf0c9c59168075c6523a4c441087072e2f1547f Binary files /dev/null and b/sziszapangma/core/wer/__pycache__/distance_matrix_calculator.cpython-38.pyc differ diff --git a/sziszapangma/core/wer/__pycache__/step_type.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/step_type.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d3ee780df43be6c35dd46f10f72bad0a42c35bb8 Binary files /dev/null and b/sziszapangma/core/wer/__pycache__/step_type.cpython-38.pyc differ diff --git a/sziszapangma/core/wer/__pycache__/step_words.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/step_words.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4142240350aabea762e0c740887510d3e13d56cf Binary files /dev/null and b/sziszapangma/core/wer/__pycache__/step_words.cpython-38.pyc differ diff --git a/sziszapangma/core/wer/__pycache__/wer_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_calculator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0e3f29381a1c6f19fd5f025c13ae15dd3a3baf30 Binary files /dev/null and b/sziszapangma/core/wer/__pycache__/wer_calculator.cpython-38.pyc differ diff --git a/sziszapangma/core/wer/__pycache__/wer_embedding_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_embedding_calculator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..924ae83ebc3c912ad3acb439a5766bc626c0ac74 Binary files /dev/null and b/sziszapangma/core/wer/__pycache__/wer_embedding_calculator.cpython-38.pyc differ diff --git a/sziszapangma/core/wer/__pycache__/wer_processing_step.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_processing_step.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e74e7f411d80ab89142f285eaaaf84f831cf9522 Binary files /dev/null and b/sziszapangma/core/wer/__pycache__/wer_processing_step.cpython-38.pyc differ diff --git a/sziszapangma/core/wer/__pycache__/wer_soft_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_soft_calculator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e66f737228a10bcea2f3c886a1ccb85c904c6d0c Binary files /dev/null and b/sziszapangma/core/wer/__pycache__/wer_soft_calculator.cpython-38.pyc differ diff --git a/sziszapangma/core/wer/__pycache__/wer_span_question.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_span_question.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..61e5fac5c3866d97cb474694ebc1fa60aba75bd9 Binary files /dev/null and b/sziszapangma/core/wer/__pycache__/wer_span_question.cpython-38.pyc differ diff --git a/sziszapangma/core/wer/__pycache__/wer_step.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_step.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..15b375efa80ab3c0292468ee00bb3e178407600e Binary files /dev/null and b/sziszapangma/core/wer/__pycache__/wer_step.cpython-38.pyc differ diff --git a/sziszapangma/core/wer/__pycache__/wer_util.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_util.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5caa38f0edc34f2f56e7ed8b79fd36da809fc616 Binary files /dev/null and b/sziszapangma/core/wer/__pycache__/wer_util.cpython-38.pyc differ diff --git a/sziszapangma/core/wer/classic_wer_calculator.py b/sziszapangma/core/wer/classic_wer_calculator.py new file mode 100644 index 0000000000000000000000000000000000000000..db54d33764a3593ad53c0ddd40ea84ef3eaf8de9 --- /dev/null +++ b/sziszapangma/core/wer/classic_wer_calculator.py @@ -0,0 +1,9 @@ +from sziszapangma.core.wer.distance_matrix_calculator import \ + BinaryDistanceCalculator +from sziszapangma.core.wer.wer_calculator import WerCalculator + + +class ClassicWerCalculator(WerCalculator): + + def __init__(self): + super().__init__(BinaryDistanceCalculator()) diff --git a/sziszapangma/core/wer/distance_matrix_calculator.py b/sziszapangma/core/wer/distance_matrix_calculator.py new file mode 100644 index 0000000000000000000000000000000000000000..50e359af263175cde48c883b7157a02451e2384f --- /dev/null +++ b/sziszapangma/core/wer/distance_matrix_calculator.py @@ -0,0 +1,89 @@ +from abc import ABC, abstractmethod +from typing import List + +import numpy as np + +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer + + +class DistanceCalculator(ABC): + @abstractmethod + def calculate_distance_matrix( + self, + reference: List[str], + hypothesis: List[str] + ) -> np.array: + pass + + @abstractmethod + def calculate_distance_for_words(self, word1: str, word2: str) -> float: + pass + + +class BinaryDistanceCalculator(DistanceCalculator): + def calculate_distance_for_words(self, word1: str, word2: str) -> float: + return 0 if word1 == word2 else 1 + + def calculate_distance_matrix( + self, + reference: List[str], + hypothesis: List[str] + ) -> np.array: + return np.array([ + [self.calculate_distance_for_words(reference_word, hypothesis_word) + for hypothesis_word in hypothesis] + for reference_word in reference + ]) + + +class CosineDistanceCalculator(DistanceCalculator): + _embedding_transformer: EmbeddingTransformer + + def __init__(self, embedding_transformer: EmbeddingTransformer): + self._embedding_transformer = embedding_transformer + + def calculate_distance_for_words(self, word1: str, word2: str) -> float: + return self.cosine_distance_between_words_embeddings( + self._embedding_transformer.get_embedding(word1), + self._embedding_transformer.get_embedding(word2) + ) + + @staticmethod + def cosine_distance_between_words_embeddings( + word1_embedding: np.array, + word2_embedding: np.array + ) -> float: + a = word1_embedding + b = word2_embedding + if a.shape != b.shape: + raise RuntimeError( + "array {} shape not match {}".format(a.shape, b.shape)) + if a.ndim == 1: + a_norm = np.linalg.norm(a) + b_norm = np.linalg.norm(b) + elif a.ndim == 2: + a_norm = np.linalg.norm(a, axis=1, keepdims=True) + b_norm = np.linalg.norm(b, axis=1, keepdims=True) + else: + raise RuntimeError("array dimensions {} not right".format(a.ndim)) + similiarity = np.dot(a, b.T) / (a_norm * b_norm) + dist = 1. - similiarity + return dist + + def calculate_distance_matrix( + self, + reference: List[str], + hypothesis: List[str] + ) -> np.array: + embeddings_dict = self._embedding_transformer.get_embeddings( + list(set(reference + hypothesis)) + ) + return np.array([[ + self.cosine_distance_between_words_embeddings( + embeddings_dict[reference_word], + embeddings_dict[hypothesis_word], + ) + for hypothesis_word in hypothesis] + for reference_word in reference + ]) diff --git a/sziszapangma/core/wer/step_type.py b/sziszapangma/core/wer/step_type.py new file mode 100644 index 0000000000000000000000000000000000000000..974e02a999d9869d385d3aa2280a2c5125f44084 --- /dev/null +++ b/sziszapangma/core/wer/step_type.py @@ -0,0 +1,17 @@ +from enum import Enum + + +class StepType(Enum): + CORRECT = 1 + SUBSTITUTION = 2 + DELETION = 3 + INSERTION = 4 + + def get_short_name(self) -> str: + return self.name[:3] + + def is_cross_step(self) -> bool: + return self in [StepType.CORRECT, StepType.SUBSTITUTION] + + def contain_reference_word(self) -> bool: + return self != StepType.INSERTION diff --git a/sziszapangma/core/wer/step_words.py b/sziszapangma/core/wer/step_words.py new file mode 100644 index 0000000000000000000000000000000000000000..889c2fc79cc09bb1cea028a9526e3b1abc550e2f --- /dev/null +++ b/sziszapangma/core/wer/step_words.py @@ -0,0 +1,8 @@ +from dataclasses import dataclass +from typing import Optional + + +@dataclass(frozen=True) +class StepWords: + reference_word: Optional[str] + hypothesis_word: Optional[str] diff --git a/sziszapangma/core/wer/wer_calculator.py b/sziszapangma/core/wer/wer_calculator.py new file mode 100644 index 0000000000000000000000000000000000000000..724fb949e83816e46e1dab3f9254d77aa8429bba --- /dev/null +++ b/sziszapangma/core/wer/wer_calculator.py @@ -0,0 +1,220 @@ +from abc import ABC +from typing import List, Tuple, Optional + +import numpy as np + +from sziszapangma.core.wer.distance_matrix_calculator import \ + DistanceCalculator +from sziszapangma.core.wer.step_type import StepType +from sziszapangma.core.wer.wer_processing_step import WerProcessingStep +from sziszapangma.core.wer.wer_span_question import Span +from sziszapangma.core.wer.wer_step import WerStep, StepWords + + +class WerCalculator(ABC): + _distance_matrix_calculator: DistanceCalculator + + def __init__(self, distance_matrix_calculator: DistanceCalculator): + self._distance_matrix_calculator = distance_matrix_calculator + + def convert_processing_steps_to_result( + self, + processing_steps: List[WerProcessingStep], + reference_weights: Optional[List[float]] = None + ) -> List[WerStep]: + if reference_weights is None: + return [ + WerStep(step.step_type, step.step_words, step.step_cost) + for step in processing_steps + ] + else: + indexes_per_steps = self._get_reference_indexes_per_steps( + processing_steps) + return [ + WerStep( + processing_steps[step_index].step_type, + processing_steps[step_index].step_words, + reference_weights[indexes_per_steps[step_index]] * + processing_steps[step_index].step_cost + ) + for step_index in range(len(processing_steps)) + ] + + def get_distance_matrix_between_words( + self, + reference: List[str], + hypothesis: List[str] + ) -> np.ndarray: + return self._distance_matrix_calculator.calculate_distance_matrix( + reference, hypothesis) + + def extract_steps_path( + self, + steps_matrix: List[List[WerProcessingStep]] + ) -> List[WerProcessingStep]: + x = len(steps_matrix) - 1 + y = len(steps_matrix[0]) - 1 + to_return = [] + while not (x == 0 and y == 0): + current_step = steps_matrix[x][y] + to_return.append(current_step) + if current_step.step_type == StepType.DELETION: + x = x - 1 + elif current_step.step_type == StepType.INSERTION: + y = y - 1 + else: # creation and substitution + y = y - 1 + x = x - 1 + return to_return[::-1] + + @staticmethod + def _get_levenshtein_processing_step_cross( + prev_cross_distance: float, + step_words: StepWords, + current_distance: float + ) -> WerProcessingStep: + return WerProcessingStep.levenshtein_correct( + prev_cross_distance, step_words, 0) \ + if current_distance == 0 \ + else WerProcessingStep.levenshtein_substitution( + prev_cross_distance, step_words, current_distance) + + def get_levenshtein_embedding_based( + self, + reference: List[str], + hypothesis: List[str], + distance_matrix: np.ndarray + ) -> Tuple[np.ndarray, List[List[WerProcessingStep]]]: + + reference_len = len(reference) + hypothesis_len = len(hypothesis) + distance_arr, steps_arr = self._get_initialized_levenshtein_matrix( + reference, hypothesis) + + for ref_index in range(reference_len): + for hyp_index in range(hypothesis_len): + step_words = StepWords(reference[ref_index], + hypothesis[hyp_index]) + current_distance = distance_matrix[ref_index][hyp_index] + prev_cross_distance = distance_arr[ref_index][hyp_index] + + cross_go_step = self._get_levenshtein_processing_step_cross( + prev_cross_distance, step_words, current_distance) + insertion_step = WerProcessingStep.levenshtein_insertion( + distance_arr[ref_index + 1][hyp_index], step_words) + deletion_step = WerProcessingStep.levenshtein_deletion( + distance_arr[ref_index][hyp_index + 1], step_words) + + best_step = min([cross_go_step, insertion_step, deletion_step], + key=lambda it: it.total_distance()) + + distance_arr[ref_index + 1][hyp_index + 1] = \ + best_step.total_distance() + steps_arr[ref_index + 1][hyp_index + 1] = best_step + + return distance_arr, steps_arr + + @staticmethod + def _get_initialized_levenshtein_matrix( + reference: List[str], + hypothesis: List[str] + ) -> Tuple[np.ndarray, List[List[Optional[WerProcessingStep]]]]: + + # TODO: consider about remove distance_arr replaced by steps_arr + reference_len = len(reference) + hypothesis_len = len(hypothesis) + distance_arr = np.zeros((reference_len + 1) * (hypothesis_len + 1)) \ + .reshape((reference_len + 1, hypothesis_len + 1)) + steps_arr = [ + [None for _ in range(hypothesis_len + 1)] + for _ in range(reference_len + 1) + ] + + # levenshtein initial + for ref_index in range(reference_len + 1): + distance_arr[ref_index][0] = ref_index + step_words = StepWords( + reference[ref_index - 1] if ref_index > 0 else None, + None + ) + steps_arr[ref_index][0] = WerProcessingStep.levenshtein_deletion( + ref_index - 1, step_words) + for hyp_index in range(hypothesis_len + 1): + distance_arr[0][hyp_index] = hyp_index + step_words = StepWords( + None, + hypothesis[hyp_index - 1] if hyp_index > 0 else None + ) + steps_arr[0][hyp_index] = WerProcessingStep.levenshtein_insertion( + hyp_index - 1, step_words) + + return distance_arr, steps_arr + + def _get_reference_indexes_per_steps( + self, + steps: List[WerProcessingStep] + ) -> List[int]: + counter = 0 + indexes = [] + for step in steps: + indexes.append(counter) + if step.step_type.contain_reference_word(): + counter = counter + 1 + return indexes + + def _calculate_wer( + self, + steps: List[WerStep], + ) -> float: + reference_len = sum([ + 1 if step.step_type.contain_reference_word() else 0 + for step in steps + ]) + return sum([step.step_cost for step in steps]) / reference_len + + def _calculate_steps_path( + self, + reference: List[str], + hypothesis: List[str] + ) -> List[WerProcessingStep]: + distance_between_words = self.get_distance_matrix_between_words( + reference, hypothesis) + _, steps_matrix = self.get_levenshtein_embedding_based( + reference, hypothesis, distance_between_words) + return self.extract_steps_path(steps_matrix) + + def calculate_wer( + self, + reference: List[str], + hypothesis: List[str] + ) -> Tuple[float, List[WerStep]]: + steps_path = self._calculate_steps_path(reference, hypothesis) + steps = self.convert_processing_steps_to_result(steps_path) + return self._calculate_wer(steps), steps + + def calculate_wer_for_spans( + self, + reference: List[str], + hypothesis: List[str], + spans: List[Span] + ) -> List[float]: + steps_path = self._calculate_steps_path(reference, hypothesis) + reference_len = len(reference) + return [ + self._calculate_wer(self.convert_processing_steps_to_result( + processing_steps=steps_path, + reference_weights=span.get_reference_weights_table( + reference_len) + )) + for span in spans + ] + + def calculate_wer_weighted( + self, + reference: List[str], + hypothesis: List[str], + weights: List[float] + ) -> Tuple[float, List[WerStep]]: + steps_path = self._calculate_steps_path(reference, hypothesis) + steps = self.convert_processing_steps_to_result(steps_path, weights) + return self._calculate_wer(steps), steps diff --git a/sziszapangma/core/wer/wer_embedding_calculator.py b/sziszapangma/core/wer/wer_embedding_calculator.py new file mode 100644 index 0000000000000000000000000000000000000000..30dc82a07e036c5e7c9b27bfea2295aba867d3e8 --- /dev/null +++ b/sziszapangma/core/wer/wer_embedding_calculator.py @@ -0,0 +1,60 @@ +from typing import List, Optional + +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer +from sziszapangma.core.wer.distance_matrix_calculator import \ + BinaryDistanceCalculator, DistanceCalculator, CosineDistanceCalculator +from sziszapangma.core.wer.step_words import StepWords +from sziszapangma.core.wer.wer_calculator import WerCalculator +from sziszapangma.core.wer.wer_processing_step import WerProcessingStep +from sziszapangma.core.wer.wer_step import WerStep + + +class WerEmbeddingCalculator(WerCalculator): + _distance_calculator: DistanceCalculator + + def __init__(self, embedding_transformer: EmbeddingTransformer): + super().__init__(BinaryDistanceCalculator()) + self._embedding_transformer = embedding_transformer + self._distance_calculator = CosineDistanceCalculator( + embedding_transformer) + + def _calculate_distance_for_word_step(self, step_words: StepWords) -> float: + return self._distance_calculator.calculate_distance_for_words( + step_words.reference_word, + step_words.hypothesis_word + ) + + def _calculate_result_cost_for_step( + self, + processing_step: WerProcessingStep + ) -> float: + step_words = processing_step.step_words + return self._calculate_distance_for_word_step(step_words) \ + if processing_step.step_type.is_cross_step() \ + else processing_step.step_cost + + def convert_processing_steps_to_result( + self, + processing_steps: List[WerProcessingStep], + reference_weights: Optional[List[float]] = None + ) -> List[WerStep]: + if reference_weights is None: + return [ + WerStep(step.step_type, step.step_words, + self._calculate_result_cost_for_step(step)) + for step in processing_steps + ] + else: + indexes_per_steps = self._get_reference_indexes_per_steps( + processing_steps) + return [ + WerStep( + processing_steps[step_index].step_type, + processing_steps[step_index].step_words, + reference_weights[indexes_per_steps[step_index]] * + self._calculate_result_cost_for_step( + processing_steps[step_index]) + ) + for step_index in range(len(processing_steps)) + ] diff --git a/sziszapangma/core/wer/wer_processing_step.py b/sziszapangma/core/wer/wer_processing_step.py new file mode 100644 index 0000000000000000000000000000000000000000..15d92dddefca7deb436b7d24bd734e8e72acf94e --- /dev/null +++ b/sziszapangma/core/wer/wer_processing_step.py @@ -0,0 +1,41 @@ +from dataclasses import dataclass + +from sziszapangma.core.wer.step_type import StepType +from sziszapangma.core.wer.step_words import StepWords + + +@dataclass(frozen=True) +class WerProcessingStep: + step_type: StepType + step_words: StepWords + previous_distance: float + step_cost: float + + @classmethod + def levenshtein_insertion(cls, previous_distance: float, + step_words: StepWords, step_cost: float = 1): + words = StepWords(None, step_words.hypothesis_word) + return WerProcessingStep(StepType.INSERTION, words, + previous_distance, step_cost) + + @classmethod + def levenshtein_deletion(cls, previous_distance: float, + step_words: StepWords, step_cost: float = 1): + words = StepWords(step_words.reference_word, None) + return WerProcessingStep(StepType.DELETION, words, + previous_distance, step_cost) + + @classmethod + def levenshtein_substitution(cls, previous_distance: float, + step_words: StepWords, step_cost: float): + return WerProcessingStep(StepType.SUBSTITUTION, step_words, + previous_distance, step_cost) + + @classmethod + def levenshtein_correct(cls, previous_distance: float, + step_words: StepWords, step_cost: float): + return WerProcessingStep(StepType.CORRECT, step_words, + previous_distance, step_cost) + + def total_distance(self) -> float: + return self.step_cost + self.previous_distance diff --git a/sziszapangma/core/wer/wer_soft_calculator.py b/sziszapangma/core/wer/wer_soft_calculator.py new file mode 100644 index 0000000000000000000000000000000000000000..e17728264cb922a6357005de289ed2e3cebf7cd9 --- /dev/null +++ b/sziszapangma/core/wer/wer_soft_calculator.py @@ -0,0 +1,11 @@ +from sziszapangma.core.wer.distance_matrix_calculator import \ + CosineDistanceCalculator +from sziszapangma.core.wer.wer_calculator import WerCalculator +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer + + +class WerSoftCalculator(WerCalculator): + + def __init__(self, embedding_transformer: EmbeddingTransformer): + super().__init__(CosineDistanceCalculator(embedding_transformer)) diff --git a/sziszapangma/core/wer/wer_span_question.py b/sziszapangma/core/wer/wer_span_question.py new file mode 100644 index 0000000000000000000000000000000000000000..bfdf43488e7c8f3d4022de8fa892d3ad89b203f7 --- /dev/null +++ b/sziszapangma/core/wer/wer_span_question.py @@ -0,0 +1,16 @@ +from dataclasses import dataclass + + +@dataclass(frozen=True) +class Span: + index_start: int + index_end: int + + def _is_index_belong(self, index: int) -> bool: + return self.index_start <= index < self.index_end + + def get_reference_weights_table(self, total_size: int): + return [ + 1 if self._is_index_belong(it) else 0 + for it in range(total_size) + ] diff --git a/sziszapangma/core/wer/wer_step.py b/sziszapangma/core/wer/wer_step.py new file mode 100644 index 0000000000000000000000000000000000000000..5e1efa02ba9320b641c21c9a32712a9a23db94a3 --- /dev/null +++ b/sziszapangma/core/wer/wer_step.py @@ -0,0 +1,11 @@ +from dataclasses import dataclass + +from sziszapangma.core.wer.step_type import StepType +from sziszapangma.core.wer.step_words import StepWords + + +@dataclass(frozen=True) +class WerStep: + step_type: StepType + step_words: StepWords + step_cost: float diff --git a/sziszapangma/core/wer/wer_util.py b/sziszapangma/core/wer/wer_util.py new file mode 100644 index 0000000000000000000000000000000000000000..a6ef66618105ed4162aad40988a15537f81e91e2 --- /dev/null +++ b/sziszapangma/core/wer/wer_util.py @@ -0,0 +1,33 @@ +from typing import List, Optional + +import numpy as np +import pandas as pd + +from sziszapangma.core.wer.wer_step import WerStep + + +class WerUtil: + + @staticmethod + def _optional_str_to_str(value: Optional[str]) -> str: + return value if value is not None else '' + + @staticmethod + def _wer_step_to_pandas_row_lit(step: WerStep) -> List[any]: + return [ + step.step_type.get_short_name(), + WerUtil._optional_str_to_str(step.step_words.reference_word), + WerUtil._optional_str_to_str(step.step_words.hypothesis_word), + round(step.step_cost, 3) + ] + + @staticmethod + def steps_to_dataframe(steps: List[WerStep]) -> pd.DataFrame: + arr = np.array([ + WerUtil._wer_step_to_pandas_row_lit(step) + for step in steps + ]) + return pd.DataFrame( + arr, + columns=['step_type', 'reference', 'hypothesis', 'cost'] + ) diff --git a/sziszapangma/integration/__init__.py b/sziszapangma/integration/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/sziszapangma/integration/__pycache__/__init__.cpython-38.pyc b/sziszapangma/integration/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a18d1edc644ccabc99c7df659b505bf8aebead00 Binary files /dev/null and b/sziszapangma/integration/__pycache__/__init__.cpython-38.pyc differ diff --git a/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc b/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b70aca5ff61fb49ab62aded80d52d31f3990f1a8 Binary files /dev/null and b/sziszapangma/integration/__pycache__/asr_processor.cpython-38.pyc differ diff --git a/sziszapangma/integration/__pycache__/experiment_manager.cpython-38.pyc b/sziszapangma/integration/__pycache__/experiment_manager.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..878ec62a813cd95936980836e5a04d91728a0606 Binary files /dev/null and b/sziszapangma/integration/__pycache__/experiment_manager.cpython-38.pyc differ diff --git a/sziszapangma/integration/__pycache__/gold_transcript_processor.cpython-38.pyc b/sziszapangma/integration/__pycache__/gold_transcript_processor.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..035e15e52b368db6f519a04ed2b9bbb79855db88 Binary files /dev/null and b/sziszapangma/integration/__pycache__/gold_transcript_processor.cpython-38.pyc differ diff --git a/sziszapangma/integration/__pycache__/id_generator.cpython-38.pyc b/sziszapangma/integration/__pycache__/id_generator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..051a0798d43d79433b153ea102202bd9f1158001 Binary files /dev/null and b/sziszapangma/integration/__pycache__/id_generator.cpython-38.pyc differ diff --git a/sziszapangma/integration/__pycache__/metrics_calculator.cpython-38.pyc b/sziszapangma/integration/__pycache__/metrics_calculator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..64abcc4563ec492321106b4c30b36abb903f4342 Binary files /dev/null and b/sziszapangma/integration/__pycache__/metrics_calculator.cpython-38.pyc differ diff --git a/sziszapangma/integration/__pycache__/path_filter.cpython-38.pyc b/sziszapangma/integration/__pycache__/path_filter.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..12476b326cff42cc21f1c1d17c3d78b9ecbb967c Binary files /dev/null and b/sziszapangma/integration/__pycache__/path_filter.cpython-38.pyc differ diff --git a/sziszapangma/integration/__pycache__/record_id_iterator.cpython-38.pyc b/sziszapangma/integration/__pycache__/record_id_iterator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e2264bf3624c27c85f232c56c84110e81319cb1 Binary files /dev/null and b/sziszapangma/integration/__pycache__/record_id_iterator.cpython-38.pyc differ diff --git a/sziszapangma/integration/__pycache__/record_path_provider.cpython-38.pyc b/sziszapangma/integration/__pycache__/record_path_provider.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab1556917ecaea9eca94ca0a6b30e7ecb68831dd Binary files /dev/null and b/sziszapangma/integration/__pycache__/record_path_provider.cpython-38.pyc differ diff --git a/sziszapangma/integration/asr_processor.py b/sziszapangma/integration/asr_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..580695a245586d7e9f02cda1ba50c7e8f59684d4 --- /dev/null +++ b/sziszapangma/integration/asr_processor.py @@ -0,0 +1,30 @@ +from abc import ABC, abstractmethod +from typing import List, Dict + +import requests + + +class AsrProcessor(ABC): + + @abstractmethod + def call_recognise(self, file_path: str) -> List[Dict[str, any]]: + """ + Currently most important is field `transcript` with list of transcript + words. + """ + pass + + +class AsrWebClient(AsrProcessor): + _url: str + + def __init__(self, url: str): + super(AsrWebClient, self).__init__() + self._url = url + + def call_recognise(self, file_path: str) -> List[Dict[str, any]]: + files = {'file': open(file_path, 'rb')} + res = requests.post(self._url, files=files) + json_response = res.json() + print(json_response) + return json_response diff --git a/sziszapangma/integration/experiment_manager.py b/sziszapangma/integration/experiment_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..b28ad39b3bf59095294c73bdd5b208f5326c552b --- /dev/null +++ b/sziszapangma/integration/experiment_manager.py @@ -0,0 +1,30 @@ +from typing import List + +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository +from .record_id_iterator import RecordIdIterator +from .task.processing_task import ProcessingTask + + +class ExperimentManager: + _experiment_repository: ExperimentRepository + _record_id_iterator: RecordIdIterator + _processing_tasks: List[ProcessingTask] + + def __init__( + self, + experiment_repository: ExperimentRepository, + record_id_iterator: RecordIdIterator, + processing_tasks: List[ProcessingTask] + ): + self._experiment_repository = experiment_repository + self._record_id_iterator = record_id_iterator + self._processing_tasks = processing_tasks + + def process(self): + self._experiment_repository.initialise() + for processing_task in self._processing_tasks: + processing_task.process( + self._record_id_iterator, + self._experiment_repository + ) diff --git a/sziszapangma/integration/gold_transcript_processor.py b/sziszapangma/integration/gold_transcript_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..a6322541c5fd81ca99e13cc5dc0282c2a8a1dca3 --- /dev/null +++ b/sziszapangma/integration/gold_transcript_processor.py @@ -0,0 +1,13 @@ +from abc import ABC, abstractmethod +from typing import List, Dict + + +class GoldTranscriptProcessor(ABC): + """""" + + @abstractmethod + def get_gold_transcript(self, record_id: str) -> List[Dict[str, any]]: + """ + Currently the most important dict key is `word` – original transcript + word. + """ diff --git a/sziszapangma/integration/id_generator.py b/sziszapangma/integration/id_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..a73b6958aab824b0a469c750b603d3cff641200c --- /dev/null +++ b/sziszapangma/integration/id_generator.py @@ -0,0 +1,12 @@ +"""Class for calculate id generator by record file path.""" +from abc import ABC, abstractmethod + + +class IdGenerator(ABC): + """Class for calculate id generator by record file path.""" + + @abstractmethod + def get_id(self, record_file_path: str) -> str: + """Method to calculate if by path. + + Id should be unique and repeatable.""" diff --git a/sziszapangma/integration/metrics_calculator.py b/sziszapangma/integration/metrics_calculator.py new file mode 100644 index 0000000000000000000000000000000000000000..0d145c0386dfa66712f76128bef604991883ba30 --- /dev/null +++ b/sziszapangma/integration/metrics_calculator.py @@ -0,0 +1,34 @@ +"""Main calculator of metrics.""" +from typing import List, Dict + + +class MetricsCalculator: + """Main calculator of metrics.""" + + _calculate_classic_wer: bool + + def __init__(self, calculate_classic_wer): + self._calculate_classic_wer = calculate_classic_wer + + @staticmethod + def _run_wer_calculations( + gold_transcript: List[Dict[str, any]], + asr_result: List[str] + ) -> float: + return simple_calculate_wer( + reference=[it[_WORD] for it in gold_transcript], + hypothesis=[it for it in asr_result], + ) + + def calculate_metrics( + self, + gold_transcript: List[Dict[str, any]], + asr_result: List[str] + ) -> Dict[str, any]: + """Calculate all metrics for data sample.""" + metrics = dict() + if self._calculate_classic_wer: + metrics[_CLASSIC_WER] = MetricsCalculator._run_wer_calculations( + gold_transcript, asr_result) + # TODO: put additional metrics here + return metrics diff --git a/sziszapangma/integration/path_filter.py b/sziszapangma/integration/path_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..aa9db3a8bd1ca104e043a9a2915a25ee483d8c91 --- /dev/null +++ b/sziszapangma/integration/path_filter.py @@ -0,0 +1,47 @@ +from abc import ABC, abstractmethod +from pathlib import Path +from typing import List, Optional + + +class PathFilter(ABC): + """ + Abstract class of PathFilter. + + Filter provide all audio files to preprocess by ASR and calculate metrics. + """ + + @abstractmethod + def get_list_of_files(self) -> List[str]: + """Returns list of audio files.""" + + +class ExtensionPathFilter(PathFilter): + """ + Implementation of PathFilter which find all files with specified extension. + """ + _extension: str + _root_directory: str + _files_limit: Optional[int] + + def __init__( + self, + root_directory: str, + extension: str, + files_limit: Optional[int] = None + ): + """Constructor of class.""" + self._extension = extension + self._files_limit = files_limit + self._root_directory = root_directory + + def get_list_of_files(self) -> List[str]: + """ + Implementation of searching files with extension. + """ + all_files = [ + str(it) + for it in Path(self._root_directory).glob(f'**/*.{self._extension}') + ] + return all_files \ + if self._files_limit is None \ + else all_files[:self._files_limit] diff --git a/sziszapangma/integration/record_id_iterator.py b/sziszapangma/integration/record_id_iterator.py new file mode 100644 index 0000000000000000000000000000000000000000..f62ad1e014cd1068a4bfe1908c484528297b3bc5 --- /dev/null +++ b/sziszapangma/integration/record_id_iterator.py @@ -0,0 +1,21 @@ +from abc import ABC, abstractmethod +from typing import Set + +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository + + +class RecordIdIterator(ABC): + @abstractmethod + def get_all_records(self) -> Set[str]: + pass + + +class RepositoryRecordIdIterator(RecordIdIterator): + _experiment_repository: ExperimentRepository + + def __init__(self, experiment_repository: ExperimentRepository): + self._experiment_repository = experiment_repository + + def get_all_records(self) -> Set[str]: + return self._experiment_repository.get_all_record_ids() diff --git a/sziszapangma/integration/record_path_provider.py b/sziszapangma/integration/record_path_provider.py new file mode 100644 index 0000000000000000000000000000000000000000..fd3bca1f8c4f1c8c13d0b461160377b9d095a6ed --- /dev/null +++ b/sziszapangma/integration/record_path_provider.py @@ -0,0 +1,7 @@ +from abc import ABC, abstractmethod + + +class RecordPathProvider(ABC): + @abstractmethod + def get_path(self, record_id: str) -> str: + pass diff --git a/sziszapangma/integration/repository/__init__.py b/sziszapangma/integration/repository/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/sziszapangma/integration/repository/__pycache__/__init__.cpython-38.pyc b/sziszapangma/integration/repository/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d82948fab5ca19a68216ed42875796cfe0a80fe8 Binary files /dev/null and b/sziszapangma/integration/repository/__pycache__/__init__.cpython-38.pyc differ diff --git a/sziszapangma/integration/repository/__pycache__/experiment_repository.cpython-38.pyc b/sziszapangma/integration/repository/__pycache__/experiment_repository.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7e187bae3886fd9ea14a4b3f7d734667994e846 Binary files /dev/null and b/sziszapangma/integration/repository/__pycache__/experiment_repository.cpython-38.pyc differ diff --git a/sziszapangma/integration/repository/__pycache__/file_experiment_repository.cpython-38.pyc b/sziszapangma/integration/repository/__pycache__/file_experiment_repository.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dcc82e13461b3f7816892ee5636b8d7f5dbff9ef Binary files /dev/null and b/sziszapangma/integration/repository/__pycache__/file_experiment_repository.cpython-38.pyc differ diff --git a/sziszapangma/integration/repository/__pycache__/mongo_experiment_repository.cpython-38.pyc b/sziszapangma/integration/repository/__pycache__/mongo_experiment_repository.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..32f957c9503ef7ba409a19f25cb4578e649b51e8 Binary files /dev/null and b/sziszapangma/integration/repository/__pycache__/mongo_experiment_repository.cpython-38.pyc differ diff --git a/sziszapangma/integration/repository/experiment_repository.py b/sziszapangma/integration/repository/experiment_repository.py new file mode 100644 index 0000000000000000000000000000000000000000..6f6d1484b2afe8e8885bddcb4e0eece4d5c677d8 --- /dev/null +++ b/sziszapangma/integration/repository/experiment_repository.py @@ -0,0 +1,52 @@ +"""Repository to manage results of asr experiment processing.""" +from abc import ABC, abstractmethod +from typing import Optional, Set + + +class ExperimentRepository(ABC): + """Repository to manage results of asr experiment processing.""" + + @abstractmethod + def initialise(self): + """Method to initialize repository.""" + + @abstractmethod + def property_exists(self, record_id: str, property_name: str) -> bool: + """Method checks that property in record exists.""" + + @abstractmethod + def update_property_for_key( + self, + record_id: str, + property_name: str, + property_value: any + ): + """Method updates property in record.""" + + @abstractmethod + def delete_property_for_key( + self, + record_id: str, + property_name: str + ): + """Method removes property in record.""" + + @abstractmethod + def get_property_for_key( + self, + record_id: str, + property_name: str + ) -> Optional[any]: + """Method returns property for record.""" + + @abstractmethod + def get_metrics_result_to_df(self, metrics_property: str): + pass + + @abstractmethod + def get_all_record_ids(self) -> Set[str]: + """Methods returns all possible record ids.""" + + @abstractmethod + def get_all_properties(self) -> Set[str]: + """Methods returns all possible properties.""" diff --git a/sziszapangma/integration/repository/file_experiment_repository.py b/sziszapangma/integration/repository/file_experiment_repository.py new file mode 100644 index 0000000000000000000000000000000000000000..520390e64829f63a4873fdaa53a7ff629fc31159 --- /dev/null +++ b/sziszapangma/integration/repository/file_experiment_repository.py @@ -0,0 +1,100 @@ +import json +import os +from typing import Optional, Dict, Set + +import pandas as pd + +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository + + +class FileExperimentRepository(ExperimentRepository): + _metadata_file_path: str + _pretty_format: bool + _cache_value: any + + def __init__(self, metadata_file_path: str, pretty_format: bool = False): + super().__init__() + self._metadata_file_path = metadata_file_path + self._pretty_format = pretty_format + self._cache_value = None + + def initialise(self): + if not os.path.isfile(self._metadata_file_path): + with open(self._metadata_file_path, 'w') as f: + empty_dict = dict() + f.write(json.dumps(empty_dict)) + + def property_exists(self, record_id: str, property_name: str) -> bool: + json_content = self._get_file_parsed_json() + if record_id not in json_content: + return False + elif property_name not in json_content[record_id]: + return False + else: + return True + + def update_property_for_key( + self, + record_id: str, + property_name: str, + property_value: any + ): + json_content = self._get_file_parsed_json() + if record_id not in json_content: + json_content[record_id] = dict({}) + json_content[record_id][property_name] = property_value + self._update_metadata_file(json_content) + + def get_property_for_key( + self, + record_id: str, + property_name: str + ) -> Optional[any]: + json_content = self._get_file_parsed_json() + if self.property_exists(record_id, property_name): + return json_content[record_id][property_name] + else: + return None + + def _get_file_parsed_json(self) -> Dict[str, any]: + if self._cache_value is None: + with open(self._metadata_file_path, 'r') as f: + self._cache_value = json.loads(f.read()) + return self._cache_value + + def _update_metadata_file(self, json_content: Dict[str, any]): + self._cache_value = json_content + indent = 4 if self._pretty_format else None + with open(self._metadata_file_path, 'w') as f: + f.write(json.dumps(json_content, indent=indent)) + + def get_metrics_result_to_df(self, metrics_property: str) -> pd.DataFrame: + list_of_dicts = list() + all_metadata = self._get_file_parsed_json() + for item_id in all_metadata.keys(): + item_dict = dict() + item_dict['id'] = item_id + for metric_keys in all_metadata[item_id][metrics_property].keys(): + item_dict[f'{metrics_property}.{metric_keys}'] = \ + all_metadata[item_id][metrics_property][metric_keys] + list_of_dicts.append(item_dict) + return pd.DataFrame(list_of_dicts) + + def delete_property_for_key(self, record_id: str, property_name: str): + json_content = self._get_file_parsed_json() + if record_id not in json_content: + json_content[record_id] = dict({}) + json_content[record_id].pop(property_name) + self._update_metadata_file(json_content) + + def get_all_record_ids(self) -> Set[str]: + return set(self._get_file_parsed_json().keys()) + + def get_all_properties(self) -> Set[str]: + properties = set() + all_metadata = self._get_file_parsed_json() + for key in all_metadata.keys(): + for prop in all_metadata[key].keys(): + properties.add(prop) + return properties diff --git a/sziszapangma/integration/repository/mongo_experiment_repository.py b/sziszapangma/integration/repository/mongo_experiment_repository.py new file mode 100644 index 0000000000000000000000000000000000000000..73f3f6f05fcd32b607f70ccabf04b875ffb1a4ad --- /dev/null +++ b/sziszapangma/integration/repository/mongo_experiment_repository.py @@ -0,0 +1,72 @@ +from typing import Optional, List, Set + +from pymongo import MongoClient +from pymongo.database import Database + +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository + +ID = '_id' +VALUE = 'value' + + +class MongoExperimentRepository(ExperimentRepository): + _mongo_client: MongoClient + _database_name: str + + def __init__(self, mongo_client: MongoClient, database_name: str): + super().__init__() + self._mongo_client = mongo_client + self._database_name = database_name + + def initialise(self): + return + + def property_exists(self, record_id: str, property_name: str) -> bool: + database = self._get_database() + all_collections = database.list_collection_names() + if property_name not in all_collections: + return False + else: + return database[property_name].find_one( + {ID: record_id}) is not None + + def update_property_for_key(self, record_id: str, property_name: str, + property_value: any): + self.delete_property_for_key(record_id, property_name) + self._get_database()[property_name].insert_one({ + ID: record_id, + VALUE: property_value + }) + + def delete_property_for_key(self, record_id: str, property_name: str): + if self.property_exists(record_id, property_name): + self._get_database()[property_name].delete_one({ID: record_id}) + + def get_property_for_key( + self, + record_id: str, + property_name: str + ) -> Optional[any]: + if self.property_exists(record_id, property_name): + return self._get_database()[property_name].find_one( + {ID: record_id})[VALUE] + else: + return None + + def get_metrics_result_to_df(self, metrics_property: str): + # TODO: implement + pass + + def _get_database(self) -> Database: + return self._mongo_client[self._database_name] + + def get_all_record_ids(self) -> Set[str]: + records = set() + for collection_name in self.get_all_properties(): + for record in self._get_database()[collection_name]: + records.add(record[ID]) + return records + + def get_all_properties(self) -> Set[str]: + return set(self._get_database().list_collection_names()) diff --git a/sziszapangma/integration/task/__init__.py b/sziszapangma/integration/task/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/sziszapangma/integration/task/__pycache__/__init__.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f04226f780906728ded8b6dddea48b4094670f9e Binary files /dev/null and b/sziszapangma/integration/task/__pycache__/__init__.cpython-38.pyc differ diff --git a/sziszapangma/integration/task/__pycache__/asr_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/asr_task.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c984f35f66feeb3525f85bbbeffc0f0f683a234b Binary files /dev/null and b/sziszapangma/integration/task/__pycache__/asr_task.cpython-38.pyc differ diff --git a/sziszapangma/integration/task/__pycache__/classic_wer_metric_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/classic_wer_metric_task.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..afb1c1a779039fb604f524ec832bd68c10553e17 Binary files /dev/null and b/sziszapangma/integration/task/__pycache__/classic_wer_metric_task.cpython-38.pyc differ diff --git a/sziszapangma/integration/task/__pycache__/embedding_wer_metrics_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/embedding_wer_metrics_task.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb121b6163c23215d419fc36220f227d69e09cb4 Binary files /dev/null and b/sziszapangma/integration/task/__pycache__/embedding_wer_metrics_task.cpython-38.pyc differ diff --git a/sziszapangma/integration/task/__pycache__/gold_transcript_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/gold_transcript_task.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3c45e39c7669d1eb90145199e976c60239df6b92 Binary files /dev/null and b/sziszapangma/integration/task/__pycache__/gold_transcript_task.cpython-38.pyc differ diff --git a/sziszapangma/integration/task/__pycache__/processing_task.cpython-38.pyc b/sziszapangma/integration/task/__pycache__/processing_task.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..46a1b9f7e3a486498a384e05811cafe91ee187cf Binary files /dev/null and b/sziszapangma/integration/task/__pycache__/processing_task.cpython-38.pyc differ diff --git a/sziszapangma/integration/task/asr_task.py b/sziszapangma/integration/task/asr_task.py new file mode 100644 index 0000000000000000000000000000000000000000..3393121424db7ec06fa2a93e26a31cb8ed37fa28 --- /dev/null +++ b/sziszapangma/integration/task/asr_task.py @@ -0,0 +1,40 @@ +from sziszapangma.integration.asr_processor import AsrProcessor +from sziszapangma.integration.record_path_provider import RecordPathProvider +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository +from sziszapangma.integration.task.processing_task import ProcessingTask + + +class AsrTask(ProcessingTask): + _record_path_provider: RecordPathProvider + _asr_processor: AsrProcessor + _asr_property_name: str + + def __init__( + self, + task_name: str, + record_path_provider: RecordPathProvider, + asr_processor: AsrProcessor, + asr_property_name: str, + require_update: bool + ): + super().__init__(task_name, require_update) + self._record_path_provider = record_path_provider + self._asr_processor = asr_processor + self._asr_property_name = asr_property_name + + def skip_for_record(self, record_id: str, + experiment_repository: ExperimentRepository) -> bool: + asr_value = experiment_repository \ + .get_property_for_key(record_id, self._asr_property_name) + return asr_value is not None and 'transcription' in asr_value + + def run_single_process(self, record_id: str, + experiment_repository: ExperimentRepository): + file_record_path = self._record_path_provider.get_path(record_id) + experiment_repository.update_property_for_key( + record_id, + self._asr_property_name, + self._asr_processor + .call_recognise(file_record_path) + ) diff --git a/sziszapangma/integration/task/classic_wer_metric_task.py b/sziszapangma/integration/task/classic_wer_metric_task.py new file mode 100644 index 0000000000000000000000000000000000000000..dfd8d2696e21a317e9272789e7a5a65e2cc8fb4e --- /dev/null +++ b/sziszapangma/integration/task/classic_wer_metric_task.py @@ -0,0 +1,72 @@ +from typing import List, Dict + +from sziszapangma.core.wer.classic_wer_calculator import ClassicWerCalculator +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository +from sziszapangma.integration.task.processing_task import ProcessingTask + +_CLASSIC_WER = 'classic_wer' +_WORD = 'word' + + +class ClassicWerMetricTask(ProcessingTask): + _metrics_property_name: str + _gold_transcript_property_name: str + _classic_wer_calculator: ClassicWerCalculator + + def __init__( + self, + task_name: str, + gold_transcript_property_name: str, + asr_property_name: str, + metrics_property_name: str, + require_update: bool + ): + super().__init__(task_name, require_update) + self._gold_transcript_property_name = gold_transcript_property_name + self._asr_property_name = asr_property_name + self._metrics_property_name = metrics_property_name + self._classic_wer_calculator = ClassicWerCalculator() + + def skip_for_record(self, record_id: str, + experiment_repository: ExperimentRepository) -> bool: + return experiment_repository \ + .get_property_for_key(record_id, self._metrics_property_name) + + def run_single_process(self, record_id: str, + experiment_repository: ExperimentRepository): + gold_transcript = experiment_repository \ + .get_property_for_key(record_id, + self._gold_transcript_property_name) + asr_result = experiment_repository \ + .get_property_for_key(record_id, self._asr_property_name) + if 'transcription' in asr_result: + experiment_repository.update_property_for_key( + record_id, + self._metrics_property_name, + self.calculate_metrics( + gold_transcript=gold_transcript, + asr_result=asr_result['transcription'] + ) + ) + + def _run_wer_calculations( + self, + gold_transcript: List[Dict[str, any]], + asr_result: List[str] + ) -> float: + return self._classic_wer_calculator.calculate_wer( + reference=[it[_WORD] for it in gold_transcript], + hypothesis=[it for it in asr_result], + )[0] + + def calculate_metrics( + self, + gold_transcript: List[Dict[str, any]], + asr_result: List[str] + ) -> Dict[str, any]: + """Calculate all metrics for data sample.""" + metrics = dict() + metrics[_CLASSIC_WER] = self._run_wer_calculations( + gold_transcript, asr_result) + return metrics diff --git a/sziszapangma/integration/task/embedding_wer_metrics_task.py b/sziszapangma/integration/task/embedding_wer_metrics_task.py new file mode 100644 index 0000000000000000000000000000000000000000..67f8b943c692b1fd0723f6067e63d79dcadd5fe4 --- /dev/null +++ b/sziszapangma/integration/task/embedding_wer_metrics_task.py @@ -0,0 +1,81 @@ +from typing import List, Dict + +from sziszapangma.core.transformer.cached_embedding_transformer import \ + CachedEmbeddingTransformer +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer +from sziszapangma.core.wer.wer_embedding_calculator import \ + WerEmbeddingCalculator +from sziszapangma.core.wer.wer_soft_calculator import WerSoftCalculator +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository +from sziszapangma.integration.task.processing_task import ProcessingTask + +_SOFT_WER = 'soft_wer' +_EMBEDDING_WER = 'embedding_wer' +_WORD = 'word' + + +class EmbeddingWerMetricsTask(ProcessingTask): + _metrics_property_name: str + _gold_transcript_property_name: str + _embedding_transformer: CachedEmbeddingTransformer + _wer_embedding_calculator: WerEmbeddingCalculator + _wer_soft_calculator: WerSoftCalculator + + def __init__( + self, + task_name: str, + gold_transcript_property_name: str, + asr_property_name: str, + metrics_property_name: str, + require_update: bool, + embedding_transformer: EmbeddingTransformer + ): + super().__init__(task_name, require_update) + self._gold_transcript_property_name = gold_transcript_property_name + self._asr_property_name = asr_property_name + self._metrics_property_name = metrics_property_name + self._embedding_transformer = \ + CachedEmbeddingTransformer(embedding_transformer) + self._wer_embedding_calculator = \ + WerEmbeddingCalculator(self._embedding_transformer) + self._wer_soft_calculator = \ + WerSoftCalculator(self._embedding_transformer) + + def skip_for_record(self, record_id: str, + experiment_repository: ExperimentRepository) -> bool: + return experiment_repository \ + .get_property_for_key(record_id, self._metrics_property_name) + + def run_single_process(self, record_id: str, + experiment_repository: ExperimentRepository): + gold_transcript = experiment_repository \ + .get_property_for_key(record_id, + self._gold_transcript_property_name) + asr_result = experiment_repository \ + .get_property_for_key(record_id, self._asr_property_name) + if 'transcription' in asr_result: + experiment_repository.update_property_for_key( + record_id, + self._metrics_property_name, + self.calculate_metrics( + gold_transcript=gold_transcript, + asr_result=asr_result['transcription'] + ) + ) + self._embedding_transformer.clear() + + def calculate_metrics( + self, + gold_transcript: List[Dict[str, any]], + asr_result: List[str] + ) -> Dict[str, any]: + """Calculate all metrics for data sample.""" + metrics = dict() + reference = [it[_WORD] for it in gold_transcript] + metrics[_SOFT_WER] = self._wer_soft_calculator.calculate_wer( + reference, asr_result)[0] + metrics[_EMBEDDING_WER] = self._wer_embedding_calculator.calculate_wer( + reference, asr_result)[0] + return metrics diff --git a/sziszapangma/integration/task/gold_transcript_task.py b/sziszapangma/integration/task/gold_transcript_task.py new file mode 100644 index 0000000000000000000000000000000000000000..291eb80e1eac3077ce438380b05ffac0520665bd --- /dev/null +++ b/sziszapangma/integration/task/gold_transcript_task.py @@ -0,0 +1,36 @@ +from sziszapangma.integration.gold_transcript_processor import \ + GoldTranscriptProcessor +from sziszapangma.integration.record_path_provider import RecordPathProvider +from sziszapangma.integration.repository.experiment_repository \ + import ExperimentRepository +from sziszapangma.integration.task.processing_task import ProcessingTask + + +class GoldTranscriptTask(ProcessingTask): + _gold_transcript_processor: GoldTranscriptProcessor + _gold_transcript_property_name: str + + def __init__( + self, + task_name: str, + gold_transcript_processor: GoldTranscriptProcessor, + gold_transcript_property_name: str, + require_update: bool + ): + super().__init__(task_name, require_update) + self._gold_transcript_processor = gold_transcript_processor + self._gold_transcript_property_name = gold_transcript_property_name + + def skip_for_record(self, record_id: str, + experiment_repository: ExperimentRepository) -> bool: + return experiment_repository \ + .get_property_for_key(record_id, + self._gold_transcript_property_name) + + def run_single_process(self, record_id: str, + experiment_repository: ExperimentRepository): + experiment_repository.update_property_for_key( + record_id, + self._gold_transcript_property_name, + self._gold_transcript_processor.get_gold_transcript(record_id) + ) diff --git a/sziszapangma/integration/task/processing_task.py b/sziszapangma/integration/task/processing_task.py new file mode 100644 index 0000000000000000000000000000000000000000..c49485115fdfedc21f57d7c3b7a9f2887b099a89 --- /dev/null +++ b/sziszapangma/integration/task/processing_task.py @@ -0,0 +1,51 @@ +from abc import ABC, abstractmethod + +from sziszapangma.integration.record_id_iterator import RecordIdIterator +from sziszapangma.integration.repository.experiment_repository import \ + ExperimentRepository + + +class ProcessingTask(ABC): + _require_update: bool + _task_name: str + + def __init__(self, task_name: str, require_update: bool): + self._require_update = require_update + self._task_name = task_name + + @abstractmethod + def run_single_process( + self, + record_id: str, + experiment_repository: ExperimentRepository + ): + pass + + @abstractmethod + def skip_for_record( + self, + record_id: str, + experiment_repository: ExperimentRepository + ) -> bool: + pass + + def process( + self, + record_id_iterator: RecordIdIterator, + experiment_repository: ExperimentRepository + ): + records_ids = list(record_id_iterator.get_all_records()) + for record_index in range(len(records_ids)): + part = f'{record_index + 1} / {len(records_ids)}' + record_id = records_ids[record_index] + skip = self.skip_for_record( + record_id=record_id, + experiment_repository=experiment_repository + ) + base_log = f'{self._task_name} processing {part} {record_id}' + if not skip or self._require_update: + print(base_log) + self.run_single_process(record_id, + experiment_repository) + else: + print(f'{base_log} -- skipped') diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cea96285714af1a77e53a979d6bf78814781a82a --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Unit test package for sziszapangma.""" diff --git a/tests/embeddings_pl.json b/tests/embeddings_pl.json new file mode 100644 index 0000000000000000000000000000000000000000..70e7d6b52d94f5e3a5f428a9ac232316e2030ce1 --- /dev/null +++ b/tests/embeddings_pl.json @@ -0,0 +1,2720 @@ +{ + "rego": [ + 0.025360366329550743, + -0.04195535182952881, + 0.019350925460457802, + 0.13248692452907562, + -0.1212526187300682, + -0.01891663670539856, + -0.03484756499528885, + -0.02140629291534424, + 0.07373380661010742, + 0.09841269254684448, + 0.00857617985457182, + 0.03878685086965561, + 0.1382981538772583, + 0.141482412815094, + 0.0943804532289505, + -0.04145222157239914, + -0.11814873665571213, + 0.17937591671943665, + 0.06660573929548264, + 0.0669114887714386, + -0.06707686185836792, + 0.03332226723432541, + 0.019138574600219727, + 0.038645848631858826, + 0.12267481535673141, + -0.04557936638593674, + -0.03389456868171692, + 0.10589240491390228, + 0.15420544147491455, + -0.004094157833606005, + 0.07477718591690063, + -0.019102349877357483, + -0.015680953860282898, + 0.00466578733175993, + 0.036196645349264145, + -0.019614320248365402, + -0.07911614328622818, + -0.11238190531730652, + 0.12062164396047592, + 0.08306576311588287, + -0.04920080304145813, + -0.028720583766698837, + 0.04233407601714134, + -0.13503096997737885, + 0.0017086525913327932, + 0.0012746453285217285, + 0.10003215074539185, + -0.050891172140836716, + -0.005998636595904827, + 0.017386065796017647, + 0.005148351192474365, + 0.033908747136592865, + 0.010332940146327019, + -0.0023164614103734493, + 0.11763325333595276, + 0.07122141867876053, + 0.029413599520921707, + -0.02747083455324173, + 0.02197670005261898, + 0.0697161927819252, + -0.1450590342283249, + -0.027237500995397568, + -0.053309470415115356, + 0.03383652865886688, + -0.008408180437982082, + 0.06446542590856552, + -0.0007105203112587333, + -0.08366017788648605, + 0.0050771646201610565, + 0.04338033124804497, + 0.029303759336471558, + 0.02494737133383751, + 0.030596241354942322, + -0.0253981351852417, + 0.032596975564956665, + 0.03707187995314598, + -0.018734438344836235, + 0.006434278097003698, + -0.06769295036792755, + -0.12360601872205734, + 0.0838269293308258, + -0.031027745455503464, + 0.2682488262653351, + -0.030780944973230362, + 0.04645909368991852, + -0.017756134271621704, + -0.08343406766653061, + -0.18675582110881805, + -0.05608057975769043, + 0.03880838304758072, + 0.06651925295591354, + 0.12371407449245453, + -0.07567261904478073, + 0.0946085974574089, + -0.02659708820283413, + -0.0827547237277031, + -0.04034993052482605, + 0.01702699065208435, + 0.06940530240535736, + 0.013810629025101662, + 0.11997637152671814, + -0.14746129512786865, + 0.131831094622612, + -0.014702586457133293, + -0.13849946856498718, + -0.006234161555767059, + 0.04045787826180458, + 0.05735635757446289, + 0.0066235922276973724, + 0.012200994417071342, + -0.006246550008654594, + 0.10443231463432312, + 0.038930173963308334, + 0.012603623792529106, + 0.0642445981502533, + 0.012551533989608288, + -0.06847232580184937, + 0.0003183564986102283, + 0.09870034456253052, + -0.006378100719302893, + 0.11994011700153351, + 0.07558205723762512, + 0.013828049413859844, + 0.11783914268016815, + -0.05701836943626404, + -0.025578824803233147, + 0.08175185322761536, + -0.009828973561525345, + 0.04556669294834137, + -0.0019460500916466117, + -0.10199606418609619, + 0.007795512676239014, + -0.0326656699180603, + 0.12185566127300262, + -0.04753127694129944, + -0.037251636385917664, + -0.10819704830646515, + 0.06959820538759232, + -0.016401033848524094, + -0.03345759958028793, + 0.032997798174619675, + -0.13399460911750793, + -0.05238836631178856, + -0.22104597091674805, + -0.03195151314139366, + -0.05830279737710953, + -0.07016734778881073, + 0.12436967343091965, + -0.11996293067932129, + -0.04278447479009628, + 0.05688241869211197, + 0.03518470376729965, + 0.035848721861839294, + -0.0497179739177227, + 0.1173618733882904, + -0.026286771520972252, + 0.11869470775127411, + -0.06522578001022339, + -0.02036689594388008, + -0.049065422266721725, + 0.09066169708967209, + 0.10371477901935577, + 0.07177156209945679, + -0.01686999574303627, + 0.004568031057715416, + 0.016954246908426285, + 0.024920357391238213, + 0.20036350190639496, + 0.07762596011161804, + 0.042032480239868164, + -0.06874687969684601, + -0.08206215500831604, + -0.059366341680288315, + 0.21933695673942566, + 0.07867085933685303, + -0.041550636291503906, + 0.045646168291568756, + -0.0058541386388242245, + -0.030714213848114014, + 0.01864239200949669, + -0.07514034956693649, + -0.04487421736121178, + -0.0725104957818985, + -0.09751518815755844, + -0.15162283182144165, + 0.06641946732997894, + 0.012081718072295189, + 0.02505236491560936, + 0.019052943214774132, + -0.011952199041843414, + 0.02710006758570671, + 0.07528761029243469, + 0.0053316703997552395, + -0.05752786621451378, + -0.13719043135643005, + 0.10667595267295837, + 0.07605913281440735, + 0.20441934466362, + 0.011313196271657944, + -0.04499569162726402, + -0.010336518287658691, + -0.04517510533332825, + -0.10358977317810059, + -0.08371572196483612, + 0.07000277936458588, + -0.12187262624502182, + 0.03666067123413086, + 0.03201401233673096, + 0.027945850044488907, + 0.06616507470607758, + 0.047266267240047455, + -0.11192768812179565, + 0.02800518088042736, + 0.04106767103075981, + 0.044681113213300705, + -0.03224359452724457, + -0.05946209281682968, + -0.06543562561273575, + -0.003850643988698721, + 0.09258010238409042, + 0.11185108125209808, + -0.04267556592822075, + -0.03479834645986557, + -0.09976062178611755, + -0.01968393474817276, + 0.04705026373267174, + -0.03983002156019211, + 0.20730488002300262, + 0.014524552039802074, + 0.07460898905992508, + -0.07390899956226349, + 0.032891854643821716, + -0.01906980760395527, + 0.006029362790286541, + 0.045801129192113876, + -0.002989241387695074, + -0.022413700819015503, + 0.054536692798137665, + -0.022095564752817154, + -0.16709403693675995, + -0.07985913008451462, + 0.07989532500505447, + -0.02780805341899395, + -0.1545652449131012, + -0.05080298334360123, + -0.13523483276367188, + 0.030402880162000656, + 0.024189475923776627, + 0.1913859099149704, + -0.037512216717004776, + -0.04817260056734085, + -0.03770771622657776, + 0.06344256550073624, + -0.0014704714994877577, + -0.06589218974113464, + 0.01659468002617359, + 0.01861189678311348, + -0.003923402633517981, + -0.02344740927219391, + -0.0045809210278093815, + 0.02147747576236725, + 0.08546024560928345, + 0.05269842594861984, + -0.15124723315238953, + 0.056306660175323486, + -0.07883468270301819, + -0.029240280389785767, + -0.18876883387565613, + 0.001567062921822071, + -0.04881870746612549, + -0.0008857144857756793, + -0.08416055142879486, + -0.0807884931564331, + 0.0021422021090984344, + 0.028993740677833557, + 0.027406230568885803, + -0.06310993432998657, + 0.0981152206659317, + -0.07681175321340561, + 0.060740552842617035, + -0.30095866322517395, + -0.00856530200690031, + -0.07610736787319183, + 0.012243842706084251, + 0.08163291215896606, + -0.05355709791183472, + -0.09321880340576172, + -0.01624113880097866, + 0.08702488988637924, + 0.08788813650608063, + -0.12482321262359619, + 0.21521306037902832, + 0.12818582355976105, + 0.028399482369422913, + 0.07849821448326111, + 0.0042789168655872345, + 0.0586482435464859, + -0.07221205532550812, + 0.08500781655311584, + 0.04169676452875137 + ], + "kruchego": [ + 0.016303671523928642, + -0.019872045144438744, + -0.0092224245890975, + 0.014146615751087666, + 0.015383906662464142, + 0.002108193002641201, + -0.062140874564647675, + -0.025106163695454597, + 0.08381172269582748, + 0.05710531026124954, + -0.01848313771188259, + -0.04641922935843468, + -0.10143981128931046, + -0.0652991458773613, + 0.018572727218270302, + -0.047973163425922394, + 0.014040370471775532, + 0.005996278487145901, + -0.023732611909508705, + -0.02009929157793522, + 0.0007243369473144412, + -0.08037777245044708, + 0.08096491545438766, + 0.032353661954402924, + 0.013201138935983181, + 0.029858581721782684, + -0.08444873243570328, + -0.035845350474119186, + 0.041100140661001205, + 0.04591462388634682, + 0.006408806424587965, + 0.08591236174106598, + -0.015455985441803932, + -0.08906127512454987, + -0.08432091027498245, + 0.05640733242034912, + 0.012656928040087223, + -0.08276824653148651, + 0.034724295139312744, + -0.0023717572912573814, + -0.03359077125787735, + -0.0729396641254425, + 0.009350333362817764, + -0.0018471118528395891, + 0.01208903081715107, + 0.021758658811450005, + -0.03382786363363266, + -0.04432319104671478, + -0.008544672280550003, + 0.002590786200016737, + -0.12001265585422516, + 0.007330841384828091, + 0.01335948333144188, + -0.006848776713013649, + -0.01573527418076992, + 0.03964998200535774, + 0.04370586946606636, + -0.07679811120033264, + -0.028589816763997078, + -0.03548365831375122, + -0.018836772069334984, + -0.036120202392339706, + -0.15691232681274414, + -0.03119748830795288, + -0.03921635448932648, + 0.03989606723189354, + -0.011425910517573357, + -0.05037058889865875, + 0.020870450884103775, + -0.010966272093355656, + 0.0342303030192852, + -0.014046432450413704, + 0.043870583176612854, + 0.001676222076639533, + -0.0018626556266099215, + 0.0322086326777935, + -0.04106687009334564, + 0.023960450664162636, + 0.0004788232909049839, + -0.014335878193378448, + 0.03212282061576843, + -0.03531443700194359, + 0.11197002977132797, + 0.0059305643662810326, + -0.051486533135175705, + -0.06372729688882828, + -0.012052038684487343, + -0.036066845059394836, + 0.025950072333216667, + 0.007260511163622141, + 0.011519390158355236, + -0.0005208541406318545, + -0.07554544508457184, + -0.056650351732969284, + -0.03829614445567131, + 0.04423166438937187, + 0.04932614788413048, + -0.046451959758996964, + 0.07947622984647751, + 0.009505164809525013, + 0.02357821725308895, + -0.02505599893629551, + 0.029041992500424385, + 0.003937460482120514, + -0.03490827977657318, + 0.019778598099946976, + -0.0485965870320797, + 0.031887635588645935, + -0.06730393320322037, + -0.0526353195309639, + 0.026736879721283913, + -0.00929854717105627, + 0.02515203133225441, + 0.03929869458079338, + -0.04754827171564102, + 0.03447788581252098, + 0.026109604164958, + -0.009087104350328445, + 0.017221178859472275, + -0.020497629418969154, + -0.07402393966913223, + 0.002154354704543948, + -0.048016391694545746, + 0.017812075093388557, + 0.04352061450481415, + 0.038450323045253754, + 0.09475485235452652, + -0.0038469440769404173, + 0.015702111646533012, + -0.015994977205991745, + -0.01263596210628748, + 0.023923177272081375, + -0.013737129047513008, + 0.052358273416757584, + -0.10664000362157822, + 0.028132114559412003, + 0.009037270210683346, + 0.03250018507242203, + -0.0019254302605986595, + 0.03156875818967819, + -0.035373505204916, + -0.014041352085769176, + -0.05016426369547844, + -0.09106452763080597, + -0.00925979483872652, + -0.060555923730134964, + -0.017428983002901077, + -0.04451008141040802, + -0.06614529341459274, + -0.02807685174047947, + -0.04897867888212204, + 0.021360717713832855, + 0.06190156936645508, + -0.03151264786720276, + 0.09433288127183914, + -0.036462247371673584, + 0.025535333901643753, + -0.020453132688999176, + 0.01571831665933132, + 0.018754001706838608, + 0.06105025112628937, + -0.08530690521001816, + -0.03154398500919342, + 0.02956015057861805, + -0.01883108541369438, + -0.03418896347284317, + -0.07070766389369965, + -0.04285828769207001, + 0.03157927095890045, + -0.018144255504012108, + -0.03566468507051468, + 0.017696401104331017, + -0.1211676299571991, + 0.06651831418275833, + -0.028725869953632355, + 0.03908716142177582, + -0.025371583178639412, + -0.09087547659873962, + -0.0053370059467852116, + 0.02943328209221363, + -0.01232836302369833, + 0.014140562154352665, + -0.132462739944458, + -0.033594027161598206, + -0.06298306584358215, + -0.06971868872642517, + 0.031759992241859436, + -0.05055658891797066, + 0.021255437284708023, + 0.0643974095582962, + 0.03741704672574997, + -0.042381517589092255, + 0.02649880386888981, + 0.051538918167352676, + 0.015897568315267563, + -0.019174762070178986, + 0.019789621233940125, + -0.007294032722711563, + 0.007013384252786636, + 0.017515065148472786, + 0.012397066690027714, + -0.013402302749454975, + -0.003292143577709794, + -0.01289359200745821, + -0.050516702234745026, + -0.020899120718240738, + 0.010199878364801407, + 0.05411527678370476, + -0.09620281308889389, + -0.012224164791405201, + 0.03825684264302254, + 0.007434458006173372, + -0.04569121077656746, + 0.058289967477321625, + -0.00944305770099163, + -0.022278716787695885, + -0.14312857389450073, + 0.05767802894115448, + 0.06804315745830536, + 0.012865548022091389, + 0.05509680509567261, + 0.038668449968099594, + -0.004185462836176157, + -0.07543434947729111, + 0.05272316187620163, + -0.04099275916814804, + 0.11773957312107086, + 0.049500640481710434, + -0.029802843928337097, + -0.03579108417034149, + -0.006666944362223148, + -0.0681883841753006, + 0.05225653946399689, + 0.021549444645643234, + -0.012632218189537525, + 0.02295316383242607, + -0.007066467311233282, + 0.03605067729949951, + 0.02144845947623253, + 0.046653252094984055, + 0.022743625566363335, + -0.005440773908048868, + -0.086735300719738, + -0.008057722821831703, + -0.03225211426615715, + 0.05366796255111694, + 0.006222546566277742, + 0.0432293564081192, + 0.05440787971019745, + -0.05515147000551224, + -0.011478396132588387, + 0.022528432309627533, + -0.04767437279224396, + -0.042326103895902634, + -0.010052939876914024, + 0.015939511358737946, + -0.00036999586154706776, + -0.023475253954529762, + 0.055372823029756546, + -0.0001733503449941054, + -0.005122533068060875, + -0.0006684677209705114, + 0.06594069302082062, + 0.006065706256777048, + 0.029512591660022736, + 0.05839845910668373, + -0.03526012971997261, + 0.025412848219275475, + -0.02851499617099762, + -0.05593691021203995, + 0.0002525225281715393, + -0.0645996704697609, + -0.01837906241416931, + 0.013739209622144699, + 0.08159346878528595, + 0.07416143268346786, + -0.019867463037371635, + 0.021767443045973778, + 0.050454869866371155, + -0.05333651974797249, + -0.06338602304458618, + -0.004230514168739319, + 0.028763094916939735, + 0.09021307528018951, + 0.062178753316402435, + -0.05397014319896698, + 0.019923459738492966, + -0.02907441556453705, + -0.05785417929291725, + -0.009038555435836315, + 0.0461433045566082, + -0.012566862627863884, + -0.06420175731182098, + 0.03078165091574192, + -0.04840996488928795, + -0.001747101778164506, + 0.03705829381942749, + -0.0348169319331646, + -0.019208354875445366, + -0.01984819397330284 + ], + "dobrego": [ + -0.05503441020846367, + 0.050614506006240845, + 0.014939900487661362, + 0.022841446101665497, + -0.035710658878088, + -0.00022126563999336213, + -0.055314503610134125, + -0.011580642312765121, + 0.09758414328098297, + -0.0014309933176264167, + -0.00970902107656002, + 0.009510613046586514, + -0.060983806848526, + -0.049659788608551025, + -0.00969758816063404, + 0.001697653322480619, + 0.06200318783521652, + 0.014445019885897636, + -0.007899831049144268, + 0.015542875975370407, + 0.056988172233104706, + -0.01890401542186737, + 0.016180939972400665, + -0.03481048345565796, + -0.07366231083869934, + -0.020606830716133118, + -0.07077424228191376, + 0.02347145974636078, + 0.04351171478629112, + 0.04190950095653534, + 0.08247499167919159, + 0.07471105456352234, + 0.016334928572177887, + -0.047066256403923035, + -0.037381626665592194, + -0.007403416093438864, + 0.07530312985181808, + -0.1086779460310936, + 0.03151707351207733, + -0.007788960821926594, + -0.013699382543563843, + -0.026787307113409042, + 0.003590720472857356, + -0.029651783406734467, + -0.029520414769649506, + 0.09409046173095703, + 0.010464581660926342, + -0.0015392971690744162, + 0.011096972040832043, + 0.0061229499988257885, + -0.03382788598537445, + -0.01570059172809124, + -0.0022417332511395216, + 0.005053428001701832, + 0.006914355792105198, + -0.005147556774318218, + -0.0021074186079204082, + -0.1275118887424469, + -0.0010959183564409614, + -0.05029448866844177, + 0.003498578444123268, + -0.03031982108950615, + -0.1730177402496338, + 0.058089450001716614, + 0.02865409106016159, + 0.014864973723888397, + -0.07246039807796478, + -0.02818896993994713, + -0.008460788056254387, + 0.03579428791999817, + 0.0058321887627244, + 0.0035161348059773445, + -0.007880325429141521, + 0.009851514361798763, + -0.0072380900382995605, + 0.0053444793447852135, + 0.03675371780991554, + -0.007921168580651283, + -0.036690182983875275, + 0.006854891311377287, + -0.058318451046943665, + 0.0952877625823021, + 0.09722201526165009, + 0.04418746754527092, + -0.011816742829978466, + -0.07397593557834625, + 0.021046597510576248, + 0.028026368468999863, + -0.018990855664014816, + 0.03698477894067764, + 0.013694602064788342, + 0.023896997794508934, + -0.034639257937669754, + -0.016944263130426407, + 0.03237055242061615, + 0.041516393423080444, + 0.03431608900427818, + 0.016778334975242615, + 0.02164391800761223, + -0.016220778226852417, + 0.005468351300805807, + 0.07054583728313446, + 0.033825766295194626, + 0.012653696350753307, + 0.009620989672839642, + -0.013323506340384483, + 0.0011839298531413078, + 0.0032863817177712917, + -0.026157012209296227, + 0.005652490071952343, + 0.00202672160230577, + 0.019793830811977386, + -0.047925181686878204, + 0.023398518562316895, + -0.006825267802923918, + -0.010128451511263847, + 0.0003894492983818054, + -0.008031472563743591, + 0.0025933757424354553, + -0.01935240998864174, + -0.003638872876763344, + 0.03990951180458069, + -0.04441092163324356, + 0.024207061156630516, + 0.07817339897155762, + 0.05504067242145538, + 0.08335605263710022, + 0.011402687057852745, + -0.015040148049592972, + -0.05686502158641815, + 0.005943832453340292, + -0.0012180309277027845, + -0.02516915462911129, + -0.01568392664194107, + -0.08357439935207367, + 0.02690807729959488, + -0.04646637290716171, + -0.003297194605693221, + -0.05312460660934448, + 0.023275455459952354, + 0.008113538846373558, + -0.00679142028093338, + -0.07799074798822403, + -0.13153618574142456, + -0.02928849682211876, + -0.007434172090142965, + 0.01668117195367813, + -0.00548437237739563, + -0.14418922364711761, + -0.024840187281370163, + 0.05023394152522087, + -0.047024182975292206, + -0.017895188182592392, + -0.016912518069148064, + 0.11875057220458984, + -0.059332262724637985, + 0.013615688309073448, + -0.0350460410118103, + -0.033755674958229065, + -0.010012625716626644, + 0.00852448120713234, + -0.09327387064695358, + -0.09281349182128906, + 0.002719936426728964, + 0.023487087339162827, + -0.009898770600557327, + -0.005996163003146648, + 0.07511038333177567, + 0.023885391652584076, + -0.008119173347949982, + 0.015617724508047104, + -0.019267044961452484, + -0.14018207788467407, + 0.03772170469164848, + 0.008187171071767807, + 0.028302377089858055, + -0.07401987165212631, + -0.13756874203681946, + -0.012730449438095093, + 0.007814953103661537, + -0.008676527999341488, + -0.007358209695667028, + -0.14923608303070068, + 0.022795915603637695, + -0.038868680596351624, + -0.007667593192309141, + -0.009204352274537086, + -0.014817837625741959, + 0.03745538741350174, + 0.014328669756650925, + -0.011396514251828194, + 0.007435772102326155, + 0.03275608643889427, + 0.051575761288404465, + 0.039839815348386765, + -0.03059149906039238, + 0.03545346111059189, + -0.00953246932476759, + 0.014869826845824718, + 0.039299629628658295, + 0.005853123962879181, + 0.039247822016477585, + -0.00855233334004879, + -0.04278772324323654, + -0.03592199087142944, + 0.016183575615286827, + 0.037463702261447906, + -0.021429400891065598, + -0.008685288950800896, + 0.01981208845973015, + -0.04635133594274521, + -0.01600891351699829, + -0.023495368659496307, + -0.021313704550266266, + -0.007663375698029995, + 0.001660804613493383, + -0.07794070988893509, + -0.03147993981838226, + 0.081428162753582, + 0.022639550268650055, + -0.021628186106681824, + 0.018755175173282623, + 0.05396069586277008, + -0.015229545533657074, + 0.093647301197052, + 0.018851300701498985, + 0.07163792848587036, + 0.04553893208503723, + -0.012650231830775738, + 0.008197706192731857, + -0.036312516778707504, + -0.05427968502044678, + -0.07333257794380188, + -0.03381696715950966, + -0.024884497746825218, + -0.010352661833167076, + -0.03323546424508095, + -0.03927123174071312, + 0.04288269206881523, + -0.025867480784654617, + 0.0055990070104599, + -0.012100070714950562, + -0.06686313450336456, + -0.012311631813645363, + -0.011405255645513535, + 0.0791744664311409, + -0.025842085480690002, + 0.031465329229831696, + 0.0641825944185257, + -0.03286121040582657, + -0.03579680621623993, + -0.007758041378110647, + 0.05622842162847519, + -0.003918825648725033, + 0.08444945514202118, + 0.024446159601211548, + 0.031444065272808075, + -0.05884227901697159, + 0.061347395181655884, + 0.016597749665379524, + -8.372985757887363e-05, + -0.061184417456388474, + 0.02964228205382824, + 0.011809336952865124, + 0.01711207628250122, + -0.06209190934896469, + -0.05188027769327164, + -0.03551027923822403, + 0.02497977390885353, + 0.01093998458236456, + 0.04215599223971367, + -0.07007014006376266, + -0.007190425880253315, + -0.0012528281658887863, + 0.0788157731294632, + -0.001301859854720533, + -0.01195349358022213, + -0.06861717253923416, + -0.008431742899119854, + 0.003524206578731537, + -0.014077512547373772, + 0.012319128029048443, + -0.005923453718423843, + -0.009170498698949814, + 0.011832969263195992, + 0.007556426804512739, + 0.026151878759264946, + -0.005054730921983719, + -0.02938242256641388, + 0.007560659199953079, + 0.0022403658367693424, + 0.01216946728527546, + 0.010623137466609478, + 0.049052171409130096, + -0.0782318189740181, + -0.07438185811042786, + -0.005524127744138241, + 0.027553638443350792, + -0.012044156901538372, + 0.00031448499066755176 + ], + "psa": [ + 0.057362064719200134, + -0.0021233586594462395, + 0.0852590873837471, + -0.00017283856868743896, + -0.07565198838710785, + -0.08121095597743988, + -0.03883061558008194, + -0.24827690422534943, + -0.04554160684347153, + -0.06741677224636078, + 0.01622174307703972, + 0.04753381758928299, + -0.034107424318790436, + -0.1321285367012024, + 0.2126137912273407, + 0.004417058080434799, + 0.24809572100639343, + -0.013338938355445862, + 0.02788003347814083, + -0.022250644862651825, + 0.05647706240415573, + -0.04903966560959816, + 0.08530472218990326, + 0.057897310703992844, + -0.05830305069684982, + 0.07067838311195374, + 0.012787431478500366, + 0.03585117682814598, + -0.029449574649333954, + -0.10926508903503418, + 0.14043037593364716, + -0.08161585032939911, + 0.06856999546289444, + 0.046775929629802704, + 0.00400913879275322, + -0.123019739985466, + -0.10679252445697784, + -0.3075258135795593, + -0.027423249557614326, + 0.11304585635662079, + 0.004162512719631195, + -0.05730903893709183, + 0.028299182653427124, + -0.2490377575159073, + -0.10130404680967331, + 0.035720933228731155, + 0.1140424832701683, + -0.12318414449691772, + 0.07150633633136749, + 0.07565151154994965, + 0.3346860408782959, + -0.41780269145965576, + 0.1797449290752411, + -0.19060444831848145, + -0.03451978415250778, + 0.05773979797959328, + -0.013813059777021408, + 0.06421604752540588, + -0.022001102566719055, + -0.07702115178108215, + 0.039187997579574585, + -0.1147729754447937, + -0.18180298805236816, + 0.030310610309243202, + 0.2371119111776352, + 0.06150751933455467, + 0.06664823740720749, + -0.11622954905033112, + -0.2445794939994812, + 0.1371057778596878, + 0.1067652553319931, + -0.22976189851760864, + -0.07021855562925339, + 0.07665050774812698, + 0.12312096357345581, + -0.15748558938503265, + -0.0540604330599308, + -0.3322606384754181, + 0.025297746062278748, + -0.05489290505647659, + -0.31037935614585876, + 0.052722714841365814, + 0.3149702548980713, + -0.013982502743601799, + -0.19472122192382812, + -0.4911949932575226, + -0.12228991091251373, + -0.09965498745441437, + -0.007575744763016701, + 0.22067439556121826, + -0.18881841003894806, + 0.14516963064670563, + -0.09653589129447937, + -0.07203446328639984, + 0.10860505700111389, + 0.06878969073295593, + -0.08648698031902313, + 0.0617685467004776, + 0.0033463016152381897, + -0.007826440036296844, + 0.03804641589522362, + -0.10928153991699219, + -0.012081462889909744, + 0.050878673791885376, + -0.01474258117377758, + -0.003034410998225212, + 0.0524611696600914, + -0.12404404580593109, + 0.07047775387763977, + -0.16520074009895325, + 0.09477685391902924, + 0.09919562190771103, + 0.17180292308330536, + 0.04514208436012268, + -0.022381702437996864, + 0.016167882829904556, + -0.1328677535057068, + -0.028757061809301376, + -0.12459208071231842, + 0.16779808700084686, + -0.0913533866405487, + -0.04869356378912926, + -0.08780577033758163, + 0.011766023933887482, + 0.1935703605413437, + -0.1136653944849968, + 0.062263745814561844, + -0.028202399611473083, + 0.02112056314945221, + -0.12952084839344025, + 0.18272802233695984, + -0.14360137283802032, + -0.13668759167194366, + 0.04907786846160889, + -0.0533972904086113, + -0.05436975508928299, + 0.0009280666708946228, + -0.08174237608909607, + -0.0355672761797905, + 0.004176635295152664, + 0.07150948792695999, + 0.22621557116508484, + 0.007818220183253288, + -0.35068655014038086, + 0.0303746797144413, + -0.10332699120044708, + -0.021822044625878334, + -0.06092840060591698, + -0.020471841096878052, + -0.21362267434597015, + -0.08224484324455261, + 0.03248654678463936, + -0.134386345744133, + 0.09233617782592773, + 0.1559479683637619, + -0.25665464997291565, + -0.05508199706673622, + -0.02319049835205078, + 0.024425826966762543, + -0.006014466285705566, + 0.16134054958820343, + 0.11818048357963562, + -0.06406328082084656, + 0.06898031383752823, + 0.12925899028778076, + -0.06003619357943535, + -0.1294277012348175, + -0.004379419609904289, + 0.11942613869905472, + 0.1586723029613495, + 0.11098378896713257, + 0.19619882106781006, + -0.21351541578769684, + 0.06364531069993973, + -0.18440209329128265, + -0.0919143483042717, + -0.01907193847000599, + -0.2686134874820709, + -0.22919818758964539, + 0.1355423927307129, + 0.1894533634185791, + -0.07859388738870621, + -0.285798579454422, + 0.10207819193601608, + 0.047448981553316116, + -0.10605946183204651, + -0.1191759929060936, + -0.04839218407869339, + 0.029088757932186127, + 0.3339240550994873, + 0.04115835577249527, + -0.2934126853942871, + 0.17244583368301392, + 0.06257472932338715, + 0.027948465198278427, + 0.0893276184797287, + 0.061908647418022156, + 0.2384718954563141, + -0.04881766065955162, + 0.07925871759653091, + 0.058146096765995026, + -0.19848506152629852, + -0.11947030574083328, + -0.025473620742559433, + -0.07998187839984894, + 0.07768119126558304, + 0.1560533046722412, + -0.11095115542411804, + 0.016813859343528748, + 0.06978719681501389, + -0.007182464003562927, + -0.21029652655124664, + 0.19573143124580383, + -0.07578675448894501, + -0.0630551353096962, + 0.07033172249794006, + -0.09898793697357178, + -0.20700423419475555, + 0.24050123989582062, + 0.019243769347667694, + -0.012027114629745483, + 0.14534005522727966, + -0.05904839187860489, + -0.10008929669857025, + 0.1328340619802475, + -0.005067941267043352, + 0.30113252997398376, + 0.14709892868995667, + 0.10914036631584167, + -0.2452794313430786, + 0.1597498059272766, + -0.19580554962158203, + 0.05337795615196228, + 0.009804232977330685, + -0.0010682251304388046, + -0.14123985171318054, + -0.14881402254104614, + -0.061318617314100266, + -0.01960357278585434, + -0.09682243317365646, + 0.001654595136642456, + -0.05271979421377182, + -0.04621410369873047, + -0.16775497794151306, + -0.041242245584726334, + 0.09836067259311676, + -0.21611469984054565, + 0.20738446712493896, + -0.11346103996038437, + -0.08157006651163101, + -0.13330580294132233, + 0.08540618419647217, + 0.0015641041100025177, + 0.027379745617508888, + 0.0014384165406227112, + -0.03790559247136116, + 0.025344736874103546, + -0.13137786090373993, + 0.11160910874605179, + -0.053189780563116074, + 0.11196362972259521, + 0.015298347920179367, + 0.052557751536369324, + 0.01666770875453949, + 0.05463384836912155, + -0.1502702683210373, + 0.06611311435699463, + -0.0667155385017395, + 0.14284445345401764, + 0.08641599118709564, + -0.19731296598911285, + -0.08339843899011612, + 0.09820728003978729, + 0.03302033245563507, + 0.08921126276254654, + 0.03478021174669266, + -0.15989071130752563, + -0.013648039661347866, + -0.16263335943222046, + -0.0749879777431488, + -0.3541197180747986, + 0.12606799602508545, + 0.2353769838809967, + -0.014140097424387932, + -0.09331642091274261, + 0.2911835312843323, + 0.07249271869659424, + 0.019088830798864365, + -0.22341814637184143, + 0.06633023172616959, + -0.0937628448009491, + 0.038561124354600906, + 0.07998501509428024, + 0.03847867250442505, + 0.23800534009933472, + -0.02517944574356079, + -0.05979456380009651, + 0.14871375262737274, + -0.11308622360229492, + -0.08662764728069305 + ], + "wielkiego": [ + -0.007335918955504894, + 0.045563600957393646, + -0.009011227637529373, + -0.006715674884617329, + -0.026933681219816208, + 0.012662549503147602, + 0.012494136579334736, + 0.008883165195584297, + 0.012246189638972282, + -0.03626121208071709, + 0.007100577466189861, + -0.005960305221378803, + -0.07582269608974457, + -0.07219238579273224, + 0.03428555279970169, + -0.010050034150481224, + 0.04782329127192497, + -0.00587445218116045, + 0.01832985319197178, + 0.0006436360999941826, + -0.007253450341522694, + -0.05597827956080437, + -0.004176432266831398, + 0.00509627815335989, + -0.02963927760720253, + -0.007906301878392696, + -0.07323390990495682, + 0.010382292792201042, + 0.021376198157668114, + -0.015944762155413628, + 0.08404891192913055, + 0.04819642752408981, + -0.01974131353199482, + -0.017839740961790085, + -0.021567292511463165, + -0.011944983154535294, + -0.03820865973830223, + -0.06094135344028473, + 0.015175022184848785, + 0.0021336509380489588, + -0.011225801892578602, + -0.011345837265253067, + 0.022221948951482773, + -0.03339093178510666, + 0.010368917137384415, + 0.08850111067295074, + -0.028319694101810455, + -0.056911665946245193, + 0.013130288571119308, + -0.057576969265937805, + -0.03817208856344223, + -0.02077348157763481, + -0.002910812385380268, + 0.01794436387717724, + 0.0090854587033391, + 0.026645468547940254, + 0.020759597420692444, + -0.0730491429567337, + -0.004025932401418686, + -0.008608357980847359, + 0.006705998908728361, + -0.062277913093566895, + -0.09697772562503815, + -0.0045755901373922825, + 0.022000394761562347, + 0.04762493073940277, + -0.0174410417675972, + -0.01452673226594925, + -0.03763464838266373, + -0.007314097136259079, + -0.02416522428393364, + -0.009630979038774967, + -0.008832407183945179, + -0.009594069793820381, + 0.0010206708684563637, + -0.001869378611445427, + 0.0013620385434478521, + 0.019671745598316193, + -0.0298677496612072, + 0.037589557468891144, + 0.006166642531752586, + -0.01747388020157814, + 0.07862847298383713, + -0.004796740598976612, + -0.001299227587878704, + -0.06612580269575119, + 0.010320048779249191, + -0.057449087500572205, + 0.017147401347756386, + 0.018646012991666794, + 0.027461230754852295, + 0.029924925416707993, + -0.04038987308740616, + 0.019173771142959595, + -0.019169077277183533, + 0.01183535810559988, + 0.004596712999045849, + 0.013401293195784092, + 0.015031768009066582, + -0.006155865266919136, + 0.0016141459345817566, + 0.0016486234962940216, + -0.0016265441663563251, + 0.011905558407306671, + -0.024959390982985497, + -0.002869216725230217, + 0.003961748443543911, + 0.005381135735660791, + -0.010750019922852516, + -0.0092091616243124, + 0.0033775819465517998, + -0.009264200925827026, + 0.002375534502789378, + 0.014500413089990616, + 0.017437733709812164, + -0.016412558034062386, + -0.02643391117453575, + -0.010554300621151924, + -0.01729186624288559, + -0.011880900710821152, + -0.02654194086790085, + 0.08797746151685715, + -0.004345945548266172, + 0.018173690885305405, + 0.05139186978340149, + -0.021036317571997643, + 0.0019254737999290228, + 0.03657185286283493, + 0.05179467052221298, + -0.03216397762298584, + 0.02832106500864029, + -0.00732059171423316, + 0.001953238621354103, + 0.0552280955016613, + -0.0816313698887825, + -0.04635390639305115, + -0.0010044174268841743, + -0.01145216915756464, + -0.024835683405399323, + 0.004363479092717171, + -0.023021824657917023, + -0.028819985687732697, + -0.012252752669155598, + -0.05512222647666931, + 0.0018695993348956108, + -0.003732760902494192, + -0.0033529000356793404, + 0.0055102757178246975, + -0.0353131964802742, + 0.009309625253081322, + 0.017049534246325493, + -0.01661543920636177, + -0.037630483508110046, + 0.04413406550884247, + 0.10759931057691574, + 0.006216149777173996, + 0.01743883453309536, + -0.0321224182844162, + -0.04338386654853821, + -0.010352222248911858, + 0.019557559862732887, + -0.03616500273346901, + -0.031152736395597458, + -0.03747161850333214, + 0.03515058010816574, + 0.021793408319354057, + -0.006729810032993555, + -0.005254229065030813, + 0.015773463994264603, + 0.023700086399912834, + -0.0074120257049798965, + -0.02505037561058998, + -0.10634505748748779, + 0.044078417122364044, + 0.017648646607995033, + 0.024428721517324448, + -0.03220108523964882, + -0.10613435506820679, + 0.009063808247447014, + -0.004706214182078838, + 0.015496071428060532, + 0.007620756048709154, + -0.10322338342666626, + 0.004127911292016506, + -0.042836785316467285, + -0.005709556862711906, + 0.014780810102820396, + -0.013590950518846512, + 0.0009020622237585485, + -0.011628083884716034, + 0.0026952442713081837, + 0.02772394008934498, + -0.013394390232861042, + 0.08837705105543137, + 0.034877389669418335, + 0.00796973891556263, + 0.0036066118627786636, + -0.007537400349974632, + 0.009657973423600197, + 0.015483388677239418, + 0.009112080559134483, + 0.016993626952171326, + -0.009587174281477928, + -0.01089375652372837, + 0.020153310149908066, + -0.03212655708193779, + -0.009104751981794834, + -0.017992831766605377, + 0.027265973389148712, + 0.016177978366613388, + -0.00933571346104145, + -0.001989271491765976, + -0.011062455363571644, + 0.02645493671298027, + -0.02389197051525116, + -0.014967194758355618, + 0.04017965495586395, + 0.013360623270273209, + 0.025009073317050934, + 0.03212787210941315, + -0.018112191930413246, + 0.029857879504561424, + 0.0026419730857014656, + 0.0054023731499910355, + 0.038368940353393555, + -0.006983892060816288, + 0.11624155193567276, + 0.04358835518360138, + 0.01519742701202631, + 0.0025886106304824352, + -0.009856360964477062, + -0.034163057804107666, + -0.02339169755578041, + 0.007131385151296854, + 0.012214010581374168, + 0.009165623225271702, + 0.01322084292769432, + -0.001932912040501833, + -0.03742665797472, + -0.0019902572967112064, + -0.03884515166282654, + -0.0014395690523087978, + -0.08395906537771225, + -0.000144916120916605, + 0.011286070570349693, + 0.02790246345102787, + 0.02004914917051792, + 0.027476679533720016, + -0.014214974828064442, + -0.055873312056064606, + -0.033963315188884735, + 0.0310048870742321, + 0.026784010231494904, + -0.01679147407412529, + -0.01151774637401104, + 0.037868570536375046, + 0.00873725488781929, + 0.0177710372954607, + 0.025565484538674355, + 0.01120650302618742, + -0.01566700078547001, + 0.010263899341225624, + 0.029856469482183456, + -0.04190037399530411, + 0.03027251549065113, + -0.016120538115501404, + 0.011049199849367142, + -0.007124597672373056, + -0.021132146939635277, + 0.004725765436887741, + -0.006855126470327377, + 0.016022494062781334, + -0.0004426361992955208, + 0.009785481728613377, + 0.03574788570404053, + -0.0002664937637746334, + -0.024519827216863632, + -0.017815103754401207, + 0.0001179906539618969, + 0.001410734374076128, + -0.060682058334350586, + -0.030002214014530182, + 0.030191028490662575, + 0.003258659504354, + 0.03131566196680069, + -0.010314303450286388, + 0.01821562834084034, + 0.0028082067146897316, + -0.02450772374868393, + 0.014137959107756615, + -0.015743648633360863, + -0.0011878833174705505, + 0.027442604303359985, + 0.062496740370988846, + -0.0329519584774971, + -0.022016093134880066, + 0.0022447926457971334, + 0.010238151997327805, + 0.011039968580007553, + -0.016544777899980545 + ], + "rasowego": [ + -0.021374976262450218, + 0.022061966359615326, + -0.05104579031467438, + 0.02448401413857937, + -0.08397653698921204, + -0.04359633848071098, + -0.04844851791858673, + -0.014722022227942944, + -0.014786135405302048, + -0.0075484081171453, + -0.024928996339440346, + 0.020970119163393974, + -0.07415851205587387, + -0.05593620985746384, + -0.02552773244678974, + -0.017214473336935043, + 0.005021178629249334, + -0.019948236644268036, + -0.030592218041419983, + -0.0011016674106940627, + -0.05149977654218674, + -0.07006055116653442, + -0.02875533327460289, + -0.005115842446684837, + -0.00615012226626277, + -0.022595401853322983, + -0.009116356261074543, + 0.0655575841665268, + 0.02221864089369774, + 0.007018675096333027, + 0.012067667208611965, + 0.06786805391311646, + 0.052019406110048294, + -0.05035649240016937, + 0.03981306031346321, + 0.00491845328360796, + -0.023388657718896866, + -0.14638745784759521, + 0.007331090047955513, + 0.018253570422530174, + 0.0010674468940123916, + -0.10486278682947159, + -0.016898659989237785, + -0.0384083166718483, + -0.015604713000357151, + 0.0340690091252327, + 0.06729978322982788, + -0.001415115431882441, + 0.012272392399609089, + 0.007575922645628452, + 0.02495703287422657, + -0.07444107532501221, + 0.04819320887327194, + -0.024856077507138252, + -0.020392099395394325, + -0.011424148455262184, + 0.006205352023243904, + -0.03662575036287308, + 0.0034616547636687756, + 0.005699946079403162, + 0.03262940049171448, + -0.011213726364076138, + -0.06626541912555695, + -0.005661826115101576, + 0.0671444833278656, + -0.031498223543167114, + 0.07749772071838379, + -0.05587504804134369, + -0.014321302995085716, + -0.05381925776600838, + 0.016615932807326317, + -0.00021696757175959647, + 0.006970888003706932, + 0.029460981488227844, + 0.01869632676243782, + -0.0015935662668198347, + 0.052388809621334076, + 0.0024564608465880156, + -0.0073577179573476315, + 0.03273550420999527, + -0.025524629279971123, + 0.04064859822392464, + 0.09277748316526413, + -0.015083663165569305, + -0.04081587493419647, + -0.08818261325359344, + -0.04710976779460907, + 0.03098716214299202, + -0.00924242939800024, + 0.017826063558459282, + -0.011905795894563198, + 0.03991704434156418, + -0.06569720804691315, + -0.05167306214570999, + 0.08313693106174469, + 0.09779570996761322, + 0.022606493905186653, + 0.042619917541742325, + 0.039925672113895416, + -0.001008810824714601, + 0.017232539132237434, + -0.03307091444730759, + -0.046558354049921036, + -0.009505311958491802, + -0.03368028625845909, + -0.04864661023020744, + -0.027219058945775032, + 0.024790547788143158, + 0.016621660441160202, + 0.024113919585943222, + -0.005963386036455631, + 0.01810196414589882, + 0.054781120270490646, + 0.03975182771682739, + 0.03511524945497513, + 0.0995873436331749, + -0.022380894050002098, + -0.0230866726487875, + 0.018141157925128937, + 0.02894613891839981, + -0.04706616327166557, + -8.305055962409824e-05, + 0.017096150666475296, + 0.0054153925739228725, + 0.05484890937805176, + 0.01858840510249138, + 0.04709519073367119, + 0.023030517622828484, + 0.002663001650944352, + -0.00626006256788969, + 0.04791156202554703, + -0.0452740304172039, + -0.01642681285738945, + 0.0029099690727889538, + -0.13237300515174866, + -0.03394050896167755, + -0.09977129846811295, + 0.06643113493919373, + -0.05048809200525284, + 0.005331167485564947, + -0.039210133254528046, + 0.019543815404176712, + 0.011891878210008144, + -0.10855355113744736, + -0.024832867085933685, + 0.018708225339651108, + 0.0241832472383976, + 0.02966644987463951, + -0.03818413242697716, + -0.06066809594631195, + -0.06038204953074455, + 0.019552376121282578, + 0.03553164377808571, + 0.008572856895625591, + 0.09808126837015152, + -0.060862522572278976, + 0.02406022883951664, + -0.02335485629737377, + -0.03715915605425835, + 0.02603251300752163, + -0.017873579636216164, + -0.03878195583820343, + 0.03942133113741875, + 0.01384984515607357, + 0.029413016512989998, + 0.03399429842829704, + -0.04369866102933884, + 0.0016028174431994557, + 0.010400917381048203, + 0.0596524178981781, + 0.04103970527648926, + 0.02264336682856083, + -0.05215459689497948, + -0.0068299793638288975, + 0.008646439760923386, + -0.06566585600376129, + -0.016624554991722107, + -0.10221347212791443, + 0.036093540489673615, + -0.0018918951973319054, + 0.062389664351940155, + -0.06332951784133911, + -0.126982182264328, + 0.013618029654026031, + 0.032118864357471466, + -0.020803697407245636, + 0.01726769097149372, + -0.015036938712000847, + 0.005806658882647753, + 0.13138039410114288, + -0.024694399908185005, + -0.020180627703666687, + 0.02100687474012375, + 0.07018229365348816, + -0.0117644676938653, + -0.015375608578324318, + 0.004260370973497629, + 0.025789979845285416, + 0.04272473603487015, + -0.008279502391815186, + 0.05301598832011223, + -0.00556846521794796, + -0.0681459829211235, + 0.021686162799596786, + 0.008348362520337105, + -0.009950746782124043, + 0.07436788082122803, + 0.039569661021232605, + -0.00981104839593172, + 0.018843822181224823, + -0.0018322996329516172, + -0.0871618464589119, + -0.029130319133400917, + -0.05660361051559448, + -0.03280451148748398, + 0.009062877856194973, + -0.0653960108757019, + -0.04036419838666916, + 0.10359936207532883, + 0.06099063530564308, + 0.014591563493013382, + 0.10982383042573929, + -0.014729869551956654, + 0.011956637725234032, + 0.06778937578201294, + -0.03720470145344734, + 0.11712870746850967, + 0.10046470910310745, + 0.0196642205119133, + -0.004155087284743786, + -0.045008398592472076, + -0.05250081792473793, + -0.01716792583465576, + -0.002933334093540907, + -0.00195779325440526, + -0.03827226907014847, + -0.014527697116136551, + -0.0639154240489006, + 0.028759459033608437, + -0.028576435521245003, + 0.08100293576717377, + -0.04702320322394371, + -0.016875263303518295, + -0.024568067863583565, + -0.016138112172484398, + 0.04338507354259491, + -0.01013384759426117, + 0.01467440277338028, + 0.013792979530990124, + -0.02351970039308071, + -0.09970521181821823, + 0.04032248258590698, + 0.008005659095942974, + -0.017492681741714478, + 0.010736191645264626, + 0.0822356790304184, + -0.039843007922172546, + -0.04999406635761261, + 0.04765477403998375, + -0.043019380420446396, + -0.017399491742253304, + -0.0477265864610672, + 0.008344064466655254, + -0.027777623385190964, + 0.03964628279209137, + -0.051099471747875214, + -0.005542307160794735, + 0.031032240018248558, + 0.015952764078974724, + 0.02505410648882389, + -0.01774345338344574, + 0.030676454305648804, + -0.015608660876750946, + 0.010213768109679222, + 0.014368381351232529, + 0.025099189952015877, + -0.0523533970117569, + -0.013378938660025597, + 0.03179111331701279, + 0.009646384045481682, + -0.11224357783794403, + 0.054288893938064575, + -0.015104515478014946, + -0.045272890478372574, + -0.016325663775205612, + 0.02102903090417385, + -0.0095363212749362, + 0.049041442573070526, + -0.01453242264688015, + 0.05265913903713226, + -0.0029614169616252184, + -0.01707213744521141, + 0.038969986140728, + 0.06106139346957207, + -0.015846004709601402, + -0.01887490227818489, + 0.031026743352413177, + 0.03736288845539093, + -0.0058111087419092655, + 0.014338910579681396 + ], + "ala": [ + -0.08510930836200714, + 0.08026311546564102, + 0.005172867327928543, + -0.23018845915794373, + -0.1327226310968399, + 0.0936470776796341, + -0.06244175136089325, + -0.061952557414770126, + 0.05288635939359665, + 0.24452939629554749, + -0.08917640149593353, + -0.027700629085302353, + -0.05412629246711731, + 0.09416685998439789, + 0.05794994905591011, + -0.07576432079076767, + 0.06472751498222351, + -0.07342499494552612, + 0.09097124636173248, + -0.060087792575359344, + -0.003248483408242464, + 0.10523930191993713, + 0.008373997174203396, + -0.017618943005800247, + 0.07374531030654907, + 0.024025272578001022, + -0.018744762986898422, + 0.02191735990345478, + -0.09690085053443909, + -0.03857237845659256, + -0.03770364448428154, + 0.2185291349887848, + 0.006812140345573425, + 0.03166177496314049, + -0.04394921660423279, + -0.13524998724460602, + -0.006302122492343187, + -0.09569491446018219, + 0.06988120079040527, + 0.10575003921985626, + 0.11440394818782806, + -0.062137097120285034, + -0.045692794024944305, + -0.12013816088438034, + -0.041717927902936935, + -0.11439419537782669, + -0.02021404169499874, + -0.02632732130587101, + 0.04471046105027199, + -0.10949230194091797, + -0.012956470251083374, + 0.029622625559568405, + -0.1134611964225769, + 0.11538143455982208, + 0.2252194583415985, + -0.12420423328876495, + 0.062214866280555725, + -0.045071493834257126, + -0.3804093897342682, + -0.11720050871372223, + -0.006554341875016689, + -0.07014153897762299, + 0.011116482317447662, + -0.022332951426506042, + 0.1854228675365448, + -0.06904476881027222, + -0.01595909520983696, + -0.17870397865772247, + -0.014215500093996525, + -0.03437959775328636, + -0.04773184284567833, + 0.011287234723567963, + 0.052162572741508484, + -0.10293835401535034, + -0.05555473640561104, + 0.1461411565542221, + -0.06653688848018646, + -0.05456464737653732, + 0.0390891507267952, + -0.008416191674768925, + -0.037790387868881226, + -0.11679981648921967, + 0.3891960680484772, + -0.007345590740442276, + -0.19290880858898163, + 0.053135018795728683, + -0.1099163144826889, + 0.09811556339263916, + 0.01629701629281044, + 0.03220151364803314, + 0.01977045275270939, + 0.03380807489156723, + -0.1566382199525833, + -0.06573638319969177, + 0.029347993433475494, + 0.08862161636352539, + 0.004392128437757492, + -0.16510900855064392, + -0.0026791300624608994, + -0.07369282841682434, + -0.1060524582862854, + 0.10011787712574005, + -0.016776975244283676, + 0.07388751208782196, + 0.04492664337158203, + -0.11080620437860489, + 0.06120843067765236, + 0.023568084463477135, + 0.17439115047454834, + -0.18897901475429535, + 0.0991172194480896, + 0.025369100272655487, + -0.09956826269626617, + 0.010338149964809418, + 0.16792134940624237, + 0.10722075402736664, + -0.04778091609477997, + 0.004353707656264305, + -0.10673243552446365, + 0.040953852236270905, + -0.0960560292005539, + 0.09371501207351685, + 0.10394444316625595, + -0.0056099677458405495, + 0.06729613244533539, + -0.06795096397399902, + 0.17284581065177917, + 0.0033453311771154404, + 0.018823932856321335, + -0.1607680469751358, + 0.116116002202034, + -0.10165654122829437, + 0.19662612676620483, + 0.06528636068105698, + -0.11422064155340195, + -0.06504455953836441, + 0.029768574982881546, + -0.0484953373670578, + -0.11648310720920563, + -0.12250812351703644, + -0.01510370522737503, + 0.15246586501598358, + -0.07638084888458252, + 0.07381592690944672, + -0.05145636945962906, + 0.11254043877124786, + -0.035568609833717346, + 0.007457010447978973, + 0.09195952862501144, + 0.08615238219499588, + 0.007009610533714294, + -0.004218161106109619, + -0.01877135969698429, + 0.006081234663724899, + -0.15653260052204132, + -0.10768484324216843, + 0.24579471349716187, + -0.08761277794837952, + -0.10177511721849442, + 0.04498010873794556, + -0.09588050842285156, + -0.13153505325317383, + -0.02207024022936821, + 0.07547876238822937, + 0.032599080353975296, + -0.08592616021633148, + -0.13776612281799316, + 0.03250812739133835, + 0.26500555872917175, + 0.11078671365976334, + -0.02925412729382515, + 0.13246124982833862, + -0.22584369778633118, + -0.08997879922389984, + 0.053682196885347366, + -0.14366936683654785, + 0.2318786084651947, + -0.11107944697141647, + 0.16802093386650085, + 0.1674017310142517, + -0.08454294502735138, + -0.02038326859474182, + -0.18258585035800934, + 0.023696796968579292, + 0.07218381017446518, + 0.03743196278810501, + 0.08329722285270691, + -0.2355189323425293, + -0.05182601884007454, + 0.005523643456399441, + -0.042271655052900314, + 0.07470633089542389, + 0.016380978748202324, + 0.12326782941818237, + -0.0034765824675559998, + 0.02539176121354103, + 0.034960951656103134, + 0.07186581194400787, + -0.00383613258600235, + 0.03699350357055664, + -0.04867836833000183, + 0.09089645743370056, + -0.1530541181564331, + -0.14013057947158813, + 0.14007122814655304, + 0.14388898015022278, + 0.1655329465866089, + 0.06484488397836685, + -0.05194415524601936, + -0.14215832948684692, + 0.029214072972536087, + -0.006806789897382259, + 0.20004573464393616, + -0.07185720652341843, + 0.002342197112739086, + -0.11681094765663147, + 0.07678108662366867, + 0.20371383428573608, + -0.14282578229904175, + -0.10786132514476776, + -0.12049733847379684, + -0.05071673542261124, + -0.02978406846523285, + 0.039153002202510834, + 0.03860694169998169, + 0.025827309116721153, + -0.04815760254859924, + 0.1684526652097702, + -0.06348452717065811, + -0.036018870770931244, + 0.11284859478473663, + -0.18286296725273132, + -0.018906600773334503, + 0.09744884073734283, + 0.13633206486701965, + 0.07883652299642563, + 0.04424794763326645, + -0.14852368831634521, + -0.046556584537029266, + -0.14331218600273132, + 0.12250048667192459, + 0.09036845713853836, + 0.003670506179332733, + 0.06500013172626495, + 0.048821426928043365, + -0.07175587117671967, + -0.030644508078694344, + 0.05143702030181885, + 0.062334153801202774, + 0.027922067791223526, + -0.10170811414718628, + 0.02496853470802307, + -0.05066241696476936, + -0.08822708576917648, + 0.015883728861808777, + -0.013464706018567085, + -0.19596795737743378, + -0.1251690536737442, + -0.034124307334423065, + -0.09212999045848846, + -0.048308201134204865, + 0.07198561728000641, + 0.051106300204992294, + -0.08067712932825089, + 0.18825247883796692, + 0.04958396404981613, + -0.11096809804439545, + 0.2280646711587906, + -0.013503428548574448, + 0.08193613588809967, + -0.057304851710796356, + 0.08633160591125488, + -0.14459016919136047, + 0.05562886223196983, + 0.16470640897750854, + -0.23238077759742737, + -0.09258431196212769, + 0.05243774875998497, + 0.11136229336261749, + 0.09222377836704254, + -0.3185659348964691, + -0.1533215492963791, + -0.09634050726890564, + 0.14749886095523834, + -0.03518515080213547, + -0.03340547904372215, + -0.031760625541210175, + -0.007977711036801338, + -0.1727142333984375, + 0.044638026505708694, + 0.03248946741223335, + 0.03752497583627701, + 0.13379395008087158, + -0.06517000496387482, + -0.09402096271514893, + -0.01689091883599758, + 0.1753440946340561, + 0.149289071559906, + -0.03721372038125992, + -0.02596273459494114 + ], + "alana": [ + -0.008905170485377312, + 0.00797797366976738, + -0.020857801660895348, + -0.05502761900424957, + -0.026971876621246338, + 0.021845532581210136, + 0.0031013954430818558, + 0.04111144319176674, + 0.007476105820387602, + 0.04155835136771202, + -0.040180355310440063, + 0.04202931374311447, + 0.002554595470428467, + 0.010071192868053913, + -0.02424287237226963, + -0.05443716421723366, + 0.1433088332414627, + 0.049010977149009705, + -0.012187544256448746, + 0.01602715253829956, + 0.05048424378037453, + 0.02727050520479679, + -0.04620783403515816, + -0.0734170526266098, + 0.050709083676338196, + -0.026002367958426476, + -0.04151768237352371, + -0.0624970942735672, + -0.09327693283557892, + -0.03458327054977417, + -0.03242221102118492, + -0.01252170279622078, + 0.015669796615839005, + 0.027511216700077057, + 0.018900066614151, + 0.01364364568144083, + 0.04275044798851013, + -0.0047136880457401276, + -0.012517403811216354, + -0.01921612210571766, + 0.09476295113563538, + -0.06644897162914276, + 0.006958749145269394, + 0.041043996810913086, + 0.030241474509239197, + -0.03629227355122566, + -0.019569141790270805, + -0.06644894182682037, + 0.01731383055448532, + 0.0049531301483511925, + 0.05581922084093094, + -0.020732585340738297, + 0.020846780389547348, + -0.0034689255990087986, + -0.0023536141961812973, + 0.043540552258491516, + -0.00607583113014698, + 0.13842181861400604, + -0.08284986019134521, + 0.01645781844854355, + 0.10372365266084671, + 0.020972564816474915, + -0.014481345191597939, + 0.008332233875989914, + 0.11382568627595901, + -0.04178755730390549, + -0.00029972195625305176, + -0.10192164778709412, + -0.010269921272993088, + -0.04217970371246338, + 0.002580307424068451, + -0.014096757397055626, + 0.015790149569511414, + -0.0542927086353302, + -0.03320926055312157, + -0.01713576726615429, + -0.004578539170324802, + -0.049160730093717575, + 0.027759740129113197, + -0.087232306599617, + 0.000209444435313344, + -0.027234351262450218, + 0.11440762877464294, + -0.008088681846857071, + -0.0016661311965435743, + -0.041061677038669586, + -0.03332071006298065, + 0.043460845947265625, + 0.024811111390590668, + -0.011206693015992641, + 0.042661525309085846, + 0.05522497370839119, + -0.15101587772369385, + 0.0034809946082532406, + 0.042527489364147186, + -0.09491441398859024, + -0.025948340073227882, + -0.028411749750375748, + -0.10226449370384216, + 0.0067985886707901955, + -0.014013301581144333, + 0.05011998862028122, + 0.02444552071392536, + -0.039902541786432266, + -4.013441503047943e-05, + -0.056684911251068115, + -0.004129653796553612, + -0.06479571759700775, + 0.040350291877985, + -0.029520612210035324, + -0.04164367541670799, + 0.000608866335824132, + 0.05645133554935455, + -0.014965702779591084, + 0.02611115388572216, + -0.05806443467736244, + 0.0641874447464943, + -0.05745403841137886, + -0.06339256465435028, + 0.029419532045722008, + 0.0014499658718705177, + 0.026731204241514206, + 0.035704415291547775, + -0.0008268561214208603, + -0.0939759686589241, + 0.0077078393660485744, + 0.1284271776676178, + 0.047485750168561935, + -0.0011111609637737274, + -0.09236142784357071, + 0.029201526194810867, + -0.06830106675624847, + 0.042804352939128876, + 0.045423250645399094, + -0.07132693380117416, + -0.0472111701965332, + -0.02110600657761097, + -0.000526216346770525, + -0.010649396106600761, + -0.0013150530867278576, + -0.05174721032381058, + 0.01884961687028408, + 0.012327498756349087, + -0.11739764362573624, + 0.009444170631468296, + 0.1098916232585907, + -0.05842617154121399, + 0.04192979261279106, + 0.07221657037734985, + 0.022518323734402657, + -0.07432927191257477, + 0.023226700723171234, + 0.020223218947649002, + 0.012946680188179016, + 0.01574738323688507, + -0.06886599957942963, + -0.0070308903232216835, + 0.025227678939700127, + 0.07279892265796661, + 0.059939946979284286, + 0.03627409785985947, + 0.029419073835015297, + 0.004540672525763512, + -0.012865041382610798, + -0.04613533988595009, + -0.032980334013700485, + -0.06465771794319153, + 0.0077165658585727215, + 0.18178483843803406, + -0.020831923931837082, + -0.07532940059900284, + 0.03866453468799591, + -0.07253919541835785, + -0.034813571721315384, + 0.020511168986558914, + -0.06999587267637253, + -0.031060680747032166, + -0.00856416393071413, + 0.02916804328560829, + -0.019706588238477707, + 0.09040533006191254, + -0.05908718332648277, + -0.04164492338895798, + -0.027276139706373215, + -0.004476554226130247, + -0.03351961821317673, + 0.05742602050304413, + -0.07617287337779999, + 0.016344115138053894, + -0.020810041576623917, + -0.024502446874976158, + -0.06571627408266068, + -0.010821627452969551, + -0.023639731109142303, + -0.01641128398478031, + 0.01913810521364212, + -0.048936039209365845, + -0.0062243156135082245, + -0.038358692079782486, + 0.002915005199611187, + -0.06888172030448914, + 0.11447683721780777, + -0.04947744682431221, + 0.08107712864875793, + -0.038916926831007004, + 0.047509852796792984, + -0.001883181743323803, + -0.02029532380402088, + 0.014639850705862045, + -0.02925298735499382, + 0.0033461658749729395, + -0.06095549091696739, + 0.008958661928772926, + 0.01875258982181549, + -0.026076897978782654, + 0.009152323007583618, + -0.021915778517723083, + 0.016577403992414474, + 0.07379716634750366, + -0.042609456926584244, + 0.010794570669531822, + 0.03872985765337944, + -0.024744680151343346, + -0.017851850017905235, + 0.02018165774643421, + 0.026857100427150726, + -0.009717192500829697, + 0.07921796292066574, + 0.008214155212044716, + 0.033190082758665085, + -0.05482260882854462, + -0.059472158551216125, + 0.09949895739555359, + -0.06345643103122711, + -0.01975860260426998, + -0.067245252430439, + 0.020705673843622208, + -0.030896490439772606, + 0.0023532509803771973, + -0.04635317996144295, + 0.01883975975215435, + 0.039829663932323456, + -0.011591881513595581, + 0.003323711920529604, + 0.05427715927362442, + 0.01540394313633442, + -0.017894219607114792, + -0.038341909646987915, + 0.06111633777618408, + -0.07981638610363007, + 0.003194071352481842, + -0.0013840049505233765, + -0.015772690996527672, + 0.011809502728283405, + -0.03507591784000397, + -0.019812079146504402, + 0.05649995803833008, + -0.01170569472014904, + -0.010453758761286736, + 0.038305509835481644, + -0.004376763012260199, + 0.0023491024039685726, + -0.007492097560316324, + -0.014653166756033897, + 0.031125254929065704, + -0.007065157406032085, + -0.017799459397792816, + -0.018134284764528275, + 0.03267719969153404, + -0.002434193156659603, + 0.03548000752925873, + -0.00011319480836391449, + -0.04457104206085205, + -0.017937686294317245, + -0.02655956894159317, + -0.04748003929853439, + -0.018020927906036377, + 0.011457541026175022, + 0.08198951929807663, + -0.01670980453491211, + -0.037620238959789276, + -0.015428856015205383, + 0.07995229214429855, + 0.01623024418950081, + -0.026009880006313324, + 0.007154380902647972, + 0.026373393833637238, + -0.027543775737285614, + -0.0497322604060173, + -0.004626482725143433, + 0.04496750980615616, + -0.0015953592956066132, + 0.02821824885904789, + -0.0577494315803051, + -0.012989328242838383, + -0.014337323606014252, + 0.050722211599349976, + -0.012495487928390503, + -0.00028792815282940865, + 0.06026756018400192 + ], + "ma": [ + -0.017319366335868835, + 0.19939786195755005, + -0.002433155430480838, + -0.08989187330007553, + -0.6585466265678406, + -0.029773512855172157, + -0.06268081814050674, + 0.059397727251052856, + -0.23301823437213898, + -0.06189410760998726, + -0.14514213800430298, + -0.12059704959392548, + 0.28610071539878845, + 0.11204845458269119, + 0.017116647213697433, + 0.19462355971336365, + -0.014472516253590584, + -0.1369037926197052, + -0.1411769837141037, + -0.07382507622241974, + 0.05684584006667137, + 0.30498403310775757, + 0.07963626086711884, + 0.04957783594727516, + -0.7672253251075745, + -0.03167421743273735, + -0.049892496317625046, + 0.12684543430805206, + -0.07745488733053207, + 0.0686381608247757, + -0.04825098440051079, + -0.062087059020996094, + -0.1282641738653183, + -0.032829031348228455, + -0.00793223362416029, + 0.008400922641158104, + 0.1427750289440155, + -0.09181278198957443, + 0.13264483213424683, + 0.08397654443979263, + 0.02419828064739704, + 0.27262043952941895, + -0.04234355688095093, + 0.12569856643676758, + -0.2034372240304947, + -0.05482612922787666, + -0.044704314321279526, + -0.23985600471496582, + 0.01257017720490694, + 0.05313592031598091, + -0.0772976502776146, + -0.13264617323875427, + -0.10442794859409332, + -0.03915044665336609, + 0.9156799912452698, + -0.1419679969549179, + -0.06024566665291786, + -0.1681678742170334, + 0.033169180154800415, + 0.045470189303159714, + -0.23528966307640076, + -0.16084609925746918, + 0.09364213794469833, + -0.01774413324892521, + 0.08702721446752548, + 0.23189006745815277, + 0.051224786788225174, + -0.0024700334761291742, + -0.061074648052453995, + 0.10324984043836594, + 0.0466703362762928, + 0.024757828563451767, + 0.13603328168392181, + -0.10486587136983871, + 0.1308596432209015, + -0.08184386789798737, + 0.013749654404819012, + -0.03720531985163689, + -0.005877527873963118, + 0.008133389987051487, + -0.1295010894536972, + 0.01219901442527771, + 0.9368226528167725, + 0.030340071767568588, + -0.004245404619723558, + -0.05012226477265358, + 0.01501045748591423, + 0.1663505882024765, + 0.03797484561800957, + -0.03615853562951088, + 0.030388187617063522, + 0.10608846694231033, + 0.3501804769039154, + -0.14999425411224365, + 0.19569289684295654, + 0.02038898691534996, + 0.08674237132072449, + 0.00725436769425869, + 0.03604792058467865, + -0.043699733912944794, + -0.0020810714922845364, + -0.1282309740781784, + 0.0036961385048925877, + 0.0073354230262339115, + -0.008381481282413006, + 0.08217158168554306, + 0.12295476347208023, + 0.07235073298215866, + 0.0834241732954979, + -0.08394422382116318, + -0.1825396716594696, + 0.3363122045993805, + 0.1044616624712944, + -0.14969971776008606, + -0.07193519175052643, + 0.17450383305549622, + -0.02342291921377182, + 0.03796623647212982, + -0.1543733775615692, + 0.0026052214670926332, + 0.13685671985149384, + 0.05383211374282837, + 0.0909600704908371, + -0.024072518572211266, + 0.14813996851444244, + -0.10074421018362045, + -0.03563304618000984, + 0.11980004608631134, + 0.015190711244940758, + 0.028492972254753113, + -0.13054578006267548, + 0.0598987452685833, + -0.0018208019901067019, + -0.26574867963790894, + -0.1046622022986412, + 0.025759147480130196, + 0.22038379311561584, + 0.0029779020696878433, + -0.32297682762145996, + -0.13471390306949615, + 0.01832430809736252, + -0.11024904251098633, + -0.05563639476895332, + -0.10372592508792877, + 0.12006429582834244, + -0.1740075647830963, + -0.38642027974128723, + 0.1258246898651123, + -0.14513635635375977, + 0.08748295158147812, + 0.046368662267923355, + -0.1185491606593132, + 0.1896124631166458, + 0.08767387270927429, + 0.10766582190990448, + 0.11399984359741211, + -0.014304372482001781, + -0.017331356182694435, + -0.12349628657102585, + 0.27972283959388733, + -0.03475717082619667, + -0.3587721586227417, + 0.16097621619701385, + -0.026457762345671654, + -0.013791014440357685, + -0.09797482937574387, + -0.19777368009090424, + -0.2188035100698471, + 0.11841845512390137, + -0.14907479286193848, + 0.1518198847770691, + 0.03868028521537781, + 0.03809288889169693, + 0.4041370451450348, + 0.00430183345451951, + -0.03444720059633255, + -0.18718919157981873, + -0.23583096265792847, + -0.016519270837306976, + 0.18206454813480377, + -0.027213064953684807, + -0.252016544342041, + 0.0491454154253006, + 0.014860464259982109, + -0.06894470751285553, + 0.17181752622127533, + 0.04482664912939072, + -0.09661887586116791, + -0.1286282241344452, + -0.058976612985134125, + 0.09160139411687851, + 0.051311444491147995, + -0.14805659651756287, + -0.3604443073272705, + -0.09560105949640274, + 0.06101277098059654, + 0.011444945819675922, + 0.027357032522559166, + 0.12012814730405807, + -0.07233128696680069, + -0.34351375699043274, + 0.08263317495584488, + 0.10284171998500824, + -0.09838875383138657, + 0.12973551452159882, + 0.021962497383356094, + 0.038289088755846024, + -0.15210531651973724, + 0.01791217364370823, + -0.07782623916864395, + 0.05756104364991188, + -0.002721688710153103, + 0.24015699326992035, + -0.0803576409816742, + -0.11913346499204636, + -0.003947827965021133, + 0.34993964433670044, + -0.09963877499103546, + -0.2036137729883194, + -0.09120816737413406, + 0.010198036208748817, + 0.039048973470926285, + 0.11832015961408615, + -0.2459758073091507, + -0.1269475370645523, + 0.08283139020204544, + 0.036106932908296585, + -0.13724209368228912, + 0.027216419577598572, + -0.034134261310100555, + -0.05846599116921425, + -0.046584442257881165, + -0.1039048284292221, + 0.19857743382453918, + 0.018287429586052895, + 0.06368196755647659, + -0.03516785800457001, + -0.06189749762415886, + 0.18062718212604523, + 0.039525266736745834, + -0.43550774455070496, + 0.0037974969018250704, + 0.17117415368556976, + 0.07821366935968399, + 0.5407046675682068, + -0.2062196284532547, + 0.08034863322973251, + 0.015322472900152206, + 0.029957646504044533, + -0.20152480900287628, + -0.09223990142345428, + -0.06302131712436676, + 0.0019804774783551693, + 0.025030547752976418, + 0.14647270739078522, + -0.09102798998355865, + 0.04946552962064743, + 0.0033420773688703775, + -0.18276983499526978, + 0.07320235669612885, + 0.07692569494247437, + 0.15848369896411896, + -0.11740195751190186, + -0.18770445883274078, + 0.40553897619247437, + 0.07385458052158356, + -0.05059506744146347, + 0.06969370692968369, + 0.007246922701597214, + 0.023643385618925095, + -0.20888538658618927, + -0.14561277627944946, + 0.048460979014635086, + 0.11156976222991943, + 0.40917372703552246, + -0.01046369131654501, + -0.053318917751312256, + -0.06229471042752266, + -0.07372928410768509, + 0.058629900217056274, + -0.2429196834564209, + -0.05559220165014267, + 0.19617997109889984, + 0.007329883985221386, + -0.028705155476927757, + -0.09250165522098541, + 0.3471665680408478, + -0.016368677839636803, + -0.27010607719421387, + -0.05045217275619507, + -0.026646358892321587, + 0.15299373865127563, + 0.3539401888847351, + 0.1516050547361374, + -0.15655280649662018, + 0.08041416108608246, + -0.08694977313280106, + -0.010244608856737614, + 0.03272118791937828, + -0.15399935841560364 + ] +} diff --git a/tests/file_stored_embedding_transformer.py b/tests/file_stored_embedding_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..2e329fa3d553daeed4c8533c8fc23e81140cbc79 --- /dev/null +++ b/tests/file_stored_embedding_transformer.py @@ -0,0 +1,28 @@ +import json +from typing import List, Dict + +import numpy as np + +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer + + +class FileStoredEmbeddingTransformer(EmbeddingTransformer): + _cache: Dict[str, np.array] + + def __init__(self, file_path: str): + with open(file_path, 'r') as f: + json_content = json.loads(f.read()) + self._cache = dict({ + key: np.array(json_content[key]) + for key in json_content.keys() + }) + + def get_embeddings(self, words: List[str]) -> Dict[str, np.ndarray]: + return dict({ + word: self._cache[word] + for word in words + }) + + def get_embedding(self, word: str) -> np.ndarray: + return self._cache[word] diff --git a/tests/test_classic_wer.py b/tests/test_classic_wer.py new file mode 100644 index 0000000000000000000000000000000000000000..90a29bc60254ef54c6ca335629772ca4e6862d72 --- /dev/null +++ b/tests/test_classic_wer.py @@ -0,0 +1,42 @@ +from typing import List, Tuple + +import pytest + +from sziszapangma.core.wer.classic_wer_calculator import ClassicWerCalculator +from sziszapangma.core.wer.step_type import StepType +from sziszapangma.core.wer.step_words import StepWords + + +def get_sample_data() -> Tuple[List[str], List[str]]: + reference = ['This', 'great', 'machine', 'can', 'recognize', 'speech'] + hypothesis = ['This', 'machine', 'can', 'wreck', 'a', 'nice', 'beach'] + return reference, hypothesis + + +def test_classic_calculate_wer_value(): + """Sample test for core calculate.""" + reference, hypothesis = get_sample_data() + wer_result = ClassicWerCalculator().calculate_wer(reference, hypothesis) + assert pytest.approx(wer_result[0]) == 0.8333333 + + +def test_classic_calculate_wer_steps(): + """Sample test for core calculate.""" + reference, hypothesis = get_sample_data() + wer_result = ClassicWerCalculator().calculate_wer(reference, hypothesis) + + reference_words = [ + StepWords('This', 'This'), StepWords('great', None), + StepWords('machine', 'machine'), StepWords('can', 'can'), + StepWords(None, 'wreck'), StepWords(None, 'a'), + StepWords('recognize', 'nice'), + StepWords('speech', 'beach')] + step_types = [ + StepType.CORRECT, StepType.DELETION, StepType.CORRECT, StepType.CORRECT, + StepType.INSERTION, StepType.INSERTION, StepType.SUBSTITUTION, + StepType.SUBSTITUTION] + + assert len(wer_result[1]) == 8 + assert [it.step_type for it in wer_result[1]] == step_types + assert [it.step_cost for it in wer_result[1]] == [0, 1, 0, 0, 1, 1, 1, 1] + assert [it.step_words for it in wer_result[1]] == reference_words diff --git a/tests/test_embedding_wer.py b/tests/test_embedding_wer.py new file mode 100644 index 0000000000000000000000000000000000000000..876af9456e8f104d3761d6f5c3042ba9ed5e7623 --- /dev/null +++ b/tests/test_embedding_wer.py @@ -0,0 +1,27 @@ +from typing import List, Tuple + +import pytest + +from sziszapangma.core.wer.wer_embedding_calculator import \ + WerEmbeddingCalculator +from tests.file_stored_embedding_transformer import \ + FileStoredEmbeddingTransformer + + +def get_sample_data() -> Tuple[List[str], List[str]]: + reference = ['ala', 'ma', 'dobrego', 'wielkiego', 'psa', 'rasowego'] + hypothesis = ['alana', 'rego', 'kruchego', 'psa', 'rasowego'] + return reference, hypothesis + + +def get_calculator() -> WerEmbeddingCalculator: + return WerEmbeddingCalculator( + FileStoredEmbeddingTransformer('tests/embeddings_pl.json')) + + +def test_classic_calculate_wer_value(): + """Sample test for core calculate.""" + reference, hypothesis = get_sample_data() + wer_result = get_calculator().calculate_wer(reference, hypothesis) + print(wer_result[0]) + assert pytest.approx(wer_result[0]) == 0.55879563 diff --git a/tests/test_soft_wer.py b/tests/test_soft_wer.py new file mode 100644 index 0000000000000000000000000000000000000000..c72b97f071b382b1b7f224d4692e46d99ce18b08 --- /dev/null +++ b/tests/test_soft_wer.py @@ -0,0 +1,26 @@ +from typing import List, Tuple + +import pytest + +from sziszapangma.core.wer.wer_soft_calculator import WerSoftCalculator +from tests.file_stored_embedding_transformer import \ + FileStoredEmbeddingTransformer + + +def get_sample_data() -> Tuple[List[str], List[str]]: + reference = ['ala', 'ma', 'dobrego', 'wielkiego', 'psa', 'rasowego'] + hypothesis = ['alana', 'rego', 'kruchego', 'psa', 'rasowego'] + return reference, hypothesis + + +def get_calculator() -> WerSoftCalculator: + return WerSoftCalculator( + FileStoredEmbeddingTransformer('tests/embeddings_pl.json')) + + +def test_classic_calculate_wer_value(): + """Sample test for core calculate.""" + reference, hypothesis = get_sample_data() + wer_result = get_calculator().calculate_wer(reference, hypothesis) + print(wer_result[0]) + assert pytest.approx(wer_result[0]) == 0.50186761 diff --git a/tests/test_sziszapangma.py b/tests/test_sziszapangma.py new file mode 100644 index 0000000000000000000000000000000000000000..838dc136662e281f6cb87781d10c48f55bd3a5ef --- /dev/null +++ b/tests/test_sziszapangma.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +"""Tests for `sziszapangma` package.""" + +import pytest + +from click.testing import CliRunner + +from sziszapangma.core import cli + + +@pytest.fixture +def response(): + """Sample pytest fixture. + + See more at: http://doc.pytest.org/en/latest/fixture.html + """ + # import requests + # return requests.get('https://github.com/audreyr/cookiecutter-pypackage') + + +def test_content(response): + """Sample pytest test function with the pytest fixture as an argument.""" + # from bs4 import BeautifulSoup + # assert 'GitHub' in BeautifulSoup(response.content).title.string + + +def test_command_line_interface(): + """Test the CLI.""" + runner = CliRunner() + result = runner.invoke(cli.main) + assert result.exit_code == 0 + assert 'sziszapangma.cli.main' in result.output + help_result = runner.invoke(cli.main, ['--help']) + assert help_result.exit_code == 0 + assert '--help Show this message and exit.' in help_result.output diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000000000000000000000000000000000000..c5655d8732339d25937df8285fa3c7fb046f6df6 --- /dev/null +++ b/tox.ini @@ -0,0 +1,25 @@ +[tox] +envlist = py38, py39, flake8 + +;[travis] +;python = +; 3.9: py39 +; 3.8: py38 + +[testenv:flake8] +basepython = python +deps = flake8 +commands = flake8 sziszapangma tests + +[testenv] +setenv = + PYTHONPATH = {toxinidir} +deps = + -r{toxinidir}/requirements_dev.txt +; If you want to make tox run the tests with the same versions, create a +; requirements.txt with the pinned versions and uncomment the following line: +; -r{toxinidir}/requirements.txt +commands = + pip install -U pip + pytest --basetemp={envtmpdir} +