diff --git a/.github/workflows/python-master.yml b/.github/workflows/python-master.yml new file mode 100644 index 0000000000000000000000000000000000000000..315683c453d04f6f49dad54d9e92d3d3c8a2a4db --- /dev/null +++ b/.github/workflows/python-master.yml @@ -0,0 +1,80 @@ +name: CI Main +on: + push: + branches: + - main + +jobs: + tests: + strategy: + fail-fast: false + matrix: + python-version: [ 3.9.6 ] + poetry-version: [ 1.1.5 ] + os: [ ubuntu-20.04, macos-latest, windows-latest ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install poetry + uses: abatilo/actions-poetry@v2.0.0 + with: + poetry-version: ${{ matrix.poetry-version }} + - name: Install deps + run: poetry install -vv + - name: Run tests + run: poetry run poe test + lint: + strategy: + fail-fast: false + matrix: + python-version: [ 3.9.6 ] + poetry-version: [ 1.1.5 ] + os: [ ubuntu-20.04 ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install poetry + uses: abatilo/actions-poetry@v2.0.0 + with: + poetry-version: ${{ matrix.poetry-version }} + - name: Install deps + run: poetry install -vv + - name: Check + run: poetry run poe check + publish: + needs: + - tests + - lint + environment: Deployment + strategy: + fail-fast: false + matrix: + python-version: [ 3.9.6 ] + poetry-version: [ 1.1.5 ] + os: [ ubuntu-20.04 ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install poetry + uses: abatilo/actions-poetry@v2.0.0 + with: + poetry-version: ${{ matrix.poetry-version }} + - name: Install deps + run: poetry install -vv + - name: Build and publish + run: | + poetry version "$(poetry version --short)-beta.${GITHUB_RUN_NUMBER}" + poetry build + poetry config repositories.theliverpypi https://nexus.theliver.pl/repository/pypi-registry/ + poetry publish -r theliverpypi --username ${{ secrets.THELIVER_PYPI_USER }} --password ${{ secrets.THELIVER_PYPI_PASS }} diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 71463e777c56f3e1e24207a3c2d57bd249820617..ab4c4f49a8be91d52914267a36c50dc62546ca4f 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -1,29 +1,77 @@ -name: Python package - -on: - pull_request: - branches: [ main, develop ] +name: CI +on: pull_request jobs: - build: - - runs-on: ubuntu-18.04 - strategy: - matrix: - python-version: [ '3.8', '3.9' ] - - steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - ls -la - python -m pip install --upgrade pip - pip install tox - pip install --upgrade -r requirements.txt - pip install --upgrade -r requirements_dev.txt - - name: Run tox - run: tox -v + tests: + strategy: + fail-fast: false + matrix: + python-version: [ 3.9.6 ] + poetry-version: [ 1.1.5 ] + os: [ ubuntu-20.04, macos-latest, windows-latest ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install poetry + uses: abatilo/actions-poetry@v2.0.0 + with: + poetry-version: ${{ matrix.poetry-version }} + - name: Install deps + run: poetry install -vv + - name: Run tests + run: poetry run poe test + lint: + strategy: + fail-fast: false + matrix: + python-version: [ 3.9.6 ] + poetry-version: [ 1.1.5 ] + os: [ ubuntu-20.04 ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install poetry + uses: abatilo/actions-poetry@v2.0.0 + with: + poetry-version: ${{ matrix.poetry-version }} + - name: Install deps + run: poetry install -vv + - name: Check + run: poetry run poe check + publish: + needs: + - tests + - lint + environment: Test deployment + strategy: + fail-fast: false + matrix: + python-version: [ 3.9.6 ] + poetry-version: [ 1.1.5 ] + os: [ ubuntu-20.04 ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install poetry + uses: abatilo/actions-poetry@v2.0.0 + with: + poetry-version: ${{ matrix.poetry-version }} + - name: Install deps + run: poetry install -vv + - name: Build and publish + run: | + poetry version "$(poetry version --short)-alpha.${GITHUB_RUN_NUMBER}" + poetry build + poetry config repositories.theliverpypi https://nexus.theliver.pl/repository/pypi-registry/ + poetry publish -r theliverpypi --username ${{ secrets.THELIVER_PYPI_USER }} --password ${{ secrets.THELIVER_PYPI_PASS }} diff --git a/.github/workflows/python-release.yml b/.github/workflows/python-release.yml new file mode 100644 index 0000000000000000000000000000000000000000..ddfee7c0a2b04d38321737ed911d5b182858250c --- /dev/null +++ b/.github/workflows/python-release.yml @@ -0,0 +1,32 @@ +name: CI Main +on: release + +jobs: + publish: + environment: Deployment + strategy: + fail-fast: false + matrix: + python-version: [ 3.9.6 ] + poetry-version: [ 1.1.5 ] + os: [ ubuntu-20.04 ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install poetry + uses: abatilo/actions-poetry@v2.0.0 + with: + poetry-version: ${{ matrix.poetry-version }} + - name: Install deps + run: poetry install -vv + - name: Build and publish + run: | + poetry version "$(git describe --tags --abbrev=0)" + poetry build + poetry config repositories.theliverpypi https://nexus.theliver.pl/repository/pypi-registry/ + poetry publish -r theliverpypi --username ${{ secrets.THELIVER_PYPI_USER }} --password ${{ secrets.THELIVER_PYPI_PASS }} diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..73f69e0958611ac6e00bde95641f6699030ad235 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/asr-benchmarks.iml b/.idea/asr-benchmarks.iml new file mode 100644 index 0000000000000000000000000000000000000000..de5fcde38002e8a5d853278c1c1dbc5c302a7115 --- /dev/null +++ b/.idea/asr-benchmarks.iml @@ -0,0 +1,17 @@ +<?xml version="1.0" encoding="UTF-8"?> +<module type="PYTHON_MODULE" version="4"> + <component name="NewModuleRootManager"> + <content url="file://$MODULE_DIR$"> + <excludeFolder url="file://$MODULE_DIR$/venv" /> + </content> + <orderEntry type="inheritedJdk" /> + <orderEntry type="sourceFolder" forTests="false" /> + </component> + <component name="PyDocumentationSettings"> + <option name="format" value="PLAIN" /> + <option name="myDocStringFormat" value="Plain" /> + </component> + <component name="TestRunnerService"> + <option name="PROJECT_TEST_RUNNER" value="pytest" /> + </component> +</module> \ No newline at end of file diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml new file mode 100644 index 0000000000000000000000000000000000000000..a55e7a179bde3e4e772c29c0c85e53354aa54618 --- /dev/null +++ b/.idea/codeStyles/codeStyleConfig.xml @@ -0,0 +1,5 @@ +<component name="ProjectCodeStyleConfiguration"> + <state> + <option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" /> + </state> +</component> \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000000000000000000000000000000000000..95f8bc942df360987a0b69d6a0d51132507f382d --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,34 @@ +<component name="InspectionProjectProfileManager"> + <profile version="1.0"> + <option name="myName" value="Project Default" /> + <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" /> + <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true"> + <option name="ignoredPackages"> + <value> + <list size="6"> + <item index="0" class="java.lang.String" itemvalue="tensorflow-gpu" /> + <item index="1" class="java.lang.String" itemvalue="tensorflow" /> + <item index="2" class="java.lang.String" itemvalue="mysql-connector-python" /> + <item index="3" class="java.lang.String" itemvalue="dateutil" /> + <item index="4" class="java.lang.String" itemvalue="pytest" /> + <item index="5" class="java.lang.String" itemvalue="celery" /> + </list> + </value> + </option> + </inspection_tool> + <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true"> + <option name="ignoredErrors"> + <list> + <option value="E402" /> + </list> + </option> + </inspection_tool> + <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true"> + <option name="ignoredIdentifiers"> + <list> + <option value="dict.__getitem__" /> + </list> + </option> + </inspection_tool> + </profile> +</component> \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000000000000000000000000000000000000..105ce2da2d6447d11dfe32bfb846c3d5b199fc99 --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ +<component name="InspectionProjectProfileManager"> + <settings> + <option name="USE_PROJECT_PROFILE" value="false" /> + <version value="1.0" /> + </settings> +</component> \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000000000000000000000000000000000000..e4cf2e31963eccaa9c43fcc4a2c9d0ee8396d9be --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (asr-benchmarks)" project-jdk-type="Python SDK" /> +</project> \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000000000000000000000000000000000000..93c2771d93497aacc1f0746e02681955ef706eb7 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ProjectModuleManager"> + <modules> + <module fileurl="file://$PROJECT_DIR$/.idea/asr-benchmarks.iml" filepath="$PROJECT_DIR$/.idea/asr-benchmarks.iml" /> + </modules> + </component> +</project> \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000000000000000000000000000000000000..94a25f7f4cb416c083d265558da75d457237d671 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="VcsDirectoryMappings"> + <mapping directory="$PROJECT_DIR$" vcs="Git" /> + </component> +</project> \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 3abaefc3b8e75e64c6611f87c9884d41685b4f64..b16dce074c1e8940245be78eed07122365fbcf48 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -19,7 +19,8 @@ # import os import sys -sys.path.insert(0, os.path.abspath('..')) + +sys.path.insert(0, os.path.abspath("..")) import sziszapangma @@ -31,22 +32,22 @@ import sziszapangma # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'sziszapangma' +project = "sziszapangma" copyright = "2021, Piotr Szymański" author = "Piotr Szymański" @@ -69,10 +70,10 @@ language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -83,7 +84,7 @@ todo_include_todos = False # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'alabaster' +html_theme = "alabaster" # Theme options are theme-specific and customize the look and feel of a # theme further. For a list of options available for each theme, see the @@ -94,13 +95,13 @@ html_theme = 'alabaster' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # -- Options for HTMLHelp output --------------------------------------- # Output file base name for HTML help builder. -htmlhelp_basename = 'sziszapangmadoc' +htmlhelp_basename = "sziszapangmadoc" # -- Options for LaTeX output ------------------------------------------ @@ -109,15 +110,12 @@ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -127,9 +125,7 @@ latex_elements = { # (source start file, target name, title, author, documentclass # [howto, manual, or own class]). latex_documents = [ - (master_doc, 'sziszapangma.tex', - 'sziszapangma Documentation', - 'Piotr Szymański', 'manual'), + (master_doc, "sziszapangma.tex", "sziszapangma Documentation", "Piotr Szymański", "manual"), ] @@ -137,11 +133,7 @@ latex_documents = [ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'sziszapangma', - 'sziszapangma Documentation', - [author], 1) -] +man_pages = [(master_doc, "sziszapangma", "sziszapangma Documentation", [author], 1)] # -- Options for Texinfo output ---------------------------------------- @@ -150,13 +142,13 @@ man_pages = [ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'sziszapangma', - 'sziszapangma Documentation', - author, - 'sziszapangma', - 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "sziszapangma", + "sziszapangma Documentation", + author, + "sziszapangma", + "One line description of project.", + "Miscellaneous", + ), ] - - - diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000000000000000000000000000000000000..0baeb890b9d8930303cf8605c02aad391915f078 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,42 @@ +[mypy-transformers.*] +ignore_missing_imports = True + +[mypy-tensorflow.*] +ignore_missing_imports = True + +[mypy-srsly.*] +ignore_missing_imports = True + +[mypy-spacy.*] +ignore_missing_imports = True +implicit_reexport = True + +[mypy-flair.*] +ignore_missing_imports = True + +[mypy-sklearn.*] +ignore_missing_imports = True + +[mypy-tqdm.*] +ignore_missing_imports = True + +[mypy-torch.*] +ignore_missing_imports = True + +[mypy-datasets.*] +ignore_missing_imports = True + +[mypy-numpy.*] +ignore_missing_imports = True + +[mypy-pandas.*] +ignore_missing_imports = True + +[mypy-pymongo.*] +ignore_missing_imports = True + +[mypy-fasttext.*] +ignore_missing_imports = True + +[mypy-pytest.*] +ignore_missing_imports = True diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000000000000000000000000000000000000..6af56bfabea098d49b8ec52b29590d304ec1d433 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,797 @@ +[[package]] +name = "appdirs" +version = "1.4.4" +description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "atomicwrites" +version = "1.4.0" +description = "Atomic file writes." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "attrs" +version = "21.2.0" +description = "Classes Without Boilerplate" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.extras] +dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit"] +docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface"] +tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins"] + +[[package]] +name = "black" +version = "20.8b1" +description = "The uncompromising code formatter." +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +appdirs = "*" +click = ">=7.1.2" +mypy-extensions = ">=0.4.3" +pathspec = ">=0.6,<1" +regex = ">=2020.1.8" +toml = ">=0.10.1" +typed-ast = ">=1.4.0" +typing-extensions = ">=3.7.4" + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.3.2)", "aiohttp-cors"] + +[[package]] +name = "certifi" +version = "2021.5.30" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "charset-normalizer" +version = "2.0.4" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = ">=3.5.0" + +[package.extras] +unicode_backport = ["unicodedata2"] + +[[package]] +name = "click" +version = "8.0.1" +description = "Composable command line interface toolkit" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.4" +description = "Cross-platform colored terminal text." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "fasttext" +version = "0.9.2" +description = "fasttext Python bindings" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +numpy = "*" +pybind11 = ">=2.2" + +[[package]] +name = "idna" +version = "3.2" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "isort" +version = "5.9.3" +description = "A Python utility / library to sort Python imports." +category = "dev" +optional = false +python-versions = ">=3.6.1,<4.0" + +[package.extras] +pipfile_deprecated_finder = ["pipreqs", "requirementslib"] +requirements_deprecated_finder = ["pipreqs", "pip-api"] +colors = ["colorama (>=0.4.3,<0.5.0)"] +plugins = ["setuptools"] + +[[package]] +name = "more-itertools" +version = "8.8.0" +description = "More routines for operating on iterables, beyond itertools" +category = "dev" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "mypy" +version = "0.812" +description = "Optional static typing for Python" +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +mypy-extensions = ">=0.4.3,<0.5.0" +typed-ast = ">=1.4.0,<1.5.0" +typing-extensions = ">=3.7.4" + +[package.extras] +dmypy = ["psutil (>=4.0)"] + +[[package]] +name = "mypy-extensions" +version = "0.4.3" +description = "Experimental type system extensions for programs checked with the mypy typechecker." +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "numpy" +version = "1.21.1" +description = "NumPy is the fundamental package for array computing with Python." +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "packaging" +version = "21.0" +description = "Core utilities for Python packages" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +pyparsing = ">=2.0.2" + +[[package]] +name = "pandas" +version = "1.3.1" +description = "Powerful data structures for data analysis, time series, and statistics" +category = "main" +optional = false +python-versions = ">=3.7.1" + +[package.dependencies] +numpy = ">=1.17.3" +python-dateutil = ">=2.7.3" +pytz = ">=2017.3" + +[package.extras] +test = ["hypothesis (>=3.58)", "pytest (>=6.0)", "pytest-xdist"] + +[[package]] +name = "pastel" +version = "0.2.1" +description = "Bring colors to your terminal." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "pathspec" +version = "0.9.0" +description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" + +[[package]] +name = "pluggy" +version = "0.13.1" +description = "plugin and hook calling mechanisms for python" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.extras] +dev = ["pre-commit", "tox"] + +[[package]] +name = "poethepoet" +version = "0.10.0" +description = "A task runner that works well with poetry." +category = "dev" +optional = false +python-versions = ">=3.6,<4.0" + +[package.dependencies] +pastel = ">=0.2.0,<0.3.0" +tomlkit = ">=0.6.0,<1.0.0" + +[[package]] +name = "py" +version = "1.10.0" +description = "library with cross-python path, ini-parsing, io, code, log facilities" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "pybind11" +version = "2.7.0" +description = "Seamless operability between C++11 and Python" +category = "main" +optional = false +python-versions = "!=3.0,!=3.1,!=3.2,!=3.3,!=3.4,>=2.7" + +[package.extras] +global = ["pybind11-global (==2.7.0)"] + +[[package]] +name = "pyflakes" +version = "2.3.1" +description = "passive checker of Python programs" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "pymongo" +version = "3.12.0" +description = "Python driver for MongoDB <http://www.mongodb.org>" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +aws = ["pymongo-auth-aws (<2.0.0)"] +encryption = ["pymongocrypt (>=1.1.0,<2.0.0)"] +gssapi = ["pykerberos"] +ocsp = ["pyopenssl (>=17.2.0)", "requests (<3.0.0)", "service-identity (>=18.1.0)", "certifi"] +snappy = ["python-snappy"] +srv = ["dnspython (>=1.16.0,<1.17.0)"] +tls = ["ipaddress"] +zstd = ["zstandard"] + +[[package]] +name = "pyparsing" +version = "2.4.7" +description = "Python parsing module" +category = "dev" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "pytest" +version = "5.4.3" +description = "pytest: simple powerful testing with Python" +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} +attrs = ">=17.4.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +more-itertools = ">=4.0.0" +packaging = "*" +pluggy = ">=0.12,<1.0" +py = ">=1.5.0" +wcwidth = "*" + +[package.extras] +checkqa-mypy = ["mypy (==v0.761)"] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytz" +version = "2021.1" +description = "World timezone definitions, modern and historical" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "regex" +version = "2021.7.6" +description = "Alternative regular expression module, to replace re." +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "requests" +version = "2.26.0" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""} +idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""} +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] +use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +category = "dev" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "tomlkit" +version = "0.7.2" +description = "Style preserving TOML library" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "typed-ast" +version = "1.4.3" +description = "a fork of Python 2 and 3 ast modules with type comment support" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "typing-extensions" +version = "3.10.0.0" +description = "Backported and Experimental Type Hints for Python 3.5+" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "urllib3" +version = "1.26.6" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" + +[package.extras] +brotli = ["brotlipy (>=0.6.0)"] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "wcwidth" +version = "0.2.5" +description = "Measures the displayed width of unicode strings in a terminal" +category = "dev" +optional = false +python-versions = "*" + +[extras] +developer = [] + +[metadata] +lock-version = "1.1" +python-versions = "^3.8" +content-hash = "78c43ede773a3caa4d3c131a3faf75975279c5d38ba6c9fe15b3653515e475ce" + +[metadata.files] +appdirs = [ + {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"}, + {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, +] +atomicwrites = [ + {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, + {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, +] +attrs = [ + {file = "attrs-21.2.0-py2.py3-none-any.whl", hash = "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1"}, + {file = "attrs-21.2.0.tar.gz", hash = "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"}, +] +black = [ + {file = "black-20.8b1.tar.gz", hash = "sha256:1c02557aa099101b9d21496f8a914e9ed2222ef70336404eeeac8edba836fbea"}, +] +certifi = [ + {file = "certifi-2021.5.30-py2.py3-none-any.whl", hash = "sha256:50b1e4f8446b06f41be7dd6338db18e0990601dce795c2b1686458aa7e8fa7d8"}, + {file = "certifi-2021.5.30.tar.gz", hash = "sha256:2bbf76fd432960138b3ef6dda3dde0544f27cbf8546c458e60baf371917ba9ee"}, +] +charset-normalizer = [ + {file = "charset-normalizer-2.0.4.tar.gz", hash = "sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3"}, + {file = "charset_normalizer-2.0.4-py3-none-any.whl", hash = "sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b"}, +] +click = [ + {file = "click-8.0.1-py3-none-any.whl", hash = "sha256:fba402a4a47334742d782209a7c79bc448911afe1149d07bdabdf480b3e2f4b6"}, + {file = "click-8.0.1.tar.gz", hash = "sha256:8c04c11192119b1ef78ea049e0a6f0463e4c48ef00a30160c704337586f3ad7a"}, +] +colorama = [ + {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, + {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, +] +fasttext = [ + {file = "fasttext-0.9.2.tar.gz", hash = "sha256:665556f1f6dcb4fcbe25fa8ebcd4f71b18fa96a090de09d88d97a60cbd29dcb5"}, +] +idna = [ + {file = "idna-3.2-py3-none-any.whl", hash = "sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a"}, + {file = "idna-3.2.tar.gz", hash = "sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3"}, +] +isort = [ + {file = "isort-5.9.3-py3-none-any.whl", hash = "sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2"}, + {file = "isort-5.9.3.tar.gz", hash = "sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899"}, +] +more-itertools = [ + {file = "more-itertools-8.8.0.tar.gz", hash = "sha256:83f0308e05477c68f56ea3a888172c78ed5d5b3c282addb67508e7ba6c8f813a"}, + {file = "more_itertools-8.8.0-py3-none-any.whl", hash = "sha256:2cf89ec599962f2ddc4d568a05defc40e0a587fbc10d5989713638864c36be4d"}, +] +mypy = [ + {file = "mypy-0.812-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:a26f8ec704e5a7423c8824d425086705e381b4f1dfdef6e3a1edab7ba174ec49"}, + {file = "mypy-0.812-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:28fb5479c494b1bab244620685e2eb3c3f988d71fd5d64cc753195e8ed53df7c"}, + {file = "mypy-0.812-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:9743c91088d396c1a5a3c9978354b61b0382b4e3c440ce83cf77994a43e8c521"}, + {file = "mypy-0.812-cp35-cp35m-win_amd64.whl", hash = "sha256:d7da2e1d5f558c37d6e8c1246f1aec1e7349e4913d8fb3cb289a35de573fe2eb"}, + {file = "mypy-0.812-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:4eec37370483331d13514c3f55f446fc5248d6373e7029a29ecb7b7494851e7a"}, + {file = "mypy-0.812-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:d65cc1df038ef55a99e617431f0553cd77763869eebdf9042403e16089fe746c"}, + {file = "mypy-0.812-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:61a3d5b97955422964be6b3baf05ff2ce7f26f52c85dd88db11d5e03e146a3a6"}, + {file = "mypy-0.812-cp36-cp36m-win_amd64.whl", hash = "sha256:25adde9b862f8f9aac9d2d11971f226bd4c8fbaa89fb76bdadb267ef22d10064"}, + {file = "mypy-0.812-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:552a815579aa1e995f39fd05dde6cd378e191b063f031f2acfe73ce9fb7f9e56"}, + {file = "mypy-0.812-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:499c798053cdebcaa916eef8cd733e5584b5909f789de856b482cd7d069bdad8"}, + {file = "mypy-0.812-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:5873888fff1c7cf5b71efbe80e0e73153fe9212fafdf8e44adfe4c20ec9f82d7"}, + {file = "mypy-0.812-cp37-cp37m-win_amd64.whl", hash = "sha256:9f94aac67a2045ec719ffe6111df543bac7874cee01f41928f6969756e030564"}, + {file = "mypy-0.812-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d23e0ea196702d918b60c8288561e722bf437d82cb7ef2edcd98cfa38905d506"}, + {file = "mypy-0.812-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:674e822aa665b9fd75130c6c5f5ed9564a38c6cea6a6432ce47eafb68ee578c5"}, + {file = "mypy-0.812-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:abf7e0c3cf117c44d9285cc6128856106183938c68fd4944763003decdcfeb66"}, + {file = "mypy-0.812-cp38-cp38-win_amd64.whl", hash = "sha256:0d0a87c0e7e3a9becdfbe936c981d32e5ee0ccda3e0f07e1ef2c3d1a817cf73e"}, + {file = "mypy-0.812-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7ce3175801d0ae5fdfa79b4f0cfed08807af4d075b402b7e294e6aa72af9aa2a"}, + {file = "mypy-0.812-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:b09669bcda124e83708f34a94606e01b614fa71931d356c1f1a5297ba11f110a"}, + {file = "mypy-0.812-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:33f159443db0829d16f0a8d83d94df3109bb6dd801975fe86bacb9bf71628e97"}, + {file = "mypy-0.812-cp39-cp39-win_amd64.whl", hash = "sha256:3f2aca7f68580dc2508289c729bd49ee929a436208d2b2b6aab15745a70a57df"}, + {file = "mypy-0.812-py3-none-any.whl", hash = "sha256:2f9b3407c58347a452fc0736861593e105139b905cca7d097e413453a1d650b4"}, + {file = "mypy-0.812.tar.gz", hash = "sha256:cd07039aa5df222037005b08fbbfd69b3ab0b0bd7a07d7906de75ae52c4e3119"}, +] +mypy-extensions = [ + {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, + {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, +] +numpy = [ + {file = "numpy-1.21.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:38e8648f9449a549a7dfe8d8755a5979b45b3538520d1e735637ef28e8c2dc50"}, + {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fd7d7409fa643a91d0a05c7554dd68aa9c9bb16e186f6ccfe40d6e003156e33a"}, + {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a75b4498b1e93d8b700282dc8e655b8bd559c0904b3910b144646dbbbc03e062"}, + {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1412aa0aec3e00bc23fbb8664d76552b4efde98fb71f60737c83efbac24112f1"}, + {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e46ceaff65609b5399163de5893d8f2a82d3c77d5e56d976c8b5fb01faa6b671"}, + {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c6a2324085dd52f96498419ba95b5777e40b6bcbc20088fddb9e8cbb58885e8e"}, + {file = "numpy-1.21.1-cp37-cp37m-win32.whl", hash = "sha256:73101b2a1fef16602696d133db402a7e7586654682244344b8329cdcbbb82172"}, + {file = "numpy-1.21.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7a708a79c9a9d26904d1cca8d383bf869edf6f8e7650d85dbc77b041e8c5a0f8"}, + {file = "numpy-1.21.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95b995d0c413f5d0428b3f880e8fe1660ff9396dcd1f9eedbc311f37b5652e16"}, + {file = "numpy-1.21.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:635e6bd31c9fb3d475c8f44a089569070d10a9ef18ed13738b03049280281267"}, + {file = "numpy-1.21.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4a3d5fb89bfe21be2ef47c0614b9c9c707b7362386c9a3ff1feae63e0267ccb6"}, + {file = "numpy-1.21.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8a326af80e86d0e9ce92bcc1e65c8ff88297de4fa14ee936cb2293d414c9ec63"}, + {file = "numpy-1.21.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:791492091744b0fe390a6ce85cc1bf5149968ac7d5f0477288f78c89b385d9af"}, + {file = "numpy-1.21.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0318c465786c1f63ac05d7c4dbcecd4d2d7e13f0959b01b534ea1e92202235c5"}, + {file = "numpy-1.21.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a513bd9c1551894ee3d31369f9b07460ef223694098cf27d399513415855b68"}, + {file = "numpy-1.21.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:91c6f5fc58df1e0a3cc0c3a717bb3308ff850abdaa6d2d802573ee2b11f674a8"}, + {file = "numpy-1.21.1-cp38-cp38-win32.whl", hash = "sha256:978010b68e17150db8765355d1ccdd450f9fc916824e8c4e35ee620590e234cd"}, + {file = "numpy-1.21.1-cp38-cp38-win_amd64.whl", hash = "sha256:9749a40a5b22333467f02fe11edc98f022133ee1bfa8ab99bda5e5437b831214"}, + {file = "numpy-1.21.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d7a4aeac3b94af92a9373d6e77b37691b86411f9745190d2c351f410ab3a791f"}, + {file = "numpy-1.21.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d9e7912a56108aba9b31df688a4c4f5cb0d9d3787386b87d504762b6754fbb1b"}, + {file = "numpy-1.21.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:25b40b98ebdd272bc3020935427a4530b7d60dfbe1ab9381a39147834e985eac"}, + {file = "numpy-1.21.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8a92c5aea763d14ba9d6475803fc7904bda7decc2a0a68153f587ad82941fec1"}, + {file = "numpy-1.21.1-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05a0f648eb28bae4bcb204e6fd14603de2908de982e761a2fc78efe0f19e96e1"}, + {file = "numpy-1.21.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f01f28075a92eede918b965e86e8f0ba7b7797a95aa8d35e1cc8821f5fc3ad6a"}, + {file = "numpy-1.21.1-cp39-cp39-win32.whl", hash = "sha256:88c0b89ad1cc24a5efbb99ff9ab5db0f9a86e9cc50240177a571fbe9c2860ac2"}, + {file = "numpy-1.21.1-cp39-cp39-win_amd64.whl", hash = "sha256:01721eefe70544d548425a07c80be8377096a54118070b8a62476866d5208e33"}, + {file = "numpy-1.21.1-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2d4d1de6e6fb3d28781c73fbde702ac97f03d79e4ffd6598b880b2d95d62ead4"}, + {file = "numpy-1.21.1.zip", hash = "sha256:dff4af63638afcc57a3dfb9e4b26d434a7a602d225b42d746ea7fe2edf1342fd"}, +] +packaging = [ + {file = "packaging-21.0-py3-none-any.whl", hash = "sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14"}, + {file = "packaging-21.0.tar.gz", hash = "sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7"}, +] +pandas = [ + {file = "pandas-1.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1ee8418d0f936ff2216513aa03e199657eceb67690995d427a4a7ecd2e68f442"}, + {file = "pandas-1.3.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d9acfca191140a518779d1095036d842d5e5bc8e8ad8b5eaad1aff90fe1870d"}, + {file = "pandas-1.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e323028ab192fcfe1e8999c012a0fa96d066453bb354c7e7a4a267b25e73d3c8"}, + {file = "pandas-1.3.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9d06661c6eb741ae633ee1c57e8c432bb4203024e263fe1a077fa3fda7817fdb"}, + {file = "pandas-1.3.1-cp37-cp37m-win32.whl", hash = "sha256:23c7452771501254d2ae23e9e9dac88417de7e6eff3ce64ee494bb94dc88c300"}, + {file = "pandas-1.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7150039e78a81eddd9f5a05363a11cadf90a4968aac6f086fd83e66cf1c8d1d6"}, + {file = "pandas-1.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5c09a2538f0fddf3895070579082089ff4ae52b6cb176d8ec7a4dacf7e3676c1"}, + {file = "pandas-1.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:905fc3e0fcd86b0a9f1f97abee7d36894698d2592b22b859f08ea5a8fe3d3aab"}, + {file = "pandas-1.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ee927c70794e875a59796fab8047098aa59787b1be680717c141cd7873818ae"}, + {file = "pandas-1.3.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c976e023ed580e60a82ccebdca8e1cc24d8b1fbb28175eb6521025c127dab66"}, + {file = "pandas-1.3.1-cp38-cp38-win32.whl", hash = "sha256:22f3fcc129fb482ef44e7df2a594f0bd514ac45aabe50da1a10709de1b0f9d84"}, + {file = "pandas-1.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:45656cd59ae9745a1a21271a62001df58342b59c66d50754390066db500a8362"}, + {file = "pandas-1.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:114c6789d15862508900a25cb4cb51820bfdd8595ea306bab3b53cd19f990b65"}, + {file = "pandas-1.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:527c43311894aff131dea99cf418cd723bfd4f0bcf3c3da460f3b57e52a64da5"}, + {file = "pandas-1.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdb3b33dde260b1766ea4d3c6b8fbf6799cee18d50a2a8bc534cf3550b7c819a"}, + {file = "pandas-1.3.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c28760932283d2c9f6fa5e53d2f77a514163b9e67fd0ee0879081be612567195"}, + {file = "pandas-1.3.1-cp39-cp39-win32.whl", hash = "sha256:be12d77f7e03c40a2466ed00ccd1a5f20a574d3c622fe1516037faa31aa448aa"}, + {file = "pandas-1.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:9e1fe6722cbe27eb5891c1977bca62d456c19935352eea64d33956db46139364"}, + {file = "pandas-1.3.1.tar.gz", hash = "sha256:341935a594db24f3ff07d1b34d1d231786aa9adfa84b76eab10bf42907c8aed3"}, +] +pastel = [ + {file = "pastel-0.2.1-py2.py3-none-any.whl", hash = "sha256:4349225fcdf6c2bb34d483e523475de5bb04a5c10ef711263452cb37d7dd4364"}, + {file = "pastel-0.2.1.tar.gz", hash = "sha256:e6581ac04e973cac858828c6202c1e1e81fee1dc7de7683f3e1ffe0bfd8a573d"}, +] +pathspec = [ + {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"}, + {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"}, +] +pluggy = [ + {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"}, + {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"}, +] +poethepoet = [ + {file = "poethepoet-0.10.0-py3-none-any.whl", hash = "sha256:6fb3021603d4421c6fcc40072bbcf150a6c52ef70ff4d3be089b8b04e015ef5a"}, + {file = "poethepoet-0.10.0.tar.gz", hash = "sha256:70b97cb194b978dc464c70793e85e6f746cddf82b84a38bfb135946ad71ae19c"}, +] +py = [ + {file = "py-1.10.0-py2.py3-none-any.whl", hash = "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"}, + {file = "py-1.10.0.tar.gz", hash = "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3"}, +] +pybind11 = [ + {file = "pybind11-2.7.0-py2.py3-none-any.whl", hash = "sha256:71dfd6e61f6aef3e24eda3b9770a0d53072346871f9f5a0510598ec86b5f9dc2"}, + {file = "pybind11-2.7.0.tar.gz", hash = "sha256:3e2a9a94396fbb27e75acf28d3de26e029576be1d4b38acc846ae08ef0eb3033"}, +] +pyflakes = [ + {file = "pyflakes-2.3.1-py2.py3-none-any.whl", hash = "sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3"}, + {file = "pyflakes-2.3.1.tar.gz", hash = "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"}, +] +pymongo = [ + {file = "pymongo-3.12.0-cp27-cp27m-macosx_10_14_intel.whl", hash = "sha256:072ba7cb65c8aa4d5c5659bf6722ee85781c9d7816dc00679b8b6f3dff1ddafc"}, + {file = "pymongo-3.12.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:d6e11ffd43184d529d6752d6dcb62b994f903038a17ea2168ef1910c96324d26"}, + {file = "pymongo-3.12.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:7412a36798966624dc4c57d64aa43c2d1100b348abd98daaac8e99e57d87e1d7"}, + {file = "pymongo-3.12.0-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e8a82e35d52ad6f867e88096a1a2b9bdc7ec4d5e65c7b4976a248bf2d1a32a93"}, + {file = "pymongo-3.12.0-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:dcd3d0009fbb6e454d729f8b22d0063bd9171c31a55e0f0271119bd4f2700023"}, + {file = "pymongo-3.12.0-cp27-cp27m-win32.whl", hash = "sha256:1bc6fe7279ff40c6818db002bf5284aa03ec181ea1b1ceaeee33c289d412afa7"}, + {file = "pymongo-3.12.0-cp27-cp27m-win_amd64.whl", hash = "sha256:e2b7670c0c8c6b501464150dd49dd0d6be6cb7f049e064124911cec5514fa19e"}, + {file = "pymongo-3.12.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:316c1b8723afa9870567cd6dff35d440b2afeda53aa13da6c5ab85f98ed6f5ca"}, + {file = "pymongo-3.12.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:255a35bf29185f44b412e31a927d9dcedda7c2c380127ecc4fbf2f61b72fa978"}, + {file = "pymongo-3.12.0-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ffbae429ba9e42d0582d3ac63fdb410338892468a2107d8ff68228ec9a39a0ed"}, + {file = "pymongo-3.12.0-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c188db6cf9e14dbbb42f5254292be96f05374a35e7dfa087cc2140f0ff4f10f6"}, + {file = "pymongo-3.12.0-cp34-cp34m-macosx_10_6_intel.whl", hash = "sha256:6fb3f85870ae26896bb44e67db94045f2ebf00c5d41e6b66cdcbb5afd644fc18"}, + {file = "pymongo-3.12.0-cp34-cp34m-manylinux1_i686.whl", hash = "sha256:aaa038eafb7186a4abbb311fcf20724be9363645882bbce540bef4797e812a7a"}, + {file = "pymongo-3.12.0-cp34-cp34m-manylinux1_x86_64.whl", hash = "sha256:7d98ce3c42921bb91566121b658e0d9d59a9082a9bd6f473190607ff25ab637f"}, + {file = "pymongo-3.12.0-cp34-cp34m-win32.whl", hash = "sha256:b0a0cf39f589e52d801fdef418305562bc030cdf8929217463c8433c65fd5c2f"}, + {file = "pymongo-3.12.0-cp34-cp34m-win_amd64.whl", hash = "sha256:ceae3ab9e11a27aaab42878f1d203600dfd24f0e43678b47298219a0f10c0d30"}, + {file = "pymongo-3.12.0-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:5e574664f1468872cd40f74e4811e22b1aa4de9399d6bcfdf1ee6ea94c017fcf"}, + {file = "pymongo-3.12.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:73b400fdc22de84bae0dbf1a22613928a41612ec0a3d6ed47caf7ad4d3d0f2ff"}, + {file = "pymongo-3.12.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:cbf8672edeb7b7128c4a939274801f0e32bbf5159987815e3d1eace625264a46"}, + {file = "pymongo-3.12.0-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:a634a4730ce0b0934ed75e45beba730968e12b4dafbb22f69b3b2f616d9e644e"}, + {file = "pymongo-3.12.0-cp35-cp35m-manylinux2014_i686.whl", hash = "sha256:c55782a55f4a013a78ac5b6ee4b8731a192dea7ab09f1b6b3044c96d5128edd4"}, + {file = "pymongo-3.12.0-cp35-cp35m-manylinux2014_ppc64le.whl", hash = "sha256:11f9e0cfc84ade088a38df2708d0b958bb76360181df1b2e1e1a41beaa57952b"}, + {file = "pymongo-3.12.0-cp35-cp35m-manylinux2014_s390x.whl", hash = "sha256:186104a94d39b8412f8e3de385acd990a628346a4402d4f3a288a82b8660bd22"}, + {file = "pymongo-3.12.0-cp35-cp35m-manylinux2014_x86_64.whl", hash = "sha256:70761fd3c576b027eec882b43ee0a8e5b22ff9c20cdf4d0400e104bc29e53e34"}, + {file = "pymongo-3.12.0-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:333bfad77aa9cd11711febfb75eed0bb537a1d022e1c252714dad38993590240"}, + {file = "pymongo-3.12.0-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fa8957e9a1b202cb45e6b839c241cd986c897be1e722b81d2f32e9c6aeee80b0"}, + {file = "pymongo-3.12.0-cp35-cp35m-win32.whl", hash = "sha256:4ba0def4abef058c0e5101e05e3d5266e6fffb9795bbf8be0fe912a7361a0209"}, + {file = "pymongo-3.12.0-cp35-cp35m-win_amd64.whl", hash = "sha256:a0e5dff6701fa615f165306e642709e1c1550d5b237c5a7a6ea299886828bd50"}, + {file = "pymongo-3.12.0-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:b542d56ed1b8d5cf3bb36326f814bd2fbe8812dfd2582b80a15689ea433c0e35"}, + {file = "pymongo-3.12.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:a325600c83e61e3c9cebc0c2b1c8c4140fa887f789085075e8f44c8ff2547eb9"}, + {file = "pymongo-3.12.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:48d5bc80ab0af6b60c4163c5617f5cd23f2f880d7600940870ea5055816af024"}, + {file = "pymongo-3.12.0-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:c5cab230e7cabdae9ff23c12271231283efefb944c1b79bed79a91beb65ba547"}, + {file = "pymongo-3.12.0-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:d73e10772152605f6648ba4410318594f1043bbfe36d2fadee7c4b8912eff7c5"}, + {file = "pymongo-3.12.0-cp36-cp36m-manylinux2014_ppc64le.whl", hash = "sha256:b1c4874331ab960429caca81acb9d2932170d66d6d6f87e65dc4507a85aca152"}, + {file = "pymongo-3.12.0-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:a3566acfbcde46911c52810374ecc0354fdb841284a3efef6ff7105bc007e9a8"}, + {file = "pymongo-3.12.0-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:b3b5b3cbc3fdf4fcfa292529df2a85b5d9c7053913a739d3069af1e12e12219f"}, + {file = "pymongo-3.12.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd3854148005c808c485c754a184c71116372263709958b42aefbef2e5dd373a"}, + {file = "pymongo-3.12.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f55c1ddcc1f6050b07d468ce594f55dbf6107b459e16f735d26818d7be1e9538"}, + {file = "pymongo-3.12.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ced944dcdd561476deef7cb7bfd4987c69fffbfeff6d02ca4d5d4fd592d559b7"}, + {file = "pymongo-3.12.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78ecb8d42f50d393af912bfb1fb1dcc9aabe9967973efb49ee577e8f1cea494c"}, + {file = "pymongo-3.12.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1970cfe2aec1bf74b40cf30c130ad10cd968941694630386db33e1d044c22a2e"}, + {file = "pymongo-3.12.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b8bf42d3b32f586f4c9e37541769993783a534ad35531ce8a4379f6fa664fba9"}, + {file = "pymongo-3.12.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:bc9ac81e73573516070d24ce15da91281922811f385645df32bd3c8a45ab4684"}, + {file = "pymongo-3.12.0-cp36-cp36m-win32.whl", hash = "sha256:d04ca462cb99077e6c059e97c072957caf2918e6e4191e3161c01c439e0193de"}, + {file = "pymongo-3.12.0-cp36-cp36m-win_amd64.whl", hash = "sha256:f2acf9bbcd514e901f82c4ca6926bbd2ae61716728f110b4343eb0a69612d018"}, + {file = "pymongo-3.12.0-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:b754240daafecd9d5fce426b0fbaaed03f4ebb130745c8a4ae9231fffb8d75e5"}, + {file = "pymongo-3.12.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:af586e85144023686fb0af09c8cdf672484ea182f352e7ceead3d832de381e1b"}, + {file = "pymongo-3.12.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:fe5872ce6f9627deac8314bdffd3862624227c3de4c17ef0cc78bbf0402999eb"}, + {file = "pymongo-3.12.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:f6977a520bd96e097c8a37a8cbb9faa1ea99d21bf84190195056e25f688af73d"}, + {file = "pymongo-3.12.0-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:2dbfbbded947a83a3dffc2bd1ec4750c17e40904692186e2c55a3ad314ca0222"}, + {file = "pymongo-3.12.0-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:a752ecd1a26000a6d67be7c9a2e93801994a8b3f866ac95b672fbc00225ca91a"}, + {file = "pymongo-3.12.0-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:1bab889ae7640eba739f67fcbf8eff252dddc60d4495e6ddd3a87cd9a95fdb52"}, + {file = "pymongo-3.12.0-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:f94c7d22fb36b184734dded7345a04ec5f95130421c775b8b0c65044ef073f34"}, + {file = "pymongo-3.12.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec5ca7c0007ce268048bbe0ffc6846ed1616cf3d8628b136e81d5e64ff3f52a2"}, + {file = "pymongo-3.12.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7c72d08acdf573455b2b9d2b75b8237654841d63a48bc2327dc102c6ee89b75a"}, + {file = "pymongo-3.12.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b6ea08758b6673610b3c5bdf47189286cf9c58b1077558706a2f6f8744922527"}, + {file = "pymongo-3.12.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46d5ec90276f71af3a29917b30f2aec2315a2759b5f8d45b3b63a07ca8a070a3"}, + {file = "pymongo-3.12.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:625befa3bc9b40746a749115cc6a15bf20b9bd7597ca55d646205b479a2c99c7"}, + {file = "pymongo-3.12.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d1131562ddc2ea8a446f66c2648d7dabec2b3816fc818528eb978a75a6d23b2e"}, + {file = "pymongo-3.12.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:eee42a1cc06565f6b21caa1f504ec15e07de7ebfd520ab57f8cb3308bc118e22"}, + {file = "pymongo-3.12.0-cp37-cp37m-win32.whl", hash = "sha256:94d38eba4d1b5eb3e6bfece0651b855a35c44f32fd91f512ab4ba41b8c0d3e66"}, + {file = "pymongo-3.12.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e018a4921657c2d3f89c720b7b90b9182e277178a04a7e9542cc79d7d787ca51"}, + {file = "pymongo-3.12.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7c6a9948916a7bbcc6d3a9f6fb75db1acb5546078023bfb3db6efabcd5a67527"}, + {file = "pymongo-3.12.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:e9faf8d4712d5ea301d74abfcf6dafe4b7f4af7936e91f283b0ad7bf69ed3e3a"}, + {file = "pymongo-3.12.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cc2894fe91f31a513860238ede69fe47fada21f9e7ddfe73f7f9fef93a971e41"}, + {file = "pymongo-3.12.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:053b4ebf91c7395d1fcd2ce6a9edff0024575b7b2de6781554a4114448a8adc9"}, + {file = "pymongo-3.12.0-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:39dafa2eaf577d1969f289dc9a44501859a1897eb45bd589e93ce843fc610800"}, + {file = "pymongo-3.12.0-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:246ec420e4c8744fceb4e259f906211b9c198e1f345e6158dcd7cbad3737e11e"}, + {file = "pymongo-3.12.0-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:208debdcf76ed39ebf24f38509f50dc1c100e31e8653817fedb8e1f867850a13"}, + {file = "pymongo-3.12.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:18290649759f9db660972442aa606f845c368db9b08c4c73770f6da14113569b"}, + {file = "pymongo-3.12.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:657ad80de8ec9ed656f28844efc801a0802961e8c6a85038d97ff6f555ef4919"}, + {file = "pymongo-3.12.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b772bab31cbd9cb911e41e1a611ebc9497f9a32a7348e2747c38210f75c00f41"}, + {file = "pymongo-3.12.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2399a85b54f68008e483b2871f4a458b4c980469c7fe921595ede073e4844f1e"}, + {file = "pymongo-3.12.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e66780f14c2efaf989cd3ac613b03ee6a8e3a0ba7b96c0bb14adca71a427e55"}, + {file = "pymongo-3.12.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:02dc0b0f48ed3cd06c13b7e31b066bf91e00dac5f8147b0a0a45f9009bfab857"}, + {file = "pymongo-3.12.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:070a4ef689c9438a999ec3830e69b208ff0d12251846e064d947f97d819d1d05"}, + {file = "pymongo-3.12.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:db93608a246da44d728842b8fa9e45aa9782db76955f634a707739a8d53ff544"}, + {file = "pymongo-3.12.0-cp38-cp38-win32.whl", hash = "sha256:5af390fa9faf56c93252dab09ea57cd020c9123aa921b63a0ed51832fdb492e7"}, + {file = "pymongo-3.12.0-cp38-cp38-win_amd64.whl", hash = "sha256:a2239556ff7241584ce57be1facf25081669bb457a9e5cbe68cce4aae6567aa1"}, + {file = "pymongo-3.12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cda9e628b1315beec8341e8c04aac9a0b910650b05e0751e42e399d5694aeacb"}, + {file = "pymongo-3.12.0-cp39-cp39-manylinux1_i686.whl", hash = "sha256:845a8b83798b2fb11b09928413cb32692866bfbc28830a433d9fa4c8c3720dd0"}, + {file = "pymongo-3.12.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:da8288bc4a7807c6715416deed1c57d94d5e03e93537889e002bf985be503f1a"}, + {file = "pymongo-3.12.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:a9ba2a63777027b06b116e1ea8248e66fd1bedc2c644f93124b81a91ddbf6d88"}, + {file = "pymongo-3.12.0-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:9a13661681d17e43009bb3e85e837aa1ec5feeea1e3654682a01b8821940f8b3"}, + {file = "pymongo-3.12.0-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:6b89dc51206e4971c5568c797991eaaef5dc2a6118d67165858ad11752dba055"}, + {file = "pymongo-3.12.0-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:701e08457183da70ed96b35a6b43e6ba1df0b47c837b063cde39a1fbe1aeda81"}, + {file = "pymongo-3.12.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:e7a33322e08021c37e89cae8ff06327503e8a1719e97c69f32c31cbf6c30d72c"}, + {file = "pymongo-3.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd1f49f949a658c4e8f81ed73f9aad25fcc7d4f62f767f591e749e30038c4e1d"}, + {file = "pymongo-3.12.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a6d055f01b83b1a4df8bb0c61983d3bdffa913764488910af3620e5c2450bf83"}, + {file = "pymongo-3.12.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd6ff2192f34bd622883c745a56f492b1c9ccd44e14953e8051c33024a2947d5"}, + {file = "pymongo-3.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19d4bd0fc29aa405bb1781456c9cfff9fceabb68543741eb17234952dbc2bbb0"}, + {file = "pymongo-3.12.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24f8aeec4d6b894a6128844e50ff423dd02462ee83addf503c598ee3a80ddf3d"}, + {file = "pymongo-3.12.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0b6055e0ef451ff73c93d0348d122a0750dddf323b9361de5835dac2f6cf7fc1"}, + {file = "pymongo-3.12.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6261bee7c5abadeac7497f8f1c43e521da78dd13b0a2439f526a7b0fc3788824"}, + {file = "pymongo-3.12.0-cp39-cp39-win32.whl", hash = "sha256:2e92aa32300a0b5e4175caec7769f482b292769807024a86d674b3f19b8e3755"}, + {file = "pymongo-3.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:3ce83f17f641a62a4dfb0ba1b8a3c1ced7c842f511b5450d90c030c7828e3693"}, + {file = "pymongo-3.12.0-py2.7-macosx-10.14-intel.egg", hash = "sha256:d1740776b70367277323fafb76bcf09753a5cc9824f5d705bac22a34ff3668ea"}, + {file = "pymongo-3.12.0.tar.gz", hash = "sha256:b88d1742159bc93a078733f9789f563cef26f5e370eba810476a71aa98e5fbc2"}, +] +pyparsing = [ + {file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"}, + {file = "pyparsing-2.4.7.tar.gz", hash = "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1"}, +] +pytest = [ + {file = "pytest-5.4.3-py3-none-any.whl", hash = "sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1"}, + {file = "pytest-5.4.3.tar.gz", hash = "sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8"}, +] +python-dateutil = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] +pytz = [ + {file = "pytz-2021.1-py2.py3-none-any.whl", hash = "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"}, + {file = "pytz-2021.1.tar.gz", hash = "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da"}, +] +regex = [ + {file = "regex-2021.7.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e6a1e5ca97d411a461041d057348e578dc344ecd2add3555aedba3b408c9f874"}, + {file = "regex-2021.7.6-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:6afe6a627888c9a6cfbb603d1d017ce204cebd589d66e0703309b8048c3b0854"}, + {file = "regex-2021.7.6-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:ccb3d2190476d00414aab36cca453e4596e8f70a206e2aa8db3d495a109153d2"}, + {file = "regex-2021.7.6-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:ed693137a9187052fc46eedfafdcb74e09917166362af4cc4fddc3b31560e93d"}, + {file = "regex-2021.7.6-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:99d8ab206a5270c1002bfcf25c51bf329ca951e5a169f3b43214fdda1f0b5f0d"}, + {file = "regex-2021.7.6-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:b85ac458354165405c8a84725de7bbd07b00d9f72c31a60ffbf96bb38d3e25fa"}, + {file = "regex-2021.7.6-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:3f5716923d3d0bfb27048242a6e0f14eecdb2e2a7fac47eda1d055288595f222"}, + {file = "regex-2021.7.6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5983c19d0beb6af88cb4d47afb92d96751fb3fa1784d8785b1cdf14c6519407"}, + {file = "regex-2021.7.6-cp36-cp36m-win32.whl", hash = "sha256:c92831dac113a6e0ab28bc98f33781383fe294df1a2c3dfd1e850114da35fd5b"}, + {file = "regex-2021.7.6-cp36-cp36m-win_amd64.whl", hash = "sha256:791aa1b300e5b6e5d597c37c346fb4d66422178566bbb426dd87eaae475053fb"}, + {file = "regex-2021.7.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:59506c6e8bd9306cd8a41511e32d16d5d1194110b8cfe5a11d102d8b63cf945d"}, + {file = "regex-2021.7.6-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:564a4c8a29435d1f2256ba247a0315325ea63335508ad8ed938a4f14c4116a5d"}, + {file = "regex-2021.7.6-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:59c00bb8dd8775473cbfb967925ad2c3ecc8886b3b2d0c90a8e2707e06c743f0"}, + {file = "regex-2021.7.6-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:9a854b916806c7e3b40e6616ac9e85d3cdb7649d9e6590653deb5b341a736cec"}, + {file = "regex-2021.7.6-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:db2b7df831c3187a37f3bb80ec095f249fa276dbe09abd3d35297fc250385694"}, + {file = "regex-2021.7.6-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:173bc44ff95bc1e96398c38f3629d86fa72e539c79900283afa895694229fe6a"}, + {file = "regex-2021.7.6-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:15dddb19823f5147e7517bb12635b3c82e6f2a3a6b696cc3e321522e8b9308ad"}, + {file = "regex-2021.7.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ddeabc7652024803666ea09f32dd1ed40a0579b6fbb2a213eba590683025895"}, + {file = "regex-2021.7.6-cp37-cp37m-win32.whl", hash = "sha256:f080248b3e029d052bf74a897b9d74cfb7643537fbde97fe8225a6467fb559b5"}, + {file = "regex-2021.7.6-cp37-cp37m-win_amd64.whl", hash = "sha256:d8bbce0c96462dbceaa7ac4a7dfbbee92745b801b24bce10a98d2f2b1ea9432f"}, + {file = "regex-2021.7.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:edd1a68f79b89b0c57339bce297ad5d5ffcc6ae7e1afdb10f1947706ed066c9c"}, + {file = "regex-2021.7.6-cp38-cp38-manylinux1_i686.whl", hash = "sha256:422dec1e7cbb2efbbe50e3f1de36b82906def93ed48da12d1714cabcd993d7f0"}, + {file = "regex-2021.7.6-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cbe23b323988a04c3e5b0c387fe3f8f363bf06c0680daf775875d979e376bd26"}, + {file = "regex-2021.7.6-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:0eb2c6e0fcec5e0f1d3bcc1133556563222a2ffd2211945d7b1480c1b1a42a6f"}, + {file = "regex-2021.7.6-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:1c78780bf46d620ff4fff40728f98b8afd8b8e35c3efd638c7df67be2d5cddbf"}, + {file = "regex-2021.7.6-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:bc84fb254a875a9f66616ed4538542fb7965db6356f3df571d783f7c8d256edd"}, + {file = "regex-2021.7.6-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:598c0a79b4b851b922f504f9f39a863d83ebdfff787261a5ed061c21e67dd761"}, + {file = "regex-2021.7.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:875c355360d0f8d3d827e462b29ea7682bf52327d500a4f837e934e9e4656068"}, + {file = "regex-2021.7.6-cp38-cp38-win32.whl", hash = "sha256:e586f448df2bbc37dfadccdb7ccd125c62b4348cb90c10840d695592aa1b29e0"}, + {file = "regex-2021.7.6-cp38-cp38-win_amd64.whl", hash = "sha256:2fe5e71e11a54e3355fa272137d521a40aace5d937d08b494bed4529964c19c4"}, + {file = "regex-2021.7.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6110bab7eab6566492618540c70edd4d2a18f40ca1d51d704f1d81c52d245026"}, + {file = "regex-2021.7.6-cp39-cp39-manylinux1_i686.whl", hash = "sha256:4f64fc59fd5b10557f6cd0937e1597af022ad9b27d454e182485f1db3008f417"}, + {file = "regex-2021.7.6-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:89e5528803566af4df368df2d6f503c84fbfb8249e6631c7b025fe23e6bd0cde"}, + {file = "regex-2021.7.6-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:2366fe0479ca0e9afa534174faa2beae87847d208d457d200183f28c74eaea59"}, + {file = "regex-2021.7.6-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:f9392a4555f3e4cb45310a65b403d86b589adc773898c25a39184b1ba4db8985"}, + {file = "regex-2021.7.6-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:2bceeb491b38225b1fee4517107b8491ba54fba77cf22a12e996d96a3c55613d"}, + {file = "regex-2021.7.6-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:f98dc35ab9a749276f1a4a38ab3e0e2ba1662ce710f6530f5b0a6656f1c32b58"}, + {file = "regex-2021.7.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:319eb2a8d0888fa6f1d9177705f341bc9455a2c8aca130016e52c7fe8d6c37a3"}, + {file = "regex-2021.7.6-cp39-cp39-win32.whl", hash = "sha256:eaf58b9e30e0e546cdc3ac06cf9165a1ca5b3de8221e9df679416ca667972035"}, + {file = "regex-2021.7.6-cp39-cp39-win_amd64.whl", hash = "sha256:4c9c3155fe74269f61e27617529b7f09552fbb12e44b1189cebbdb24294e6e1c"}, + {file = "regex-2021.7.6.tar.gz", hash = "sha256:8394e266005f2d8c6f0bc6780001f7afa3ef81a7a2111fa35058ded6fce79e4d"}, +] +requests = [ + {file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"}, + {file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"}, +] +six = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] +toml = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] +tomlkit = [ + {file = "tomlkit-0.7.2-py2.py3-none-any.whl", hash = "sha256:173ad840fa5d2aac140528ca1933c29791b79a374a0861a80347f42ec9328117"}, + {file = "tomlkit-0.7.2.tar.gz", hash = "sha256:d7a454f319a7e9bd2e249f239168729327e4dd2d27b17dc68be264ad1ce36754"}, +] +typed-ast = [ + {file = "typed_ast-1.4.3-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:2068531575a125b87a41802130fa7e29f26c09a2833fea68d9a40cf33902eba6"}, + {file = "typed_ast-1.4.3-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:c907f561b1e83e93fad565bac5ba9c22d96a54e7ea0267c708bffe863cbe4075"}, + {file = "typed_ast-1.4.3-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:1b3ead4a96c9101bef08f9f7d1217c096f31667617b58de957f690c92378b528"}, + {file = "typed_ast-1.4.3-cp35-cp35m-win32.whl", hash = "sha256:dde816ca9dac1d9c01dd504ea5967821606f02e510438120091b84e852367428"}, + {file = "typed_ast-1.4.3-cp35-cp35m-win_amd64.whl", hash = "sha256:777a26c84bea6cd934422ac2e3b78863a37017618b6e5c08f92ef69853e765d3"}, + {file = "typed_ast-1.4.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f"}, + {file = "typed_ast-1.4.3-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:52b1eb8c83f178ab787f3a4283f68258525f8d70f778a2f6dd54d3b5e5fb4341"}, + {file = "typed_ast-1.4.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:01ae5f73431d21eead5015997ab41afa53aa1fbe252f9da060be5dad2c730ace"}, + {file = "typed_ast-1.4.3-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:c190f0899e9f9f8b6b7863debfb739abcb21a5c054f911ca3596d12b8a4c4c7f"}, + {file = "typed_ast-1.4.3-cp36-cp36m-win32.whl", hash = "sha256:398e44cd480f4d2b7ee8d98385ca104e35c81525dd98c519acff1b79bdaac363"}, + {file = "typed_ast-1.4.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bff6ad71c81b3bba8fa35f0f1921fb24ff4476235a6e94a26ada2e54370e6da7"}, + {file = "typed_ast-1.4.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0fb71b8c643187d7492c1f8352f2c15b4c4af3f6338f21681d3681b3dc31a266"}, + {file = "typed_ast-1.4.3-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:760ad187b1041a154f0e4d0f6aae3e40fdb51d6de16e5c99aedadd9246450e9e"}, + {file = "typed_ast-1.4.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5feca99c17af94057417d744607b82dd0a664fd5e4ca98061480fd8b14b18d04"}, + {file = "typed_ast-1.4.3-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:95431a26309a21874005845c21118c83991c63ea800dd44843e42a916aec5899"}, + {file = "typed_ast-1.4.3-cp37-cp37m-win32.whl", hash = "sha256:aee0c1256be6c07bd3e1263ff920c325b59849dc95392a05f258bb9b259cf39c"}, + {file = "typed_ast-1.4.3-cp37-cp37m-win_amd64.whl", hash = "sha256:9ad2c92ec681e02baf81fdfa056fe0d818645efa9af1f1cd5fd6f1bd2bdfd805"}, + {file = "typed_ast-1.4.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b36b4f3920103a25e1d5d024d155c504080959582b928e91cb608a65c3a49e1a"}, + {file = "typed_ast-1.4.3-cp38-cp38-manylinux1_i686.whl", hash = "sha256:067a74454df670dcaa4e59349a2e5c81e567d8d65458d480a5b3dfecec08c5ff"}, + {file = "typed_ast-1.4.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7538e495704e2ccda9b234b82423a4038f324f3a10c43bc088a1636180f11a41"}, + {file = "typed_ast-1.4.3-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:af3d4a73793725138d6b334d9d247ce7e5f084d96284ed23f22ee626a7b88e39"}, + {file = "typed_ast-1.4.3-cp38-cp38-win32.whl", hash = "sha256:f2362f3cb0f3172c42938946dbc5b7843c2a28aec307c49100c8b38764eb6927"}, + {file = "typed_ast-1.4.3-cp38-cp38-win_amd64.whl", hash = "sha256:dd4a21253f42b8d2b48410cb31fe501d32f8b9fbeb1f55063ad102fe9c425e40"}, + {file = "typed_ast-1.4.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f328adcfebed9f11301eaedfa48e15bdece9b519fb27e6a8c01aa52a17ec31b3"}, + {file = "typed_ast-1.4.3-cp39-cp39-manylinux1_i686.whl", hash = "sha256:2c726c276d09fc5c414693a2de063f521052d9ea7c240ce553316f70656c84d4"}, + {file = "typed_ast-1.4.3-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:cae53c389825d3b46fb37538441f75d6aecc4174f615d048321b716df2757fb0"}, + {file = "typed_ast-1.4.3-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b9574c6f03f685070d859e75c7f9eeca02d6933273b5e69572e5ff9d5e3931c3"}, + {file = "typed_ast-1.4.3-cp39-cp39-win32.whl", hash = "sha256:209596a4ec71d990d71d5e0d312ac935d86930e6eecff6ccc7007fe54d703808"}, + {file = "typed_ast-1.4.3-cp39-cp39-win_amd64.whl", hash = "sha256:9c6d1a54552b5330bc657b7ef0eae25d00ba7ffe85d9ea8ae6540d2197a3788c"}, + {file = "typed_ast-1.4.3.tar.gz", hash = "sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65"}, +] +typing-extensions = [ + {file = "typing_extensions-3.10.0.0-py2-none-any.whl", hash = "sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497"}, + {file = "typing_extensions-3.10.0.0-py3-none-any.whl", hash = "sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84"}, + {file = "typing_extensions-3.10.0.0.tar.gz", hash = "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342"}, +] +urllib3 = [ + {file = "urllib3-1.26.6-py2.py3-none-any.whl", hash = "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4"}, + {file = "urllib3-1.26.6.tar.gz", hash = "sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"}, +] +wcwidth = [ + {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, + {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, +] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..fa3d0f3973c7b9c64933394debc9c7a1ac636712 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,60 @@ +[tool.poetry] +name = "asr-benchmarks" +version = "0.0.1" +description = "Library to benchmark ASR systems" +authors = [ + "Piotr Szymański <niedakh@gmail.com>", + "Marcin Wątroba <markowanga@gmail.com>" +] +license = "MIT license" +packages = [ + { include = "sziszapangma" } +] + +[tool.poetry.dependencies] +python = "^3.8" +numpy = "^1.20.1" +requests = "^2.25.1" +pandas = "^1.2.4" +fasttext = "^0.9.2" +pymongo = "^3.11.4" + +[tool.poetry.dev-dependencies] +pytest = "^5.2" +mypy = "^0.812" +black = "^20.8b1" +typing-extensions = "^3.7.4" +isort = { version = "^5.9.1", extras = ["pyproject"] } +pyflakes = "2.3.1" +poethepoet = "^0.10.0" + +[tool.poe.tasks] +black = "black -v --check sziszapangma" +isort = "isort sziszapangma -c" +pyflakes = "pyflakes sziszapangma" +mypy = "mypy sziszapangma" +test = "pytest" +check = ["black", "isort", "mypy", "pyflakes"] +all = ["check", "test"] + +[tool.poetry.extras] +developer = ["black", "mypy", "typing-extensions"] + +[build-system] +requires = ["poetry>=1.1.5"] +build-backend = "poetry.masonry.api" + +[mypy] +python_version = 3.8 + +[pytest] +testpaths = "tests" + +[tool.black] +line-length = 100 + +[tool.isort] +line_length = 100 +known_first_party = 'embeddings' +known_third_party = ["click", "pytest"] +profile = "black" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 6568fe0f0f68a6b0e274833a184acf33ae99408f..0000000000000000000000000000000000000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -numpy>=1.20.1 -requests>=2.25.1 -pandas>=1.2.4 -fasttext>=0.9.2 -pymongo>=3.11.4 diff --git a/requirements_dev.txt b/requirements_dev.txt deleted file mode 100644 index 91f97d1b5aba1cc29f2f9e93252ef4eb81f0c028..0000000000000000000000000000000000000000 --- a/requirements_dev.txt +++ /dev/null @@ -1,9 +0,0 @@ -wheel==0.36.2 -watchdog==2.1.3 -flake8==3.9.2 -tox==3.23.1 -coverage==5.5 -Sphinx==4.0.2 -twine==3.4.1 -pytest==6.2.4 -pytest-runner==5.3.1 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 2642f6ad6c22dacc518a2e26b7c4078cf623ff74..0000000000000000000000000000000000000000 --- a/setup.cfg +++ /dev/null @@ -1,26 +0,0 @@ -[bumpversion] -current_version = 0.1.0 -commit = True -tag = True - -[bumpversion:file:setup.py] -search = version='{current_version}' -replace = version='{new_version}' - -[bumpversion:file:sziszapangma/__init__.py] -search = __version__ = '{current_version}' -replace = __version__ = '{new_version}' - -[bdist_wheel] -universal = 1 - -[flake8] -exclude = docs - -[aliases] -# Define setup.py command aliases here -test = pytest - -;[tool:pytest] -;collect_ignore = ['setup.py'] - diff --git a/setup.py b/setup.py deleted file mode 100644 index 4830c9e700cf3a614e22d12ddd8846380984a1fd..0000000000000000000000000000000000000000 --- a/setup.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python - -"""The setup script.""" - -from setuptools import setup, find_packages - -with open('README.rst') as readme_file: - readme = readme_file.read() - -with open('HISTORY.rst') as history_file: - history = history_file.read() - -with open("requirements.txt", "r") as fh: - requirements = fh.readlines() - -with open("requirements_dev.txt", "r") as fh: - requirements_dev = fh.readlines() + requirements - -setup_requirements = ['pytest-runner', ] - -test_requirements = ['pytest>=3', ] - -setup( - author="Piotr Szymański", - author_email='niedakh@gmail.com', - python_requires='>=3.5', - classifiers=[ - 'Development Status :: 2 - Pre-Alpha', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: MIT License', - 'Natural Language :: English', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - ], - description="A n", - entry_points={ - 'console_scripts': [ - 'sziszapangma=sziszapangma.cli:main', - ], - }, - install_requires=requirements, - license="MIT license", - long_description=readme + '\n\n' + history, - include_package_data=True, - keywords='sziszapangma', - name='sziszapangma', - packages=find_packages(include=['sziszapangma', 'sziszapangma.*']), - setup_requires=requirements_dev, - test_suite='tests', - tests_require=requirements_dev, - url='https://github.com/niedakh/sziszapangma', - version='0.1.0', - zip_safe=False, -) diff --git a/sziszapangma/__init__.py b/sziszapangma/__init__.py index 00e971cf296054b0d437ed7a068ac6856f6eb140..1ae4cd10b6a78c1981ea022709f8bfd1331630ba 100644 --- a/sziszapangma/__init__.py +++ b/sziszapangma/__init__.py @@ -1,5 +1,5 @@ """Top-level package for sziszapangma.""" __author__ = """Piotr Szymański""" -__email__ = 'niedakh@gmail.com' -__version__ = '0.1.0' +__email__ = "niedakh@gmail.com" +__version__ = "0.1.0" diff --git a/sziszapangma/__pycache__/__init__.cpython-39.pyc b/sziszapangma/__pycache__/__init__.cpython-39.pyc index 125189747dbbc47416dd2dc5f0eaaeb1d97f848b..0afbe57fd7e7215829ae3fee5d0f567fffa32ca7 100644 Binary files a/sziszapangma/__pycache__/__init__.cpython-39.pyc and b/sziszapangma/__pycache__/__init__.cpython-39.pyc differ diff --git a/sziszapangma/core/alignment/alignment_calculator.py b/sziszapangma/core/alignment/alignment_calculator.py index f69ec9537fac17d7a25d8768ea6bd6eec08c07af..0ebc6c3d74258fa51a8c1b3833cfe4a759c77987 100644 --- a/sziszapangma/core/alignment/alignment_calculator.py +++ b/sziszapangma/core/alignment/alignment_calculator.py @@ -1,15 +1,14 @@ from abc import ABC -from typing import List, Tuple, Optional +from typing import List, Optional, Tuple import numpy as np +import numpy.typing as npt -from sziszapangma.core.alignment.step_type import StepType +from sziszapangma.core.alignment.alignment_processing_step import AlignmentProcessingStep from sziszapangma.core.alignment.alignment_step import AlignmentStep -from sziszapangma.core.alignment.distance_matrix_calculator import \ - DistanceCalculator +from sziszapangma.core.alignment.distance_matrix_calculator import DistanceCalculator +from sziszapangma.core.alignment.step_type import StepType from sziszapangma.core.alignment.step_words import StepWords -from sziszapangma.core.alignment.alignment_processing_step import \ - AlignmentProcessingStep from sziszapangma.core.alignment.word import Word @@ -28,10 +27,7 @@ class AlignmentCalculator(ABC): for step in processing_steps ] - def _get_reference_indexes_per_steps( - self, - steps: List[AlignmentProcessingStep] - ) -> List[int]: + def _get_reference_indexes_per_steps(self, steps: List[AlignmentProcessingStep]) -> List[int]: counter = 0 indexes = [] for step in steps: @@ -41,99 +37,99 @@ class AlignmentCalculator(ABC): return indexes def get_distance_matrix_between_words( - self, - reference: List[Word], - hypothesis: List[Word] - ) -> np.ndarray: - return self._distance_matrix_calculator.calculate_distance_matrix( - reference, hypothesis) + self, reference: List[Word], hypothesis: List[Word] + ) -> npt.NDArray[np.float64]: + return self._distance_matrix_calculator.calculate_distance_matrix(reference, hypothesis) @staticmethod def _get_initialized_levenshtein_matrix( - reference: List[Word], - hypothesis: List[Word] - ) -> Tuple[np.ndarray, List[List[Optional[AlignmentProcessingStep]]]]: + reference: List[Word], hypothesis: List[Word] + ) -> Tuple[npt.NDArray[np.float64], List[List[Optional[AlignmentProcessingStep]]]]: # TODO: consider about remove distance_arr replaced by steps_arr reference_len = len(reference) hypothesis_len = len(hypothesis) - distance_arr = np.zeros((reference_len + 1) * (hypothesis_len + 1)) \ - .reshape((reference_len + 1, hypothesis_len + 1)) - steps_arr = [ - [None for _ in range(hypothesis_len + 1)] - for _ in range(reference_len + 1) + distance_arr = np.zeros((reference_len + 1) * (hypothesis_len + 1)).reshape( + (reference_len + 1, hypothesis_len + 1) + ) + steps_arr: List[List[Optional[AlignmentProcessingStep]]] = [ + [None for _ in range(hypothesis_len + 1)] for _ in range(reference_len + 1) ] # levenshtein initial for ref_index in range(reference_len + 1): distance_arr[ref_index][0] = ref_index - step_words = StepWords( - reference[ref_index - 1] if ref_index > 0 else None, - None + step_words = StepWords(reference[ref_index - 1] if ref_index > 0 else None, None) + steps_arr[ref_index][0] = AlignmentProcessingStep.levenshtein_deletion( + ref_index - 1, step_words ) - steps_arr[ref_index][0] = AlignmentProcessingStep\ - .levenshtein_deletion(ref_index - 1, step_words) for hyp_index in range(hypothesis_len + 1): distance_arr[0][hyp_index] = hyp_index - step_words = StepWords( - None, - hypothesis[hyp_index - 1] if hyp_index > 0 else None + step_words = StepWords(None, hypothesis[hyp_index - 1] if hyp_index > 0 else None) + steps_arr[0][hyp_index] = AlignmentProcessingStep.levenshtein_insertion( + hyp_index - 1, step_words ) - steps_arr[0][hyp_index] = AlignmentProcessingStep\ - .levenshtein_insertion(hyp_index - 1, step_words) return distance_arr, steps_arr @staticmethod def _get_levenshtein_processing_step_cross( - prev_cross_distance: float, - step_words: StepWords, - current_distance: float + prev_cross_distance: float, step_words: StepWords, current_distance: float ) -> AlignmentProcessingStep: - return AlignmentProcessingStep.levenshtein_correct( - prev_cross_distance, step_words, 0) \ - if current_distance == 0 \ + return ( + AlignmentProcessingStep.levenshtein_correct(prev_cross_distance, step_words, 0) + if current_distance == 0 else AlignmentProcessingStep.levenshtein_substitution( - prev_cross_distance, step_words, current_distance) + prev_cross_distance, step_words, current_distance + ) + ) def get_levenshtein_embedding_based( self, reference: List[Word], hypothesis: List[Word], - distance_matrix: np.ndarray - ) -> Tuple[np.ndarray, List[List[AlignmentProcessingStep]]]: + distance_matrix: npt.NDArray[np.float64], + ) -> Tuple[npt.NDArray[np.float64], List[List[AlignmentProcessingStep]]]: reference_len = len(reference) hypothesis_len = len(hypothesis) - distance_arr, steps_arr = self._get_initialized_levenshtein_matrix( - reference, hypothesis) + distance_arr, steps_arr = self._get_initialized_levenshtein_matrix(reference, hypothesis) for ref_index in range(reference_len): for hyp_index in range(hypothesis_len): - step_words = StepWords(reference[ref_index], - hypothesis[hyp_index]) + step_words = StepWords(reference[ref_index], hypothesis[hyp_index]) current_distance = distance_matrix[ref_index][hyp_index] prev_cross_distance = distance_arr[ref_index][hyp_index] cross_go_step = self._get_levenshtein_processing_step_cross( - prev_cross_distance, step_words, current_distance) + prev_cross_distance, step_words, current_distance + ) insertion_step = AlignmentProcessingStep.levenshtein_insertion( - distance_arr[ref_index + 1][hyp_index], step_words) + distance_arr[ref_index + 1][hyp_index], step_words + ) deletion_step = AlignmentProcessingStep.levenshtein_deletion( - distance_arr[ref_index][hyp_index + 1], step_words) + distance_arr[ref_index][hyp_index + 1], step_words + ) - best_step = min([cross_go_step, insertion_step, deletion_step], - key=lambda it: it.total_distance()) + best_step = min( + [cross_go_step, insertion_step, deletion_step], + key=lambda it: it.total_distance(), + ) - distance_arr[ref_index + 1][hyp_index + 1] = \ - best_step.total_distance() + distance_arr[ref_index + 1][hyp_index + 1] = best_step.total_distance() steps_arr[ref_index + 1][hyp_index + 1] = best_step - return distance_arr, steps_arr + def get_assert(item: Optional[AlignmentProcessingStep]) -> AlignmentProcessingStep: + if item is None: + raise Exception() + return item + + steps_arr_to_return = [[get_assert(itt) for itt in it] for it in steps_arr] + + return distance_arr, steps_arr_to_return def extract_steps_path( - self, - steps_matrix: List[List[AlignmentProcessingStep]] + self, steps_matrix: List[List[AlignmentProcessingStep]] ) -> List[AlignmentProcessingStep]: x = len(steps_matrix) - 1 y = len(steps_matrix[0]) - 1 @@ -151,29 +147,22 @@ class AlignmentCalculator(ABC): return to_return[::-1] def _calculate_steps_path( - self, - reference: List[Word], - hypothesis: List[Word] + self, reference: List[Word], hypothesis: List[Word] ) -> List[AlignmentProcessingStep]: - distance_between_words = self.get_distance_matrix_between_words( - reference, hypothesis) + distance_between_words = self.get_distance_matrix_between_words(reference, hypothesis) _, steps_matrix = self.get_levenshtein_embedding_based( - reference, hypothesis, distance_between_words) + reference, hypothesis, distance_between_words + ) return self.extract_steps_path(steps_matrix) def calculate_alignment( - self, - reference: List[Word], - hypothesis: List[Word] + self, reference: List[Word], hypothesis: List[Word] ) -> List[AlignmentStep]: steps_path = self._calculate_steps_path(reference, hypothesis) return self.convert_processing_steps_to_result(steps_path) def calculate_alignment_weighted( - self, - reference: List[Word], - hypothesis: List[Word], - weights: List[float] + self, reference: List[Word], hypothesis: List[Word], weights: List[float] ) -> List[AlignmentStep]: steps_path = self._calculate_steps_path(reference, hypothesis) return self.convert_processing_steps_to_result(steps_path) diff --git a/sziszapangma/core/alignment/alignment_classic_calculator.py b/sziszapangma/core/alignment/alignment_classic_calculator.py index fbf60eb310caa9d7cd2370ba253f4168da7c62f7..7d3ec171e95998f1fa4f872f149dcd92dcc1e21d 100644 --- a/sziszapangma/core/alignment/alignment_classic_calculator.py +++ b/sziszapangma/core/alignment/alignment_classic_calculator.py @@ -1,10 +1,7 @@ -from sziszapangma.core.alignment.alignment_calculator import \ - AlignmentCalculator -from sziszapangma.core.alignment.distance_matrix_calculator import \ - BinaryDistanceCalculator +from sziszapangma.core.alignment.alignment_calculator import AlignmentCalculator +from sziszapangma.core.alignment.distance_matrix_calculator import BinaryDistanceCalculator class AlignmentClassicCalculator(AlignmentCalculator): - - def __init__(self): + def __init__(self) -> None: super().__init__(BinaryDistanceCalculator()) diff --git a/sziszapangma/core/alignment/alignment_embedding_calculator.py b/sziszapangma/core/alignment/alignment_embedding_calculator.py index a20802dc2d2ad132edb57854b7c1e1ec71ef621d..da74c3820ba3d21e5e5d415e7b073245b36cb86e 100644 --- a/sziszapangma/core/alignment/alignment_embedding_calculator.py +++ b/sziszapangma/core/alignment/alignment_embedding_calculator.py @@ -1,15 +1,15 @@ from typing import List -from sziszapangma.core.alignment.alignment_calculator import \ - AlignmentCalculator -from sziszapangma.core.alignment.alignment_processing_step import \ - AlignmentProcessingStep +from sziszapangma.core.alignment.alignment_calculator import AlignmentCalculator +from sziszapangma.core.alignment.alignment_processing_step import AlignmentProcessingStep from sziszapangma.core.alignment.alignment_step import AlignmentStep -from sziszapangma.core.alignment.distance_matrix_calculator import \ - BinaryDistanceCalculator, DistanceCalculator, CosineDistanceCalculator +from sziszapangma.core.alignment.distance_matrix_calculator import ( + BinaryDistanceCalculator, + CosineDistanceCalculator, + DistanceCalculator, +) from sziszapangma.core.alignment.step_words import StepWords -from sziszapangma.core.transformer.embedding_transformer import \ - EmbeddingTransformer +from sziszapangma.core.transformer.embedding_transformer import EmbeddingTransformer class AlignmentEmbeddingCalculator(AlignmentCalculator): @@ -18,33 +18,31 @@ class AlignmentEmbeddingCalculator(AlignmentCalculator): def __init__(self, embedding_transformer: EmbeddingTransformer): super().__init__(BinaryDistanceCalculator()) self._embedding_transformer = embedding_transformer - self._distance_calculator = CosineDistanceCalculator( - embedding_transformer) + self._distance_calculator = CosineDistanceCalculator(embedding_transformer) - def _calculate_distance_for_word_step( - self, - step_words: StepWords - ) -> float: + def _calculate_distance_for_word_step(self, step_words: StepWords) -> float: + if step_words.reference_word is None: + raise Exception() + if step_words.hypothesis_word is None: + raise Exception() return self._distance_calculator.calculate_distance_for_words( - step_words.reference_word, - step_words.hypothesis_word + step_words.reference_word, step_words.hypothesis_word ) - def _calculate_result_cost_for_step( - self, - processing_step: AlignmentProcessingStep - ) -> float: + def _calculate_result_cost_for_step(self, processing_step: AlignmentProcessingStep) -> float: step_words = processing_step.step_words - return self._calculate_distance_for_word_step(step_words) \ - if processing_step.step_type.is_cross_step() \ + return ( + self._calculate_distance_for_word_step(step_words) + if processing_step.step_type.is_cross_step() else processing_step.step_cost + ) def convert_processing_steps_to_result( - self, - processing_steps: List[AlignmentProcessingStep] + self, processing_steps: List[AlignmentProcessingStep] ) -> List[AlignmentStep]: return [ - AlignmentStep(step.step_type, step.step_words, - self._calculate_result_cost_for_step(step)) + AlignmentStep( + step.step_type, step.step_words, self._calculate_result_cost_for_step(step) + ) for step in processing_steps ] diff --git a/sziszapangma/core/alignment/alignment_processing_step.py b/sziszapangma/core/alignment/alignment_processing_step.py index e4ab96dc1578348db186006797eb1801842a1588..840788371eb48e99d05608f4067b7ce43f399ee6 100644 --- a/sziszapangma/core/alignment/alignment_processing_step.py +++ b/sziszapangma/core/alignment/alignment_processing_step.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from dataclasses import dataclass from sziszapangma.core.alignment.step_type import StepType @@ -12,30 +14,32 @@ class AlignmentProcessingStep: step_cost: float @classmethod - def levenshtein_insertion(cls, previous_distance: float, - step_words: StepWords, step_cost: float = 1): + def levenshtein_insertion( + cls, previous_distance: float, step_words: StepWords, step_cost: float = 1 + ) -> AlignmentProcessingStep: words = StepWords(None, step_words.hypothesis_word) - return AlignmentProcessingStep(StepType.INSERTION, words, - previous_distance, step_cost) + return AlignmentProcessingStep(StepType.INSERTION, words, previous_distance, step_cost) @classmethod - def levenshtein_deletion(cls, previous_distance: float, - step_words: StepWords, step_cost: float = 1): + def levenshtein_deletion( + cls, previous_distance: float, step_words: StepWords, step_cost: float = 1 + ) -> AlignmentProcessingStep: words = StepWords(step_words.reference_word, None) - return AlignmentProcessingStep(StepType.DELETION, words, - previous_distance, step_cost) + return AlignmentProcessingStep(StepType.DELETION, words, previous_distance, step_cost) @classmethod - def levenshtein_substitution(cls, previous_distance: float, - step_words: StepWords, step_cost: float): - return AlignmentProcessingStep(StepType.SUBSTITUTION, step_words, - previous_distance, step_cost) + def levenshtein_substitution( + cls, previous_distance: float, step_words: StepWords, step_cost: float + ) -> AlignmentProcessingStep: + return AlignmentProcessingStep( + StepType.SUBSTITUTION, step_words, previous_distance, step_cost + ) @classmethod - def levenshtein_correct(cls, previous_distance: float, - step_words: StepWords, step_cost: float): - return AlignmentProcessingStep(StepType.CORRECT, step_words, - previous_distance, step_cost) + def levenshtein_correct( + cls, previous_distance: float, step_words: StepWords, step_cost: float + ) -> AlignmentProcessingStep: + return AlignmentProcessingStep(StepType.CORRECT, step_words, previous_distance, step_cost) def total_distance(self) -> float: return self.step_cost + self.previous_distance diff --git a/sziszapangma/core/alignment/alignment_soft_calculator.py b/sziszapangma/core/alignment/alignment_soft_calculator.py index c7de34cecef6693d6058260a1a2fd3a8997acb7b..6266390e9cf2dc873ba1a28c0db26bf7102e1ba7 100644 --- a/sziszapangma/core/alignment/alignment_soft_calculator.py +++ b/sziszapangma/core/alignment/alignment_soft_calculator.py @@ -1,12 +1,8 @@ -from sziszapangma.core.alignment.alignment_calculator import \ - AlignmentCalculator -from sziszapangma.core.alignment.distance_matrix_calculator import \ - CosineDistanceCalculator -from sziszapangma.core.transformer.embedding_transformer import \ - EmbeddingTransformer +from sziszapangma.core.alignment.alignment_calculator import AlignmentCalculator +from sziszapangma.core.alignment.distance_matrix_calculator import CosineDistanceCalculator +from sziszapangma.core.transformer.embedding_transformer import EmbeddingTransformer class AlignmentSoftCalculator(AlignmentCalculator): - def __init__(self, embedding_transformer: EmbeddingTransformer): super().__init__(CosineDistanceCalculator(embedding_transformer)) diff --git a/sziszapangma/core/alignment/alignment_step.py b/sziszapangma/core/alignment/alignment_step.py index cefd0d105f499b0e1d1ba68d29d8fedc16f2e17b..f3e802ecb8c348d1bfc59f584b2f8e44459eb769 100644 --- a/sziszapangma/core/alignment/alignment_step.py +++ b/sziszapangma/core/alignment/alignment_step.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from dataclasses import dataclass from sziszapangma.core.alignment.step_type import StepType @@ -10,9 +12,7 @@ class AlignmentStep: step_words: StepWords step_cost: float - def with_weight_multiplication(self, weight: float): + def with_weight_multiplication(self, weight: float) -> AlignmentStep: return AlignmentStep( - step_type=self.step_type, - step_words=self.step_words, - step_cost=self.step_cost * weight + step_type=self.step_type, step_words=self.step_words, step_cost=self.step_cost * weight ) diff --git a/sziszapangma/core/alignment/alignment_util.py b/sziszapangma/core/alignment/alignment_util.py index c1887317e3925e2143fd85c32b0aa82199a410f9..d90721ed7b93fe8d6f81f35cec88b687a150c21c 100644 --- a/sziszapangma/core/alignment/alignment_util.py +++ b/sziszapangma/core/alignment/alignment_util.py @@ -1,42 +1,33 @@ -from typing import List, Optional +from typing import List, Optional, Tuple import numpy as np import pandas as pd from sziszapangma.core.alignment.alignment_step import AlignmentStep +from sziszapangma.core.alignment.word import Word class AlignmentUtil: - @staticmethod - def _optional_str_to_str(value: Optional[str]) -> str: - return value if value is not None else '' + def _optional_str_to_str(word: Optional[Word]) -> str: + return word.value if word is not None else "" @staticmethod - def _wer_step_to_pandas_row_lit(step: AlignmentStep) -> List[any]: - return [ + def _wer_step_to_pandas_row_lit(step: AlignmentStep) -> Tuple[str, str, str, float]: + return ( step.step_type.get_short_name(), AlignmentUtil._optional_str_to_str(step.step_words.reference_word), - AlignmentUtil._optional_str_to_str( - step.step_words.hypothesis_word), - round(step.step_cost, 3) - ] + AlignmentUtil._optional_str_to_str(step.step_words.hypothesis_word), + round(step.step_cost, 3), + ) @staticmethod def steps_to_dataframe(steps: List[AlignmentStep]) -> pd.DataFrame: - arr = np.array([ - AlignmentUtil._wer_step_to_pandas_row_lit(step) - for step in steps - ]) - return pd.DataFrame( - arr, - columns=['step_type', 'reference', 'hypothesis', 'cost'] - ) + arr = np.array([AlignmentUtil._wer_step_to_pandas_row_lit(step) for step in steps]) + return pd.DataFrame(arr, columns=["step_type", "reference", "hypothesis", "cost"]) @staticmethod - def get_reference_indexes_per_steps( - steps: List[AlignmentStep] - ) -> List[int]: + def get_reference_indexes_per_steps(steps: List[AlignmentStep]) -> List[int]: counter = 0 indexes = [] for step in steps: @@ -47,25 +38,19 @@ class AlignmentUtil: @staticmethod def get_reference_length(steps: List[AlignmentStep]) -> int: - return sum([ - 1 if step.step_type.contain_reference_word() else 0 - for step in steps - ]) + return sum([1 if step.step_type.contain_reference_word() else 0 for step in steps]) @staticmethod def apply_weights_to_alignment( - steps: List[AlignmentStep], - weights: List[float] + steps: List[AlignmentStep], weights: List[float] ) -> List[AlignmentStep]: if AlignmentUtil.get_reference_length(steps) != len(weights): raise Exception( f"Incorrect length of weights, current={len(weights)}, " f"required={AlignmentUtil.get_reference_length(steps)}" ) - reference_indexes_per_steps = \ - AlignmentUtil.get_reference_indexes_per_steps(steps) + reference_indexes_per_steps = AlignmentUtil.get_reference_indexes_per_steps(steps) return [ - steps[index].with_weight_multiplication( - weights[reference_indexes_per_steps[index]]) + steps[index].with_weight_multiplication(weights[reference_indexes_per_steps[index]]) for index in range(len(steps)) ] diff --git a/sziszapangma/core/alignment/distance_matrix_calculator.py b/sziszapangma/core/alignment/distance_matrix_calculator.py index 5f17ea7f72e45d6f1bd0cad4f4732e786f591d22..9bbec7f81275180d31b94df0367857ca86c20aaa 100644 --- a/sziszapangma/core/alignment/distance_matrix_calculator.py +++ b/sziszapangma/core/alignment/distance_matrix_calculator.py @@ -2,19 +2,17 @@ from abc import ABC, abstractmethod from typing import List import numpy as np +import numpy.typing as npt -from sziszapangma.core.transformer.embedding_transformer import \ - EmbeddingTransformer from sziszapangma.core.alignment.word import Word +from sziszapangma.core.transformer.embedding_transformer import EmbeddingTransformer class DistanceCalculator(ABC): @abstractmethod def calculate_distance_matrix( - self, - reference: List[Word], - hypothesis: List[Word] - ) -> np.array: + self, reference: List[Word], hypothesis: List[Word] + ) -> npt.NDArray[np.float64]: pass @abstractmethod @@ -27,15 +25,17 @@ class BinaryDistanceCalculator(DistanceCalculator): return 0 if word1.value == word2.value else 1 def calculate_distance_matrix( - self, - reference: List[Word], - hypothesis: List[Word] - ) -> np.array: - return np.array([ - [self.calculate_distance_for_words(reference_word, hypothesis_word) - for hypothesis_word in hypothesis] - for reference_word in reference - ]) + self, reference: List[Word], hypothesis: List[Word] + ) -> npt.NDArray[np.float64]: + return np.array( + [ + [ + self.calculate_distance_for_words(reference_word, hypothesis_word) + for hypothesis_word in hypothesis + ] + for reference_word in reference + ] + ) class CosineDistanceCalculator(DistanceCalculator): @@ -47,19 +47,17 @@ class CosineDistanceCalculator(DistanceCalculator): def calculate_distance_for_words(self, word1: Word, word2: Word) -> float: return self.cosine_distance_between_words_embeddings( self._embedding_transformer.get_embedding(word1.value), - self._embedding_transformer.get_embedding(word2.value) + self._embedding_transformer.get_embedding(word2.value), ) @staticmethod def cosine_distance_between_words_embeddings( - word1_embedding: np.array, - word2_embedding: np.array + word1_embedding: npt.NDArray[np.float64], word2_embedding: npt.NDArray[np.float64] ) -> float: a = word1_embedding b = word2_embedding if a.shape != b.shape: - raise RuntimeError( - "array {} shape not match {}".format(a.shape, b.shape)) + raise RuntimeError("array {} shape not match {}".format(a.shape, b.shape)) if a.ndim == 1: a_norm = np.linalg.norm(a) b_norm = np.linalg.norm(b) @@ -69,22 +67,24 @@ class CosineDistanceCalculator(DistanceCalculator): else: raise RuntimeError("array dimensions {} not right".format(a.ndim)) similarity = np.dot(a, b.T) / (a_norm * b_norm) - dist = 1. - similarity - return dist + dist = 1.0 - similarity + return float(dist) def calculate_distance_matrix( - self, - reference: List[Word], - hypothesis: List[Word] - ) -> np.array: + self, reference: List[Word], hypothesis: List[Word] + ) -> npt.NDArray[np.float64]: embeddings_dict = self._embedding_transformer.get_embeddings( list(set(it.value for it in (reference + hypothesis))) ) - return np.array([[ - self.cosine_distance_between_words_embeddings( - embeddings_dict[reference_word.value], - embeddings_dict[hypothesis_word.value], - ) - for hypothesis_word in hypothesis] - for reference_word in reference - ]) + return np.array( + [ + [ + self.cosine_distance_between_words_embeddings( + embeddings_dict[reference_word.value], + embeddings_dict[hypothesis_word.value], + ) + for hypothesis_word in hypothesis + ] + for reference_word in reference + ] + ) diff --git a/sziszapangma/core/alignment/word.py b/sziszapangma/core/alignment/word.py index caf6e62b3fa7eb68a99360fa2dde03b968394d53..49ebbd4ab038985214d3e9d5e0309f5489fa56af 100644 --- a/sziszapangma/core/alignment/word.py +++ b/sziszapangma/core/alignment/word.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import uuid from dataclasses import dataclass @@ -8,5 +10,8 @@ class Word: value: str @classmethod - def from_string(cls, string: str): + def from_string(cls, string: str) -> Word: return cls(str(uuid.uuid4()), string) + + def to_lower(self) -> Word: + return Word(self.id, self.value.lower()) diff --git a/sziszapangma/core/transformer/cached_embedding_transformer.py b/sziszapangma/core/transformer/cached_embedding_transformer.py index f58fe33f260fd6c6ced5c9a0f867f15dc9ecb66a..d084fe05c3d150b3f1ed9bb71f684a85c4007f2a 100644 --- a/sziszapangma/core/transformer/cached_embedding_transformer.py +++ b/sziszapangma/core/transformer/cached_embedding_transformer.py @@ -1,32 +1,30 @@ -from typing import List, Dict +from typing import Dict, List import numpy as np +import numpy.typing as npt -from sziszapangma.core.transformer.embedding_transformer import \ - EmbeddingTransformer +from sziszapangma.core.transformer.embedding_transformer import EmbeddingTransformer class CachedEmbeddingTransformer(EmbeddingTransformer): _embeddings_transformer: EmbeddingTransformer - _cache: Dict[str, np.ndarray] + _cache: Dict[str, npt.NDArray[np.float64]] def __init__(self, embeddings_transformer: EmbeddingTransformer): self._embeddings_transformer = embeddings_transformer self._cache = dict() - def get_embedding(self, word: str) -> np.ndarray: + def get_embedding(self, word: str) -> npt.NDArray[np.float64]: return self.get_embeddings([word])[word] - def get_embeddings(self, words: List[str]) -> Dict[str, np.ndarray]: + def get_embeddings(self, words: List[str]) -> Dict[str, npt.NDArray[np.float64]]: new_words = [word for word in words if word not in self._cache] - new_embeddings = self._embeddings_transformer\ - .get_embeddings(new_words) if len(new_words) > 0 else dict() + new_embeddings = ( + self._embeddings_transformer.get_embeddings(new_words) if len(new_words) > 0 else dict() + ) for new_word in new_words: self._cache[new_word] = new_embeddings[new_word] - return { - word: self._cache[word] - for word in words - } + return {word: self._cache[word] for word in words} - def clear(self): + def clear(self) -> None: self._cache.clear() diff --git a/sziszapangma/core/transformer/embedding_transformer.py b/sziszapangma/core/transformer/embedding_transformer.py index a953f442c56d0846a1da38bb0aa888d9e1233b73..14e7db35eb96344671b1063561504f496abde590 100644 --- a/sziszapangma/core/transformer/embedding_transformer.py +++ b/sziszapangma/core/transformer/embedding_transformer.py @@ -1,15 +1,15 @@ from abc import ABC, abstractmethod -from typing import List, Dict +from typing import Dict, List import numpy as np +import numpy.typing as npt class EmbeddingTransformer(ABC): - @abstractmethod - def get_embeddings(self, words: List[str]) -> Dict[str, np.ndarray]: + def get_embeddings(self, words: List[str]) -> Dict[str, npt.NDArray[np.float64]]: pass @abstractmethod - def get_embedding(self, word: str) -> np.ndarray: + def get_embedding(self, word: str) -> npt.NDArray[np.float64]: pass diff --git a/sziszapangma/core/transformer/fasttext_embedding_transformer.py b/sziszapangma/core/transformer/fasttext_embedding_transformer.py index a74ac7e8315b44571db788be31db331773ff805d..2c40dd0f5d2788922400878f2c27f4fbcce7cac2 100644 --- a/sziszapangma/core/transformer/fasttext_embedding_transformer.py +++ b/sziszapangma/core/transformer/fasttext_embedding_transformer.py @@ -1,11 +1,11 @@ -from typing import List, Dict +from typing import Dict, List import fasttext import fasttext.util import numpy as np +import numpy.typing as npt -from sziszapangma.core.transformer.embedding_transformer import \ - EmbeddingTransformer +from sziszapangma.core.transformer.embedding_transformer import EmbeddingTransformer class FasttextEmbeddingTransformer(EmbeddingTransformer): @@ -13,15 +13,11 @@ class FasttextEmbeddingTransformer(EmbeddingTransformer): _model_name: str def __init__(self, lang_id: str): - full_model_name = fasttext.util.download_model( - lang_id, if_exists='ignore') + full_model_name = fasttext.util.download_model(lang_id, if_exists="ignore") self._fasttext_model = fasttext.load_model(full_model_name) - def get_embedding(self, word: str) -> np.ndarray: - return self._fasttext_model.get_word_vector(word) + def get_embedding(self, word: str) -> npt.NDArray[np.float64]: + return np.array(self._fasttext_model.get_word_vector(word)) - def get_embeddings(self, words: List[str]) -> Dict[str, np.ndarray]: - return { - word: self.get_embedding(word) - for word in set(words) - } + def get_embeddings(self, words: List[str]) -> Dict[str, npt.NDArray[np.float64]]: + return {word: self.get_embedding(word) for word in set(words)} diff --git a/sziszapangma/core/wer/span.py b/sziszapangma/core/wer/span.py index 44cfe840d0f2c6f68743963e749ae00a27450191..f80f5856b766e3b666fb8a2bf1941f4b0efd927d 100644 --- a/sziszapangma/core/wer/span.py +++ b/sziszapangma/core/wer/span.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from typing import List @dataclass(frozen=True) @@ -9,8 +10,5 @@ class Span: def is_index_belong(self, index: int) -> bool: return self.index_start <= index < self.index_end - def get_reference_mask_table(self, total_size: int): - return [ - self.is_index_belong(it) - for it in range(total_size) - ] + def get_reference_mask_table(self, total_size: int) -> List[bool]: + return [self.is_index_belong(it) for it in range(total_size)] diff --git a/sziszapangma/core/wer/wer_calculator.py b/sziszapangma/core/wer/wer_calculator.py index 3fa65dbe5e0ff306a6ba118e4880db724a31d5bd..b5cdab3d025fa6f206bcf968e42925b34e09cfda 100644 --- a/sziszapangma/core/wer/wer_calculator.py +++ b/sziszapangma/core/wer/wer_calculator.py @@ -7,20 +7,17 @@ from sziszapangma.core.wer.span import Span class WerCalculator(ABC): - @staticmethod def _convert_processing_steps_to_result( - input_steps: List[AlignmentStep], - span: Span + input_steps: List[AlignmentStep], span: Span ) -> List[AlignmentStep]: - indexes_per_steps = AlignmentUtil.get_reference_indexes_per_steps( - input_steps) + indexes_per_steps = AlignmentUtil.get_reference_indexes_per_steps(input_steps) return [ AlignmentStep( input_steps[step_index].step_type, input_steps[step_index].step_words, - input_steps[step_index].step_cost * span.is_index_belong( - indexes_per_steps[step_index]) + input_steps[step_index].step_cost + * span.is_index_belong(indexes_per_steps[step_index]), ) for step_index in range(len(input_steps)) ] @@ -32,21 +29,13 @@ class WerCalculator(ABC): reference_len = AlignmentUtil.get_reference_length(steps) return sum([step.step_cost for step in steps]) / reference_len - def calculate_wer( - self, - steps: List[AlignmentStep] - ) -> float: + def calculate_wer(self, steps: List[AlignmentStep]) -> float: return self._calculate_wer(steps) - def calculate_wer_for_spans( - self, - steps: List[AlignmentStep], - spans: List[Span] - ) -> List[float]: + def calculate_wer_for_spans(self, steps: List[AlignmentStep], spans: List[Span]) -> List[float]: return [ - self._calculate_wer(self._convert_processing_steps_to_result( - input_steps=steps, - span=span - )) + self._calculate_wer( + self._convert_processing_steps_to_result(input_steps=steps, span=span) + ) for span in spans ] diff --git a/sziszapangma/integration/asr_processor.py b/sziszapangma/integration/asr_processor.py index 580695a245586d7e9f02cda1ba50c7e8f59684d4..bfc26851e59664b7e66564c8ecb3b10120c765c5 100644 --- a/sziszapangma/integration/asr_processor.py +++ b/sziszapangma/integration/asr_processor.py @@ -1,13 +1,12 @@ from abc import ABC, abstractmethod -from typing import List, Dict +from typing import Any, Dict import requests class AsrProcessor(ABC): - @abstractmethod - def call_recognise(self, file_path: str) -> List[Dict[str, any]]: + def call_recognise(self, file_path: str) -> Dict[str, Any]: """ Currently most important is field `transcript` with list of transcript words. @@ -22,8 +21,8 @@ class AsrWebClient(AsrProcessor): super(AsrWebClient, self).__init__() self._url = url - def call_recognise(self, file_path: str) -> List[Dict[str, any]]: - files = {'file': open(file_path, 'rb')} + def call_recognise(self, file_path: str) -> Dict[str, Any]: + files = {"file": open(file_path, "rb")} res = requests.post(self._url, files=files) json_response = res.json() print(json_response) diff --git a/sziszapangma/integration/experiment_manager.py b/sziszapangma/integration/experiment_manager.py index b28ad39b3bf59095294c73bdd5b208f5326c552b..1dd25280ff1fd30577f0980990e230ce1cfb6a8b 100644 --- a/sziszapangma/integration/experiment_manager.py +++ b/sziszapangma/integration/experiment_manager.py @@ -1,7 +1,7 @@ from typing import List -from sziszapangma.integration.repository.experiment_repository import \ - ExperimentRepository +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository + from .record_id_iterator import RecordIdIterator from .task.processing_task import ProcessingTask @@ -15,7 +15,7 @@ class ExperimentManager: self, experiment_repository: ExperimentRepository, record_id_iterator: RecordIdIterator, - processing_tasks: List[ProcessingTask] + processing_tasks: List[ProcessingTask], ): self._experiment_repository = experiment_repository self._record_id_iterator = record_id_iterator @@ -24,7 +24,4 @@ class ExperimentManager: def process(self): self._experiment_repository.initialise() for processing_task in self._processing_tasks: - processing_task.process( - self._record_id_iterator, - self._experiment_repository - ) + processing_task.process(self._record_id_iterator, self._experiment_repository) diff --git a/sziszapangma/integration/gold_transcript_processor.py b/sziszapangma/integration/gold_transcript_processor.py index a6322541c5fd81ca99e13cc5dc0282c2a8a1dca3..f88eb02832bbe350591d46400c5d51f7205977e5 100644 --- a/sziszapangma/integration/gold_transcript_processor.py +++ b/sziszapangma/integration/gold_transcript_processor.py @@ -1,12 +1,12 @@ from abc import ABC, abstractmethod -from typing import List, Dict +from typing import Any, Dict, List class GoldTranscriptProcessor(ABC): """""" @abstractmethod - def get_gold_transcript(self, record_id: str) -> List[Dict[str, any]]: + def get_gold_transcript(self, record_id: str) -> List[Dict[str, Any]]: """ Currently the most important dict key is `word` – original transcript word. diff --git a/sziszapangma/integration/mapper/alignment_step_mapper.py b/sziszapangma/integration/mapper/alignment_step_mapper.py index 8b3bf9b32aaef3b1be84cc5c036208e3b31d4bc4..66a47919ea4351dd38ad517ee36b8b3c65b4ce10 100644 --- a/sziszapangma/integration/mapper/alignment_step_mapper.py +++ b/sziszapangma/integration/mapper/alignment_step_mapper.py @@ -1,16 +1,23 @@ -from typing import Dict +from typing import Any, Dict from sziszapangma.core.alignment.alignment_step import AlignmentStep +from sziszapangma.core.alignment.step_type import StepType from sziszapangma.integration.mapper.step_words_mapper import StepWordsMapper class AlignmentStepMapper: - @staticmethod - def to_json_dict(alignment_step: AlignmentStep) -> Dict[str, any]: + def to_json_dict(alignment_step: AlignmentStep) -> Dict[str, Any]: return { - 'step_type': alignment_step.step_type.name, - 'step_words': StepWordsMapper.to_json_dict( - alignment_step.step_words), - 'step_cost': alignment_step.step_cost + "step_type": alignment_step.step_type.name, + "step_words": StepWordsMapper.to_json_dict(alignment_step.step_words), + "step_cost": float(alignment_step.step_cost), } + + @staticmethod + def from_json_dict(input_json_dict: Dict[str, Any]) -> AlignmentStep: + return AlignmentStep( + StepType[input_json_dict["step_type"]], + StepWordsMapper.from_json_dict(input_json_dict["step_words"]), + input_json_dict["step_cost"], + ) diff --git a/sziszapangma/integration/mapper/step_words_mapper.py b/sziszapangma/integration/mapper/step_words_mapper.py index a28b532411317d7510ef92723eb0274583a18430..94edd1128cbcc94c31af2447178738c0fbdf8000 100644 --- a/sziszapangma/integration/mapper/step_words_mapper.py +++ b/sziszapangma/integration/mapper/step_words_mapper.py @@ -1,27 +1,26 @@ -from typing import Dict +from typing import Any, Dict from sziszapangma.core.alignment.step_words import StepWords from sziszapangma.integration.mapper.word_mapper import WordMapper class StepWordsMapper: - @staticmethod - def to_json_dict(step_words: StepWords) -> Dict[str, any]: + def to_json_dict(step_words: StepWords) -> Dict[str, Any]: to_return = dict() if step_words.hypothesis_word is not None: - to_return['hypothesis_word'] = WordMapper.to_json_dict( - step_words.hypothesis_word) + to_return["hypothesis_word"] = WordMapper.to_json_dict(step_words.hypothesis_word) if step_words.reference_word is not None: - to_return['reference_word'] = WordMapper.to_json_dict( - step_words.reference_word) + to_return["reference_word"] = WordMapper.to_json_dict(step_words.reference_word) return to_return @staticmethod - def from_json_dict(input_json_dict: Dict[str, any]) -> StepWords: + def from_json_dict(input_json_dict: Dict[str, Any]) -> StepWords: return StepWords( - None if 'reference_word' not in input_json_dict - else WordMapper.from_json_dict(input_json_dict['reference_word']), - None if 'hypothesis_word' not in input_json_dict - else WordMapper.from_json_dict(input_json_dict['hypothesis_word']), + None + if "reference_word" not in input_json_dict + else WordMapper.from_json_dict(input_json_dict["reference_word"]), + None + if "hypothesis_word" not in input_json_dict + else WordMapper.from_json_dict(input_json_dict["hypothesis_word"]), ) diff --git a/sziszapangma/integration/mapper/word_mapper.py b/sziszapangma/integration/mapper/word_mapper.py index f7b0cd485c62cdb95b46c2154e09b91846aa8854..30d3d6adbe7a14bc868eca99c43421b0d99b88c3 100644 --- a/sziszapangma/integration/mapper/word_mapper.py +++ b/sziszapangma/integration/mapper/word_mapper.py @@ -2,18 +2,14 @@ from typing import Dict from sziszapangma.core.alignment.word import Word -_ID = 'id' -_VALUE = 'value' +_ID = "id" +_VALUE = "value" class WordMapper: - @staticmethod def to_json_dict(word: Word) -> Dict[str, str]: - return { - _ID: word.id, - _VALUE: word.value - } + return {_ID: word.id, _VALUE: word.value} @staticmethod def from_json_dict(input_json_dict: Dict[str, str]) -> Word: diff --git a/sziszapangma/integration/path_filter.py b/sziszapangma/integration/path_filter.py index 1ac6eb41e04b1fddd8930ea00c5db93321e32dc4..a5e55f90c90db25a564e94a982edd39af8ec2161 100644 --- a/sziszapangma/integration/path_filter.py +++ b/sziszapangma/integration/path_filter.py @@ -19,16 +19,12 @@ class ExtensionPathFilter(PathFilter): """ Implementation of PathFilter which find all files with specified extension. """ + _extension: str _root_directory: str _files_limit: Optional[int] - def __init__( - self, - root_directory: str, - extension: str, - files_limit: Optional[int] = None - ): + def __init__(self, root_directory: str, extension: str, files_limit: Optional[int] = None): """Constructor of class.""" self._extension = extension self._files_limit = files_limit @@ -38,12 +34,6 @@ class ExtensionPathFilter(PathFilter): """ Implementation of searching files with extension. """ - path_generator = Path(self._root_directory).glob( - f'**/*.{self._extension}') - all_files = [ - str(it) - for it in path_generator - ] - return all_files \ - if self._files_limit is None \ - else all_files[:self._files_limit] + path_generator = Path(self._root_directory).glob(f"**/*.{self._extension}") + all_files = [str(it) for it in path_generator] + return all_files if self._files_limit is None else all_files[: self._files_limit] diff --git a/sziszapangma/integration/record_id_iterator.py b/sziszapangma/integration/record_id_iterator.py index f62ad1e014cd1068a4bfe1908c484528297b3bc5..dd3ab0dfe7b9601a7c751acd52748dcf07226909 100644 --- a/sziszapangma/integration/record_id_iterator.py +++ b/sziszapangma/integration/record_id_iterator.py @@ -1,8 +1,7 @@ from abc import ABC, abstractmethod from typing import Set -from sziszapangma.integration.repository.experiment_repository import \ - ExperimentRepository +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository class RecordIdIterator(ABC): diff --git a/sziszapangma/integration/repository/experiment_repository.py b/sziszapangma/integration/repository/experiment_repository.py index 6f6d1484b2afe8e8885bddcb4e0eece4d5c677d8..8e1e67da84aa1292aaccb89b909753ffbb0df8d8 100644 --- a/sziszapangma/integration/repository/experiment_repository.py +++ b/sziszapangma/integration/repository/experiment_repository.py @@ -1,13 +1,13 @@ """Repository to manage results of asr experiment processing.""" from abc import ABC, abstractmethod -from typing import Optional, Set +from typing import Any, Optional, Set class ExperimentRepository(ABC): """Repository to manage results of asr experiment processing.""" @abstractmethod - def initialise(self): + def initialise(self) -> None: """Method to initialize repository.""" @abstractmethod @@ -15,28 +15,15 @@ class ExperimentRepository(ABC): """Method checks that property in record exists.""" @abstractmethod - def update_property_for_key( - self, - record_id: str, - property_name: str, - property_value: any - ): + def update_property_for_key(self, record_id: str, property_name: str, property_value: Any): """Method updates property in record.""" @abstractmethod - def delete_property_for_key( - self, - record_id: str, - property_name: str - ): + def delete_property_for_key(self, record_id: str, property_name: str): """Method removes property in record.""" @abstractmethod - def get_property_for_key( - self, - record_id: str, - property_name: str - ) -> Optional[any]: + def get_property_for_key(self, record_id: str, property_name: str) -> Optional[Any]: """Method returns property for record.""" @abstractmethod diff --git a/sziszapangma/integration/repository/file_experiment_repository.py b/sziszapangma/integration/repository/file_experiment_repository.py index 520390e64829f63a4873fdaa53a7ff629fc31159..93e9d96658bc18f3de423e048daf142efb6405a0 100644 --- a/sziszapangma/integration/repository/file_experiment_repository.py +++ b/sziszapangma/integration/repository/file_experiment_repository.py @@ -1,17 +1,16 @@ import json import os -from typing import Optional, Dict, Set +from typing import Any, Dict, Optional, Set import pandas as pd -from sziszapangma.integration.repository.experiment_repository import \ - ExperimentRepository +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository class FileExperimentRepository(ExperimentRepository): _metadata_file_path: str _pretty_format: bool - _cache_value: any + _cache_value: Any def __init__(self, metadata_file_path: str, pretty_format: bool = False): super().__init__() @@ -21,7 +20,7 @@ class FileExperimentRepository(ExperimentRepository): def initialise(self): if not os.path.isfile(self._metadata_file_path): - with open(self._metadata_file_path, 'w') as f: + with open(self._metadata_file_path, "w") as f: empty_dict = dict() f.write(json.dumps(empty_dict)) @@ -34,39 +33,30 @@ class FileExperimentRepository(ExperimentRepository): else: return True - def update_property_for_key( - self, - record_id: str, - property_name: str, - property_value: any - ): + def update_property_for_key(self, record_id: str, property_name: str, property_value: Any): json_content = self._get_file_parsed_json() if record_id not in json_content: json_content[record_id] = dict({}) json_content[record_id][property_name] = property_value self._update_metadata_file(json_content) - def get_property_for_key( - self, - record_id: str, - property_name: str - ) -> Optional[any]: + def get_property_for_key(self, record_id: str, property_name: str) -> Optional[Any]: json_content = self._get_file_parsed_json() if self.property_exists(record_id, property_name): return json_content[record_id][property_name] else: return None - def _get_file_parsed_json(self) -> Dict[str, any]: + def _get_file_parsed_json(self) -> Dict[str, Dict[Any, Any]]: if self._cache_value is None: - with open(self._metadata_file_path, 'r') as f: + with open(self._metadata_file_path, "r") as f: self._cache_value = json.loads(f.read()) return self._cache_value - def _update_metadata_file(self, json_content: Dict[str, any]): + def _update_metadata_file(self, json_content: Dict[str, Any]): self._cache_value = json_content indent = 4 if self._pretty_format else None - with open(self._metadata_file_path, 'w') as f: + with open(self._metadata_file_path, "w") as f: f.write(json.dumps(json_content, indent=indent)) def get_metrics_result_to_df(self, metrics_property: str) -> pd.DataFrame: @@ -74,10 +64,11 @@ class FileExperimentRepository(ExperimentRepository): all_metadata = self._get_file_parsed_json() for item_id in all_metadata.keys(): item_dict = dict() - item_dict['id'] = item_id + item_dict["id"] = item_id for metric_keys in all_metadata[item_id][metrics_property].keys(): - item_dict[f'{metrics_property}.{metric_keys}'] = \ - all_metadata[item_id][metrics_property][metric_keys] + item_dict[f"{metrics_property}.{metric_keys}"] = all_metadata[item_id][ + metrics_property + ][metric_keys] list_of_dicts.append(item_dict) return pd.DataFrame(list_of_dicts) diff --git a/sziszapangma/integration/repository/mongo_experiment_repository.py b/sziszapangma/integration/repository/mongo_experiment_repository.py index 19c11aa4d7215642840e8b3ca85b15b08f4fb167..45e0d96d64a1ab852e0f0b999f3924136ccbb0e0 100644 --- a/sziszapangma/integration/repository/mongo_experiment_repository.py +++ b/sziszapangma/integration/repository/mongo_experiment_repository.py @@ -1,13 +1,12 @@ -from typing import Optional, Set +from typing import Any, Optional, Set from pymongo import MongoClient from pymongo.database import Database -from sziszapangma.integration.repository.experiment_repository import \ - ExperimentRepository +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository -ID = '_id' -VALUE = 'value' +ID = "_id" +VALUE = "value" class MongoExperimentRepository(ExperimentRepository): @@ -28,29 +27,19 @@ class MongoExperimentRepository(ExperimentRepository): if property_name not in all_collections: return False else: - return database[property_name].find_one( - {ID: record_id}) is not None + return database[property_name].find_one({ID: record_id}) is not None - def update_property_for_key(self, record_id: str, property_name: str, - property_value: any): + def update_property_for_key(self, record_id: str, property_name: str, property_value: Any): self.delete_property_for_key(record_id, property_name) - self._get_database()[property_name].insert_one({ - ID: record_id, - VALUE: property_value - }) + self._get_database()[property_name].insert_one({ID: record_id, VALUE: property_value}) def delete_property_for_key(self, record_id: str, property_name: str): if self.property_exists(record_id, property_name): self._get_database()[property_name].delete_one({ID: record_id}) - def get_property_for_key( - self, - record_id: str, - property_name: str - ) -> Optional[any]: + def get_property_for_key(self, record_id: str, property_name: str) -> Optional[Any]: if self.property_exists(record_id, property_name): - return self._get_database()[property_name].find_one( - {ID: record_id})[VALUE] + return self._get_database()[property_name].find_one({ID: record_id})[VALUE] else: return None @@ -64,7 +53,7 @@ class MongoExperimentRepository(ExperimentRepository): def get_all_record_ids(self) -> Set[str]: records = set() for collection_name in self.get_all_properties(): - for record in self._get_database()[collection_name]: + for record in self._get_database()[collection_name].find(): records.add(record[ID]) return records diff --git a/sziszapangma/integration/task/asr_task.py b/sziszapangma/integration/task/asr_task.py index 3393121424db7ec06fa2a93e26a31cb8ed37fa28..88d104961a2336c0aa4d80abeb13c970a0137967 100644 --- a/sziszapangma/integration/task/asr_task.py +++ b/sziszapangma/integration/task/asr_task.py @@ -1,7 +1,8 @@ +from sziszapangma.core.alignment.word import Word from sziszapangma.integration.asr_processor import AsrProcessor +from sziszapangma.integration.mapper.word_mapper import WordMapper from sziszapangma.integration.record_path_provider import RecordPathProvider -from sziszapangma.integration.repository.experiment_repository import \ - ExperimentRepository +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository from sziszapangma.integration.task.processing_task import ProcessingTask @@ -16,25 +17,26 @@ class AsrTask(ProcessingTask): record_path_provider: RecordPathProvider, asr_processor: AsrProcessor, asr_property_name: str, - require_update: bool + require_update: bool, ): super().__init__(task_name, require_update) self._record_path_provider = record_path_provider self._asr_processor = asr_processor self._asr_property_name = asr_property_name - def skip_for_record(self, record_id: str, - experiment_repository: ExperimentRepository) -> bool: - asr_value = experiment_repository \ - .get_property_for_key(record_id, self._asr_property_name) - return asr_value is not None and 'transcription' in asr_value + def skip_for_record(self, record_id: str, experiment_repository: ExperimentRepository) -> bool: + asr_value = experiment_repository.get_property_for_key(record_id, self._asr_property_name) + print(record_id, self._asr_property_name, asr_value) + return asr_value is not None and "transcription" in asr_value - def run_single_process(self, record_id: str, - experiment_repository: ExperimentRepository): + def run_single_process( + self, record_id: str, experiment_repository: ExperimentRepository + ) -> None: file_record_path = self._record_path_provider.get_path(record_id) + asr_result = self._asr_processor.call_recognise(file_record_path) + asr_result["transcription"] = [ + WordMapper.to_json_dict(Word.from_string(it)) for it in asr_result["transcription"] + ] experiment_repository.update_property_for_key( - record_id, - self._asr_property_name, - self._asr_processor - .call_recognise(file_record_path) + record_id, self._asr_property_name, asr_result ) diff --git a/sziszapangma/integration/task/classic_wer_metric_task.py b/sziszapangma/integration/task/classic_wer_metric_task.py index 4657a4aee4db1b723728ad5bd1ceaf76986af925..9a0b63b3b57d35e5823927fb5636e7c9ad48a355 100644 --- a/sziszapangma/integration/task/classic_wer_metric_task.py +++ b/sziszapangma/integration/task/classic_wer_metric_task.py @@ -1,17 +1,15 @@ -from typing import List, Dict +from typing import Any, Dict, List -from sziszapangma.core.alignment.alignment_classic_calculator import \ - AlignmentClassicCalculator +from sziszapangma.core.alignment.alignment_classic_calculator import AlignmentClassicCalculator from sziszapangma.core.alignment.alignment_step import AlignmentStep +from sziszapangma.core.alignment.word import Word from sziszapangma.core.wer.wer_calculator import WerCalculator -from sziszapangma.integration.mapper.alignment_step_mapper import \ - AlignmentStepMapper +from sziszapangma.integration.mapper.alignment_step_mapper import AlignmentStepMapper from sziszapangma.integration.mapper.word_mapper import WordMapper -from sziszapangma.integration.repository.experiment_repository import \ - ExperimentRepository +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository from sziszapangma.integration.task.processing_task import ProcessingTask -_CLASSIC_WER = 'classic_wer' +_CLASSIC_WER = "classic_wer" class ClassicWerMetricTask(ProcessingTask): @@ -28,7 +26,7 @@ class ClassicWerMetricTask(ProcessingTask): asr_property_name: str, metrics_property_name: str, alignment_property_name: str, - require_update: bool + require_update: bool, ): super().__init__(task_name, require_update) self._gold_transcript_property_name = gold_transcript_property_name @@ -38,60 +36,43 @@ class ClassicWerMetricTask(ProcessingTask): self._alignment_classic_calculator = AlignmentClassicCalculator() self._wer_calculator = WerCalculator() - def skip_for_record( - self, - record_id: str, - experiment_repository: ExperimentRepository - ) -> bool: - return experiment_repository \ - .get_property_for_key(record_id, self._metrics_property_name) + def skip_for_record(self, record_id: str, experiment_repository: ExperimentRepository) -> bool: + return ( + experiment_repository.get_property_for_key(record_id, self._metrics_property_name) + is not None + ) - def run_single_process(self, record_id: str, - experiment_repository: ExperimentRepository): - gold_transcript = experiment_repository \ - .get_property_for_key(record_id, - self._gold_transcript_property_name) - asr_result = experiment_repository \ - .get_property_for_key(record_id, self._asr_property_name) - if 'transcription' in asr_result: - alignment_steps = self._get_alignment( - gold_transcript, asr_result['transcription'] - ) + def run_single_process(self, record_id: str, experiment_repository: ExperimentRepository): + gold_transcript = experiment_repository.get_property_for_key( + record_id, self._gold_transcript_property_name + ) + asr_result = experiment_repository.get_property_for_key(record_id, self._asr_property_name) + if gold_transcript is not None and asr_result is not None and "transcription" in asr_result: + alignment_steps = self._get_alignment(gold_transcript, asr_result["transcription"]) experiment_repository.update_property_for_key( record_id, self._alignment_property_name, - [AlignmentStepMapper.to_json_dict(it) - for it in alignment_steps] + [AlignmentStepMapper.to_json_dict(it) for it in alignment_steps], ) experiment_repository.update_property_for_key( - record_id, - self._metrics_property_name, - self.calculate_metrics(alignment_steps) + record_id, self._metrics_property_name, self.calculate_metrics(alignment_steps) ) def _get_alignment( - self, - gold_transcript: List[Dict[str, any]], - asr_result: List[Dict[str, any]] + self, gold_transcript: List[Dict[str, Any]], asr_result: List[Dict[str, Any]] ) -> List[AlignmentStep]: gold_transcript_words = [ - WordMapper.from_json_dict(word_dict) + # WordMapper.from_json_dict(word_dict) + Word(word_dict["id"], word_dict["word"]) for word_dict in gold_transcript ] - asr_words = [ - WordMapper.from_json_dict(word_dict) - for word_dict in asr_result - ] - return self._alignment_classic_calculator \ - .calculate_alignment(reference=gold_transcript_words, - hypothesis=asr_words) + asr_words = [WordMapper.from_json_dict(word_dict).to_lower() for word_dict in asr_result] + return self._alignment_classic_calculator.calculate_alignment( + reference=gold_transcript_words, hypothesis=asr_words + ) - def calculate_metrics( - self, - alignment_steps: List[AlignmentStep] - ) -> Dict[str, any]: + def calculate_metrics(self, alignment_steps: List[AlignmentStep]) -> Dict[str, Any]: """Calculate all metrics for data sample.""" metrics = dict() - metrics[_CLASSIC_WER] = self._wer_calculator.calculate_wer( - alignment_steps) + metrics[_CLASSIC_WER] = self._wer_calculator.calculate_wer(alignment_steps) return metrics diff --git a/sziszapangma/integration/task/embedding_wer_metrics_task.py b/sziszapangma/integration/task/embedding_wer_metrics_task.py index 3eb3476171ee99318a434c70ba06627300d645ac..81b5d92c729d9da62830a478293cedc4eb63f4d1 100644 --- a/sziszapangma/integration/task/embedding_wer_metrics_task.py +++ b/sziszapangma/integration/task/embedding_wer_metrics_task.py @@ -1,25 +1,19 @@ -from typing import List, Dict +from typing import Dict, List -from sziszapangma.core.alignment.alignment_embedding_calculator import \ - AlignmentEmbeddingCalculator -from sziszapangma.core.alignment.alignment_soft_calculator import \ - AlignmentSoftCalculator +from sziszapangma.core.alignment.alignment_embedding_calculator import AlignmentEmbeddingCalculator +from sziszapangma.core.alignment.alignment_soft_calculator import AlignmentSoftCalculator from sziszapangma.core.alignment.word import Word -from sziszapangma.core.transformer.cached_embedding_transformer import \ - CachedEmbeddingTransformer -from sziszapangma.core.transformer.embedding_transformer import \ - EmbeddingTransformer +from sziszapangma.core.transformer.cached_embedding_transformer import CachedEmbeddingTransformer +from sziszapangma.core.transformer.embedding_transformer import EmbeddingTransformer from sziszapangma.core.wer.wer_calculator import WerCalculator -from sziszapangma.integration.mapper.alignment_step_mapper import \ - AlignmentStepMapper +from sziszapangma.integration.mapper.alignment_step_mapper import AlignmentStepMapper from sziszapangma.integration.mapper.word_mapper import WordMapper -from sziszapangma.integration.repository.experiment_repository import \ - ExperimentRepository +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository from sziszapangma.integration.task.processing_task import ProcessingTask -_SOFT_WER = 'soft_wer' -_EMBEDDING_WER = 'embedding_wer' -_WORD = 'word' +_SOFT_WER = "soft_wer" +_EMBEDDING_WER = "embedding_wer" +_WORD = "word" class EmbeddingWerMetricsTask(ProcessingTask): @@ -39,67 +33,72 @@ class EmbeddingWerMetricsTask(ProcessingTask): metrics_property_name: str, alignment_property_name: str, require_update: bool, - embedding_transformer: EmbeddingTransformer + embedding_transformer: EmbeddingTransformer, ): super().__init__(task_name, require_update) self._gold_transcript_property_name = gold_transcript_property_name self._asr_property_name = asr_property_name self._metrics_property_name = metrics_property_name - self._embedding_transformer = \ - CachedEmbeddingTransformer(embedding_transformer) - self._alignment_embedding_calculator = \ - AlignmentEmbeddingCalculator(self._embedding_transformer) - self._alignment_soft_calculator = \ - AlignmentSoftCalculator(self._embedding_transformer) + self._embedding_transformer = CachedEmbeddingTransformer(embedding_transformer) + self._alignment_embedding_calculator = AlignmentEmbeddingCalculator( + self._embedding_transformer + ) + self._alignment_soft_calculator = AlignmentSoftCalculator(self._embedding_transformer) self._wer_calculator = WerCalculator() self._alignment_property_name = alignment_property_name - def skip_for_record(self, record_id: str, - experiment_repository: ExperimentRepository) -> bool: - return experiment_repository \ - .get_property_for_key(record_id, self._metrics_property_name) + def skip_for_record(self, record_id: str, experiment_repository: ExperimentRepository) -> bool: + return ( + experiment_repository.get_property_for_key(record_id, self._metrics_property_name) + is not None + ) - def run_single_process(self, record_id: str, - experiment_repository: ExperimentRepository): - gold_transcript = experiment_repository \ - .get_property_for_key(record_id, - self._gold_transcript_property_name) - asr_result = experiment_repository \ - .get_property_for_key(record_id, self._asr_property_name) - if 'transcription' in asr_result: - gold_transcript_words = self._map_words_to_domain(gold_transcript) - asr_words = self._map_words_to_domain(asr_result['transcription']) + def run_single_process(self, record_id: str, experiment_repository: ExperimentRepository): + gold_transcript = experiment_repository.get_property_for_key( + record_id, self._gold_transcript_property_name + ) + asr_result = experiment_repository.get_property_for_key(record_id, self._asr_property_name) + if gold_transcript is not None and asr_result is not None and "transcription" in asr_result: + gold_transcript_words = self._map_words_to_domain_gold_transcript(gold_transcript) + asr_words = [ + it + for it in self._map_words_to_domain(asr_result["transcription"]) + if len(it.value) > 0 + ] - soft_alignment = self._alignment_soft_calculator \ - .calculate_alignment(gold_transcript_words, asr_words) - embedding_alignment = self._alignment_embedding_calculator \ - .calculate_alignment(gold_transcript_words, asr_words) + soft_alignment = self._alignment_soft_calculator.calculate_alignment( + gold_transcript_words, asr_words + ) + embedding_alignment = self._alignment_embedding_calculator.calculate_alignment( + gold_transcript_words, asr_words + ) soft_wer = self._wer_calculator.calculate_wer(soft_alignment) - embedding_wer = self._wer_calculator \ - .calculate_wer(embedding_alignment) + embedding_wer = self._wer_calculator.calculate_wer(embedding_alignment) alignment_results = { - 'soft_alignment': [AlignmentStepMapper.to_json_dict(it) - for it in soft_alignment], - 'embedding_alignment': [AlignmentStepMapper.to_json_dict(it) - for it in embedding_alignment], + "soft_alignment": [AlignmentStepMapper.to_json_dict(it) for it in soft_alignment], + "embedding_alignment": [ + AlignmentStepMapper.to_json_dict(it) for it in embedding_alignment + ], } - wer_results = {'soft_wer': soft_wer, - 'embedding_wer': embedding_wer} + wer_results = {"soft_wer": soft_wer, "embedding_wer": embedding_wer} experiment_repository.update_property_for_key( - record_id, self._alignment_property_name, alignment_results) + record_id, self._alignment_property_name, alignment_results + ) experiment_repository.update_property_for_key( - record_id, self._metrics_property_name, wer_results) + record_id, self._metrics_property_name, wer_results + ) self._embedding_transformer.clear() @staticmethod - def _map_words_to_domain( - input_json_dicts: List[Dict[str, str]] - ) -> List[Word]: + def _map_words_to_domain(input_json_dicts: List[Dict[str, str]]) -> List[Word]: + return [WordMapper.from_json_dict(word_dict).to_lower() for word_dict in input_json_dicts] + + @staticmethod + def _map_words_to_domain_gold_transcript(input_json_dicts: List[Dict[str, str]]) -> List[Word]: return [ - WordMapper.from_json_dict(word_dict) - for word_dict in input_json_dicts + Word(word_dict["id"], word_dict["word"]).to_lower() for word_dict in input_json_dicts ] diff --git a/sziszapangma/integration/task/gold_transcript_task.py b/sziszapangma/integration/task/gold_transcript_task.py index 0b407b4767d05c703a05475a58a113efbd247005..3d22795683838e0b869d0c73c73f03c2acc64dbc 100644 --- a/sziszapangma/integration/task/gold_transcript_task.py +++ b/sziszapangma/integration/task/gold_transcript_task.py @@ -1,7 +1,5 @@ -from sziszapangma.integration.gold_transcript_processor import \ - GoldTranscriptProcessor -from sziszapangma.integration.repository.experiment_repository \ - import ExperimentRepository +from sziszapangma.integration.gold_transcript_processor import GoldTranscriptProcessor +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository from sziszapangma.integration.task.processing_task import ProcessingTask @@ -14,22 +12,23 @@ class GoldTranscriptTask(ProcessingTask): task_name: str, gold_transcript_processor: GoldTranscriptProcessor, gold_transcript_property_name: str, - require_update: bool + require_update: bool, ): super().__init__(task_name, require_update) self._gold_transcript_processor = gold_transcript_processor self._gold_transcript_property_name = gold_transcript_property_name - def skip_for_record(self, record_id: str, - experiment_repository: ExperimentRepository) -> bool: - return experiment_repository \ - .get_property_for_key(record_id, - self._gold_transcript_property_name) + def skip_for_record(self, record_id: str, experiment_repository: ExperimentRepository) -> bool: + return ( + experiment_repository.get_property_for_key( + record_id, self._gold_transcript_property_name + ) + is not None + ) - def run_single_process(self, record_id: str, - experiment_repository: ExperimentRepository): + def run_single_process(self, record_id: str, experiment_repository: ExperimentRepository): experiment_repository.update_property_for_key( record_id, self._gold_transcript_property_name, - self._gold_transcript_processor.get_gold_transcript(record_id) + self._gold_transcript_processor.get_gold_transcript(record_id), ) diff --git a/sziszapangma/integration/task/processing_task.py b/sziszapangma/integration/task/processing_task.py index c49485115fdfedc21f57d7c3b7a9f2887b099a89..29f55c1f3e93899c33484b9ccfbf14f55d4d993b 100644 --- a/sziszapangma/integration/task/processing_task.py +++ b/sziszapangma/integration/task/processing_task.py @@ -1,8 +1,8 @@ +import traceback from abc import ABC, abstractmethod from sziszapangma.integration.record_id_iterator import RecordIdIterator -from sziszapangma.integration.repository.experiment_repository import \ - ExperimentRepository +from sziszapangma.integration.repository.experiment_repository import ExperimentRepository class ProcessingTask(ABC): @@ -14,38 +14,30 @@ class ProcessingTask(ABC): self._task_name = task_name @abstractmethod - def run_single_process( - self, - record_id: str, - experiment_repository: ExperimentRepository - ): + def run_single_process(self, record_id: str, experiment_repository: ExperimentRepository): pass @abstractmethod - def skip_for_record( - self, - record_id: str, - experiment_repository: ExperimentRepository - ) -> bool: + def skip_for_record(self, record_id: str, experiment_repository: ExperimentRepository) -> bool: pass def process( - self, - record_id_iterator: RecordIdIterator, - experiment_repository: ExperimentRepository + self, record_id_iterator: RecordIdIterator, experiment_repository: ExperimentRepository ): records_ids = list(record_id_iterator.get_all_records()) for record_index in range(len(records_ids)): - part = f'{record_index + 1} / {len(records_ids)}' + part = f"{record_index + 1} / {len(records_ids)}" record_id = records_ids[record_index] skip = self.skip_for_record( - record_id=record_id, - experiment_repository=experiment_repository + record_id=record_id, experiment_repository=experiment_repository ) - base_log = f'{self._task_name} processing {part} {record_id}' + base_log = f"{self._task_name} processing {part} {record_id}" if not skip or self._require_update: print(base_log) - self.run_single_process(record_id, - experiment_repository) + try: + self.run_single_process(record_id, experiment_repository) + except Exception as err: + print("Handling run-time error:", err) + traceback.print_exc() else: - print(f'{base_log} -- skipped') + print(f"{base_log} -- skipped") diff --git a/tests/file_stored_embedding_transformer.py b/tests/file_stored_embedding_transformer.py index 2e329fa3d553daeed4c8533c8fc23e81140cbc79..42dfefa64483fbf1db005673805f0932f4416442 100644 --- a/tests/file_stored_embedding_transformer.py +++ b/tests/file_stored_embedding_transformer.py @@ -1,28 +1,21 @@ import json -from typing import List, Dict +from typing import Dict, List import numpy as np -from sziszapangma.core.transformer.embedding_transformer import \ - EmbeddingTransformer +from sziszapangma.core.transformer.embedding_transformer import EmbeddingTransformer class FileStoredEmbeddingTransformer(EmbeddingTransformer): - _cache: Dict[str, np.array] + _cache: Dict[str, np.ndarray] def __init__(self, file_path: str): - with open(file_path, 'r') as f: + with open(file_path, "r") as f: json_content = json.loads(f.read()) - self._cache = dict({ - key: np.array(json_content[key]) - for key in json_content.keys() - }) + self._cache = dict({key: np.array(json_content[key]) for key in json_content.keys()}) def get_embeddings(self, words: List[str]) -> Dict[str, np.ndarray]: - return dict({ - word: self._cache[word] - for word in words - }) + return dict({word: self._cache[word] for word in words}) def get_embedding(self, word: str) -> np.ndarray: return self._cache[word] diff --git a/tests/test_classic_wer.py b/tests/test_classic_wer.py index ff67ebd11117c13284f38efefe6a059bd5cd1c90..0766e9443fc37856522062f9f3db6c038107f3b3 100644 --- a/tests/test_classic_wer.py +++ b/tests/test_classic_wer.py @@ -2,8 +2,7 @@ from typing import List, Tuple import pytest -from sziszapangma.core.alignment.alignment_classic_calculator import \ - AlignmentClassicCalculator +from sziszapangma.core.alignment.alignment_classic_calculator import AlignmentClassicCalculator from sziszapangma.core.alignment.step_type import StepType from sziszapangma.core.alignment.step_words import StepWords from sziszapangma.core.alignment.word import Word @@ -15,16 +14,15 @@ def string_list_to_words(strings: List[str]) -> List[Word]: def get_sample_data() -> Tuple[List[Word], List[Word]]: - reference = ['This', 'great', 'machine', 'can', 'recognize', 'speech'] - hypothesis = ['This', 'machine', 'can', 'wreck', 'a', 'nice', 'beach'] + reference = ["This", "great", "machine", "can", "recognize", "speech"] + hypothesis = ["This", "machine", "can", "wreck", "a", "nice", "beach"] return string_list_to_words(reference), string_list_to_words(hypothesis) def test_classic_calculate_wer_value(): """Sample test for core calculate.""" reference, hypothesis = get_sample_data() - alignment = AlignmentClassicCalculator()\ - .calculate_alignment(reference, hypothesis) + alignment = AlignmentClassicCalculator().calculate_alignment(reference, hypothesis) wer_result = WerCalculator().calculate_wer(alignment) assert pytest.approx(wer_result) == 0.8333333 @@ -32,8 +30,7 @@ def test_classic_calculate_wer_value(): def test_classic_calculate_wer_steps(): """Sample test for core calculate.""" reference, hypothesis = get_sample_data() - alignment = AlignmentClassicCalculator().calculate_alignment( - reference, hypothesis) + alignment = AlignmentClassicCalculator().calculate_alignment(reference, hypothesis) reference_words = [ StepWords(reference[0], hypothesis[0]), @@ -43,11 +40,18 @@ def test_classic_calculate_wer_steps(): StepWords(None, hypothesis[3]), StepWords(None, hypothesis[4]), StepWords(reference[4], hypothesis[5]), - StepWords(reference[5], hypothesis[6])] + StepWords(reference[5], hypothesis[6]), + ] step_types = [ - StepType.CORRECT, StepType.DELETION, StepType.CORRECT, - StepType.CORRECT, StepType.INSERTION, StepType.INSERTION, - StepType.SUBSTITUTION, StepType.SUBSTITUTION] + StepType.CORRECT, + StepType.DELETION, + StepType.CORRECT, + StepType.CORRECT, + StepType.INSERTION, + StepType.INSERTION, + StepType.SUBSTITUTION, + StepType.SUBSTITUTION, + ] assert len(alignment) == 8 assert [it.step_type for it in alignment] == step_types diff --git a/tests/test_embedding_wer.py b/tests/test_embedding_wer.py index 4f7cd55c9d0fdb1f9abff24bb80c0c040ec92061..69fe11e91e4c65e90e58876000fe97eedac608ee 100644 --- a/tests/test_embedding_wer.py +++ b/tests/test_embedding_wer.py @@ -2,14 +2,11 @@ from typing import List, Tuple import pytest -from sziszapangma.core.alignment.alignment_calculator import \ - AlignmentCalculator -from sziszapangma.core.alignment.alignment_embedding_calculator import \ - AlignmentEmbeddingCalculator +from sziszapangma.core.alignment.alignment_calculator import AlignmentCalculator +from sziszapangma.core.alignment.alignment_embedding_calculator import AlignmentEmbeddingCalculator from sziszapangma.core.alignment.word import Word from sziszapangma.core.wer.wer_calculator import WerCalculator -from tests.file_stored_embedding_transformer import \ - FileStoredEmbeddingTransformer +from tests.file_stored_embedding_transformer import FileStoredEmbeddingTransformer def string_list_to_words(strings: List[str]) -> List[Word]: @@ -17,20 +14,18 @@ def string_list_to_words(strings: List[str]) -> List[Word]: def get_sample_data() -> Tuple[List[Word], List[Word]]: - reference = ['ala', 'ma', 'dobrego', 'wielkiego', 'psa', 'rasowego'] - hypothesis = ['alana', 'rego', 'kruchego', 'psa', 'rasowego'] + reference = ["ala", "ma", "dobrego", "wielkiego", "psa", "rasowego"] + hypothesis = ["alana", "rego", "kruchego", "psa", "rasowego"] return string_list_to_words(reference), string_list_to_words(hypothesis) def get_alignment_calculator() -> AlignmentCalculator: - return AlignmentEmbeddingCalculator( - FileStoredEmbeddingTransformer('tests/embeddings_pl.json')) + return AlignmentEmbeddingCalculator(FileStoredEmbeddingTransformer("tests/embeddings_pl.json")) def test_classic_calculate_wer_value(): """Sample test for core calculate.""" reference, hypothesis = get_sample_data() - alignment = get_alignment_calculator().calculate_alignment(reference, - hypothesis) + alignment = get_alignment_calculator().calculate_alignment(reference, hypothesis) wer_result = WerCalculator().calculate_wer(alignment) assert pytest.approx(wer_result) == 0.55879563 diff --git a/tests/test_soft_wer.py b/tests/test_soft_wer.py index 85a34338b831fff39525f557c69886911cb4e100..64703e3e0cefc958d3bd1cc405f2611573db0f70 100644 --- a/tests/test_soft_wer.py +++ b/tests/test_soft_wer.py @@ -2,14 +2,11 @@ from typing import List, Tuple import pytest -from sziszapangma.core.alignment.alignment_calculator import \ - AlignmentCalculator -from sziszapangma.core.alignment.alignment_soft_calculator import \ - AlignmentSoftCalculator +from sziszapangma.core.alignment.alignment_calculator import AlignmentCalculator +from sziszapangma.core.alignment.alignment_soft_calculator import AlignmentSoftCalculator from sziszapangma.core.alignment.word import Word from sziszapangma.core.wer.wer_calculator import WerCalculator -from tests.file_stored_embedding_transformer import \ - FileStoredEmbeddingTransformer +from tests.file_stored_embedding_transformer import FileStoredEmbeddingTransformer def string_list_to_words(strings: List[str]) -> List[Word]: @@ -17,21 +14,19 @@ def string_list_to_words(strings: List[str]) -> List[Word]: def get_sample_data() -> Tuple[List[Word], List[Word]]: - reference = ['ala', 'ma', 'dobrego', 'wielkiego', 'psa', 'rasowego'] - hypothesis = ['alana', 'rego', 'kruchego', 'psa', 'rasowego'] + reference = ["ala", "ma", "dobrego", "wielkiego", "psa", "rasowego"] + hypothesis = ["alana", "rego", "kruchego", "psa", "rasowego"] return string_list_to_words(reference), string_list_to_words(hypothesis) def get_alignment_calculator() -> AlignmentCalculator: - return AlignmentSoftCalculator( - FileStoredEmbeddingTransformer('tests/embeddings_pl.json')) + return AlignmentSoftCalculator(FileStoredEmbeddingTransformer("tests/embeddings_pl.json")) def test_classic_calculate_wer_value(): """Sample test for core calculate.""" reference, hypothesis = get_sample_data() - alignment = get_alignment_calculator().calculate_alignment( - reference, hypothesis) + alignment = get_alignment_calculator().calculate_alignment(reference, hypothesis) wer_result = WerCalculator().calculate_wer(alignment) print(wer_result) assert pytest.approx(wer_result) == 0.50186761 diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 7062dd226ee8550c29f47c9d907e6e32b5c2bac7..0000000000000000000000000000000000000000 --- a/tox.ini +++ /dev/null @@ -1,22 +0,0 @@ -[tox] -envlist = flake8,testenv -skipsdist = True - -[testenv:flake8] -basepython = python -deps = flake8 -commands = flake8 sziszapangma tests - -[testenv] -setenv = - PYTHONPATH = {toxinidir} -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/requirements_dev.txt -; If you want to make tox run the tests with the same versions, create a -; requirements.txt with the pinned versions and uncomment the following line: -; -r{toxinidir}/requirements.txt -commands = - pytest -; pytest --basetemp={envtmpdir} -