From 7f9049609008bbbbfe51447719d2e86e1c3b79ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20W=C4=85troba?= <markowanga@gmail.com> Date: Tue, 10 Aug 2021 08:36:58 +0200 Subject: [PATCH] Add base ASR serice --- poetry.lock | 117 +++++++++++++++++- pyproject.toml | 1 + .../base_asr_service/asr_processor.py | 45 +++++++ .../base_asr_service/asr_result.py | 8 ++ .../base_asr_service/requirements.txt | 1 + 5 files changed, 169 insertions(+), 3 deletions(-) create mode 100644 sziszapangma/integration/base_asr_service/asr_processor.py create mode 100644 sziszapangma/integration/base_asr_service/asr_result.py create mode 100644 sziszapangma/integration/base_asr_service/requirements.txt diff --git a/poetry.lock b/poetry.lock index 6af56bf..c14ca3b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -73,7 +73,7 @@ unicode_backport = ["unicodedata2"] name = "click" version = "8.0.1" description = "Composable command line interface toolkit" -category = "dev" +category = "main" optional = false python-versions = ">=3.6" @@ -84,7 +84,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "colorama" version = "0.4.4" description = "Cross-platform colored terminal text." -category = "dev" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" @@ -100,6 +100,24 @@ python-versions = "*" numpy = "*" pybind11 = ">=2.2" +[[package]] +name = "flask" +version = "2.0.1" +description = "A simple framework for building complex web applications." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +click = ">=7.1.2" +itsdangerous = ">=2.0" +Jinja2 = ">=3.0" +Werkzeug = ">=2.0" + +[package.extras] +async = ["asgiref (>=3.2)"] +dotenv = ["python-dotenv"] + [[package]] name = "idna" version = "3.2" @@ -122,6 +140,36 @@ requirements_deprecated_finder = ["pipreqs", "pip-api"] colors = ["colorama (>=0.4.3,<0.5.0)"] plugins = ["setuptools"] +[[package]] +name = "itsdangerous" +version = "2.0.1" +description = "Safely pass data to untrusted environments and back." +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "jinja2" +version = "3.0.1" +description = "A very fast and expressive template engine." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + +[[package]] +name = "markupsafe" +version = "2.0.1" +description = "Safely add untrusted strings to HTML/XML markup." +category = "main" +optional = false +python-versions = ">=3.6" + [[package]] name = "more-itertools" version = "8.8.0" @@ -409,13 +457,24 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "werkzeug" +version = "2.0.1" +description = "The comprehensive WSGI web application library." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +watchdog = ["watchdog"] + [extras] developer = [] [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "78c43ede773a3caa4d3c131a3faf75975279c5d38ba6c9fe15b3653515e475ce" +content-hash = "339f3a61fd3a6005117dded8ba0727e8eab938a43ad3c1b492db4ac55b8b9009" [metadata.files] appdirs = [ @@ -452,6 +511,10 @@ colorama = [ fasttext = [ {file = "fasttext-0.9.2.tar.gz", hash = "sha256:665556f1f6dcb4fcbe25fa8ebcd4f71b18fa96a090de09d88d97a60cbd29dcb5"}, ] +flask = [ + {file = "Flask-2.0.1-py3-none-any.whl", hash = "sha256:a6209ca15eb63fc9385f38e452704113d679511d9574d09b2cf9183ae7d20dc9"}, + {file = "Flask-2.0.1.tar.gz", hash = "sha256:1c4c257b1892aec1398784c63791cbaa43062f1f7aeb555c4da961b20ee68f55"}, +] idna = [ {file = "idna-3.2-py3-none-any.whl", hash = "sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a"}, {file = "idna-3.2.tar.gz", hash = "sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3"}, @@ -460,6 +523,50 @@ isort = [ {file = "isort-5.9.3-py3-none-any.whl", hash = "sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2"}, {file = "isort-5.9.3.tar.gz", hash = "sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899"}, ] +itsdangerous = [ + {file = "itsdangerous-2.0.1-py3-none-any.whl", hash = "sha256:5174094b9637652bdb841a3029700391451bd092ba3db90600dea710ba28e97c"}, + {file = "itsdangerous-2.0.1.tar.gz", hash = "sha256:9e724d68fc22902a1435351f84c3fb8623f303fffcc566a4cb952df8c572cff0"}, +] +jinja2 = [ + {file = "Jinja2-3.0.1-py3-none-any.whl", hash = "sha256:1f06f2da51e7b56b8f238affdd6b4e2c61e39598a378cc49345bc1bd42a978a4"}, + {file = "Jinja2-3.0.1.tar.gz", hash = "sha256:703f484b47a6af502e743c9122595cc812b0271f661722403114f71a79d0f5a4"}, +] +markupsafe = [ + {file = "MarkupSafe-2.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51"}, + {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff"}, + {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b"}, + {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94"}, + {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872"}, + {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f"}, + {file = "MarkupSafe-2.0.1-cp36-cp36m-win32.whl", hash = "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d"}, + {file = "MarkupSafe-2.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9"}, + {file = "MarkupSafe-2.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567"}, + {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:6557b31b5e2c9ddf0de32a691f2312a32f77cd7681d8af66c2692efdbef84c18"}, + {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:49e3ceeabbfb9d66c3aef5af3a60cc43b85c33df25ce03d0031a608b0a8b2e3f"}, + {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f"}, + {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2"}, + {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d"}, + {file = "MarkupSafe-2.0.1-cp37-cp37m-win32.whl", hash = "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415"}, + {file = "MarkupSafe-2.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914"}, + {file = "MarkupSafe-2.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066"}, + {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35"}, + {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b"}, + {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298"}, + {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75"}, + {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb"}, + {file = "MarkupSafe-2.0.1-cp38-cp38-win32.whl", hash = "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64"}, + {file = "MarkupSafe-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833"}, + {file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26"}, + {file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3c112550557578c26af18a1ccc9e090bfe03832ae994343cfdacd287db6a6ae7"}, + {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:53edb4da6925ad13c07b6d26c2a852bd81e364f95301c66e930ab2aef5b5ddd8"}, + {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:f5653a225f31e113b152e56f154ccbe59eeb1c7487b39b9d9f9cdb58e6c79dc5"}, + {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135"}, + {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902"}, + {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509"}, + {file = "MarkupSafe-2.0.1-cp39-cp39-win32.whl", hash = "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74"}, + {file = "MarkupSafe-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8"}, + {file = "MarkupSafe-2.0.1.tar.gz", hash = "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a"}, +] more-itertools = [ {file = "more-itertools-8.8.0.tar.gz", hash = "sha256:83f0308e05477c68f56ea3a888172c78ed5d5b3c282addb67508e7ba6c8f813a"}, {file = "more_itertools-8.8.0-py3-none-any.whl", hash = "sha256:2cf89ec599962f2ddc4d568a05defc40e0a587fbc10d5989713638864c36be4d"}, @@ -795,3 +902,7 @@ wcwidth = [ {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, ] +werkzeug = [ + {file = "Werkzeug-2.0.1-py3-none-any.whl", hash = "sha256:6c1ec500dcdba0baa27600f6a22f6333d8b662d22027ff9f6202e3367413caa8"}, + {file = "Werkzeug-2.0.1.tar.gz", hash = "sha256:1de1db30d010ff1af14a009224ec49ab2329ad2cde454c8a708130642d579c42"}, +] diff --git a/pyproject.toml b/pyproject.toml index fa3d0f3..e94b32d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ requests = "^2.25.1" pandas = "^1.2.4" fasttext = "^0.9.2" pymongo = "^3.11.4" +Flask = "^2.0.1" [tool.poetry.dev-dependencies] pytest = "^5.2" diff --git a/sziszapangma/integration/base_asr_service/asr_processor.py b/sziszapangma/integration/base_asr_service/asr_processor.py new file mode 100644 index 0000000..ef70212 --- /dev/null +++ b/sziszapangma/integration/base_asr_service/asr_processor.py @@ -0,0 +1,45 @@ +import os +import uuid +from abc import ABC, abstractmethod +from pathlib import Path + +from asr_result import AsrResult +from flask import Flask, Response, jsonify, request + +_TEMP_DIRECTORY = "asr_processing" + + +class AsrProcessor(ABC): + def __init__(self): + self.read_user_credentials = self.read_user_credentials() + self._cached_users_credentials = None + + @abstractmethod + def process_asr(self, audio_file_path: str) -> AsrResult: + """Method to call for ASR results.""" + + def process_request(self) -> Response: + file_tag = str(uuid.uuid4()) + f = request.files["file"] + if f is not None and f.filename is not None: + file_extension = f.filename.split(".")[-1] + file_name = f"{file_tag}.{file_extension}" + file_path = f"{_TEMP_DIRECTORY}/{file_name}" + f.save(file_path) + try: + transcription = self.process_asr(file_path) + os.remove(file_path) + result_object = jsonify( + {"transcription": transcription.words, "full_text": transcription.full_text} + ) + except: + result_object = jsonify({"error": "Error on asr processing"}) + else: + result_object = jsonify({"error": "Error on asr processing"}) + return result_object + + def start_processor(self): + app = Flask(__name__) + Path(_TEMP_DIRECTORY).mkdir(parents=True, exist_ok=True) + app.route("/process_asr", methods=["POST"])(self.process_request) + app.run(debug=True, host="0.0.0.0") diff --git a/sziszapangma/integration/base_asr_service/asr_result.py b/sziszapangma/integration/base_asr_service/asr_result.py new file mode 100644 index 0000000..cdf7c20 --- /dev/null +++ b/sziszapangma/integration/base_asr_service/asr_result.py @@ -0,0 +1,8 @@ +from dataclasses import dataclass +from typing import List + + +@dataclass(frozen=True) +class AsrResult: + words: List[str] + full_text: str diff --git a/sziszapangma/integration/base_asr_service/requirements.txt b/sziszapangma/integration/base_asr_service/requirements.txt new file mode 100644 index 0000000..7e10602 --- /dev/null +++ b/sziszapangma/integration/base_asr_service/requirements.txt @@ -0,0 +1 @@ +flask -- GitLab