From 31e935085b51da2ab69ceb2e713a2775d601e657 Mon Sep 17 00:00:00 2001 From: Kamil Kanclerz <kamil.kanclerz@e-science.pl> Date: Sun, 13 Dec 2020 23:30:17 +0100 Subject: [PATCH] Add worker files, Dockerfile, CI config and gitignore --- .gitignore | 5 +++++ .gitlab-ci.yml | 38 ++++++++++++++++++++++++++++++++++++ Dockerfile | 21 ++++++++++++++++++++ config.ini | 18 +++++++++++++++++ entrypoint.sh | 1 + main.py | 34 ++++++++++++++++++++++++++++++++ requirements.txt | 2 ++ src/speller2_worker.py | 41 +++++++++++++++++++++++++++++++++++++++ tox.ini | 44 ++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 204 insertions(+) create mode 100755 .gitignore create mode 100755 .gitlab-ci.yml create mode 100755 Dockerfile create mode 100755 config.ini create mode 100755 entrypoint.sh create mode 100755 main.py create mode 100755 requirements.txt create mode 100755 src/speller2_worker.py create mode 100755 tox.ini diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..9d9cd9a --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.idea/ + +# temp files +example_usage.py +example_text.txt \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100755 index 0000000..6949a7d --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,38 @@ +image: 'clarinpl/python:3.6' + +cache: + paths: + - .tox + +stages: + - check_style + - build + +before_script: + - pip install tox==2.9.1 + +pep8: + stage: check_style + script: + - tox -v -e pep8 + +docstyle: + stage: check_style + script: + - tox -v -e docstyle + +build_image: + stage: build + image: docker:18.09.7 + only: + - master + services: + - 'docker:18.09.7-dind' + before_script: + - '' + script: + - docker build -t clarinpl/speller2 . + - echo $DOCKER_PASSWORD > pass.txt + - cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin + - rm pass.txt + - docker push clarinpl/speller2 diff --git a/Dockerfile b/Dockerfile new file mode 100755 index 0000000..951dd5c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +FROM clarinpl/python:3.6 + +WORKDIR /home/worker +COPY ./src ./src +COPY ./requirements.txt . +COPY ./models/autocorrect ./models/autocorrect +COPY ./entrypoint.sh . + +RUN apt update && apt install -y g++ gdb + +#RUN git clone https://github.com/facebookresearch/fastText.git && \ +# cd fastText && \ +# python3.6 -m pip install . && \ +# cd .. && \ +# rm -rf fastText + +RUN python3.6 -m pip install -r requirements.txt + +RUN ["chmod", "+x", "./entrypoint.sh"] + +CMD ["./entrypoint.sh"] diff --git a/config.ini b/config.ini new file mode 100755 index 0000000..ebc8991 --- /dev/null +++ b/config.ini @@ -0,0 +1,18 @@ +[service] +tool = speller2 + +root = /samba/requests/ +rabbit_host = rabbitmq +rabbit_user = test +rabbit_password = test +queue_prefix = nlp_ + +[tool] +workers_number = 1 + +[logging] +port = 9980 +local_log_level = INFO + +[logging_levels] +__main__ = INFO diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100755 index 0000000..5899eb4 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1 @@ +python3.6 main.py service diff --git a/main.py b/main.py new file mode 100755 index 0000000..3751847 --- /dev/null +++ b/main.py @@ -0,0 +1,34 @@ +"""Implementation of text error correction service.""" +import argparse +import nlp_ws +from src.speller2_worker import Speller2Worker + + +def get_args(): + """Gets command line arguments.""" + parser = argparse.ArgumentParser(description="speller2") + + subparsers = parser.add_subparsers(dest="mode") + subparsers.required = True + + subparsers.add_parser( + "service", + help="Run as a service") + + return parser.parse_args() + + +def main(): + """Runs the program.""" + args = get_args() + + generators = { + "service": lambda: nlp_ws.NLPService.main(Speller2Worker), + } + + gen_fn = generators.get(args.mode, lambda: None) + gen_fn() + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100755 index 0000000..4cc9b5e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +autocorrect==2.1.0 +numpy==1.17.4 diff --git a/src/speller2_worker.py b/src/speller2_worker.py new file mode 100755 index 0000000..0b55c46 --- /dev/null +++ b/src/speller2_worker.py @@ -0,0 +1,41 @@ +"""Implementation of nlp_worker.""" +import io +import json +import logging + +import nlp_ws +from autocorrect import Speller + +_log = logging.getLogger(__name__) + + +class Speller2Worker(nlp_ws.NLPWorker): + """Implements nlp_worker for text error correction service.""" + + @classmethod + def static_init(cls, config): + """One time static initialisation.""" + _log.log(logging.INFO, "Worker started loading static models ") + cls._model = Speller('pl') + _log.log(logging.INFO, "Worker finished loading static models ") + + def process(self, input_file, task_options, output_file): + """Starting nlp process.""" + _log.info("Processing") + + model = self._model + + data = self._read_file(input_file) + + corrected_data = model(data) + + with io.open(output_file, 'w', encoding='utf-8') as f: + json.dump(corrected_data, f, ensure_ascii=False) + + @classmethod + def _read_file(cls, input_path): + """Reading text from input file""" + with open(input_path, 'r', encoding='utf-8') as f: + content = f.read() + + return content diff --git a/tox.ini b/tox.ini new file mode 100755 index 0000000..854603a --- /dev/null +++ b/tox.ini @@ -0,0 +1,44 @@ +[tox] +envlist = pep8,docstyle +skipsdist = True + +[testenv:pep8] +deps = + flake8 +basepython = python3 +commands = + flake8 {posargs} + +[testenv:docstyle] +deps = + pydocstyle +basepython = python3 +commands = + pydocstyle --verbose {posargs} + +[flake8] +# W504 skipped because it is overeager and unnecessary +ignore = W504 +show-source = True +exclude = .git,.venv,.tox,dist,doc,*egg,build,venv +import-order-style = pep8 +max-line-length = 80 + + +[pydocstyle] +# D104 Missing docstring in public package +# D203 1 blank line required before class docstring +# D213 Multi-line docstring summary should start at the second line +# D214 Section is over-indented +# D215 Section underline is over-indented +# D401 First line should be in imperative mood; try rephrasing +# D405 Section name should be properly capitalized +# D406 Section name should end with a newline +# D407 Missing dashed underline after section +# D408 Section underline should be in the line following the section’s name +# D409 Section underline should match the length of its name +# D410 Missing blank line after section +# D411 Missing blank line before section +ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411 +match-dir = ^(?!\.tox|venv).* +match = ^(?!setup).*\.py -- GitLab