Skip to content
Snippets Groups Projects
Commit 31e93508 authored by Kamil Kanclerz's avatar Kamil Kanclerz
Browse files

Add worker files, Dockerfile, CI config and gitignore

parent fc4d515c
No related merge requests found
Pipeline #2163 failed with stages
in 32 seconds
.idea/
# temp files
example_usage.py
example_text.txt
\ No newline at end of file
image: 'clarinpl/python:3.6'
cache:
paths:
- .tox
stages:
- check_style
- build
before_script:
- pip install tox==2.9.1
pep8:
stage: check_style
script:
- tox -v -e pep8
docstyle:
stage: check_style
script:
- tox -v -e docstyle
build_image:
stage: build
image: docker:18.09.7
only:
- master
services:
- 'docker:18.09.7-dind'
before_script:
- ''
script:
- docker build -t clarinpl/speller2 .
- echo $DOCKER_PASSWORD > pass.txt
- cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin
- rm pass.txt
- docker push clarinpl/speller2
FROM clarinpl/python:3.6
WORKDIR /home/worker
COPY ./src ./src
COPY ./requirements.txt .
COPY ./models/autocorrect ./models/autocorrect
COPY ./entrypoint.sh .
RUN apt update && apt install -y g++ gdb
#RUN git clone https://github.com/facebookresearch/fastText.git && \
# cd fastText && \
# python3.6 -m pip install . && \
# cd .. && \
# rm -rf fastText
RUN python3.6 -m pip install -r requirements.txt
RUN ["chmod", "+x", "./entrypoint.sh"]
CMD ["./entrypoint.sh"]
[service]
tool = speller2
root = /samba/requests/
rabbit_host = rabbitmq
rabbit_user = test
rabbit_password = test
queue_prefix = nlp_
[tool]
workers_number = 1
[logging]
port = 9980
local_log_level = INFO
[logging_levels]
__main__ = INFO
python3.6 main.py service
main.py 0 → 100755
"""Implementation of text error correction service."""
import argparse
import nlp_ws
from src.speller2_worker import Speller2Worker
def get_args():
"""Gets command line arguments."""
parser = argparse.ArgumentParser(description="speller2")
subparsers = parser.add_subparsers(dest="mode")
subparsers.required = True
subparsers.add_parser(
"service",
help="Run as a service")
return parser.parse_args()
def main():
"""Runs the program."""
args = get_args()
generators = {
"service": lambda: nlp_ws.NLPService.main(Speller2Worker),
}
gen_fn = generators.get(args.mode, lambda: None)
gen_fn()
if __name__ == "__main__":
main()
autocorrect==2.1.0
numpy==1.17.4
"""Implementation of nlp_worker."""
import io
import json
import logging
import nlp_ws
from autocorrect import Speller
_log = logging.getLogger(__name__)
class Speller2Worker(nlp_ws.NLPWorker):
"""Implements nlp_worker for text error correction service."""
@classmethod
def static_init(cls, config):
"""One time static initialisation."""
_log.log(logging.INFO, "Worker started loading static models ")
cls._model = Speller('pl')
_log.log(logging.INFO, "Worker finished loading static models ")
def process(self, input_file, task_options, output_file):
"""Starting nlp process."""
_log.info("Processing")
model = self._model
data = self._read_file(input_file)
corrected_data = model(data)
with io.open(output_file, 'w', encoding='utf-8') as f:
json.dump(corrected_data, f, ensure_ascii=False)
@classmethod
def _read_file(cls, input_path):
"""Reading text from input file"""
with open(input_path, 'r', encoding='utf-8') as f:
content = f.read()
return content
tox.ini 0 → 100755
[tox]
envlist = pep8,docstyle
skipsdist = True
[testenv:pep8]
deps =
flake8
basepython = python3
commands =
flake8 {posargs}
[testenv:docstyle]
deps =
pydocstyle
basepython = python3
commands =
pydocstyle --verbose {posargs}
[flake8]
# W504 skipped because it is overeager and unnecessary
ignore = W504
show-source = True
exclude = .git,.venv,.tox,dist,doc,*egg,build,venv
import-order-style = pep8
max-line-length = 80
[pydocstyle]
# D104 Missing docstring in public package
# D203 1 blank line required before class docstring
# D213 Multi-line docstring summary should start at the second line
# D214 Section is over-indented
# D215 Section underline is over-indented
# D401 First line should be in imperative mood; try rephrasing
# D405 Section name should be properly capitalized
# D406 Section name should end with a newline
# D407 Missing dashed underline after section
# D408 Section underline should be in the line following the section’s name
# D409 Section underline should match the length of its name
# D410 Missing blank line after section
# D411 Missing blank line before section
ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
match-dir = ^(?!\.tox|venv).*
match = ^(?!setup).*\.py
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment