Commit 90b0d778 authored by Bartłomiej Koptyra's avatar Bartłomiej Koptyra

Program skeleton.

parent dd9d97f6
FROM clarinpl/python:3.6
WORKDIR /home/worker
COPY ./src ./src
COPY ./main.py .
COPY ./requirements.txt .
RUN python3.6 -m pip install -r requirements.txt
CMD ["python3.6", "main.py", "service"]
\ No newline at end of file
[service]
tool = anonymizer
root = /samba/requests/
rabbit_host = rabbitmq
rabbit_user = test
rabbit_password = test
queue_prefix = nlp_
[tool]
workers_number = 1
[logging]
port = 9998
local_log_level = INFO
[logging_levels]
__main__ = INFO
version: '3'
services:
tokenizer:
container_name: clarin_anonymizer
build: ./
working_dir: /home/worker
entrypoint:
- python3.6
- main.py
- service
environment:
- PYTHONUNBUFFERED=0
volumes:
- '/samba:/samba'
- './config.ini:/home/worker/config.ini'
- './src:/home/worker/src'
- './main.py:/home/worker/main.py'
"""Implementation of tokenizer service."""
import argparse
import nlp_ws
from src.worker import Worker
def get_args():
"""Gets command line arguments."""
parser = argparse.ArgumentParser(description="tokenizer")
subparsers = parser.add_subparsers(dest="mode")
subparsers.required = True
subparsers.add_parser(
"service",
help="Run as a service")
return parser.parse_args()
def main():
"""Runs the program."""
args = get_args()
generators = {
"service": lambda: nlp_ws.NLPService.main(Worker),
}
gen_fn = generators.get(args.mode, lambda: None)
gen_fn()
if __name__ == "__main__":
main()
nlp-ws
\ No newline at end of file
"""Implementation of anonymizer functionality."""
import re
class Anonymizer:
"""Class used to edit sentences based on options."""
def __init__(self, task_options):
self.method = task_options.get('method', 'delete')
def process(self):
if ctag == 'ign':
# sprawddz czy to nick a potem email
# sprawdz czy to nazwa własna jak mBank? nie wiem
"""Implementation of nlp_worker."""
import logging
import nlp_ws
from src.anonymizer import Anonymizer
_log = logging.getLogger(__name__)
class Worker(nlp_ws.NLPWorker):
"""Implements nlp_worker for anonymizer service."""
@classmethod
def static_init(cls, config):
"""One time static initialisation."""
print("siema")
def process(self, input_file, task_options, output_file):
"""Anonymizes input text.
It is assumed input_file is encoded in UTF-8.
Options:
method - 'delete'/'tag'/'pseudo' - 'delete' deletes selected tokens,
'tag' replaces selected tokens with arbitrary tags, 'pseudo'
replaces selected tokens with a random token that
"""
anon = Anonymizer(task_options)
with open(input_file, 'rt', encoding='utf-8') as input_file:
with open(output_file, 'wt', encoding='utf-8') as output_file:
print("elo")
[tox]
envlist = pep8,docstyle
skipsdist = True
[testenv:pep8]
deps =
flake8
basepython = python3
commands =
flake8 {posargs}
[testenv:docstyle]
deps =
pydocstyle
basepython = python3
commands =
pydocstyle --verbose {posargs}
[flake8]
# W504 skipped because it is overeager and unnecessary
ignore = W504
show-source = True
exclude = .git,.venv,.tox,dist,doc,*egg,build,venv
import-order-style = pep8
max-line-length = 80
[pydocstyle]
# D104 Missing docstring in public package
# D203 1 blank line required before class docstring
# D213 Multi-line docstring summary should start at the second line
# D214 Section is over-indented
# D215 Section underline is over-indented
# D401 First line should be in imperative mood; try rephrasing
# D405 Section name should be properly capitalized
# D406 Section name should end with a newline
# D407 Missing dashed underline after section
# D408 Section underline should be in the line following the section’s name
# D409 Section underline should match the length of its name
# D410 Missing blank line after section
# D411 Missing blank line before section
ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
match-dir = ^(?!\.tox|venv).*
match = ^(?!setup).*\.py
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment