Commit 90b0d778 authored by Bartłomiej Koptyra's avatar Bartłomiej Koptyra

Program skeleton.

parent dd9d97f6
FROM clarinpl/python:3.6
WORKDIR /home/worker
COPY ./src ./src
COPY ./ .
COPY ./requirements.txt .
RUN python3.6 -m pip install -r requirements.txt
CMD ["python3.6", "", "service"]
\ No newline at end of file
tool = anonymizer
root = /samba/requests/
rabbit_host = rabbitmq
rabbit_user = test
rabbit_password = test
queue_prefix = nlp_
workers_number = 1
port = 9998
local_log_level = INFO
__main__ = INFO
version: '3'
container_name: clarin_anonymizer
build: ./
working_dir: /home/worker
- python3.6
- service
- '/samba:/samba'
- './config.ini:/home/worker/config.ini'
- './src:/home/worker/src'
- './'
"""Implementation of tokenizer service."""
import argparse
import nlp_ws
from src.worker import Worker
def get_args():
"""Gets command line arguments."""
parser = argparse.ArgumentParser(description="tokenizer")
subparsers = parser.add_subparsers(dest="mode")
subparsers.required = True
help="Run as a service")
return parser.parse_args()
def main():
"""Runs the program."""
args = get_args()
generators = {
"service": lambda: nlp_ws.NLPService.main(Worker),
gen_fn = generators.get(args.mode, lambda: None)
if __name__ == "__main__":
\ No newline at end of file
"""Implementation of anonymizer functionality."""
import re
class Anonymizer:
"""Class used to edit sentences based on options."""
def __init__(self, task_options):
self.method = task_options.get('method', 'delete')
def process(self):
if ctag == 'ign':
# sprawddz czy to nick a potem email
# sprawdz czy to nazwa własna jak mBank? nie wiem
"""Implementation of nlp_worker."""
import logging
import nlp_ws
from src.anonymizer import Anonymizer
_log = logging.getLogger(__name__)
class Worker(nlp_ws.NLPWorker):
"""Implements nlp_worker for anonymizer service."""
def static_init(cls, config):
"""One time static initialisation."""
def process(self, input_file, task_options, output_file):
"""Anonymizes input text.
It is assumed input_file is encoded in UTF-8.
method - 'delete'/'tag'/'pseudo' - 'delete' deletes selected tokens,
'tag' replaces selected tokens with arbitrary tags, 'pseudo'
replaces selected tokens with a random token that
anon = Anonymizer(task_options)
with open(input_file, 'rt', encoding='utf-8') as input_file:
with open(output_file, 'wt', encoding='utf-8') as output_file:
envlist = pep8,docstyle
skipsdist = True
deps =
basepython = python3
commands =
flake8 {posargs}
deps =
basepython = python3
commands =
pydocstyle --verbose {posargs}
# W504 skipped because it is overeager and unnecessary
ignore = W504
show-source = True
exclude = .git,.venv,.tox,dist,doc,*egg,build,venv
import-order-style = pep8
max-line-length = 80
# D104 Missing docstring in public package
# D203 1 blank line required before class docstring
# D213 Multi-line docstring summary should start at the second line
# D214 Section is over-indented
# D215 Section underline is over-indented
# D401 First line should be in imperative mood; try rephrasing
# D405 Section name should be properly capitalized
# D406 Section name should end with a newline
# D407 Missing dashed underline after section
# D408 Section underline should be in the line following the section’s name
# D409 Section underline should match the length of its name
# D410 Missing blank line after section
# D411 Missing blank line before section
ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
match-dir = ^(?!\.tox|venv).*
match = ^(?!setup).*\.py
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment