Newer
Older
Bartlomiej Koptyra
committed
"""Implementation of nlp_worker."""
import logging
import nlp_ws
from src.anonymizers.polish_anonymizer import PolishAnonymizer
from src.anonymizers.english_anonymizer import EnglishAnonymizer
from src.anonymizers.russian_anonymizer import RussianAnonymizer
Bartlomiej Koptyra
committed
_log = logging.getLogger(__name__)
class Worker(nlp_ws.NLPWorker):
"""Implements nlp_worker for anonymizer service."""
def process(self, input_file, task_options, output_file):
"""Anonymizes input text.
It is assumed input_file is encoded in UTF-8.
Options:
method - 'delete'/'tag'/'pseudo' - 'delete' deletes selected tokens,
'tag' replaces selected tokens with arbitrary tags, 'pseudo'
replaces selected tokens with a random token that
"""
lang = task_options.get('language', 'pl')
anonymizers = {'pl': PolishAnonymizer,
'en': EnglishAnonymizer,
'ru': RussianAnonymizer
}
anon = anonymizers.get(lang, PolishAnonymizer)(task_options)
anon.process(input_file, output_file)