"""Implementation of nlp_worker.""" import logging import nlp_ws from src.anonymizers.polish_anonymizer import PolishAnonymizer from src.anonymizers.english_anonymizer import EnglishAnonymizer from src.anonymizers.russian_anonymizer import RussianAnonymizer _log = logging.getLogger(__name__) class Worker(nlp_ws.NLPWorker): """Implements nlp_worker for anonymizer service.""" def process(self, input_file, task_options, output_file): """Anonymizes input text. It is assumed input_file is encoded in UTF-8. Options: method - 'delete'/'tag'/'pseudo' - 'delete' deletes selected tokens, 'tag' replaces selected tokens with arbitrary tags, 'pseudo' replaces selected tokens with a random token that """ lang = task_options.get('language', 'pl') anonymizers = {'pl': PolishAnonymizer, 'en': EnglishAnonymizer, 'ru': RussianAnonymizer } anon = anonymizers.get(lang, PolishAnonymizer)(task_options) anon.process(input_file, output_file)