Skip to content
Snippets Groups Projects
worker.py 1.13 KiB
Newer Older
"""Implementation of nlp_worker."""
import logging

import nlp_ws


from src.anonymizers.polish_anonymizer import PolishAnonymizer
from src.anonymizers.english_anonymizer import EnglishAnonymizer
from src.anonymizers.russian_anonymizer import RussianAnonymizer

_log = logging.getLogger(__name__)


class Worker(nlp_ws.NLPWorker):
    """Implements nlp_worker for anonymizer service."""

    def process(self, input_file, task_options, output_file):
        """Anonymizes input text.

        It is assumed input_file is encoded in UTF-8.

        Options:
        method - 'delete'/'tag'/'pseudo' - 'delete' deletes selected tokens,
                'tag' replaces selected tokens with arbitrary tags, 'pseudo'
                replaces selected tokens with a random token that
        """
        lang = task_options.get('language', 'pl')
        anonymizers = {'pl': PolishAnonymizer,
                       'en': EnglishAnonymizer,
                       'ru': RussianAnonymizer
                       }
        anon = anonymizers.get(lang, PolishAnonymizer)(task_options)
        anon.process(input_file, output_file)