Skip to content
Snippets Groups Projects
worker.py 1.67 KiB
Newer Older
"""Implementation of nlp_worker."""
import logging

import nlp_ws
Michał Pogoda's avatar
Michał Pogoda committed
from hydra import initialize, compose
from hydra.utils import instantiate

_log = logging.getLogger(__name__)


class Worker(nlp_ws.NLPWorker):
    """Implements nlp_worker for anonymizer service."""
Michał Pogoda's avatar
Michał Pogoda committed
    def __init__(self) -> None:
        self._last_config = None
        self._pipeline = None
        super().__init__()
Michał Pogoda's avatar
Michał Pogoda committed
    def _prepare_pipeline(self, task_options):
Michał Pogoda's avatar
Michał Pogoda committed
        language = task_options.get("language", "pl")
        replace_method = task_options.get("method", "tag")

Michał Pogoda's avatar
Michał Pogoda committed
        overrides = [
            "language=" + language,
            "replacers=" + replace_method,
        ]
Michał Pogoda's avatar
Michał Pogoda committed
        config_hash = hash(tuple(overrides))
        if self._last_config != config_hash:
            with initialize(config_path="./config"):
                cfg = compose(config_name="config", overrides=overrides)
                self._pipeline = instantiate(cfg["pipeline"])
Michał Pogoda's avatar
Michał Pogoda committed
        return self._pipeline

    def process(self, input_file, task_options, output_file):
        """Anonymizes input text.

        It is assumed input_file is encoded in UTF-8.

        Options:
        method - 'delete'/'tag'/'pseudo' - 'delete' deletes selected tokens,
                'tag' replaces selected tokens with arbitrary tags, 'pseudo'
                replaces selected tokens with a random token that
Michał Pogoda's avatar
Michał Pogoda committed
        language - 'pl' - language of the input text. As of now only Polish is supported.
Michał Pogoda's avatar
Michał Pogoda committed
        pipeline = self._prepare_pipeline(task_options)
Michał Pogoda's avatar
Michał Pogoda committed

        with open(output_file, "w", encoding="utf-8") as f:
Michał Pogoda's avatar
Michał Pogoda committed
            result = pipeline.run(input_file)
            f.write(result)