"""Implementation of nlp_worker.""" import logging import nlp_ws from hydra import initialize, compose from hydra.utils import instantiate _log = logging.getLogger(__name__) class Worker(nlp_ws.NLPWorker): """Implements nlp_worker for anonymizer service.""" def __init__(self) -> None: self._last_config = None self._pipeline = None super().__init__() def _prepare_pipeline(self, task_options): language = task_options.get("language", "pl") replace_method = task_options.get("method", "tag") overrides = [ "language=" + language, "replacers=" + replace_method, ] config_hash = hash(tuple(overrides)) if self._last_config != config_hash: with initialize(config_path="./config"): cfg = compose(config_name="config", overrides=overrides) self._pipeline = instantiate(cfg["pipeline"]) return self._pipeline def process(self, input_file, task_options, output_file): """Anonymizes input text. It is assumed input_file is encoded in UTF-8. Options: method - 'delete'/'tag'/'pseudo' - 'delete' deletes selected tokens, 'tag' replaces selected tokens with arbitrary tags, 'pseudo' replaces selected tokens with a random token that language - 'pl' - language of the input text. As of now only Polish is supported. """ pipeline = self._prepare_pipeline(task_options) with open(output_file, "w", encoding="utf-8") as f: result = pipeline.run(input_file) f.write(result)