Newer
Older
Bartlomiej Koptyra
committed
"""Implementation of nlp_worker."""
import logging
import nlp_ws
from hydra import initialize, compose
from hydra.utils import instantiate
Bartlomiej Koptyra
committed
_log = logging.getLogger(__name__)
class Worker(nlp_ws.NLPWorker):
"""Implements nlp_worker for anonymizer service."""
def __init__(self) -> None:
self._last_config = None
self._pipeline = None
super().__init__()
language = task_options.get("language", "pl")
replace_method = task_options.get("method", "tag")
overrides = [
"language=" + language,
"replacers=" + replace_method,
]
config_hash = hash(tuple(overrides))
if self._last_config != config_hash:
with initialize(config_path="./config"):
cfg = compose(config_name="config", overrides=overrides)
self._pipeline = instantiate(cfg["pipeline"])
Bartlomiej Koptyra
committed
def process(self, input_file, task_options, output_file):
"""Anonymizes input text.
It is assumed input_file is encoded in UTF-8.
Options:
method - 'delete'/'tag'/'pseudo' - 'delete' deletes selected tokens,
'tag' replaces selected tokens with arbitrary tags, 'pseudo'
replaces selected tokens with a random token that
language - 'pl' - language of the input text. As of now only Polish is supported.
Bartlomiej Koptyra
committed
"""
with open(output_file, "w", encoding="utf-8") as f:
result = pipeline.run(input_file)
f.write(result)