speller2_worker.py

"""Implementation of nlp_worker."""
import logging

import nlp_ws
from autocorrect import Speller

_log = logging.getLogger(__name__)


class Speller2Worker(nlp_ws.NLPWorker):
    """Implements nlp_worker for text error correction service."""

    @classmethod
    def static_init(cls, config):
        """One time static initialisation."""
        _log.log(logging.INFO, "Worker started loading static models ")
        cls._model = {'pl': Speller('pl'),
                      'ru': Speller('ru'),
                      'en': Speller('en'),
                      'uk': Speller('uk')}
        _log.log(logging.INFO, "Worker finished loading static models ")

    def process(self, input_file, task_options, output_file):
        """Starting nlp process."""
        _log.info("Processing")
        language = task_options.get('lang', 'pl')
        model = self._model.get(language)

        data = self._read_file(input_file)

        corrected_data = [model(line) for line in data.split('\n')]

        with open(output_file, 'w', encoding='utf-8') as f:
            f.write('\n'.join(corrected_data))

    @classmethod
    def _read_file(cls, input_path):
        """Reading text from input file."""
        with open(input_path, 'r', encoding='utf-8') as f:
            content = f.read()

        return content