speller2_worker.py

"""Implementation of nlp_worker."""
import io
import json
import logging

import nlp_ws
from autocorrect import Speller

_log = logging.getLogger(__name__)


class Speller2Worker(nlp_ws.NLPWorker):
    """Implements nlp_worker for text error correction service."""

    @classmethod
    def static_init(cls, config):
        """One time static initialisation."""
        _log.log(logging.INFO, "Worker started loading static models ")
        cls._model = Speller('pl')
        _log.log(logging.INFO, "Worker finished loading static models ")

    def process(self, input_file, task_options, output_file):
        """Starting nlp process."""
        _log.info("Processing")

        model = self._model

        data = self._read_file(input_file)
        print(data.split('\n'))
        corrected_data = [model(line) for line in data.split('\n')]

        with open(output_file, 'w', encoding='utf-8') as f:
            f.write('\n'.join(corrected_data))

        #with io.open(output_file, 'w', encoding='utf-8') as f:
            #json.dump(corrected_data, f, ensure_ascii=False)
            #for line in corrected_data:

    @classmethod
    def _read_file(cls, input_path):
        """Reading text from input file."""
        with open(input_path, 'r', encoding='utf-8') as f:
            content = f.read()

        return content