Skip to content
Snippets Groups Projects
Select Git revision
  • 1aa27aea5a2817c3e529a18a022b93cd13ecb59b
  • master default protected
  • developer
3 results

speller2_worker.py

Blame
  • speller2_worker.py 2.18 KiB
    """Implementation of nlp_worker."""
    import logging
    
    import nlp_ws
    from autocorrect import Speller, Word
    
    _log = logging.getLogger(__name__)
    
    
    class SpellerFixed(Speller):
        """Fixes orginal speller in case of long words."""
    
        def __init__(self, lang="en"):
            """Call superclass."""
            super().__init__(lang)
    
        def get_candidates(self, word):
            """Returns a list of possible candidate words."""
            w = Word(word, self.lang, self.only_replacements)
            if self.fast or len(word) > 15:
                candidates = (self.existing([word]) or self.existing(w.typos()) or
                              [word])
            else:
                candidates = (
                    self.existing([word]) or
                    self.existing(w.typos()) or
                    self.existing(w.double_typos()) or
                    [word]
                )
            return [(self.nlp_data.get(c, 0), c) for c in candidates]
    
    
    class Speller2Worker(nlp_ws.NLPWorker):
        """Implements nlp_worker for text error correction service."""
    
        @classmethod
        def static_init(cls, config):
            """One time static initialisation."""
            _log.log(logging.INFO, "Worker started loading static models ")
            cls._model = {'pl': SpellerFixed('pl'),
                          'ru': SpellerFixed('ru'),
                          'en': SpellerFixed('en'),
                          'uk': SpellerFixed('uk')}
            _log.log(logging.INFO, "Worker finished loading static models ")
    
        def process(self, input_file, task_options, output_file):
            """Starting nlp process."""
            language = task_options.get('lang', 'pl')
            model = self._model.get(language)
    
            with open(input_file, 'r', encoding='utf-8') as f:
                with open(output_file, 'w', encoding='utf-8') as f_out:
                    for line in f.readlines():
                        corrected_data = model(line)
                        f_out.write(corrected_data)
                        f_out.write("\n")
    
        @classmethod
        def _read_file(cls, input_path):
            """Reading text from input file."""
            with open(input_path, 'r', encoding='utf-8') as f:
                content = f.read()
    
            return content