Skip to content
Snippets Groups Projects
Select Git revision
  • a87187dbaba8b841128972edd9a2cb7b175ce88f
  • main default protected
  • change_data_model
  • feature/add_auth_asr_service
  • fix/incorrect_import
  • feature/change_registry_clarin
  • feature/add_base_asr_service
  • feature/add_poetry
  • feature/add_word_ids
  • feature/add_sziszapangma
10 results

import_from_google_responses.py

Blame
  • speller2_worker.py 1.35 KiB
    """Implementation of nlp_worker."""
    import io
    import json
    import logging
    
    import nlp_ws
    from autocorrect import Speller
    
    _log = logging.getLogger(__name__)
    
    
    class Speller2Worker(nlp_ws.NLPWorker):
        """Implements nlp_worker for text error correction service."""
    
        @classmethod
        def static_init(cls, config):
            """One time static initialisation."""
            _log.log(logging.INFO, "Worker started loading static models ")
            cls._model = Speller('pl')
            _log.log(logging.INFO, "Worker finished loading static models ")
    
        def process(self, input_file, task_options, output_file):
            """Starting nlp process."""
            _log.info("Processing")
    
            model = self._model
    
            data = self._read_file(input_file)
            print(data.split('\n'))
            corrected_data = [model(line) for line in data.split('\n')]
    
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write('\n'.join(corrected_data))
    
            #with io.open(output_file, 'w', encoding='utf-8') as f:
                #json.dump(corrected_data, f, ensure_ascii=False)
                #for line in corrected_data:
    
        @classmethod
        def _read_file(cls, input_path):
            """Reading text from input file."""
            with open(input_path, 'r', encoding='utf-8') as f:
                content = f.read()
    
            return content