diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..c8c8379a75078fbeadc90e296909446af09255fa --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,32 @@ +image: 'clarinpl/python:3.6' +cache: + paths: + - .tox +stages: + - check_style + - build +before_script: + - pip install tox==2.9.1 +pep8: + stage: check_style + script: + - tox -v -e pep8 +docstyle: + stage: check_style + script: + - tox -v -e docstyle +build_image: + stage: build + image: 'docker:18.09.7' + only: + - master + services: + - 'docker:18.09.7-dind' + before_script: + - '' + script: + - docker build -t clarinpl/maca . + - echo $DOCKER_PASSWORD > pass.txt + - cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin + - rm pass.txt + - docker push clarinpl/maca diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..c035aa47bce7bc2a8fb9e8e1dfe7489096cfb221 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM clarinpl/python:3.6 + +RUN apt-get update && apt-get install -y \ + toki \ + corpus2-python3.6 \ + morphanalyser + +WORKDIR /home/worker +COPY requirements.txt . +COPY main.py . +COPY ./src ./src +RUN python3.6 -m pip install -r requirements.txt +RUN cp -a /usr/share/maca/. /home/worker +CMD ["python", "main.py", "service"] diff --git a/config.ini b/config.ini index b1c1c1e72c7b30e8d967b7ba6f13e51a2afb05be..1a40909ec6ea71337bba8ddf6e985477fb62feb9 100644 --- a/config.ini +++ b/config.ini @@ -1,22 +1,17 @@ -; PLIK KONFIGURACYJNY WORKERA -; Plik zawiera konfigurację zarówno Api usługi sieciowej jak i narzędzia. -; -; Autor: Tomasz Walkowiak -; email: tomasz.walkowiak@pwr.edu.pl - -; --------- CZĘŚĆ DLA Serwisu --------- [service] -#root = /mnt2/requests/ -root = /samba/requests/ tool = maca -rabbit_host =10.17.0.85 -rabbit_user =clarin -rabbit_password =clarin123 -; --------- CZĘŚĆ DLA Narzedzia --------- +root = /samba/requests/ +rabbit_host = rabbitmq +rabbit_user = test +rabbit_password = test +queue_prefix =nlp_ + [tool] -workers_number = 1 +workers_number = 2 +config_path = /usr/share/maca/ [logging] port = 9995 local_log_level = INFO + diff --git a/maca_worker.py b/maca_worker.py deleted file mode 100644 index 5e39aaa43ca7dc7518c0054ea50768f253f9818e..0000000000000000000000000000000000000000 --- a/maca_worker.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - - -import nlp_ws -import logging - -import maca -import corpus2 - -def sentences(reader): - """Yields subsequent sentences from a reader.""" - while True: - sentence = reader.get_next_sentence() - if not sentence: - break - yield sentence - -def chunks(reader): - """Yields subsequent sentences from a reader.""" - while True: - chunk = reader.get_next_chunk() - if not chunk: - break - yield chunk - - -_log = logging.getLogger(__name__) - -class MacaWorker(nlp_ws.NLPWorker): - - @classmethod - def static_init(cls, config): - _log.info( "Worker started loading models %s","AS" ) - cls.configtool = config['tool']; - return - - def init(self): - _log.info( "Worker started loading models" ) - - def process(self, inputFile, taskOptions, outputFile): - maca_config='morfeusz2-nkjp' - if 'morfeusz2' in taskOptions: - if not taskOptions['morfeusz2']: - maca_config='morfeusz-nkjp-official' - - _log.info( "Config %s",maca_config) - reader = maca.PlainTextReader.create_file_reader(str(inputFile), maca_config) - writer = corpus2.TokenWriter.create_path_writer("ccl",str(outputFile),reader.tagset()) - for chunk in chunks(reader): - writer.write_chunk(chunk) - - -if __name__ == '__main__': - nlp_ws.NLPService.main(MacaWorker) - diff --git a/main.py b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..490b3f80d76021a976f049120a41ebd547c1cae0 --- /dev/null +++ b/main.py @@ -0,0 +1,33 @@ +"""Implementation of maca service.""" +import argparse + +import nlp_ws + +from src.worker import Worker + + +def get_args(): + """Gets command line arguments.""" + parser = argparse.ArgumentParser(description="maca implementation") + + subparsers = parser.add_subparsers(dest="algorithm") + subparsers.required = True + + subparsers.add_parser("service", help="Run as a service") + return parser.parse_args() + + +def main(): + """Runs the program.""" + args = get_args() + + generators = { + "service": lambda: nlp_ws.NLPService.main(Worker), + } + + gen_fn = generators.get(args.algorithm, lambda: None) + gen_fn() + + +if __name__ == "__main__": + main() diff --git a/old/maca_service.py b/old/maca_service.py deleted file mode 100644 index 25ef62cc6ffc2ccbc2d62d08c5a1a8300ef99883..0000000000000000000000000000000000000000 --- a/old/maca_service.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -from nlp_service import Service, create_service_option_parser -import shutil -from subprocess import call - -import maca -import corpus2 - - -def sentences(reader): - """Yields subsequent sentences from a reader.""" - while True: - sentence = reader.get_next_sentence() - if not sentence: - break - yield sentence - -def chunks(reader): - """Yields subsequent sentences from a reader.""" - while True: - chunk = reader.get_next_chunk() - if not chunk: - break - yield chunk - - -class MacaService(Service): - def __init__(self, *args, **kwargs): - self.maca_config='morfeusz2-nkjp' - super(MacaService, self).__init__(*args, **kwargs) - - def process(self, inputFile, taskOptions, outputFile): - reader = maca.PlainTextReader.create_file_reader(inputFile, self.maca_config) - writer = corpus2.TokenWriter.create_path_writer("ccl",outputFile,reader.tagset()) - for chunk in chunks(reader): - writer.write_chunk(chunk) - #shutil.move(inputFile, outputFile) - - -def test(service): - service.process('test.txt',0,'out.ccl') - -if __name__ == '__main__': - parser = create_service_option_parser() - args = parser.parse_args() - config_path = args.config_path - logfile_path = args.log_file - logging_lvl = args.logging_lvl - run_as_daemon = args.daemon - - service = MacaService(config_path, logfile_path, logging_lvl, run_as_daemon) - #test(service); - service.run() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..dae0fc562943ab4df118a6eb757e6c333761c3fb --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +nlp-ws \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/worker.py b/src/worker.py new file mode 100644 index 0000000000000000000000000000000000000000..7dd8b2f943e3639ed91e1de3c3363f6ff50a3974 --- /dev/null +++ b/src/worker.py @@ -0,0 +1,64 @@ +"""Worker for the maca service.""" +import logging + +import corpus2 +import maca +import nlp_ws + + +def sentences(reader): + """Yields subsequent sentences from a reader.""" + while True: + sentence = reader.get_next_sentence() + if not sentence: + break + yield sentence + + +def chunks(reader): + """Yields subsequent sentences from a reader.""" + while True: + chunk = reader.get_next_chunk() + if not chunk: + break + yield chunk + + +_log = logging.getLogger(__name__) + + +class Worker(nlp_ws.NLPWorker): + """Implements nlp_worker for maca service.""" + + @classmethod + def static_init(cls, config): + """Static_init for Worker.""" + _log.info("Worker started loading models %s", "AS") + cls.configtool = config["tool"] + return + + def init(self): + """Initialize worker.""" + _log.info("Worker started loading models") + + def process(self, input_file, task_options, output_file): + """Runs a single nlp_task.""" + maca_config = "morfeusz2-nkjp" + if "morfeusz2" in task_options: + if not task_options["morfeusz2"]: + maca_config = "morfeusz-nkjp-official" + + _log.info("Config %s", maca_config) + reader = maca.PlainTextReader.create_file_reader( + str(input_file), maca_config + ) + writer = corpus2.TokenWriter.create_path_writer( + "ccl", str( + output_file), reader.tagset() + ) + for chunk in chunks(reader): + writer.write_chunk(chunk) + + +if __name__ == "__main__": + nlp_ws.NLPService.main(Worker) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000000000000000000000000000000000000..5cedaca7f27b2c2ae0027a2a1f55eff9541a5dc8 --- /dev/null +++ b/tox.ini @@ -0,0 +1,45 @@ +[tox] +envlist = pep8,docstyle +skipsdist = True + +[testenv:pep8] +deps = + flake8 +basepython = python3 +commands = + flake8 {posargs} + +[testenv:docstyle] +deps = + pydocstyle +basepython = python3 +commands = + pydocstyle --verbose {posargs} + +[flake8] +# W503 line break before binary operator +# W504 skipped because it is overeager and unnecessary +ignore = W503,W504 +show-source = True +exclude = .git,.venv,.tox,dist,doc,*egg,build,venv +import-order-style = pep8 +max-line-length = 80 + + +[pydocstyle] +# D104 Missing docstring in public package +# D203 1 blank line required before class docstring +# D213 Multi-line docstring summary should start at the second line +# D214 Section is over-indented +# D215 Section underline is over-indented +# D401 First line should be in imperative mood; try rephrasing +# D405 Section name should be properly capitalized +# D406 Section name should end with a newline +# D407 Missing dashed underline after section +# D408 Section underline should be in the line following the section’s name +# D409 Section underline should match the length of its name +# D410 Missing blank line after section +# D411 Missing blank line before section +ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411 +match-dir = ^(?!\.tox|venv).* +match = ^(?!setup).*\.py