Merge branch 'dev' into 'master'

refactored, added CI, Dockerfile See merge request !1

Merge branch 'dev' into 'master'
b335e78a · Mateusz Gniewkowski · 17188750 · 2291f739 · b335e78a · b335e78a
Commit b335e78a authored Sep 14, 2020 by Mateusz Gniewkowski
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
+image: 'clarinpl/python:3.6'
+cache:
+  paths:
+    - .tox
+stages:
+  - check_style
+  - build
+before_script:
+  - pip install tox==2.9.1
+pep8:
+  stage: check_style
+  script:
+    - tox -v -e pep8
+docstyle:
+  stage: check_style
+  script:
+    - tox -v -e docstyle
+build_image:
+  stage: build
+  image: 'docker:18.09.7'
+  only:
+    - master
+  services:
+    - 'docker:18.09.7-dind'
+  before_script:
+    - ''
+  script:
+    - docker build -t clarinpl/maca .
+    - echo $DOCKER_PASSWORD > pass.txt
+    - cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin
+    - rm pass.txt
+    - docker push clarinpl/maca
--- a/Dockerfile
+++ b/Dockerfile
+FROM clarinpl/python:3.6
+
+RUN apt-get update && apt-get install -y  \
+    toki \
+    corpus2-python3.6 \
+    morphanalyser
+
+WORKDIR /home/worker
+COPY requirements.txt .
+COPY main.py .
+COPY ./src ./src
+RUN python3.6 -m pip install -r requirements.txt
+RUN cp -a /usr/share/maca/. /home/worker
+CMD ["python", "main.py", "service"]
--- a/config.ini
+++ b/config.ini
-; PLIK KONFIGURACYJNY  WORKERA
-; Plik zawiera konfigurację zarówno Api usługi sieciowej jak i narzędzia.
-;
-;       Autor: Tomasz Walkowiak
-;       email: tomasz.walkowiak@pwr.edu.pl
-
-; --------- CZĘŚĆ DLA Serwisu ---------
 [service]
-#root = /mnt2/requests/
-root = /samba/requests/
 tool = maca
-rabbit_host =10.17.0.85
-rabbit_user =clarin
-rabbit_password =clarin123

-; --------- CZĘŚĆ DLA Narzedzia ---------
+root = /samba/requests/
+rabbit_host = rabbitmq
+rabbit_user = test
+rabbit_password = test
+queue_prefix =nlp_
+
 [tool]
-workers_number = 1
+workers_number = 2
+config_path = /usr/share/maca/

 [logging]
 port = 9995
 local_log_level = INFO
+
--- a/maca_worker.py
+++ b/maca_worker.py
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-
-import nlp_ws
-import logging
-
-import maca
-import corpus2
-
-def sentences(reader):
-	"""Yields subsequent sentences from a reader."""
-	while True:
-		sentence = reader.get_next_sentence()
-		if not sentence:
-			break
-		yield sentence
-
-def chunks(reader):
-	"""Yields subsequent sentences from a reader."""
-	while True:
-		chunk = reader.get_next_chunk()
-		if not chunk:
-			break
-		yield chunk
-
-
-_log = logging.getLogger(__name__)
-
-class MacaWorker(nlp_ws.NLPWorker):
-	
-	@classmethod
-	def static_init(cls, config):
-		_log.info( "Worker started loading models %s","AS" )
-		cls.configtool = config['tool'];
-		return
-
-	def init(self):
-		_log.info( "Worker started loading models" )
-			
-	def process(self, inputFile, taskOptions, outputFile):	
-		maca_config='morfeusz2-nkjp'
-		if 'morfeusz2' in taskOptions:
-			if not taskOptions['morfeusz2']:
-				maca_config='morfeusz-nkjp-official'
-			
-		_log.info( "Config %s",maca_config)
-		reader = maca.PlainTextReader.create_file_reader(str(inputFile), maca_config)
-		writer = corpus2.TokenWriter.create_path_writer("ccl",str(outputFile),reader.tagset())
-		for chunk in chunks(reader):
-			writer.write_chunk(chunk)
-			
-
-if __name__ == '__main__':
-	nlp_ws.NLPService.main(MacaWorker)
-	
--- a/main.py
+++ b/main.py
+"""Implementation of maca service."""
+import argparse
+
+import nlp_ws
+
+from src.worker import Worker
+
+
+def get_args():
+    """Gets command line arguments."""
+    parser = argparse.ArgumentParser(description="maca implementation")
+
+    subparsers = parser.add_subparsers(dest="algorithm")
+    subparsers.required = True
+
+    subparsers.add_parser("service", help="Run as a service")
+    return parser.parse_args()
+
+
+def main():
+    """Runs the program."""
+    args = get_args()
+
+    generators = {
+        "service": lambda: nlp_ws.NLPService.main(Worker),
+    }
+
+    gen_fn = generators.get(args.algorithm, lambda: None)
+    gen_fn()
+
+
+if __name__ == "__main__":
+    main()
--- a/old/maca_service.py
+++ b/old/maca_service.py
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-from nlp_service import Service, create_service_option_parser
-import shutil
-from subprocess import call
-
-import maca
-import corpus2
-
-
-def sentences(reader):
-	"""Yields subsequent sentences from a reader."""
-	while True:
-		sentence = reader.get_next_sentence()
-		if not sentence:
-			break
-		yield sentence
-
-def chunks(reader):
-	"""Yields subsequent sentences from a reader."""
-	while True:
-		chunk = reader.get_next_chunk()
-		if not chunk:
-			break
-		yield chunk
-
-
-class MacaService(Service):
-	def __init__(self, *args, **kwargs):
-		self.maca_config='morfeusz2-nkjp'
-		super(MacaService, self).__init__(*args, **kwargs)
-
-	def process(self, inputFile, taskOptions, outputFile):
-		reader = maca.PlainTextReader.create_file_reader(inputFile, self.maca_config)
-		writer = corpus2.TokenWriter.create_path_writer("ccl",outputFile,reader.tagset())
-		for chunk in chunks(reader):
-			writer.write_chunk(chunk)
-		#shutil.move(inputFile, outputFile)
-	
-	
-def test(service):
-	service.process('test.txt',0,'out.ccl')
-	
-if __name__ == '__main__':
-	parser = create_service_option_parser()
-	args = parser.parse_args()
-	config_path = args.config_path
-	logfile_path = args.log_file
-	logging_lvl = args.logging_lvl
-	run_as_daemon = args.daemon
-
-	service = MacaService(config_path, logfile_path, logging_lvl, run_as_daemon)
-	#test(service);
-	service.run()
--- a/requirements.txt
+++ b/requirements.txt
+nlp-ws
\ No newline at end of file
--- a/src/__init__.py
+++ b/src/__init__.py
--- a/src/worker.py
+++ b/src/worker.py
+"""Worker for the maca service."""
+import logging
+
+import corpus2
+import maca
+import nlp_ws
+
+
+def sentences(reader):
+    """Yields subsequent sentences from a reader."""
+    while True:
+        sentence = reader.get_next_sentence()
+        if not sentence:
+            break
+        yield sentence
+
+
+def chunks(reader):
+    """Yields subsequent sentences from a reader."""
+    while True:
+        chunk = reader.get_next_chunk()
+        if not chunk:
+            break
+        yield chunk
+
+
+_log = logging.getLogger(__name__)
+
+
+class Worker(nlp_ws.NLPWorker):
+    """Implements nlp_worker for maca service."""
+
+    @classmethod
+    def static_init(cls, config):
+        """Static_init for Worker."""
+        _log.info("Worker started loading models %s", "AS")
+        cls.configtool = config["tool"]
+        return
+
+    def init(self):
+        """Initialize worker."""
+        _log.info("Worker started loading models")
+
+    def process(self, input_file, task_options, output_file):
+        """Runs a single nlp_task."""
+        maca_config = "morfeusz2-nkjp"
+        if "morfeusz2" in task_options:
+            if not task_options["morfeusz2"]:
+                maca_config = "morfeusz-nkjp-official"
+
+        _log.info("Config %s", maca_config)
+        reader = maca.PlainTextReader.create_file_reader(
+            str(input_file), maca_config
+        )
+        writer = corpus2.TokenWriter.create_path_writer(
+            "ccl", str(
+                output_file), reader.tagset()
+        )
+        for chunk in chunks(reader):
+            writer.write_chunk(chunk)
+
+
+if __name__ == "__main__":
+    nlp_ws.NLPService.main(Worker)
--- a/tox.ini
+++ b/tox.ini
+[tox]
+envlist = pep8,docstyle
+skipsdist = True
+
+[testenv:pep8]
+deps =
+    flake8
+basepython = python3
+commands =
+    flake8 {posargs}
+
+[testenv:docstyle]
+deps =
+    pydocstyle
+basepython = python3
+commands =
+    pydocstyle --verbose {posargs}
+
+[flake8]
+# W503 line break before binary operator
+# W504 skipped because it is overeager and unnecessary
+ignore = W503,W504
+show-source = True
+exclude = .git,.venv,.tox,dist,doc,*egg,build,venv
+import-order-style = pep8
+max-line-length = 80
+
+
+[pydocstyle]
+# D104 Missing docstring in public package
+# D203 1 blank line required before class docstring
+# D213 Multi-line docstring summary should start at the second line
+# D214 Section is over-indented
+# D215 Section underline is over-indented
+# D401 First line should be in imperative mood; try rephrasing
+# D405 Section name should be properly capitalized
+# D406 Section name should end with a newline
+# D407 Missing dashed underline after section
+# D408 Section underline should be in the line following the section’s name
+# D409 Section underline should match the length of its name
+# D410 Missing blank line after section
+# D411 Missing blank line before section
+ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
+match-dir = ^(?!\.tox|venv).*
+match = ^(?!setup).*\.py