Commit b335e78a authored by Mateusz Gniewkowski's avatar Mateusz Gniewkowski

Merge branch 'dev' into 'master'

refactored, added CI, Dockerfile

See merge request !1
parents 17188750 2291f739
Pipeline #1951 passed with stages
in 11 minutes and 54 seconds
image: 'clarinpl/python:3.6'
cache:
paths:
- .tox
stages:
- check_style
- build
before_script:
- pip install tox==2.9.1
pep8:
stage: check_style
script:
- tox -v -e pep8
docstyle:
stage: check_style
script:
- tox -v -e docstyle
build_image:
stage: build
image: 'docker:18.09.7'
only:
- master
services:
- 'docker:18.09.7-dind'
before_script:
- ''
script:
- docker build -t clarinpl/maca .
- echo $DOCKER_PASSWORD > pass.txt
- cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin
- rm pass.txt
- docker push clarinpl/maca
FROM clarinpl/python:3.6
RUN apt-get update && apt-get install -y \
toki \
corpus2-python3.6 \
morphanalyser
WORKDIR /home/worker
COPY requirements.txt .
COPY main.py .
COPY ./src ./src
RUN python3.6 -m pip install -r requirements.txt
RUN cp -a /usr/share/maca/. /home/worker
CMD ["python", "main.py", "service"]
; PLIK KONFIGURACYJNY WORKERA
; Plik zawiera konfigurację zarówno Api usługi sieciowej jak i narzędzia.
;
; Autor: Tomasz Walkowiak
; email: tomasz.walkowiak@pwr.edu.pl
; --------- CZĘŚĆ DLA Serwisu ---------
[service]
#root = /mnt2/requests/
root = /samba/requests/
tool = maca
rabbit_host =10.17.0.85
rabbit_user =clarin
rabbit_password =clarin123
; --------- CZĘŚĆ DLA Narzedzia ---------
root = /samba/requests/
rabbit_host = rabbitmq
rabbit_user = test
rabbit_password = test
queue_prefix =nlp_
[tool]
workers_number = 1
workers_number = 2
config_path = /usr/share/maca/
[logging]
port = 9995
local_log_level = INFO
#!/usr/bin/python
# -*- coding: utf-8 -*-
import nlp_ws
import logging
import maca
import corpus2
def sentences(reader):
"""Yields subsequent sentences from a reader."""
while True:
sentence = reader.get_next_sentence()
if not sentence:
break
yield sentence
def chunks(reader):
"""Yields subsequent sentences from a reader."""
while True:
chunk = reader.get_next_chunk()
if not chunk:
break
yield chunk
_log = logging.getLogger(__name__)
class MacaWorker(nlp_ws.NLPWorker):
@classmethod
def static_init(cls, config):
_log.info( "Worker started loading models %s","AS" )
cls.configtool = config['tool'];
return
def init(self):
_log.info( "Worker started loading models" )
def process(self, inputFile, taskOptions, outputFile):
maca_config='morfeusz2-nkjp'
if 'morfeusz2' in taskOptions:
if not taskOptions['morfeusz2']:
maca_config='morfeusz-nkjp-official'
_log.info( "Config %s",maca_config)
reader = maca.PlainTextReader.create_file_reader(str(inputFile), maca_config)
writer = corpus2.TokenWriter.create_path_writer("ccl",str(outputFile),reader.tagset())
for chunk in chunks(reader):
writer.write_chunk(chunk)
if __name__ == '__main__':
nlp_ws.NLPService.main(MacaWorker)
"""Implementation of maca service."""
import argparse
import nlp_ws
from src.worker import Worker
def get_args():
"""Gets command line arguments."""
parser = argparse.ArgumentParser(description="maca implementation")
subparsers = parser.add_subparsers(dest="algorithm")
subparsers.required = True
subparsers.add_parser("service", help="Run as a service")
return parser.parse_args()
def main():
"""Runs the program."""
args = get_args()
generators = {
"service": lambda: nlp_ws.NLPService.main(Worker),
}
gen_fn = generators.get(args.algorithm, lambda: None)
gen_fn()
if __name__ == "__main__":
main()
#!/usr/bin/python
# -*- coding: utf-8 -*-
from nlp_service import Service, create_service_option_parser
import shutil
from subprocess import call
import maca
import corpus2
def sentences(reader):
"""Yields subsequent sentences from a reader."""
while True:
sentence = reader.get_next_sentence()
if not sentence:
break
yield sentence
def chunks(reader):
"""Yields subsequent sentences from a reader."""
while True:
chunk = reader.get_next_chunk()
if not chunk:
break
yield chunk
class MacaService(Service):
def __init__(self, *args, **kwargs):
self.maca_config='morfeusz2-nkjp'
super(MacaService, self).__init__(*args, **kwargs)
def process(self, inputFile, taskOptions, outputFile):
reader = maca.PlainTextReader.create_file_reader(inputFile, self.maca_config)
writer = corpus2.TokenWriter.create_path_writer("ccl",outputFile,reader.tagset())
for chunk in chunks(reader):
writer.write_chunk(chunk)
#shutil.move(inputFile, outputFile)
def test(service):
service.process('test.txt',0,'out.ccl')
if __name__ == '__main__':
parser = create_service_option_parser()
args = parser.parse_args()
config_path = args.config_path
logfile_path = args.log_file
logging_lvl = args.logging_lvl
run_as_daemon = args.daemon
service = MacaService(config_path, logfile_path, logging_lvl, run_as_daemon)
#test(service);
service.run()
nlp-ws
\ No newline at end of file
"""Worker for the maca service."""
import logging
import corpus2
import maca
import nlp_ws
def sentences(reader):
"""Yields subsequent sentences from a reader."""
while True:
sentence = reader.get_next_sentence()
if not sentence:
break
yield sentence
def chunks(reader):
"""Yields subsequent sentences from a reader."""
while True:
chunk = reader.get_next_chunk()
if not chunk:
break
yield chunk
_log = logging.getLogger(__name__)
class Worker(nlp_ws.NLPWorker):
"""Implements nlp_worker for maca service."""
@classmethod
def static_init(cls, config):
"""Static_init for Worker."""
_log.info("Worker started loading models %s", "AS")
cls.configtool = config["tool"]
return
def init(self):
"""Initialize worker."""
_log.info("Worker started loading models")
def process(self, input_file, task_options, output_file):
"""Runs a single nlp_task."""
maca_config = "morfeusz2-nkjp"
if "morfeusz2" in task_options:
if not task_options["morfeusz2"]:
maca_config = "morfeusz-nkjp-official"
_log.info("Config %s", maca_config)
reader = maca.PlainTextReader.create_file_reader(
str(input_file), maca_config
)
writer = corpus2.TokenWriter.create_path_writer(
"ccl", str(
output_file), reader.tagset()
)
for chunk in chunks(reader):
writer.write_chunk(chunk)
if __name__ == "__main__":
nlp_ws.NLPService.main(Worker)
[tox]
envlist = pep8,docstyle
skipsdist = True
[testenv:pep8]
deps =
flake8
basepython = python3
commands =
flake8 {posargs}
[testenv:docstyle]
deps =
pydocstyle
basepython = python3
commands =
pydocstyle --verbose {posargs}
[flake8]
# W503 line break before binary operator
# W504 skipped because it is overeager and unnecessary
ignore = W503,W504
show-source = True
exclude = .git,.venv,.tox,dist,doc,*egg,build,venv
import-order-style = pep8
max-line-length = 80
[pydocstyle]
# D104 Missing docstring in public package
# D203 1 blank line required before class docstring
# D213 Multi-line docstring summary should start at the second line
# D214 Section is over-indented
# D215 Section underline is over-indented
# D401 First line should be in imperative mood; try rephrasing
# D405 Section name should be properly capitalized
# D406 Section name should end with a newline
# D407 Missing dashed underline after section
# D408 Section underline should be in the line following the section’s name
# D409 Section underline should match the length of its name
# D410 Missing blank line after section
# D411 Missing blank line before section
ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
match-dir = ^(?!\.tox|venv).*
match = ^(?!setup).*\.py
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment