Skip to content
Snippets Groups Projects
Commit b335e78a authored by Mateusz Gniewkowski's avatar Mateusz Gniewkowski
Browse files

Merge branch 'dev' into 'master'

refactored, added CI, Dockerfile

See merge request !1
parents 17188750 2291f739
No related branches found
No related tags found
1 merge request!1refactored, added CI, Dockerfile
Pipeline #1951 passed
image: 'clarinpl/python:3.6'
cache:
paths:
- .tox
stages:
- check_style
- build
before_script:
- pip install tox==2.9.1
pep8:
stage: check_style
script:
- tox -v -e pep8
docstyle:
stage: check_style
script:
- tox -v -e docstyle
build_image:
stage: build
image: 'docker:18.09.7'
only:
- master
services:
- 'docker:18.09.7-dind'
before_script:
- ''
script:
- docker build -t clarinpl/maca .
- echo $DOCKER_PASSWORD > pass.txt
- cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin
- rm pass.txt
- docker push clarinpl/maca
FROM clarinpl/python:3.6
RUN apt-get update && apt-get install -y \
toki \
corpus2-python3.6 \
morphanalyser
WORKDIR /home/worker
COPY requirements.txt .
COPY main.py .
COPY ./src ./src
RUN python3.6 -m pip install -r requirements.txt
RUN cp -a /usr/share/maca/. /home/worker
CMD ["python", "main.py", "service"]
; PLIK KONFIGURACYJNY WORKERA
; Plik zawiera konfigurację zarówno Api usługi sieciowej jak i narzędzia.
;
; Autor: Tomasz Walkowiak
; email: tomasz.walkowiak@pwr.edu.pl
; --------- CZĘŚĆ DLA Serwisu ---------
[service]
#root = /mnt2/requests/
root = /samba/requests/
tool = maca
rabbit_host =10.17.0.85
rabbit_user =clarin
rabbit_password =clarin123
; --------- CZĘŚĆ DLA Narzedzia ---------
root = /samba/requests/
rabbit_host = rabbitmq
rabbit_user = test
rabbit_password = test
queue_prefix =nlp_
[tool]
workers_number = 1
workers_number = 2
config_path = /usr/share/maca/
[logging]
port = 9995
local_log_level = INFO
#!/usr/bin/python
# -*- coding: utf-8 -*-
import nlp_ws
import logging
import maca
import corpus2
def sentences(reader):
"""Yields subsequent sentences from a reader."""
while True:
sentence = reader.get_next_sentence()
if not sentence:
break
yield sentence
def chunks(reader):
"""Yields subsequent sentences from a reader."""
while True:
chunk = reader.get_next_chunk()
if not chunk:
break
yield chunk
_log = logging.getLogger(__name__)
class MacaWorker(nlp_ws.NLPWorker):
@classmethod
def static_init(cls, config):
_log.info( "Worker started loading models %s","AS" )
cls.configtool = config['tool'];
return
def init(self):
_log.info( "Worker started loading models" )
def process(self, inputFile, taskOptions, outputFile):
maca_config='morfeusz2-nkjp'
if 'morfeusz2' in taskOptions:
if not taskOptions['morfeusz2']:
maca_config='morfeusz-nkjp-official'
_log.info( "Config %s",maca_config)
reader = maca.PlainTextReader.create_file_reader(str(inputFile), maca_config)
writer = corpus2.TokenWriter.create_path_writer("ccl",str(outputFile),reader.tagset())
for chunk in chunks(reader):
writer.write_chunk(chunk)
if __name__ == '__main__':
nlp_ws.NLPService.main(MacaWorker)
main.py 0 → 100644
"""Implementation of maca service."""
import argparse
import nlp_ws
from src.worker import Worker
def get_args():
"""Gets command line arguments."""
parser = argparse.ArgumentParser(description="maca implementation")
subparsers = parser.add_subparsers(dest="algorithm")
subparsers.required = True
subparsers.add_parser("service", help="Run as a service")
return parser.parse_args()
def main():
"""Runs the program."""
args = get_args()
generators = {
"service": lambda: nlp_ws.NLPService.main(Worker),
}
gen_fn = generators.get(args.algorithm, lambda: None)
gen_fn()
if __name__ == "__main__":
main()
#!/usr/bin/python
# -*- coding: utf-8 -*-
from nlp_service import Service, create_service_option_parser
import shutil
from subprocess import call
import maca
import corpus2
def sentences(reader):
"""Yields subsequent sentences from a reader."""
while True:
sentence = reader.get_next_sentence()
if not sentence:
break
yield sentence
def chunks(reader):
"""Yields subsequent sentences from a reader."""
while True:
chunk = reader.get_next_chunk()
if not chunk:
break
yield chunk
class MacaService(Service):
def __init__(self, *args, **kwargs):
self.maca_config='morfeusz2-nkjp'
super(MacaService, self).__init__(*args, **kwargs)
def process(self, inputFile, taskOptions, outputFile):
reader = maca.PlainTextReader.create_file_reader(inputFile, self.maca_config)
writer = corpus2.TokenWriter.create_path_writer("ccl",outputFile,reader.tagset())
for chunk in chunks(reader):
writer.write_chunk(chunk)
#shutil.move(inputFile, outputFile)
def test(service):
service.process('test.txt',0,'out.ccl')
if __name__ == '__main__':
parser = create_service_option_parser()
args = parser.parse_args()
config_path = args.config_path
logfile_path = args.log_file
logging_lvl = args.logging_lvl
run_as_daemon = args.daemon
service = MacaService(config_path, logfile_path, logging_lvl, run_as_daemon)
#test(service);
service.run()
nlp-ws
\ No newline at end of file
"""Worker for the maca service."""
import logging
import corpus2
import maca
import nlp_ws
def sentences(reader):
"""Yields subsequent sentences from a reader."""
while True:
sentence = reader.get_next_sentence()
if not sentence:
break
yield sentence
def chunks(reader):
"""Yields subsequent sentences from a reader."""
while True:
chunk = reader.get_next_chunk()
if not chunk:
break
yield chunk
_log = logging.getLogger(__name__)
class Worker(nlp_ws.NLPWorker):
"""Implements nlp_worker for maca service."""
@classmethod
def static_init(cls, config):
"""Static_init for Worker."""
_log.info("Worker started loading models %s", "AS")
cls.configtool = config["tool"]
return
def init(self):
"""Initialize worker."""
_log.info("Worker started loading models")
def process(self, input_file, task_options, output_file):
"""Runs a single nlp_task."""
maca_config = "morfeusz2-nkjp"
if "morfeusz2" in task_options:
if not task_options["morfeusz2"]:
maca_config = "morfeusz-nkjp-official"
_log.info("Config %s", maca_config)
reader = maca.PlainTextReader.create_file_reader(
str(input_file), maca_config
)
writer = corpus2.TokenWriter.create_path_writer(
"ccl", str(
output_file), reader.tagset()
)
for chunk in chunks(reader):
writer.write_chunk(chunk)
if __name__ == "__main__":
nlp_ws.NLPService.main(Worker)
tox.ini 0 → 100644
[tox]
envlist = pep8,docstyle
skipsdist = True
[testenv:pep8]
deps =
flake8
basepython = python3
commands =
flake8 {posargs}
[testenv:docstyle]
deps =
pydocstyle
basepython = python3
commands =
pydocstyle --verbose {posargs}
[flake8]
# W503 line break before binary operator
# W504 skipped because it is overeager and unnecessary
ignore = W503,W504
show-source = True
exclude = .git,.venv,.tox,dist,doc,*egg,build,venv
import-order-style = pep8
max-line-length = 80
[pydocstyle]
# D104 Missing docstring in public package
# D203 1 blank line required before class docstring
# D213 Multi-line docstring summary should start at the second line
# D214 Section is over-indented
# D215 Section underline is over-indented
# D401 First line should be in imperative mood; try rephrasing
# D405 Section name should be properly capitalized
# D406 Section name should end with a newline
# D407 Missing dashed underline after section
# D408 Section underline should be in the line following the section’s name
# D409 Section underline should match the length of its name
# D410 Missing blank line after section
# D411 Missing blank line before section
ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
match-dir = ^(?!\.tox|venv).*
match = ^(?!setup).*\.py
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment