Skip to content
Snippets Groups Projects
Commit c287ab9d authored by Wiktor Walentynowicz's avatar Wiktor Walentynowicz :construction_worker_tone1:
Browse files

Develop

parent a9d2f718
2 merge requests!5Develop,!1Develop
__pycache__
.tox
/data
/venv
\ No newline at end of file
image: "clarinpl/python:3.8"
cache:
paths:
- .tox
stages:
- check_style
- build
before_script:
- pip install tox==3.18.1
pep8:
stage: check_style
script:
- tox -v -e pep8
build_image:
stage: build
image: 'docker:18.09.7'
only:
- master
services:
- 'docker:18.09.7-dind'
variables:
DOCKERHUB_NAME: clarinpl/$CI_PROJECT_NAME
before_script:
- ''
script:
- docker build -t $DOCKERHUB_NAME .
- echo $DOCKER_PASSWORD > pass.txt
- cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin
- rm pass.txt
- docker push $DOCKERHUB_NAME
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
- docker image tag $DOCKERHUB_NAME $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
- docker image tag $DOCKERHUB_NAME $CI_REGISTRY_IMAGE:latest
- docker push $CI_REGISTRY_IMAGE
FROM 11.7.0-cudnn8-runtime-ubuntu20.04
RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y gcc python3-dev python3-venv python3-pip
COPY requirements.txt requirements.txt
RUN python3 -m pip install -r requirements.txt && rm requirements.txt
WORKDIR /home/worker
COPY src src
COPY entrypoint.py entrypoint.py
COPY worker.py worker.py
COPY config.ini config.ini
ENTRYPOINT [ "python3", "entrypoint.py"]
\ No newline at end of file
[service]
tool = winer
root = /samba/requests/
rabbit_host = $RABBIT_HOST
rabbit_user = $RABBIT_USER
rabbit_password = $RABBIT_PASSWORD
queue_prefix = nlp_
[tool]
workers_number=1
[logging]
port=9981
local_log_level=INFO
[deployment]
s3_endpoint = https://s3.clarin-pl.eu
models_s3_location=s3://workers/winer/models
models_cache_dir=/home/worker/models
\ No newline at end of file
apiVersion: apps/v1
kind: Deployment
metadata:
name: winer
labels:
app: winer
namespace: nlpworkers
spec:
replicas: 1
selector:
matchLabels:
app: winer
template:
metadata:
labels:
app: winer
spec:
containers:
- name: winer
image: clarinpl/$CI_PROJECT_NAME:latest
imagePullPolicy: Always
volumeMounts:
- name: config
mountPath: /home/worker/config.ini
subPath: config.ini
- name: samba
mountPath: /samba
- name: models
mountPath: /home/worker/models/
volumes:
- name: config
configMap:
name: winer-config-ini
- name: samba
hostPath:
path: /samba
type: ""
- name: models
hostPath:
path: /tmp/winer-models
type: DirectoryOrCreate
#!/usr/bin/python3
from subprocess import run
import configparser
import sys
parser = configparser.ConfigParser()
parser.read("config.ini")
s3_endpoint = parser["deployment"].get("s3_endpoint", "https://s3.clarin-pl.eu")
s3_location = parser["deployment"].get(
"models_s3_location", "s3://workers/winer/models"
)
local_models_location = parser["deployment"].get("models_cache_dir", "/tmp/models")
cmd = (
f'aws --no-sign-request --endpoint-url "{s3_endpoint}" s3 sync --delete'
f' "{s3_location}" "{local_models_location}"'
)
run(cmd, shell=True)
run(["python", "worker.py"] + sys.argv[1:])
--index-url https://pypi.clarin-pl.eu/simple/
nlp_ws
winer==0.2.0
\ No newline at end of file
"""Implementation of punctuator service"""
from winer.datafiles import read_clarin_json, write_clarin_json
from winer.document import create_document_from_clarin_json, \
create_entities_from_hf_outputs
from winer.winer import Winer
import logging
class WinerWorker:
DEFAULT_MODEL = "dummy"
def __init__(
self,
models_location: str,
):
logging.info("Loading models...")
self.active_model = Winer(f'{models_location}/{self.DEFAULT_MODEL}')
def process(
self,
input_path: str,
task_options: dict,
output_path: str
) -> None:
documents = [create_document_from_clarin_json(read_clarin_json(input_path))]
outputs = self.active_model.predict(
[document.get_pretokenized_text() for document in documents]
)
for idx in range(len(documents)):
documents[idx].add_entites(create_entities_from_hf_outputs(outputs[idx]))
write_clarin_json(documents[0].as_clarin_json(), output_path)
tox.ini 0 → 100644
[tox]
envlist = unittest,pep8
skipsdist = True
[flake8]
exclude =
venv,
.tox,
.git,
__pycache__,
docs/source/conf.py,
build,
dist,
tests/fixtures/*,
*.pyc,
*.egg-info,
.cache,
.eggs
data
generated
max-complexity = 10
min_python_version = 3.8
max-line-length = 88
select = I,C,E,F,W,B,B950,TYP,T
ignore = E231, W503
[testenv:pep8]
deps =
flake8
flake8-type-annotations
flake8-typing-imports
basepython = python
commands =
flake8 {posargs}
\ No newline at end of file
"""Implementation of punctuator service"""
import configparser
import nlp_ws
from src.winer_worker import WinerWorker
class Worker(nlp_ws.NLPWorker):
def init(self):
config = configparser.ConfigParser()
config.read("config.ini")
config = config["deployment"]
models_cache_dir = config.get("models_cache_dir", "/home/worker/models")
self.winer = WinerWorker(models_cache_dir)
def process(self, input_path: str, task_options: dict, output_path: str) -> None:
self.winer.process(input_path, task_options, output_path)
if __name__ == "__main__":
nlp_ws.NLPService.main(Worker)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment