From e6429c583a190c9436b799a106fc4a37dbc21457 Mon Sep 17 00:00:00 2001 From: bwalkow <bwalkow@e-science.pl> Date: Fri, 16 Dec 2022 10:48:48 +0100 Subject: [PATCH] Winer CPU and GPU --- .gitlab-ci.yml | 58 ++++++++++++++++++++++++++++++-------------- Dockerfile | 4 +-- Dockerfile.gpu | 15 ++++++++++++ config.ini | 4 +-- entrypoint.py | 6 ++--- requirements-gpu.txt | 4 +++ requirements.txt | 2 +- src/winer_worker.py | 3 +-- worker.py | 2 +- 9 files changed, 68 insertions(+), 30 deletions(-) create mode 100644 Dockerfile.gpu create mode 100644 requirements-gpu.txt diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ea1c47a..7eb8988 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,34 +6,56 @@ cache: stages: - check_style - - build - -before_script: - - pip install tox==3.18.1 + - build_develop + - build_master pep8: stage: check_style + before_script: + - pip install tox==3.18.1 script: - tox -v -e pep8 -build_image: - stage: build +build_develop: + only: + variables: + - $CI_COMMIT_MESSAGE =~ /build-cpu/ + stage: build_develop + image: docker:18.09.7 + services: + - 'docker:18.09.7-dind' + script: + - docker build -t $CI_REGISTRY_IMAGE:develop . + - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY + - docker push $CI_REGISTRY_IMAGE:develop + +build_develop_both: + only: + variables: + - $CI_COMMIT_MESSAGE =~ /build-both/ + stage: build_develop + image: docker:18.09.7 + services: + - 'docker:18.09.7-dind' + script: + - docker build -t $CI_REGISTRY_IMAGE:develop . + - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY + - docker push $CI_REGISTRY_IMAGE:develop + - docker build -t $CI_REGISTRY_IMAGE:gpu -f Dockerfile.gpu . + - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY + - docker push $CI_REGISTRY_IMAGE:gpu + +build_master: + stage: build_master image: 'docker:18.09.7' only: - master services: - 'docker:18.09.7-dind' - variables: - DOCKERHUB_NAME: clarinpl/$CI_PROJECT_NAME - before_script: - - '' script: - - docker build -t $DOCKERHUB_NAME . - - echo $DOCKER_PASSWORD > pass.txt - - cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin - - rm pass.txt - - docker push $DOCKERHUB_NAME + - docker build -t $CI_REGISTRY_IMAGE:latest . + - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY + - docker push $CI_REGISTRY_IMAGE:latest + - docker build -t $CI_REGISTRY_IMAGE:gpu -f Dockerfile.gpu . - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY - - docker image tag $DOCKERHUB_NAME $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG - - docker image tag $DOCKERHUB_NAME $CI_REGISTRY_IMAGE:latest - - docker push $CI_REGISTRY_IMAGE + - docker push $CI_REGISTRY_IMAGE:gpu diff --git a/Dockerfile b/Dockerfile index d704403..446cfc1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,4 @@ -FROM 11.7.0-cudnn8-runtime-ubuntu20.04 - -RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y gcc python3-dev python3-venv python3-pip +FROM clarinpl/python:3.8 COPY requirements.txt requirements.txt RUN python3 -m pip install -r requirements.txt && rm requirements.txt diff --git a/Dockerfile.gpu b/Dockerfile.gpu new file mode 100644 index 0000000..2a53004 --- /dev/null +++ b/Dockerfile.gpu @@ -0,0 +1,15 @@ +FROM nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04 + +RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y gcc python3-dev python3-venv python3-pip + +COPY requirements-gpu.txt requirements.txt +RUN python3 -m pip install -r requirements.txt && rm requirements.txt + +WORKDIR /home/worker + +COPY src src +COPY entrypoint.py entrypoint.py +COPY worker.py worker.py +COPY config.ini config.ini + +ENTRYPOINT [ "python3", "entrypoint.py"] \ No newline at end of file diff --git a/config.ini b/config.ini index 3cad108..283d637 100644 --- a/config.ini +++ b/config.ini @@ -15,5 +15,5 @@ local_log_level=INFO [deployment] s3_endpoint = https://s3.clarin-pl.eu -models_s3_location=s3://workers/winer/models -models_cache_dir=/home/worker/models \ No newline at end of file +models_s3_location=s3://workers/winer/models/base +models_cache_dir=/home/worker/models/base \ No newline at end of file diff --git a/entrypoint.py b/entrypoint.py index 0cbaaa5..024b565 100644 --- a/entrypoint.py +++ b/entrypoint.py @@ -9,9 +9,9 @@ parser.read("config.ini") s3_endpoint = parser["deployment"].get("s3_endpoint", "https://s3.clarin-pl.eu") s3_location = parser["deployment"].get( - "models_s3_location", "s3://workers/winer/models" + "models_s3_location", "s3://workers/winer/models/base" ) -local_models_location = parser["deployment"].get("models_cache_dir", "/tmp/models") +local_models_location = parser["deployment"].get("models_cache_dir", "/home/worker/models/base") cmd = ( f'aws --no-sign-request --endpoint-url "{s3_endpoint}" s3 sync --delete' @@ -19,4 +19,4 @@ cmd = ( ) run(cmd, shell=True) -run(["python", "worker.py"] + sys.argv[1:]) +run(["python3", "worker.py"] + sys.argv[1:]) diff --git a/requirements-gpu.txt b/requirements-gpu.txt new file mode 100644 index 0000000..cca044f --- /dev/null +++ b/requirements-gpu.txt @@ -0,0 +1,4 @@ +--index-url https://pypi.clarin-pl.eu/simple/ +nlp_ws +winer==0.2.0 +awscli==1.22.57 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 9c67cc3..02e500a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ --index-url https://pypi.clarin-pl.eu/simple/ nlp_ws -winer==0.2.0 \ No newline at end of file +awscli==1.22.57 \ No newline at end of file diff --git a/src/winer_worker.py b/src/winer_worker.py index 526352a..7dae252 100644 --- a/src/winer_worker.py +++ b/src/winer_worker.py @@ -9,7 +9,6 @@ import logging class WinerWorker: - DEFAULT_MODEL = "dummy" def __init__( self, @@ -17,7 +16,7 @@ class WinerWorker: ): logging.info("Loading models...") - self.active_model = Winer(f'{models_location}/{self.DEFAULT_MODEL}') + self.active_model = Winer(models_location) def process( self, diff --git a/worker.py b/worker.py index 23abb49..da36947 100644 --- a/worker.py +++ b/worker.py @@ -13,7 +13,7 @@ class Worker(nlp_ws.NLPWorker): config.read("config.ini") config = config["deployment"] - models_cache_dir = config.get("models_cache_dir", "/home/worker/models") + models_cache_dir = config.get("models_cache_dir", "/home/worker/models/base") self.winer = WinerWorker(models_cache_dir) def process(self, input_path: str, task_options: dict, output_path: str) -> None: -- GitLab