From e6429c583a190c9436b799a106fc4a37dbc21457 Mon Sep 17 00:00:00 2001
From: bwalkow <bwalkow@e-science.pl>
Date: Fri, 16 Dec 2022 10:48:48 +0100
Subject: [PATCH] Winer CPU and GPU

---
 .gitlab-ci.yml       | 58 ++++++++++++++++++++++++++++++--------------
 Dockerfile           |  4 +--
 Dockerfile.gpu       | 15 ++++++++++++
 config.ini           |  4 +--
 entrypoint.py        |  6 ++---
 requirements-gpu.txt |  4 +++
 requirements.txt     |  2 +-
 src/winer_worker.py  |  3 +--
 worker.py            |  2 +-
 9 files changed, 68 insertions(+), 30 deletions(-)
 create mode 100644 Dockerfile.gpu
 create mode 100644 requirements-gpu.txt

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index ea1c47a..7eb8988 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -6,34 +6,56 @@ cache:
 
 stages:
   - check_style
-  - build
-
-before_script:
-  - pip install tox==3.18.1
+  - build_develop
+  - build_master
 
 pep8:
   stage: check_style
+  before_script:
+    - pip install tox==3.18.1
   script:
     - tox -v -e pep8
 
-build_image:
-  stage: build
+build_develop:
+  only:
+    variables:
+      - $CI_COMMIT_MESSAGE =~ /build-cpu/
+  stage: build_develop
+  image: docker:18.09.7
+  services:
+    - 'docker:18.09.7-dind'
+  script:
+    - docker build -t $CI_REGISTRY_IMAGE:develop .
+    - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
+    - docker push $CI_REGISTRY_IMAGE:develop
+
+build_develop_both:
+  only:
+    variables:
+      - $CI_COMMIT_MESSAGE =~ /build-both/
+  stage: build_develop
+  image: docker:18.09.7
+  services:
+    - 'docker:18.09.7-dind'
+  script:
+    - docker build -t $CI_REGISTRY_IMAGE:develop .
+    - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
+    - docker push $CI_REGISTRY_IMAGE:develop
+    - docker build -t $CI_REGISTRY_IMAGE:gpu -f Dockerfile.gpu .
+    - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
+    - docker push $CI_REGISTRY_IMAGE:gpu
+  
+build_master:
+  stage: build_master
   image: 'docker:18.09.7'
   only:
     - master
   services:
     - 'docker:18.09.7-dind'
-  variables:
-    DOCKERHUB_NAME: clarinpl/$CI_PROJECT_NAME
-  before_script:
-    - ''
   script:
-    - docker build -t $DOCKERHUB_NAME .
-    - echo $DOCKER_PASSWORD > pass.txt
-    - cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin
-    - rm pass.txt
-    - docker push $DOCKERHUB_NAME
+    - docker build -t $CI_REGISTRY_IMAGE:latest .
+    - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
+    - docker push $CI_REGISTRY_IMAGE:latest
+    - docker build -t $CI_REGISTRY_IMAGE:gpu -f Dockerfile.gpu .
     - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
-    - docker image tag $DOCKERHUB_NAME $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
-    - docker image tag $DOCKERHUB_NAME $CI_REGISTRY_IMAGE:latest
-    - docker push $CI_REGISTRY_IMAGE
+    - docker push $CI_REGISTRY_IMAGE:gpu
diff --git a/Dockerfile b/Dockerfile
index d704403..446cfc1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,4 @@
-FROM 11.7.0-cudnn8-runtime-ubuntu20.04
-
-RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y gcc python3-dev python3-venv python3-pip
+FROM clarinpl/python:3.8
 
 COPY requirements.txt requirements.txt
 RUN python3 -m pip install -r requirements.txt && rm requirements.txt
diff --git a/Dockerfile.gpu b/Dockerfile.gpu
new file mode 100644
index 0000000..2a53004
--- /dev/null
+++ b/Dockerfile.gpu
@@ -0,0 +1,15 @@
+FROM nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04
+
+RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y gcc python3-dev python3-venv python3-pip
+
+COPY requirements-gpu.txt requirements.txt
+RUN python3 -m pip install -r requirements.txt && rm requirements.txt
+
+WORKDIR /home/worker
+
+COPY src src
+COPY entrypoint.py entrypoint.py
+COPY worker.py worker.py
+COPY config.ini config.ini
+
+ENTRYPOINT [ "python3", "entrypoint.py"]
\ No newline at end of file
diff --git a/config.ini b/config.ini
index 3cad108..283d637 100644
--- a/config.ini
+++ b/config.ini
@@ -15,5 +15,5 @@ local_log_level=INFO
 
 [deployment]
 s3_endpoint = https://s3.clarin-pl.eu
-models_s3_location=s3://workers/winer/models
-models_cache_dir=/home/worker/models
\ No newline at end of file
+models_s3_location=s3://workers/winer/models/base
+models_cache_dir=/home/worker/models/base
\ No newline at end of file
diff --git a/entrypoint.py b/entrypoint.py
index 0cbaaa5..024b565 100644
--- a/entrypoint.py
+++ b/entrypoint.py
@@ -9,9 +9,9 @@ parser.read("config.ini")
 
 s3_endpoint = parser["deployment"].get("s3_endpoint", "https://s3.clarin-pl.eu")
 s3_location = parser["deployment"].get(
-    "models_s3_location", "s3://workers/winer/models"
+    "models_s3_location", "s3://workers/winer/models/base"
 )
-local_models_location = parser["deployment"].get("models_cache_dir", "/tmp/models")
+local_models_location = parser["deployment"].get("models_cache_dir", "/home/worker/models/base")
 
 cmd = (
     f'aws --no-sign-request --endpoint-url "{s3_endpoint}" s3 sync --delete'
@@ -19,4 +19,4 @@ cmd = (
 )
 run(cmd, shell=True)
 
-run(["python", "worker.py"] + sys.argv[1:])
+run(["python3", "worker.py"] + sys.argv[1:])
diff --git a/requirements-gpu.txt b/requirements-gpu.txt
new file mode 100644
index 0000000..cca044f
--- /dev/null
+++ b/requirements-gpu.txt
@@ -0,0 +1,4 @@
+--index-url https://pypi.clarin-pl.eu/simple/ 
+nlp_ws
+winer==0.2.0
+awscli==1.22.57
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 9c67cc3..02e500a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 --index-url https://pypi.clarin-pl.eu/simple/ 
 nlp_ws
-winer==0.2.0
\ No newline at end of file
+awscli==1.22.57
\ No newline at end of file
diff --git a/src/winer_worker.py b/src/winer_worker.py
index 526352a..7dae252 100644
--- a/src/winer_worker.py
+++ b/src/winer_worker.py
@@ -9,7 +9,6 @@ import logging
 
 
 class WinerWorker:
-    DEFAULT_MODEL = "dummy"
 
     def __init__(
         self,
@@ -17,7 +16,7 @@ class WinerWorker:
     ):
 
         logging.info("Loading models...")
-        self.active_model = Winer(f'{models_location}/{self.DEFAULT_MODEL}')
+        self.active_model = Winer(models_location)
 
     def process(
         self,
diff --git a/worker.py b/worker.py
index 23abb49..da36947 100644
--- a/worker.py
+++ b/worker.py
@@ -13,7 +13,7 @@ class Worker(nlp_ws.NLPWorker):
         config.read("config.ini")
         config = config["deployment"]
 
-        models_cache_dir = config.get("models_cache_dir", "/home/worker/models")
+        models_cache_dir = config.get("models_cache_dir", "/home/worker/models/base")
         self.winer = WinerWorker(models_cache_dir)
 
     def process(self, input_path: str, task_options: dict, output_path: str) -> None:
-- 
GitLab