diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4d608510e92abaf9cb192f7fc3d70ef0ee68b9e0..a858b46314dbf367530e11bd11ddd8a976734b24 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,7 +10,7 @@ stages: - build before_script: - - pip install tox==3.19.0 + - pip install tox==3.18.1 pep8: stage: check_style diff --git a/Dockerfile b/Dockerfile index 2b3b6a3852dc79edf55914ce01f983c6a2f9b98c..ce5211b591294192613e08e020dd93879257f810 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,6 @@ RUN pip3 install -r requirements.txt && rm requirements.txt COPY src ./src COPY config.ini . COPY worker.py . - -RUN pip3 freeze +COPY entrypoint.sh . -ENTRYPOINT [ "./worker.py" ] \ No newline at end of file +ENTRYPOINT [ "./entrypoint.sh" ] \ No newline at end of file diff --git a/config.ini b/config.ini index 9e5854734140c5933f8f9dcd97bb152cdcd9c5fd..93819e52ab6958cf2e7a936a1acfa92e097c09fb 100644 --- a/config.ini +++ b/config.ini @@ -14,8 +14,8 @@ port = 9981 local_log_level = INFO [deployment] -device = "cpu" +device = cpu chunk_size = 500 threshold = 0.9 -model = "deploy/model" -base_model = "dkleczek/bert-base-polish-cased-v1" \ No newline at end of file +model = deploy/model +base_model = dkleczek/bert-base-polish-cased-v1 \ No newline at end of file diff --git a/entrypoint.sh b/entrypoint.sh index e548dca6707b76c9ba68b7c4709dfd2151b5b34a..a6e06ed1888303023e935d1a1661dd86858ef546 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -1,8 +1,8 @@ #!/bin/bash -if test -f "./deploy/model"; then +if ! test -f "./deploy/model"; then mkdir -p ./deploy wget https://minio.clarin-pl.eu/public/models/punctuation/0-190000.model -O deploy/model fi -python3 worker.py \ No newline at end of file +python worker.py \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 17154b987755ad39634351de67de2f3166cbe8be..4e2d19ab860508eab855751c232b65756f57736d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -58,4 +58,5 @@ transformers==3.0.2 typing-extensions==3.7.4.2 unattended-upgrades==0.1 urllib3==1.25.10 -zict==2.0.0 \ No newline at end of file +zict==2.0.0 +git+https://gitlab.clarin-pl.eu/nlpworkers/nlp_ws.git@fa5f09a2f1447cac2c411c9d9e3d927ecd815ddc#egg=nlp_ws \ No newline at end of file diff --git a/src/pipelines/actions_based/utils.py b/src/pipelines/actions_based/utils.py index 4f62b611c4d1e41e53148cd8a77fa36e5a819add..a8728e111b3665092231211e3a0e1714af2f23ea 100644 --- a/src/pipelines/actions_based/utils.py +++ b/src/pipelines/actions_based/utils.py @@ -6,12 +6,12 @@ import torch.nn as nn from transformers import BertForTokenClassification, BertTokenizerFast, PretrainedConfig from src.pipelines.actions_based.processing import ( + ACTIONS_KEYS, action_vector, last_stop_label, recover_text, token_labels_to_word_labels, ) -from src.processing import ACTIONS_KEYS def load_model( diff --git a/worker.py b/worker.py index 5bf6e0c88a423ec27cb04f0728b5eeeac71406e6..c7132f543ca808d54075a0193837ae70a458b54a 100755 --- a/worker.py +++ b/worker.py @@ -4,8 +4,7 @@ import configparser import nlp_ws -from src.pipelines.actions_based.processing import apply_actions_punctuation -from src.pipelines.actions_based.utils import load_model +from src.pipelines.actions_based.utils import apply_actions_punctuation, load_model from src.utils import preprocess @@ -16,8 +15,8 @@ class Worker(nlp_ws.NLPWorker): self.config = configparser.ConfigParser() self.config.read("config.ini") - self.threshold = self.config["deployment"]["threshold"] - self.chunk_size = self.config["deployment"]["chunk_size"] + self.threshold = float(self.config["deployment"]["threshold"]) + self.chunk_size = int(self.config["deployment"]["chunk_size"]) self.tokenizer, self.model = load_model( self.config["deployment"]["model"], self.config["deployment"]["base_model"],