From 8993b9cd8641b15471bd77ece8a693733cc6611a Mon Sep 17 00:00:00 2001
From: Michal Pogoda <michalpogoda@hotmail.com>
Date: Thu, 3 Sep 2020 13:26:04 +0200
Subject: [PATCH 1/4] Better support for cuda debvices

---
 src/models/actions_model_base.py       | 18 ++++++++++++------
 src/models/actions_model_mixed.py      | 18 ++++++++++--------
 src/models/actions_model_restricted.py | 19 +++++++++++++------
 src/models/model_factory.py            |  3 +--
 src/utils.py                           | 15 +++++++++++++++
 worker.py                              |  9 ++++++++-
 6 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/src/models/actions_model_base.py b/src/models/actions_model_base.py
index d503f08..339b981 100644
--- a/src/models/actions_model_base.py
+++ b/src/models/actions_model_base.py
@@ -20,7 +20,13 @@ from src.pipelines.actions_based.processing import (
     token_labels_to_word_labels,
 )
 from src.pipelines.actions_based.utils import max_suppression
-from src.utils import pickle_read, pickle_save, prepare_folder, yaml_serializable
+from src.utils import (
+    get_device,
+    pickle_read,
+    pickle_save,
+    prepare_folder,
+    yaml_serializable,
+)
 
 
 @dataclass
@@ -111,8 +117,10 @@ class ActionsModelBase(ActionsModel):
     def predict(self, text: str) -> str:
         text = text.strip()
 
+        device = get_device(self)
+
         tokenizer = self.tokenizer()
-        tokens = tokenizer(text, return_tensors="pt")["input_ids"]
+        tokens = tokenizer(text, return_tensors="pt")["input_ids"].to(device)
         output = None
 
         index_start = 0
@@ -120,12 +128,10 @@ class ActionsModelBase(ActionsModel):
             index_end = min(index_start + self.runtime.chunksize, len(tokens[0]))
 
             tokens_chunk = tokens[:, index_start:index_end]
+            attention_mask = torch.ones_like(tokens_chunk).to(device)
 
             actions = (
-                self.predict_raw(tokens_chunk, torch.ones_like(tokens_chunk))
-                .detach()
-                .cpu()
-                .numpy()
+                self.predict_raw(tokens_chunk, attention_mask).detach().cpu().numpy()
             )
             actions_suppresed = max_suppression(actions, self.runtime.threshold)[0]
 
diff --git a/src/models/actions_model_mixed.py b/src/models/actions_model_mixed.py
index e8f9a50..e09c0fa 100644
--- a/src/models/actions_model_mixed.py
+++ b/src/models/actions_model_mixed.py
@@ -16,7 +16,13 @@ from src.pipelines.actions_based.processing import (
     recover_text,
     token_labels_to_word_labels,
 )
-from src.utils import pickle_read, pickle_save, prepare_folder, yaml_serializable
+from src.utils import (
+    get_device,
+    pickle_read,
+    pickle_save,
+    prepare_folder,
+    yaml_serializable,
+)
 
 
 @dataclass
@@ -83,7 +89,6 @@ class ActionsModelMixed(PunctuationModel):
         self._tokenizer = None
 
         self.num_labels = params.num_labels
-        self.device = "cpu"
 
         # Word embedder
         self.word_embedding = nn.Embedding(params.vocab_size, params.embedding_size)
@@ -160,11 +165,6 @@ class ActionsModelMixed(PunctuationModel):
 
         return self.to_labels(z)
 
-    def to(self, device):
-        self.device = device
-
-        super(ActionsModelMixed, self).to(device)
-
     def tokenizer(self) -> BertTokenizerFast:
         if self._tokenizer is None:
             self._tokenizer = BertTokenizerFast.from_pretrained(
@@ -173,12 +173,14 @@ class ActionsModelMixed(PunctuationModel):
         return self._tokenizer
 
     def predict(self, text: str) -> str:
+        # TODO: Optimize for speed
+
         inputs = [action_vector(["upper_case"])]
 
         tokenizer = self.tokenizer()
         text_tokenized = tokenizer(text, return_tensors="pt")
 
-        target_device = self.device
+        target_device = get_device(self)
 
         max_cond_len = self.runtime.max_cond_len
         if max_cond_len is None:
diff --git a/src/models/actions_model_restricted.py b/src/models/actions_model_restricted.py
index 9239e66..eb7f859 100644
--- a/src/models/actions_model_restricted.py
+++ b/src/models/actions_model_restricted.py
@@ -19,7 +19,13 @@ from src.pipelines.actions_based.processing import (
     token_labels_to_word_labels,
 )
 from src.pipelines.actions_based.utils import max_suppression
-from src.utils import pickle_read, pickle_save, prepare_folder, yaml_serializable
+from src.utils import (
+    get_device,
+    pickle_read,
+    pickle_save,
+    prepare_folder,
+    yaml_serializable,
+)
 
 
 @dataclass
@@ -131,10 +137,12 @@ class ActionsModelRestricted(ActionsModel):
         chunk_size = self.runtime.chunksize
         threshold = self.runtime.threshold
 
+        device = get_device(self)
+
         text = text.strip()
 
         tokenizer = self.tokenizer()
-        tokens = tokenizer(text, return_tensors="pt")["input_ids"]
+        tokens = tokenizer(text, return_tensors="pt")["input_ids"].to(device)
         output = None
 
         index_start = 0
@@ -143,11 +151,10 @@ class ActionsModelRestricted(ActionsModel):
 
             tokens_chunk = tokens[:, index_start:index_end]
 
+            attention_mask = torch.ones_like(tokens_chunk).to(device)
+
             actions = (
-                self.predict_raw(tokens_chunk, torch.ones_like(tokens_chunk))
-                .detach()
-                .cpu()
-                .numpy()
+                self.predict_raw(tokens_chunk, attention_mask).detach().cpu().numpy()
             )
             actions_suppresed = max_suppression(actions, threshold)[0]
 
diff --git a/src/models/model_factory.py b/src/models/model_factory.py
index 3d4abcc..5e4a9fc 100644
--- a/src/models/model_factory.py
+++ b/src/models/model_factory.py
@@ -1,7 +1,6 @@
+from src.models.actions_model_base import ActionsModelBase
 from src.models.actions_model_mixed import ActionsModelMixed
 from src.models.actions_model_restricted import ActionsModelRestricted
-from src.models.actions_model_base import ActionsModelBase
-
 
 MODELS_MAP = {
     "actions_base": ActionsModelBase,
diff --git a/src/utils.py b/src/utils.py
index 90a69f5..41c0be0 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -14,6 +14,7 @@ import pandas as pd
 import torch
 import torch.nn as nn
 import yaml
+from torch import mode
 from torch.optim import Optimizer
 
 from src.batch_loading import get_batches, get_ordered_dataframe_len
@@ -604,3 +605,17 @@ def yaml_serializable(cls):
     setattr(cls, "load_yaml", load_yaml)
 
     return cls
+
+
+def get_device(model: nn.Module) -> torch.device:
+    """Get device on which the module resides. Works only if all
+    parameters reside on single device.
+
+    Args:
+        model (nn.Module): Module to check
+
+    Returns:
+        torch.device: Device on which module's paraters exists
+    """
+
+    return next(model.parameters()).device
diff --git a/worker.py b/worker.py
index 92d4fe0..98e5a75 100755
--- a/worker.py
+++ b/worker.py
@@ -1,10 +1,12 @@
 #!/usr/bin/python
 
 import configparser
+import logging
 from src.models.model_factory import MODELS_MAP
 from typing import List
 
 import nlp_ws
+import torch
 
 from src.utils import input_preprocess, output_preprocess
 
@@ -16,10 +18,12 @@ class Worker(nlp_ws.NLPWorker):
         self.config = configparser.ConfigParser()
         self.config.read("config.ini")
 
-        self.device = self.config["deployment"]["device"]
+        self.device = torch.device(self.config["deployment"]["device"])
         self.models_dir = self.config["deployment"]["models_dir"]
         self.models = {}
 
+        self._log = logging.getLogger(__name__)
+
         models_enabled = self.config["deployment"]["models_enabled"]
         models_enabled = models_enabled.split(",")
 
@@ -51,6 +55,9 @@ class Worker(nlp_ws.NLPWorker):
         with open(output_file, "w") as f:
             f.write(result)
 
+        if self.device.type != "cpu":
+            torch.cuda.empty_cache()
+
 
 if __name__ == "__main__":
     nlp_ws.NLPService.main(Worker)
-- 
GitLab


From a417070628d36a017130edb5c2c4383378c2aa8f Mon Sep 17 00:00:00 2001
From: Michal Pogoda <michalpogoda@hotmail.com>
Date: Thu, 3 Sep 2020 14:02:08 +0200
Subject: [PATCH 2/4] Use torch 1.4.0 with cuda 10.0 support

---
 requirements.txt | 3 ++-
 src/utils.py     | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 4a63c40..068c99e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 --index-url https://pypi.clarin-pl.eu/simple/
+--find-links https://download.pytorch.org/whl/torch_stable.html
 attrs==19.3.0
 bokeh==2.1.1
 certifi==2020.6.20
@@ -50,7 +51,7 @@ tblib==1.7.0
 tokenizers==0.8.1rc1
 toml==0.10.1
 toolz==0.10.0
-torch==1.5.1
+torch==1.4.0+cu100
 tornado==6.0.4
 tqdm==4.48.2
 transformers==3.0.2
diff --git a/src/utils.py b/src/utils.py
index 41c0be0..0bb292f 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -14,7 +14,6 @@ import pandas as pd
 import torch
 import torch.nn as nn
 import yaml
-from torch import mode
 from torch.optim import Optimizer
 
 from src.batch_loading import get_batches, get_ordered_dataframe_len
-- 
GitLab


From 7d71f4283a60997c7324d31e89b3a482c81b1200 Mon Sep 17 00:00:00 2001
From: Michal Pogoda <michalpogoda@hotmail.com>
Date: Thu, 3 Sep 2020 14:06:59 +0200
Subject: [PATCH 3/4] Update readme

---
 README.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/README.md b/README.md
index e8f9b06..3eb10aa 100644
--- a/README.md
+++ b/README.md
@@ -46,5 +46,19 @@ Eg. if you place your model named "production" at `punctuator/checkpoints/action
 python3 punctuate.py -a mixed -d /deploy/actions_mixed -i test_data/text.txt -m production -dv cuda:0
 ```
 
+## Config
+```ini
+[deployment]
+device = cpu ; Device on which inference will be made (eg. cpu, cuda:0 etc)
+models_dir = deploy ; Relative path to directory, where models will be placed
+models_enabled = actions_base,actions_mixed,actions_restricted ; which models are available. 
+```
+
+## LPMN
+```
+filedir(/users/michal.pogoda)|any2txt|punctuator_test({"model":"model_name"})
+```
+where model_name is one of models specified in models_enabled. If no model is provided or requested model is unavailable, actions_base will be used.
+
 ## Mountpoints
 Directory where the model will be downloaded (~500Mb) needs to be mounted at /punctuator/deploy
-- 
GitLab


From 51c49ff5f0bb7ad35dca391ee7bc56b7c5a98289 Mon Sep 17 00:00:00 2001
From: Michal Pogoda <michalpogoda@hotmail.com>
Date: Thu, 3 Sep 2020 14:08:19 +0200
Subject: [PATCH 4/4] Added simple LPMN example

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 3eb10aa..ffead89 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,10 @@ models_enabled = actions_base,actions_mixed,actions_restricted ; which models ar
 
 ## LPMN
 ```
+filedir(/users/michal.pogoda)|any2txt|punctuator_test
+```
+or
+```
 filedir(/users/michal.pogoda)|any2txt|punctuator_test({"model":"model_name"})
 ```
 where model_name is one of models specified in models_enabled. If no model is provided or requested model is unavailable, actions_base will be used.
-- 
GitLab