From 0daab175246a1b36cbde870e47429456d28f3f8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bart=C5=82omiej=20Bojanowski?=
 <bartlomiej.bojanowski@pwr.edu.pl>
Date: Wed, 30 Sep 2020 13:25:37 +0200
Subject: [PATCH] Add MultifiWorker

---
 .gitignore             |  4 ++++
 .gitlab-ci.yml         | 17 ++++++++++++++
 main.py                |  6 +++++
 src/Multifit_worker.py | 53 ++++++++++++++++++++++++++++++++++++++++++
 tox.ini                | 10 ++++++++
 5 files changed, 90 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .gitlab-ci.yml
 create mode 100644 main.py
 create mode 100644 src/Multifit_worker.py
 create mode 100644 tox.ini

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1cf6708
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+.idea
+/idea
+log.txt
+__pycache__
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000..796f62b
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,17 @@
+image: clarinpl/python:3.8
+
+cache:
+  paths:
+    - .tox
+
+stages:
+  - check_style
+  - build
+
+before_script:
+  - pip install tox==2.9.1
+
+pep8:
+  stage: check_style
+  script:
+    - tox -v -e pep8
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..2e3d817
--- /dev/null
+++ b/main.py
@@ -0,0 +1,6 @@
+import nlp_ws
+from src.Multifit_worker import MultifitWorker
+
+
+if __name__ == '__main__':
+    nlp_ws.NLPService.main(MultifitWorker)
diff --git a/src/Multifit_worker.py b/src/Multifit_worker.py
new file mode 100644
index 0000000..f486174
--- /dev/null
+++ b/src/Multifit_worker.py
@@ -0,0 +1,53 @@
+import json
+import logging
+
+import nlp_ws
+from fastai.text import load_learner
+from torch import to_np
+
+log = logging.getLogger(__name__)
+
+
+class MultifitWorker(nlp_ws.NLPWorker):
+    @classmethod
+    def static_init(self, config):
+        self.config = config
+        log.debug("static_init(%s)", config)
+
+    def init(self):
+        log.debug("init()")
+        self._classifier = MultifitClassifier()
+
+    def process(self, input_path, task_options, output_path):
+        task = task_options.get("type", None)
+        with open(input_path, "r") as f:
+            text = f.read()
+        lang = text.split('__label__')[1]
+        text = text.split('__label__')[0]
+        result = self._classifier.predict(text, lang=lang, task_options=task)
+        print(result)
+        with open(output_path, "w") as f:
+            json.dump(result, f, indent=4)
+
+
+class MultifitClassifier(object):
+
+    def __init__(self):
+        self.labels_text = ["__label__meta_amb", "__label__meta_minus_m",
+                            "__label__meta_plus_m", "___label__meta_zero"]
+        self.labels_sen = ["__label__z_amb", "__label__z_minus_m",
+                           "__label__z_plus_m", "___label__z_zero"]
+
+    def predict(self, ccl, lang=None, task_options=None):
+        path = ""
+        if task_options == "sentence":
+            path = lang + "-sent-sen.pkl"
+            labels = self.label_sen
+        else:
+            path = lang + "-sent.pkl"
+            labels = self.labels_text
+        learner = load_learner("models", path)
+        results = learner.predict("xxbos " + str(ccl))
+        probabilities = [str(x) for x in to_np(results[2])]
+        result = zip(probabilities, labels)
+        return result
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..7e68f0e
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,10 @@
+[tox]
+envlist = pep8
+skipsdist = True
+
+[testenv:pep8]
+deps =
+    flake8
+basepython = python3.8
+commands =
+    flake8 {posargs}
-- 
GitLab