Skip to content
Snippets Groups Projects
Commit 82f23dd1 authored by Bartłomiej Bojanowski's avatar Bartłomiej Bojanowski
Browse files

Add XlmRoberta worker

parent af3fc319
No related branches found
No related tags found
2 merge requests!2Developer,!1Add XlmRoberta worker
Pipeline #1917 passed
.idea
/idea
log.txt
__pycache__
image: clarinpl/python:3.8
cache:
paths:
- .tox
stages:
- check_style
- build
before_script:
- pip install tox==2.9.1
pep8:
stage: check_style
script:
- tox -v -e pep8
\ No newline at end of file
FROM clarinpl/python:3.8
WORKDIR /home/worker
COPY ./main.py .
COPY ./requirements.txt .
COPY ./config.ini .
COPY ./src ./src
RUN apt-get install -y build-essential libffi-dev
RUN pip install --index-url https://pypi.clarin-pl.eu/simple/ -r requirements.txt
RUN pip install torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
CMD ["python3.8", "main.py"]
[service]
tool = xlmroberta
root = /samba/requests/
rabbit_host = 10.17.0.85
rabbit_user = clarin
rabbit_password = clarin123
[tool]
workers_number = 1
[logging]
port = 9981
local_log_level = INFO
[model]
de_sent = {"file": "src/models/de-sent"}
en_sent = {"file": "src/models/de-sent"}
it_sent = {"file": "src/models/de-sent"}
ru_sent = {"file": "src/models/de-sent"}
zh_sent = {"file": "src/models/de-sent"}
ja_sent = {"file": "src/models/de-sent"}
fr_sent = {"file": "src/models/de-sent"}
import nlp_ws
from src.XlmRoberta_Worker import XlmRobertaWorker
if __name__ == '__main__':
nlp_ws.NLPService.main(XlmRobertaWorker)
import json
import logging
import nlp_ws
from simpletransformers.classification import ClassificationModel
log = logging.getLogger(__name__)
class XlmRobertaWorker(nlp_ws.NLPWorker):
@classmethod
def static_init(self, config):
self.config = config
log.debug("static_init(%s)", config)
def init(self):
log.debug("init()")
models = dict()
list_models = dict()
for key in self.config["model"]:
models[key] = json.loads(self.config["model"][key])
for key, value in models.items():
list_models[key] = ClassificationModel("xlmroberta",
value["file"],
num_labels=4,
use_cuda=False)
print(value["file"])
self._classifier = XlmRobertaClassifier(list_models)
def process(self, input_path, task_options, output_path):
task = task_options.get("type", None)
with open(input_path, "r") as f:
text = f.read()
lang = text.split('__label__')[1]
text = text.split('__label__')[0]
result = self._classifier.predict(text, lang, task)
with open(output_path, "w") as f:
json.dump(result, f, indent=4)
class XlmRobertaClassifier(object):
def __init__(self, models):
self.models = models
self.labels_text = ["__label__meta_amb", "__label__meta_minus_m",
"__label__meta_plus_m", "___label__meta_zero"]
self.labels_sen = ["__label__z_amb", "__label__z_minus_m",
"__label__z_plus_m", "___label__z_zero"]
def predict(self, ccl, lang, task_options):
if task_options == "sentence":
task = "_sent_sen"
labels = self.labels_sen
else:
task = "_sent"
labels = self.labels_text
model = self.models[lang + task]
decision, raw = model.predict([ccl])
print(raw)
print(labels)
result = dict(zip(labels, raw[0]))
print(result)
result['decision'] = labels[decision[0]]
result['lang'] = lang
return result
tox.ini 0 → 100644
[tox]
envlist = pep8
skipsdist = True
[testenv:pep8]
deps =
flake8
basepython = python3.8
commands =
flake8 {posargs}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment