Commit d9582cc0 authored by Tomasz Walkowiak's avatar Tomasz Walkowiak

Initial commit

parents
FROM clarinpl/builder AS builder
FROM clarinpl/python:2.7
RUN apt-get update && apt-get install -y \
libxml++2.6-dev \
libloki-dev \
libboost-all-dev \
libicu-dev \
libffi-dev \
libssl-dev \
libxml2-utils
COPY --from=builder /install/corpus2 /
COPY --from=builder /install/wccl /
COPY --from=builder /install/corpus2mwe /
ENV PYTHONPATH=/usr/local/lib/python2.7/dist-packages:$PYTHONPATH
RUN ldconfig
COPY requirements.txt .
RUN pip install -r requirements.txt
RUN mkdir /home/worker
[service]
tool = mwe
root = /samba/requests/
rabbit_host = rabbit.clarin.ws
rabbit_user = clarin
rabbit_password = clarin123
[tool]
workers_number = 5
[logging]
port = 0
local_log_level = INFO
[logging_levels]
__main__ = INFO
wsd_worker = INFO
version: '3'
services:
mwe:
container_name: clarin_mwe
build: ./
working_dir: /home/worker
entrypoint:
- python2
- mwe_worker.py
volumes:
- /samba:/samba
- ./config.ini:/home/worker/config.ini
- ./mwe_worker.py:/home/worker/mwe_worker.py
restart: always
\ No newline at end of file
#!/usr/bin/python
# -*- coding: utf-8 -*-
import nlp_ws
from corpus_ccl import cclutils as ccl
import corpus2mwe as mwe
import subprocess
import logging
_log = logging.getLogger(__name__)
class MWEWorker(nlp_ws.NLPWorker):
@classmethod
def static_init(cls, config):
_log.info( "Static init" )
def init(self):
_log.info( "Worker started loading models %s","AS" )
self.tagset = ccl.get_tagset('nkjp')
self.reader = None
def process(self, inputFile, taskOptions, outputFile):
_validate_xml(inputFile)
if not self.reader:
self.reader = mwe.CclMWEReader(inputFile, self.tagset)
self.reader.use_annotations(False)
else:
self.reader.set_files(inputFile)
mwe_doc = self.reader.read()
ccl.write_ccl(mwe_doc, str(outputFile))
class _InvalidXMLInRequest(Exception):
pass
def _validate_xml(xmlfile):
"""
This will do nothing if XML is valid and raise exception if it's not.
"""
lint_call = subprocess.Popen(
('xmllint', '--nonet', '--noout', xmlfile),
stderr=subprocess.PIPE,
)
xml_err = lint_call.communicate()[1]
if lint_call.returncode != 0:
raise _InvalidXMLInRequest("Wrong XML in input data")
if __name__ == '__main__':
nlp_ws.NLPService.main(MWEWorker)
corpus-ccl
nlp-ws
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment