Commit cb57fc45 authored by Tomasz Walkowiak's avatar Tomasz Walkowiak

Merge branch 'mewex_to_rancher' into 'master'

Mewex to rancher

See merge request !1
parents eabb2901 7b6ee1b6
Pipeline #1767 passed with stages
in 16 minutes and 15 seconds
image: clarinpl/python:3.6
cache:
paths:
- .tox
stages:
- check_style
- build
before_script:
- pip install tox==2.9.1
pep8:
stage: check_style
script:
- tox -v -e pep8
docstyle:
stage: check_style
script:
- tox -v -e docstyle
build_image:
stage: build
image: 'docker:18.09.7'
only:
- master
services:
- 'docker:18.09.7-dind'
before_script:
- ''
script:
- docker build -t clarinpl/mewex .
- echo $DOCKER_PASSWORD > pass.txt
- cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin
- rm pass.txt
- docker push clarinpl/mewex
FROM ubuntu:xenial
# Instal prequesites
RUN apt-get update && apt-get -y upgrade
RUN apt-get -y install unzip software-properties-common && \
add-apt-repository ppa:jonathonf/python-3.6 && \
apt-get update && \
apt-get -y install libxml2-dev libxslt-dev build-essential python3.6 python3.6-dev python3-pip python3.6-venv python3-setuptools && \
apt-get -y install libboost-all-dev libicu-dev git-core wget cmake libantlr-dev libloki-dev python-dev swig libsigc++-2.0-dev libglibmm-2.4-dev libxml++2.6-dev && \
python3.6 -m pip install pip --upgrade && \
python3.6 -m pip install --no-cache-dir Cython
#newest cmake
RUN wget https://cmake.org/files/v3.12/cmake-3.12.0-Linux-x86_64.sh
RUN sh cmake-3.12.0-Linux-x86_64.sh --prefix=/usr/local --exclude-subdir
#download tools
RUN mkdir /build
WORKDIR /build
RUN git clone http://nlp.pwr.wroc.pl/corpus2.git
RUN git clone http://nlp.pwr.edu.pl/wccl.git
RUN wget -O morfeusz2-2.0.0-Linux-amd64.deb https://nextcloud.clarin-pl.eu/index.php/s/VVIvx4w20azcWbp/download
RUN dpkg -i morfeusz2-2.0.0-Linux-amd64.deb
# corpus2
RUN cd corpus2
RUN mkdir bin
WORKDIR /build/corpus2/bin
RUN cmake ..
RUN make -j
RUN make -j
RUN make install
RUN ldconfig
# wccl
RUN mkdir /build/wccl/bin
WORKDIR /build/wccl/bin
RUN cmake ..
RUN make -j
RUN make -j
RUN make install
RUN ldconfig
FROM clarinpl/python:3.6
RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common \
build-essential \
pwrutils \
corpus2-python3.6 \
wccl-python3.6 \
python3.6 \
python3.6-dev \
default-jdk \
libicu-dev \
libboost1.58-all-dev
RUN wget -O morf 'https://nextcloud.clarin-pl.eu/index.php/s/VVIvx4w20azcWbp/download' && \
dpkg -i ./morf
# Install Java
RUN apt-get update && \
......@@ -48,59 +22,39 @@ RUN apt-get update && \
apt-get -y install openjdk-8-jre && \
apt-get -y install locales locales-all
# Set locales
RUN sed -i 's/# pl_PL.UTF-8 UTF-8/pl_PL.UTF-8 UTF-8/' /etc/locale.gen
RUN locale-gen pl_PL.UTF-8
ENV LANG pl_PL.UTF-8
ENV LANGUAGE pl_PL
ENV LC_ALL pl_PL.UTF-8
RUN dpkg-reconfigure --frontend noninteractive locales
# Set environment
ENV JAVA_HOME /opt/jdk
ENV PATH ${PATH}:${JAVA_HOME}/bin
WORKDIR /home/worker
COPY ./requirements.txt .
RUN python3.6 -m pip install -r requirements.txt
# installing polem
RUN cd .
RUN rm /usr/bin/python3
RUN ln -s /usr/bin/python3.6 /usr/bin/python3
RUN alias python3='/usr/bin/python3.6'
RUN apt-get -y install default-jdk
WORKDIR /build/
RUN git clone https://github.com/gkubon/Polem
RUN mkdir -p /build/Polem/build
COPY . /build/Polem
RUN rm /build/Polem/build -rf
WORKDIR /build/Polem/build
RUN cmake ..
RUN make -j
RUN make install
RUN ldconfig
RUN mkdir /data
RUN mv /build/Polem/corpus /data
WORKDIR /app
#installing nlp_ws
RUN pip3 install --extra-index-url https://pypi.clarin-pl.eu/ nlp_ws
# Download lxml
RUN python3.6 -m pip install lxml
RUN python3.6 -m pip install configparser
RUN pip3 install ujson
RUN git clone https://github.com/gkubon/Polem &&\
mkdir Polem/build &&\
cd Polem/build &&\
cmake .. &&\
make &&\
make install
#install mewex
WORKDIR /build
RUN git clone https://github.com/MGniew/MeWeX.git
RUN cd MeWeX/mwextractor/mwextractor && \
WORKDIR /build/
RUN git clone https://gitlab.clarin-pl.eu/team-semantics/mewex && \
cd mewex/mwextractor/mwextractor && \
mkdir build && \
cd build && \
cmake .. && \
make install && \
ldconfig
RUN cd MeWeX/mewexlib/ && \
RUN cd mewex && \
mkdir mewexlib/mewexlib/data/relations/ontology &&\
mv mewexlib/mewexlib/data/relations/*.ccl mewexlib/mewexlib/data/relations/ontology
RUN cd mewex/mewexlib/ && \
python3.6 setup.py install
WORKDIR /home/worker
COPY ./src ./src
COPY ./main.py .
CMD ["python3.6", "main.py", "service"]
......@@ -2,9 +2,10 @@
tool = mewex1
root = /samba/requests/
rabbit_host = 10.17.0.85
rabbit_user = clarin
rabbit_password = clarin123
rabbit_host = rabbitmq
rabbit_user = test
rabbit_password = test
queue_prefix =nlp_
[tool]
workers_number = 4
......
version: '3'
services:
mewex:
container_name: clarin_mewex
build: ./
working_dir: /home/worker
entrypoint:
- python3.6
- main.py
- service
volumes:
- './samba:/samba'
- './config.ini:/home/worker/config.ini'
- './src:/home/worker/src'
- './main.py:/home/worker/main.py'
restart: always
\ No newline at end of file
"""Implementation of tfidf worker."""
import nlp_ws
import argparse
from src.mewex import MewexWorker
def get_args():
"""Gets command line arguments."""
parser = argparse.ArgumentParser(description="mewex")
subparsers = parser.add_subparsers(dest="mode")
subparsers.required = True
subparsers.add_parser(
"service",
help="Run as a service")
return parser.parse_args()
def main():
"""Runs the program."""
args = get_args()
generators = {
"service": lambda: nlp_ws.NLPService.main(MewexWorker),
}
gen_fn = generators.get(args.mode, lambda: None)
gen_fn()
if __name__ == "__main__":
main()
#!/usr/bin/python3
"""Implementation of MeWex Worker."""
# !/usr/bin/python3
import os
import re
import io
......@@ -6,14 +7,18 @@ import io
import mewexlib as mwl
import WrapLem
from nlp_ws import NLPWorker, NLPService
from nlp_ws import NLPWorker
class MewexWorker(NLPWorker):
"""Implements mewex worker."""
def init(self):
"""Initialize worker."""
self._lemmatizer = WrapLem.CascadeLemmatizer.assembleLemmatizer()
def process(self, input_path, task_options, output_path):
"""Running nlp process."""
args = _parse_mewex_options(task_options.get('mewex_options') or {})
args['input_files'] = (
[os.path.join(input_path, f) for f in os.listdir(input_path)]
......@@ -22,13 +27,16 @@ class MewexWorker(NLPWorker):
)
if not os.path.exists(output_path):
os.makedirs(output_path)
args['output_file'] = output_path+"/mewex.csv"
args['output_file'] = output_path + "/mewex.csv"
mwl.call_mewex(**args)
self.lemmatize(output_path+"/mewex.csv",output_path+"/mewexlemmatized.csv")
self.cut_lines(output_path+"/mewexlemmatized.csv",output_path+"/mewexshort.csv",1000)
self.lemmatize(output_path + "/mewex.csv",
output_path + "/mewexlemmatized.csv")
self.cut_lines(output_path + "/mewexlemmatized.csv",
output_path + "/mewexshort.csv", 1000)
def cut_lines(self,inf,outf,lines):
def cut_lines(self, inf, outf, lines):
"""."""
f = open(inf, "r")
copy = open(outf, "w")
n = 0
......@@ -41,10 +49,13 @@ class MewexWorker(NLPWorker):
copy.close()
def lemmatize(self, inf, outf):
"""."""
input_file = io.open(inf, "r", encoding="utf-8")
output_file = open(outf, "w")
next(input_file); next(input_file) # First two rows are header rows, so just skip them
output_file.write("Rank\tQuantity\tRealtion\tBase form\tLemmatized form\tAll forms\n")
next(input_file)
next(input_file) # First two rows are header rows, so just skip them
output_file.write(
"Rank\tQuantity\tRealtion\tBase form\tLemmatized form\tAll forms\n")
orthreg = re.compile(r'[0-9]+:([^(]+)\(([^)]+)\).*')
basereg = re.compile(r'[^:]+:([^ ]+)')
for line in input_file:
......@@ -55,7 +66,7 @@ class MewexWorker(NLPWorker):
orth = orthtuple[0][0].strip()
tag = orthtuple[0][1]
result = self._lemmatizer.lemmatizeS(orth, base, tag, False)
splited.insert(4,result)
splited.insert(4, result)
output_file.write('\t'.join(splited) + '\n')
input_file.close()
output_file.close()
......@@ -80,7 +91,3 @@ def _parse_mewex_options(opts):
args[key] = val
return args
if __name__ == '__main__':
NLPService.main(MewexWorker, pause_at_exit=True)
[tox]
envlist = pep8,docstyle
skipsdist = True
[testenv:pep8]
deps =
flake8
basepython = python3.6
commands =
flake8 {posargs}
[testenv:docstyle]
deps =
pydocstyle
basepython = python3.6
commands =
pydocstyle --verbose {posargs}
[flake8]
# W504 skipped because it is overeager and unnecessary
ignore = W504
show-source = True
exclude = .git,.venv,.tox,dist,doc,*egg,build,venv
import-order-style = pep8
max-line-length = 80
[pydocstyle]
# D104 Missing docstring in public package
# D203 1 blank line required before class docstring
# D213 Multi-line docstring summary should start at the second line
# D214 Section is over-indented
# D215 Section underline is over-indented
# D401 First line should be in imperative mood; try rephrasing
# D405 Section name should be properly capitalized
# D406 Section name should end with a newline
# D407 Missing dashed underline after section
# D408 Section underline should be in the line following the section’s name
# D409 Section underline should match the length of its name
# D410 Missing blank line after section
# D411 Missing blank line before section
ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
match-dir = ^(?!\.tox|venv).*
match = ^(?!setup).*\.py
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment