Skip to content
Snippets Groups Projects
Commit 7b6ee1b6 authored by Leszek Szymczak's avatar Leszek Szymczak Committed by Tomasz Walkowiak
Browse files

Mewex to rancher

parent eabb2901
No related branches found
No related tags found
No related merge requests found
image: clarinpl/python:3.6
cache:
paths:
- .tox
stages:
- check_style
- build
before_script:
- pip install tox==2.9.1
pep8:
stage: check_style
script:
- tox -v -e pep8
docstyle:
stage: check_style
script:
- tox -v -e docstyle
build_image:
stage: build
image: 'docker:18.09.7'
only:
- master
services:
- 'docker:18.09.7-dind'
before_script:
- ''
script:
- docker build -t clarinpl/mewex .
- echo $DOCKER_PASSWORD > pass.txt
- cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin
- rm pass.txt
- docker push clarinpl/mewex
FROM ubuntu:xenial
# Instal prequesites
RUN apt-get update && apt-get -y upgrade
RUN apt-get -y install unzip software-properties-common && \
add-apt-repository ppa:jonathonf/python-3.6 && \
apt-get update && \
apt-get -y install libxml2-dev libxslt-dev build-essential python3.6 python3.6-dev python3-pip python3.6-venv python3-setuptools && \
apt-get -y install libboost-all-dev libicu-dev git-core wget cmake libantlr-dev libloki-dev python-dev swig libsigc++-2.0-dev libglibmm-2.4-dev libxml++2.6-dev && \
python3.6 -m pip install pip --upgrade && \
python3.6 -m pip install --no-cache-dir Cython
#newest cmake
RUN wget https://cmake.org/files/v3.12/cmake-3.12.0-Linux-x86_64.sh
RUN sh cmake-3.12.0-Linux-x86_64.sh --prefix=/usr/local --exclude-subdir
#download tools
RUN mkdir /build
WORKDIR /build
RUN git clone http://nlp.pwr.wroc.pl/corpus2.git
RUN git clone http://nlp.pwr.edu.pl/wccl.git
RUN wget -O morfeusz2-2.0.0-Linux-amd64.deb https://nextcloud.clarin-pl.eu/index.php/s/VVIvx4w20azcWbp/download
RUN dpkg -i morfeusz2-2.0.0-Linux-amd64.deb
# corpus2
RUN cd corpus2
RUN mkdir bin
WORKDIR /build/corpus2/bin
RUN cmake ..
RUN make -j
RUN make -j
RUN make install
RUN ldconfig
# wccl
RUN mkdir /build/wccl/bin
WORKDIR /build/wccl/bin
RUN cmake ..
RUN make -j
RUN make -j
RUN make install
RUN ldconfig
FROM clarinpl/python:3.6
RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common \
build-essential \
pwrutils \
corpus2-python3.6 \
wccl-python3.6 \
python3.6 \
python3.6-dev \
default-jdk \
libicu-dev \
libboost1.58-all-dev
RUN wget -O morf 'https://nextcloud.clarin-pl.eu/index.php/s/VVIvx4w20azcWbp/download' && \
dpkg -i ./morf
# Install Java
RUN apt-get update && \
......@@ -48,59 +22,39 @@ RUN apt-get update && \
apt-get -y install openjdk-8-jre && \
apt-get -y install locales locales-all
# Set locales
RUN sed -i 's/# pl_PL.UTF-8 UTF-8/pl_PL.UTF-8 UTF-8/' /etc/locale.gen
RUN locale-gen pl_PL.UTF-8
ENV LANG pl_PL.UTF-8
ENV LANGUAGE pl_PL
ENV LC_ALL pl_PL.UTF-8
RUN dpkg-reconfigure --frontend noninteractive locales
# Set environment
ENV JAVA_HOME /opt/jdk
ENV PATH ${PATH}:${JAVA_HOME}/bin
WORKDIR /home/worker
COPY ./requirements.txt .
RUN python3.6 -m pip install -r requirements.txt
# installing polem
RUN cd .
RUN rm /usr/bin/python3
RUN ln -s /usr/bin/python3.6 /usr/bin/python3
RUN alias python3='/usr/bin/python3.6'
RUN apt-get -y install default-jdk
WORKDIR /build/
RUN git clone https://github.com/gkubon/Polem
RUN mkdir -p /build/Polem/build
COPY . /build/Polem
RUN rm /build/Polem/build -rf
WORKDIR /build/Polem/build
RUN cmake ..
RUN make -j
RUN make install
RUN ldconfig
RUN mkdir /data
RUN mv /build/Polem/corpus /data
WORKDIR /app
#installing nlp_ws
RUN pip3 install --extra-index-url https://pypi.clarin-pl.eu/ nlp_ws
# Download lxml
RUN python3.6 -m pip install lxml
RUN python3.6 -m pip install configparser
RUN pip3 install ujson
RUN git clone https://github.com/gkubon/Polem &&\
mkdir Polem/build &&\
cd Polem/build &&\
cmake .. &&\
make &&\
make install
#install mewex
WORKDIR /build
RUN git clone https://github.com/MGniew/MeWeX.git
RUN cd MeWeX/mwextractor/mwextractor && \
WORKDIR /build/
RUN git clone https://gitlab.clarin-pl.eu/team-semantics/mewex && \
cd mewex/mwextractor/mwextractor && \
mkdir build && \
cd build && \
cmake .. && \
make install && \
ldconfig
RUN cd MeWeX/mewexlib/ && \
RUN cd mewex && \
mkdir mewexlib/mewexlib/data/relations/ontology &&\
mv mewexlib/mewexlib/data/relations/*.ccl mewexlib/mewexlib/data/relations/ontology
RUN cd mewex/mewexlib/ && \
python3.6 setup.py install
WORKDIR /home/worker
COPY ./src ./src
COPY ./main.py .
CMD ["python3.6", "main.py", "service"]
......@@ -2,9 +2,10 @@
tool = mewex1
root = /samba/requests/
rabbit_host = 10.17.0.85
rabbit_user = clarin
rabbit_password = clarin123
rabbit_host = rabbitmq
rabbit_user = test
rabbit_password = test
queue_prefix =nlp_
[tool]
workers_number = 4
......
version: '3'
services:
mewex:
container_name: clarin_mewex
build: ./
working_dir: /home/worker
entrypoint:
- python3.6
- main.py
- service
volumes:
- './samba:/samba'
- './config.ini:/home/worker/config.ini'
- './src:/home/worker/src'
- './main.py:/home/worker/main.py'
restart: always
\ No newline at end of file
main.py 0 → 100644
"""Implementation of tfidf worker."""
import nlp_ws
import argparse
from src.mewex import MewexWorker
def get_args():
"""Gets command line arguments."""
parser = argparse.ArgumentParser(description="mewex")
subparsers = parser.add_subparsers(dest="mode")
subparsers.required = True
subparsers.add_parser(
"service",
help="Run as a service")
return parser.parse_args()
def main():
"""Runs the program."""
args = get_args()
generators = {
"service": lambda: nlp_ws.NLPService.main(MewexWorker),
}
gen_fn = generators.get(args.mode, lambda: None)
gen_fn()
if __name__ == "__main__":
main()
cmake
Cython
nlp_ws
lxml
ujson
configparser
\ No newline at end of file
File added
"""Implementation of MeWex Worker."""
# !/usr/bin/python3
import os
import re
......@@ -6,14 +7,18 @@ import io
import mewexlib as mwl
import WrapLem
from nlp_ws import NLPWorker, NLPService
from nlp_ws import NLPWorker
class MewexWorker(NLPWorker):
"""Implements mewex worker."""
def init(self):
"""Initialize worker."""
self._lemmatizer = WrapLem.CascadeLemmatizer.assembleLemmatizer()
def process(self, input_path, task_options, output_path):
"""Running nlp process."""
args = _parse_mewex_options(task_options.get('mewex_options') or {})
args['input_files'] = (
[os.path.join(input_path, f) for f in os.listdir(input_path)]
......@@ -25,10 +30,13 @@ class MewexWorker(NLPWorker):
args['output_file'] = output_path + "/mewex.csv"
mwl.call_mewex(**args)
self.lemmatize(output_path+"/mewex.csv",output_path+"/mewexlemmatized.csv")
self.cut_lines(output_path+"/mewexlemmatized.csv",output_path+"/mewexshort.csv",1000)
self.lemmatize(output_path + "/mewex.csv",
output_path + "/mewexlemmatized.csv")
self.cut_lines(output_path + "/mewexlemmatized.csv",
output_path + "/mewexshort.csv", 1000)
def cut_lines(self, inf, outf, lines):
"""."""
f = open(inf, "r")
copy = open(outf, "w")
n = 0
......@@ -41,10 +49,13 @@ class MewexWorker(NLPWorker):
copy.close()
def lemmatize(self, inf, outf):
"""."""
input_file = io.open(inf, "r", encoding="utf-8")
output_file = open(outf, "w")
next(input_file); next(input_file) # First two rows are header rows, so just skip them
output_file.write("Rank\tQuantity\tRealtion\tBase form\tLemmatized form\tAll forms\n")
next(input_file)
next(input_file) # First two rows are header rows, so just skip them
output_file.write(
"Rank\tQuantity\tRealtion\tBase form\tLemmatized form\tAll forms\n")
orthreg = re.compile(r'[0-9]+:([^(]+)\(([^)]+)\).*')
basereg = re.compile(r'[^:]+:([^ ]+)')
for line in input_file:
......@@ -80,7 +91,3 @@ def _parse_mewex_options(opts):
args[key] = val
return args
if __name__ == '__main__':
NLPService.main(MewexWorker, pause_at_exit=True)
tox.ini 0 → 100644
[tox]
envlist = pep8,docstyle
skipsdist = True
[testenv:pep8]
deps =
flake8
basepython = python3.6
commands =
flake8 {posargs}
[testenv:docstyle]
deps =
pydocstyle
basepython = python3.6
commands =
pydocstyle --verbose {posargs}
[flake8]
# W504 skipped because it is overeager and unnecessary
ignore = W504
show-source = True
exclude = .git,.venv,.tox,dist,doc,*egg,build,venv
import-order-style = pep8
max-line-length = 80
[pydocstyle]
# D104 Missing docstring in public package
# D203 1 blank line required before class docstring
# D213 Multi-line docstring summary should start at the second line
# D214 Section is over-indented
# D215 Section underline is over-indented
# D401 First line should be in imperative mood; try rephrasing
# D405 Section name should be properly capitalized
# D406 Section name should end with a newline
# D407 Missing dashed underline after section
# D408 Section underline should be in the line following the section’s name
# D409 Section underline should match the length of its name
# D410 Missing blank line after section
# D411 Missing blank line before section
ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
match-dir = ^(?!\.tox|venv).*
match = ^(?!setup).*\.py
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment