Commit 80d63344 authored by leszeks's avatar leszeks

Initial commit

parent eabb2901
Pipeline #1724 failed with stage
in 30 seconds
image: clarinpl/python:3.6
cache:
paths:
- .tox
stages:
- check_style
- build
before_script:
- pip install tox==2.9.1
pep8:
stage: check_style
script:
- tox -v -e pep8
docstyle:
stage: check_style
script:
- tox -v -e docstyle
build_image:
stage: build
image: 'docker:18.09.7'
only:
- master
services:
- 'docker:18.09.7-dind'
before_script:
- ''
script:
- docker build -t clarinpl/mewex .
- echo $DOCKER_PASSWORD > pass.txt
- cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin
- rm pass.txt
- docker push clarinpl/mewex
FROM ubuntu:xenial
# Instal prequesites
RUN apt-get update && apt-get -y upgrade
RUN apt-get -y install unzip software-properties-common && \
add-apt-repository ppa:jonathonf/python-3.6 && \
apt-get update && \
apt-get -y install libxml2-dev libxslt-dev build-essential python3.6 python3.6-dev python3-pip python3.6-venv python3-setuptools && \
apt-get -y install libboost-all-dev libicu-dev git-core wget cmake libantlr-dev libloki-dev python-dev swig libsigc++-2.0-dev libglibmm-2.4-dev libxml++2.6-dev && \
python3.6 -m pip install pip --upgrade && \
python3.6 -m pip install --no-cache-dir Cython
#newest cmake
RUN wget https://cmake.org/files/v3.12/cmake-3.12.0-Linux-x86_64.sh
RUN sh cmake-3.12.0-Linux-x86_64.sh --prefix=/usr/local --exclude-subdir
#download tools
RUN mkdir /build
WORKDIR /build
RUN git clone http://nlp.pwr.wroc.pl/corpus2.git
RUN git clone http://nlp.pwr.edu.pl/wccl.git
RUN wget -O morfeusz2-2.0.0-Linux-amd64.deb https://nextcloud.clarin-pl.eu/index.php/s/VVIvx4w20azcWbp/download
RUN dpkg -i morfeusz2-2.0.0-Linux-amd64.deb
FROM clarinpl/python:3.6
# corpus2
RUN cd corpus2
RUN mkdir bin
WORKDIR /build/corpus2/bin
RUN cmake ..
RUN make -j
RUN make -j
RUN make install
RUN ldconfig
RUN apt-get update && apt-get install -y --no-install-recommends \
cmake \
default-jdk \
corpus2-python3.6 \
wccl-python3.6 \
libicu-dev \
libboost1.58-all-dev
# wccl
RUN mkdir /build/wccl/bin
WORKDIR /build/wccl/bin
RUN cmake ..
RUN make -j
RUN make -j
RUN make install
RUN ldconfig
RUN wget -O morf 'https://nextcloud.clarin-pl.eu/index.php/s/VVIvx4w20azcWbp/download' && \
dpkg -i ./morf
# Install Java
RUN apt-get update && \
......@@ -48,49 +18,34 @@ RUN apt-get update && \
apt-get -y install openjdk-8-jre && \
apt-get -y install locales locales-all
# Set locales
RUN sed -i 's/# pl_PL.UTF-8 UTF-8/pl_PL.UTF-8 UTF-8/' /etc/locale.gen
RUN locale-gen pl_PL.UTF-8
ENV LANG pl_PL.UTF-8
ENV LANGUAGE pl_PL
ENV LC_ALL pl_PL.UTF-8
RUN dpkg-reconfigure --frontend noninteractive locales
# Set environment
ENV JAVA_HOME /opt/jdk
ENV PATH ${PATH}:${JAVA_HOME}/bin
# installing polem
RUN cd .
RUN rm /usr/bin/python3
RUN ln -s /usr/bin/python3.6 /usr/bin/python3
RUN alias python3='/usr/bin/python3.6'
RUN apt-get -y install default-jdk
# RUN cd .
# RUN rm /usr/bin/python3
# RUN ln -s /usr/bin/python3.6 /usr/bin/python3
# RUN alias python3='/usr/bin/python3.6'
#RUN apt-get -y install default-jdk
WORKDIR /build/
RUN git clone https://github.com/gkubon/Polem
RUN mkdir -p /build/Polem/build
COPY . /build/Polem
RUN rm /build/Polem/build -rf
WORKDIR /build/Polem/build
RUN cmake ..
RUN make -j
RUN make install
RUN ldconfig
RUN mkdir /data
RUN mv /build/Polem/corpus /data
WORKDIR /app
#installing nlp_ws
RUN pip3 install --extra-index-url https://pypi.clarin-pl.eu/ nlp_ws
# Download lxml
RUN python3.6 -m pip install lxml
RUN python3.6 -m pip install configparser
RUN pip3 install ujson
RUN git clone https://github.com/gkubon/Polem &&\
mkdir polem/cpp/build &&\
cd polem/cpp/build &&\
cmake .. &&\
make &&\
make install
# RUN mkdir -p /build/Polem/build
# COPY . /build/Polem
# RUN rm /build/Polem/build -rf
# WORKDIR /build/Polem/build
# RUN cmake ..
# RUN make -j
# RUN make install
# RUN ldconfig
# RUN mkdir /data
# RUN mv /build/Polem/corpus /data
# WORKDIR /app
#install mewex
WORKDIR /build
......@@ -103,4 +58,11 @@ RUN cd MeWeX/mwextractor/mwextractor && \
ldconfig
RUN cd MeWeX/mewexlib/ && \
python3.6 setup.py install
WORKDIR /home/worker
COPY ./src ./src
COPY ./main.py .
COPY ./requirements.txt .
RUN python3.6 -m pip install -r requirements.txt
CMD ["python3.6", "main.py", "service"]
......@@ -2,9 +2,11 @@
tool = mewex1
root = /samba/requests/
rabbit_host = 10.17.0.85
rabbit_user = clarin
rabbit_password = clarin123
rabbit_host = rabbitmq
rabbit_user = test
rabbit_password = test
queue_prefix =nlp_
[tool]
workers_number = 4
......
version: '3'
services:
mewex:
container_name: clarin_mewex
build: ./
working_dir: /home/worker
entrypoint:
- python3.6
- main.py
- service
volumes:
- '/samba:/samba'
- './config.ini:/home/worker/config.ini'
- './src:/home/worker/src'
- './main.py:/home/worker/main.py'
restart: always
\ No newline at end of file
"""Implementation of tfidf worker."""
import nlp_ws
import argparse
from src.mewex import MewexWorker
def get_args():
"""Gets command line arguments."""
parser = argparse.ArgumentParser(description="mewex")
subparsers = parser.add_subparsers(dest="mode")
subparsers.required = True
subparsers.add_parser(
"service",
help="Run as a service")
return parser.parse_args()
def main():
"""Runs the program."""
args = get_args()
generators = {
"service": lambda: nlp_ws.NLPService.main(MewexWorker),
}
gen_fn = generators.get(args.mode, lambda: None)
gen_fn()
if __name__ == "__main__":
main()
nlp_ws
lxml
ujson
configparser
\ No newline at end of file
......@@ -80,7 +80,3 @@ def _parse_mewex_options(opts):
args[key] = val
return args
if __name__ == '__main__':
NLPService.main(MewexWorker, pause_at_exit=True)
[tox]
envlist = pep8,docstyle
skipsdist = True
[testenv:pep8]
deps =
flake8
basepython = python3.6
commands =
flake8 {posargs}
[testenv:docstyle]
deps =
pydocstyle
basepython = python3.6
commands =
pydocstyle --verbose {posargs}
[flake8]
# W504 skipped because it is overeager and unnecessary
ignore = W504
show-source = True
exclude = .git,.venv,.tox,dist,doc,*egg,build,venv
import-order-style = pep8
max-line-length = 80
[pydocstyle]
# D104 Missing docstring in public package
# D203 1 blank line required before class docstring
# D213 Multi-line docstring summary should start at the second line
# D214 Section is over-indented
# D215 Section underline is over-indented
# D401 First line should be in imperative mood; try rephrasing
# D405 Section name should be properly capitalized
# D406 Section name should end with a newline
# D407 Missing dashed underline after section
# D408 Section underline should be in the line following the section’s name
# D409 Section underline should match the length of its name
# D410 Missing blank line after section
# D411 Missing blank line before section
ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
match-dir = ^(?!\.tox|venv).*
match = ^(?!setup).*\.py
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment