Modified Dockerfile,refactor code(pep8-naming) and testing docker

parent 8df8a6d7
Pipeline #204 passed with stage
in 40 seconds
......@@ -11,6 +11,5 @@ services:
volumes:
- /samba:/samba
- ./config.ini:/home/worker/config.ini
- ./main.py:/home/worker/main.py
- ./cluto.py:/home/worker/cluto.py
restart: always
\ No newline at end of file
FROM clarinpl/python:2.7
COPY requirements.txt .
RUN pip install -r requirements.txt
RUN mkdir /home/worker && \
cd /home/worker && \
wget http://glaros.dtc.umn.edu/gkhome/fetch/sw/cluto/cluto-2.1.2a.tar.gz && \
tar -xvf cluto-2.1.2a.tar.gz
RUN apt-get -y update && apt-get -y install imagemagick
RUN sed -i 's/\(<policy domain="coder" rights=\)"none" \(pattern="PS" \/>\)/\1"read|write"\2/g' /etc/ImageMagick-6/policy.xml
RUN less /etc/ImageM*/policy.xml
RUN apt-get -y update && apt-get -y install imagemagick && \
sed -i 's/\(<policy domain="coder" rights=\)"none" \(pattern="PS" \/>\)/\1"read|write"\2/g' /etc/ImageMagick-6/policy.xml && \
less /etc/ImageM*/policy.xml
WORKDIR /home/worker
COPY ./src ./src
COPY ./main.py .
COPY requirements.txt .
RUN pip install -r requirements.txt
CMD ["python","main.py"]
\ No newline at end of file
[service]
root = /samba/requests/
tool = cluto
rabbit_host =rabbit.clarin.ws
rabbit_user =clarin
rabbit_password =clarin123
rabbit_host = rabbit.clarin.ws
rabbit_user = clarin
rabbit_password = clarin123
[tool]
workers_number = 4
......
......@@ -14,9 +14,9 @@ class ClutoWorker(nlp_ws.NLPWorker):
# self.logger.log(INFO, "Iobber model loaded form "+
# self.config['model-dir'])
def process(self, inputFile, taskOptions, outputFile):
def process(self, input_file, task_options, output_file):
"""Starting process."""
cluto.run(inputFile, outputFile, taskOptions)
cluto.run(input_file, output_file, task_options)
if __name__ == '__main__':
......
......@@ -18,17 +18,17 @@ import xlsxwriter
verbose = False
def load_data(inputFile):
def load_data(input_file):
"""Loading data."""
with open(inputFile) as json_ifs:
jsonVal = json.load(json_ifs)
rowlabels = _np.asarray(jsonVal["rowlabels"])
data = _np.asarray(jsonVal["arr"])
jsonVal["arr"] = None
with open(input_file) as json_ifs:
json_val = json.load(json_ifs)
rowlabels = _np.asarray(json_val["rowlabels"])
data = _np.asarray(json_val["arr"])
json_val["arr"] = None
return data, rowlabels
def save_XLSX(names, clustering_path, outfile):
def save_xlsx(names, clustering_path, outfile):
"""Saving to XLSX."""
srow = 3
scol = 4
......@@ -217,9 +217,9 @@ def run_convert(cl_out_file, out_file, options, rowlabels):
call(['convert', '-density', density, cl_out_file, 'png:' + out_file])
def run(inputFile, outputFile, options):
def run(input_file, output_file, options):
"""Running cluto worker."""
data, rowlabels = load_data(inputFile + "/similarity.json")
data, rowlabels = load_data(input_file + "/similarity.json")
if "analysis_type" not in options:
options["analysis_type"] = "plottree"
no_clusters = number_of_clusters(options, rowlabels)
......@@ -232,7 +232,7 @@ def run(inputFile, outputFile, options):
rlabel_path = os.path.join(temp_folder, 'documents_ids.txt')
cluto_out_path = os.path.join(temp_folder, 'cluto.ps')
shutil.copy2(os.path.join(inputFile, 'matrix.txt'),
shutil.copy2(os.path.join(input_file, 'matrix.txt'),
os.path.join(temp_folder, 'matrix.txt'))
with io.open(rlabel_path, 'w') as rlabel_ofs:
for lab in rowlabels:
......@@ -241,29 +241,29 @@ def run(inputFile, outputFile, options):
run_cluto(options, no_clusters, cluto_path, rlabel_path,
cluto_out_path, os.path.join(temp_folder, 'clutoout.txt'))
if not os.path.exists(outputFile):
os.mkdir(outputFile)
if not os.path.exists(output_file):
os.mkdir(output_file)
shutil.copyfile(os.path.join(temp_folder, 'clutoout.txt'),
os.path.join(outputFile, 'clutoout.txt'))
os.path.join(output_file, 'clutoout.txt'))
run_convert2json(os.path.join(temp_folder, 'matrix.txt.tree'),
os.path.join(outputFile, 'result.json'), rowlabels,
os.path.join(output_file, 'result.json'), rowlabels,
os.path.join(temp_folder, 'matrix.txt.clustering.' +
str(no_clusters)))
run_convert(cluto_out_path, os.path.join(outputFile, 'result.png'),
run_convert(cluto_out_path, os.path.join(output_file, 'result.png'),
options, rowlabels)
# for heatmap
to_heat_map_json(cluto_path, os.path.join(temp_folder,
'matrix.txt.clustering.' +
str(no_clusters)), rowlabels,
outputFile + "/data.json")
output_file + "/data.json")
# Check if they are required by any tool
shutil.copyfile(os.path.join(temp_folder, 'matrix.txt.clustering.' +
str(no_clusters)),
os.path.join(outputFile, 'result.clustering'))
shutil.copyfile(cluto_path, os.path.join(outputFile, 'matrix.txt'))
joblib.dump(rowlabels, outputFile + "/rowlabels.pkl")
os.path.join(output_file, 'result.clustering'))
shutil.copyfile(cluto_path, os.path.join(output_file, 'matrix.txt'))
joblib.dump(rowlabels, output_file + "/rowlabels.pkl")
# Results in JSON:
with open(os.path.join(temp_folder, 'matrix.txt.clustering.' +
......@@ -272,32 +272,32 @@ def run(inputFile, outputFile, options):
# to be deleted, but now required by visualisation
res = {"clusters": clusters, "rowlabels": rowlabels.tolist()}
with open(os.path.join(outputFile, 'clusters.json'), 'w') as outfile:
with open(os.path.join(output_file, 'clusters.json'), 'w') as outfile:
json.dump(res, outfile)
labels = get_lables_from_names(rowlabels)
labels["groupnames"]["clusters"] = list(set(clusters))
labels["groups"]["clusters"] = clusters
with open(os.path.join(outputFile, 'labels.json'), 'w') as outfile:
with open(os.path.join(output_file, 'labels.json'), 'w') as outfile:
json.dump(labels, outfile)
# results in XLSX
save_XLSX(rowlabels, os.path.join(temp_folder, 'matrix.txt.clustering.' +
save_xlsx(rowlabels, os.path.join(temp_folder, 'matrix.txt.clustering.' +
str(no_clusters)),
os.path.join(outputFile, 'result.xlsx'))
os.path.join(output_file, 'result.xlsx'))
# Coping results for next tools
# for visulisation (mds)
# similarity matrix
shutil.copyfile(os.path.join(inputFile, 'similarity.json'),
os.path.join(outputFile, 'similarity.json'))
shutil.copyfile(os.path.join(inputFile, 'distance.json'),
os.path.join(outputFile, 'distance.json'))
shutil.copyfile(os.path.join(input_file, 'similarity.json'),
os.path.join(output_file, 'similarity.json'))
shutil.copyfile(os.path.join(input_file, 'distance.json'),
os.path.join(output_file, 'distance.json'))
# for featsel
# matrix after selection and weighting
shutil.copyfile(os.path.join(inputFile, 'weighted.json'),
os.path.join(outputFile, 'weighted.json'))
shutil.copyfile(os.path.join(input_file, 'weighted.json'),
os.path.join(output_file, 'weighted.json'))
# remove temp_folder
shutil.rmtree(temp_folder)
......
......@@ -5,6 +5,7 @@ skipsdist = True
[testenv:pep8]
deps =
flake8
pep8-naming
basepython = python2.7
commands =
flake8 {posargs}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment