diff --git a/src/cluto.py b/src/cluto.py index c10bfe5b8515b36edfe3e719c87052d08368be70..e83f6951e35ea1db309a865a3b2b54b62aa26eb6 100644 --- a/src/cluto.py +++ b/src/cluto.py @@ -6,20 +6,19 @@ from __future__ import print_function import json import re import io - -import numpy as _np import os import shutil import tempfile - from subprocess import call + +import numpy as _np from sklearn.externals import joblib import xlsxwriter verbose = False -def loadData(inputFile): +def load_data(inputFile): """Loading data.""" with open(inputFile) as json_ifs: jsonVal = json.load(json_ifs) @@ -29,7 +28,7 @@ def loadData(inputFile): return data, rowlabels -def saveXLSX(names, clustering_path, outfile): +def save_XLSX(names, clustering_path, outfile): """Saving to XLSX.""" srow = 3 scol = 4 @@ -49,7 +48,7 @@ def saveXLSX(names, clustering_path, outfile): workbook.close() -def toHeatMapJSON(cluto_path, clustering_path, names, outfile): +def to_heat_map_json(cluto_path, clustering_path, names, outfile): """Saving to JSON.""" with open(clustering_path) as f: groups = f.readlines() @@ -105,8 +104,8 @@ def number_of_clusters(options, rowlabels): return no_clusters -def save_clutofiles(mat, rlabels, clabels, cluto_path, rlabel_path, - clabel_path): +def save_cluto_files(mat, rlabels, clabels, cluto_path, rlabel_path, + clabel_path): """Saving cluto file.""" with open(cluto_path, 'w') as cluto_ofs: # Print header: @@ -156,12 +155,12 @@ def write_node(node_id, tree_dict, name2group): name2group)) if len(child_node_strings) == 0: node_str = '{"id":"node_' + node_id + '", "group":' + \ - str(name2group[node_id]) +\ + str(name2group[node_id]) + \ ', "name":"' + \ node_id + \ '", "data":{}, "children":[' else: - node_str = '{"id":"node_' + node_id + '", "name":"' + node_id +\ + node_str = '{"id":"node_' + node_id + '", "name":"' + node_id + \ '", "data":{}, "children":[' node_str += ', '.join(child_node_strings) node_str += ']}' @@ -209,10 +208,10 @@ def run_convert(cl_out_file, out_file, options, rowlabels): if len(rowlabels) < 25: density = '50' -# if options['analysis_type'] == 'plottree': -# resize = '50%' -# else: -# resize = '100%' + # if options['analysis_type'] == 'plottree': + # resize = '50%' + # else: + # resize = '100%' # print density call(['convert', '-density', density, cl_out_file, 'png:' + out_file]) @@ -220,7 +219,7 @@ def run_convert(cl_out_file, out_file, options, rowlabels): def run(inputFile, outputFile, options): """Running cluto worker.""" - data, rowlabels = loadData(inputFile + "/similarity.json") + data, rowlabels = load_data(inputFile + "/similarity.json") if "analysis_type" not in options: options["analysis_type"] = "plottree" no_clusters = number_of_clusters(options, rowlabels) @@ -254,10 +253,10 @@ def run(inputFile, outputFile, options): options, rowlabels) # for heatmap - toHeatMapJSON(cluto_path, os.path.join(temp_folder, - 'matrix.txt.clustering.' + - str(no_clusters)), rowlabels, - outputFile + "/data.json") + to_heat_map_json(cluto_path, os.path.join(temp_folder, + 'matrix.txt.clustering.' + + str(no_clusters)), rowlabels, + outputFile + "/data.json") # Check if they are required by any tool shutil.copyfile(os.path.join(temp_folder, 'matrix.txt.clustering.' + @@ -276,16 +275,16 @@ def run(inputFile, outputFile, options): with open(os.path.join(outputFile, 'clusters.json'), 'w') as outfile: json.dump(res, outfile) - labels = getLablesFromNames(rowlabels) + labels = get_lables_from_names(rowlabels) labels["groupnames"]["clusters"] = list(set(clusters)) labels["groups"]["clusters"] = clusters with open(os.path.join(outputFile, 'labels.json'), 'w') as outfile: json.dump(labels, outfile) # results in XLSX - saveXLSX(rowlabels, os.path.join(temp_folder, 'matrix.txt.clustering.' + - str(no_clusters)), - os.path.join(outputFile, 'result.xlsx')) + save_XLSX(rowlabels, os.path.join(temp_folder, 'matrix.txt.clustering.' + + str(no_clusters)), + os.path.join(outputFile, 'result.xlsx')) # Coping results for next tools # for visulisation (mds) @@ -304,7 +303,7 @@ def run(inputFile, outputFile, options): shutil.rmtree(temp_folder) -def getLablesFromNames(row_labels): +def get_lables_from_names(row_labels): """Getting labels from names.""" # data, data_cleaned,shortest_row_len, row_labels = get_data(row) shortest_row_len = 10000000 diff --git a/tox.ini b/tox.ini index 517bd1ca93ee3b53de05c3ae39fa2a7de47bdec0..cf2f52af562c033cdbdc67a581912d4332165268 100644 --- a/tox.ini +++ b/tox.ini @@ -39,7 +39,7 @@ max-line-length = 80 # D409 Section underline should match the length of its name # D410 Missing blank line after section # D411 Missing blank line before section -ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411 +ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411,D100 match-dir = ^(?!\.tox|venv).* match = ^(?!setup).*\.py