Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
cluto
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
nlpworkers
cluto
Commits
8df8a6d7
Commit
8df8a6d7
authored
5 years ago
by
bbojanowski
Browse files
Options
Downloads
Patches
Plain Diff
Adding to tox.ini D100
parent
6013eddc
Branches
Branches containing commit
2 merge requests
!2
Resolve "Refactor"
,
!1
WIP: Resolve "Refactor"
Pipeline
#165
passed with stage
in 34 seconds
Changes
2
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/cluto.py
+23
-24
23 additions, 24 deletions
src/cluto.py
tox.ini
+1
-1
1 addition, 1 deletion
tox.ini
with
24 additions
and
25 deletions
src/cluto.py
+
23
−
24
View file @
8df8a6d7
...
...
@@ -6,20 +6,19 @@ from __future__ import print_function
import
json
import
re
import
io
import
numpy
as
_np
import
os
import
shutil
import
tempfile
from
subprocess
import
call
import
numpy
as
_np
from
sklearn.externals
import
joblib
import
xlsxwriter
verbose
=
False
def
load
D
ata
(
inputFile
):
def
load
_d
ata
(
inputFile
):
"""
Loading data.
"""
with
open
(
inputFile
)
as
json_ifs
:
jsonVal
=
json
.
load
(
json_ifs
)
...
...
@@ -29,7 +28,7 @@ def loadData(inputFile):
return
data
,
rowlabels
def
saveXLSX
(
names
,
clustering_path
,
outfile
):
def
save
_
XLSX
(
names
,
clustering_path
,
outfile
):
"""
Saving to XLSX.
"""
srow
=
3
scol
=
4
...
...
@@ -49,7 +48,7 @@ def saveXLSX(names, clustering_path, outfile):
workbook
.
close
()
def
to
H
eat
MapJSON
(
cluto_path
,
clustering_path
,
names
,
outfile
):
def
to
_h
eat
_map_json
(
cluto_path
,
clustering_path
,
names
,
outfile
):
"""
Saving to JSON.
"""
with
open
(
clustering_path
)
as
f
:
groups
=
f
.
readlines
()
...
...
@@ -105,8 +104,8 @@ def number_of_clusters(options, rowlabels):
return
no_clusters
def
save_clutofiles
(
mat
,
rlabels
,
clabels
,
cluto_path
,
rlabel_path
,
clabel_path
):
def
save_cluto
_
files
(
mat
,
rlabels
,
clabels
,
cluto_path
,
rlabel_path
,
clabel_path
):
"""
Saving cluto file.
"""
with
open
(
cluto_path
,
'
w
'
)
as
cluto_ofs
:
# Print header:
...
...
@@ -156,12 +155,12 @@ def write_node(node_id, tree_dict, name2group):
name2group
))
if
len
(
child_node_strings
)
==
0
:
node_str
=
'
{
"
id
"
:
"
node_
'
+
node_id
+
'"
,
"
group
"
:
'
+
\
str
(
name2group
[
node_id
])
+
\
str
(
name2group
[
node_id
])
+
\
'
,
"
name
"
:
"'
+
\
node_id
+
\
'"
,
"
data
"
:{},
"
children
"
:[
'
else
:
node_str
=
'
{
"
id
"
:
"
node_
'
+
node_id
+
'"
,
"
name
"
:
"'
+
node_id
+
\
node_str
=
'
{
"
id
"
:
"
node_
'
+
node_id
+
'"
,
"
name
"
:
"'
+
node_id
+
\
'"
,
"
data
"
:{},
"
children
"
:[
'
node_str
+=
'
,
'
.
join
(
child_node_strings
)
node_str
+=
'
]}
'
...
...
@@ -209,10 +208,10 @@ def run_convert(cl_out_file, out_file, options, rowlabels):
if
len
(
rowlabels
)
<
25
:
density
=
'
50
'
# if options['analysis_type'] == 'plottree':
# resize = '50%'
# else:
# resize = '100%'
# if options['analysis_type'] == 'plottree':
# resize = '50%'
# else:
# resize = '100%'
# print density
call
([
'
convert
'
,
'
-density
'
,
density
,
cl_out_file
,
'
png:
'
+
out_file
])
...
...
@@ -220,7 +219,7 @@ def run_convert(cl_out_file, out_file, options, rowlabels):
def
run
(
inputFile
,
outputFile
,
options
):
"""
Running cluto worker.
"""
data
,
rowlabels
=
load
D
ata
(
inputFile
+
"
/similarity.json
"
)
data
,
rowlabels
=
load
_d
ata
(
inputFile
+
"
/similarity.json
"
)
if
"
analysis_type
"
not
in
options
:
options
[
"
analysis_type
"
]
=
"
plottree
"
no_clusters
=
number_of_clusters
(
options
,
rowlabels
)
...
...
@@ -254,10 +253,10 @@ def run(inputFile, outputFile, options):
options
,
rowlabels
)
# for heatmap
to
H
eat
MapJSON
(
cluto_path
,
os
.
path
.
join
(
temp_folder
,
'
matrix.txt.clustering.
'
+
str
(
no_clusters
)),
rowlabels
,
outputFile
+
"
/data.json
"
)
to
_h
eat
_map_json
(
cluto_path
,
os
.
path
.
join
(
temp_folder
,
'
matrix.txt.clustering.
'
+
str
(
no_clusters
)),
rowlabels
,
outputFile
+
"
/data.json
"
)
# Check if they are required by any tool
shutil
.
copyfile
(
os
.
path
.
join
(
temp_folder
,
'
matrix.txt.clustering.
'
+
...
...
@@ -276,16 +275,16 @@ def run(inputFile, outputFile, options):
with
open
(
os
.
path
.
join
(
outputFile
,
'
clusters.json
'
),
'
w
'
)
as
outfile
:
json
.
dump
(
res
,
outfile
)
labels
=
get
L
ables
F
rom
N
ames
(
rowlabels
)
labels
=
get
_l
ables
_f
rom
_n
ames
(
rowlabels
)
labels
[
"
groupnames
"
][
"
clusters
"
]
=
list
(
set
(
clusters
))
labels
[
"
groups
"
][
"
clusters
"
]
=
clusters
with
open
(
os
.
path
.
join
(
outputFile
,
'
labels.json
'
),
'
w
'
)
as
outfile
:
json
.
dump
(
labels
,
outfile
)
# results in XLSX
saveXLSX
(
rowlabels
,
os
.
path
.
join
(
temp_folder
,
'
matrix.txt.clustering.
'
+
str
(
no_clusters
)),
os
.
path
.
join
(
outputFile
,
'
result.xlsx
'
))
save
_
XLSX
(
rowlabels
,
os
.
path
.
join
(
temp_folder
,
'
matrix.txt.clustering.
'
+
str
(
no_clusters
)),
os
.
path
.
join
(
outputFile
,
'
result.xlsx
'
))
# Coping results for next tools
# for visulisation (mds)
...
...
@@ -304,7 +303,7 @@ def run(inputFile, outputFile, options):
shutil
.
rmtree
(
temp_folder
)
def
get
L
ables
F
rom
N
ames
(
row_labels
):
def
get
_l
ables
_f
rom
_n
ames
(
row_labels
):
"""
Getting labels from names.
"""
# data, data_cleaned,shortest_row_len, row_labels = get_data(row)
shortest_row_len
=
10000000
...
...
This diff is collapsed.
Click to expand it.
tox.ini
+
1
−
1
View file @
8df8a6d7
...
...
@@ -39,7 +39,7 @@ max-line-length = 80
# D409 Section underline should match the length of its name
# D410 Missing blank line after section
# D411 Missing blank line before section
ignore
=
D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
ignore
=
D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
,D100
match-dir
=
^(?!
\.
tox|venv).*
match
=
^(?!setup).*
\.
py
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment