Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
corpus2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
corpus2
Commits
d55d81ca
Commit
d55d81ca
authored
Aug 23, 2012
by
jezozwierzak
Browse files
Options
Downloads
Patches
Plain Diff
Changed CSVWriter to CSVTable
parent
493c914d
No related branches found
No related tags found
No related merge requests found
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
utils/CSVColumn.py
+166
-0
166 additions, 0 deletions
utils/CSVColumn.py
utils/CSVTable.py
+178
-0
178 additions, 0 deletions
utils/CSVTable.py
utils/CSVWriter.py
+0
-192
0 additions, 192 deletions
utils/CSVWriter.py
utils/chunk_eval.py
+60
-35
60 additions, 35 deletions
utils/chunk_eval.py
with
404 additions
and
227 deletions
utils/CSVColumn.py
0 → 100755
+
166
−
0
View file @
d55d81ca
#!/usr/bin/python
#-*- coding: utf-8 -*-
'''
Created on 09-08-2012
@author: Adam Pawlaczek
'''
# Copyright (C) 2012 Adam Pawlaczek.
# This program is free software; you can redistribute and/or modify it
# under the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.
#
# See the LICENCE and COPYING files for more details
class
CSVColumn
:
def
__init__
(
self
,
parent
,
name
,
separator
,
type
):
self
.
parent
=
parent
self
.
name
=
name
self
.
type
=
type
self
.
width
=
len
(
name
)
self
.
content
=
[]
self
.
separator
=
separator
def
addSubColumn
(
self
,
name
,
type
=
''
):
assert
len
(
self
.
content
)
==
0
or
self
.
hasSubColumns
()
self
.
type
=
'
dict
'
self
.
content
.
append
(
CSVColumn
(
self
,
name
,
self
.
separator
,
type
))
self
.
recountWidths
()
def
insertValue
(
self
,
row
,
data
,
subColumn
=
''
):
if
self
.
hasSubColumns
()
and
type
(
data
).
__name__
==
'
dict
'
:
for
subColumn
in
self
.
content
:
subColumn
.
insertValue
(
data
[
subColumn
.
name
],
row
)
elif
self
.
hasSubColumns
():
for
sub
in
self
.
content
:
if
sub
.
name
==
subColumn
:
sub
.
content
[
row
]
=
data
if
type
(
data
).
__name__
==
"
float
"
:
data_str
=
"
%.4f
"
%
data
else
:
data_str
=
str
(
data
)
if
len
(
data_str
)
>
sub
.
width
:
sub
.
width
=
len
(
data_str
)
break
elif
subColumn
==
''
:
self
.
content
[
row
]
=
data
if
type
(
data
).
__name__
==
"
float
"
:
data_str
=
"
%.4f
"
%
data
else
:
data_str
=
str
(
data
)
if
len
(
data_str
)
>
self
.
width
:
self
.
width
=
len
(
data_str
)
def
increment
(
self
,
row
,
subColumn
=
''
):
if
subColumn
!=
''
and
self
.
hasSubColumns
():
for
sub
in
self
.
content
:
if
sub
.
name
==
subColumn
:
sub
.
content
[
row
]
+=
1
if
sub
.
type
==
"
float
"
:
data_str
=
"
%.4f
"
%
sub
.
content
[
row
]
else
:
data_str
=
str
(
sub
.
content
[
row
])
if
len
(
data_str
)
>
sub
.
width
:
self
.
width
+=
len
(
data_str
)
-
sub
.
width
sub
.
width
=
len
(
data_str
)
break
else
:
self
.
content
[
row
]
+=
1
def
getValue
(
self
,
row
,
subColumn
=
''
):
if
subColumn
!=
''
and
self
.
hasSubColumns
():
for
sub
in
self
.
content
:
if
sub
.
name
==
subColumn
:
return
sub
.
content
[
row
]
else
:
return
self
.
content
[
row
]
def
addValue
(
self
,
data
):
if
self
.
hasSubColumns
():
str_data
=
''
for
sub
in
self
.
content
:
sub
.
addValue
(
data
[
sub
.
name
])
str_data
+=
str
(
data
[
sub
.
name
])
+
self
.
separator
if
len
(
str_data
)
>
self
.
width
:
self
.
recountWidths
()
else
:
assert
type
(
data
).
__name__
!=
'
dict
'
,
'
You added dict to column with no subcolumns
'
if
len
(
self
.
content
)
==
0
:
self
.
type
=
type
(
data
).
__name__
self
.
content
.
append
(
data
)
if
len
(
str
(
data
))
>
self
.
width
and
self
.
isSubColumn
():
self
.
parent
.
recountWidths
()
elif
len
(
str
(
data
)
+
self
.
separator
)
>
self
.
width
:
self
.
width
=
len
(
str
(
data
))
def
countSum
(
self
):
if
self
.
hasSubColumns
():
result
=
{}
for
sub
in
self
.
content
:
result
[
sub
.
name
]
=
sub
.
countSum
()
else
:
if
self
.
type
!=
'
string
'
:
result
=
0
for
row
in
self
.
content
:
result
+=
row
return
result
def
recountWidths
(
self
):
subColsStr
=
''
for
subColumn
in
self
.
content
:
subColsStr
+=
subColumn
.
name
+
self
.
separator
if
len
(
subColsStr
)
>
self
.
width
:
self
.
width
=
len
(
subColsStr
)
for
subColumn
in
self
.
content
:
subColumn
.
width
=
len
(
subColumn
.
name
+
self
.
separator
)
elif
self
.
parent
.
rows
>
0
:
widths
=
0
for
i
in
range
(
0
,
len
(
self
.
content
)):
#Dla każdej podkolumny
for
j
in
range
(
0
,
self
.
parent
.
rows
):
#Dla każdego wiersza
if
i
==
len
(
self
.
content
)
-
1
:
if
self
.
width
-
widths
>
0
:
self
.
content
[
i
].
width
=
self
.
width
-
widths
else
:
self
.
content
[
i
].
width
=
len
(
self
.
parent
.
ptr
(
self
.
content
[
i
].
content
[
j
],
1
))
break
elif
len
(
self
.
parent
.
ptr
(
self
.
content
[
i
].
content
[
j
],
1
))
>
self
.
content
[
i
].
width
:
self
.
content
[
i
].
width
=
len
(
self
.
parent
.
ptr
(
self
.
content
[
i
].
content
[
j
],
1
))
widths
+=
self
.
content
[
i
].
width
if
widths
>
self
.
width
:
self
.
width
=
widths
def
hasSubColumns
(
self
):
if
len
(
self
.
content
)
>
0
and
type
(
self
.
content
[
0
]).
__name__
==
"
instance
"
:
return
self
.
content
[
0
].
__class__
==
CSVColumn
else
:
return
False
def
isSubColumn
(
self
):
if
self
.
parent
!=
''
and
type
(
self
.
parent
).
__name__
==
"
instance
"
:
return
self
.
parent
.
__class__
==
CSVColumn
else
:
return
False
def
fillZeros
(
self
,
rows
):
if
self
.
hasSubColumns
():
for
subcolumn
in
self
.
content
:
subcolumn
.
fillZeros
(
rows
)
else
:
for
i
in
range
(
0
,
rows
):
if
self
.
type
==
"
float
"
:
self
.
content
.
append
(
0.0
)
elif
self
.
type
==
"
int
"
:
self
.
content
.
append
(
0
)
else
:
self
.
content
.
append
(
''
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
utils/CSVTable.py
0 → 100755
+
178
−
0
View file @
d55d81ca
#!/usr/bin/python
#-*- coding: utf-8 -*-
'''
Created on 09-08-2012
@author: Adam Pawlaczek
TODO: Ew. dodać float do obliczania AVG zamiast rzutowania na typ, w którym jest kolumna.
'''
# Copyright (C) 2012 Adam Pawlaczek.
# This program is free software; you can redistribute and/or modify it
# under the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.
#
# See the LICENCE and COPYING files for more details
from
CSVColumn
import
CSVColumn
from
operator
import
itemgetter
class
CSVTable
:
def
__init__
(
self
,
separator
=
'
;
'
):
self
.
widths
=
[]
self
.
content
=
[]
self
.
rows
=
0
self
.
columns
=
0
self
.
separator
=
separator
def
addColumn
(
self
,
name
,
type
=
''
):
for
column
in
self
.
content
:
assert
column
.
name
!=
name
,
'
Column with name:
'
+
name
+
'
already exists
'
column
=
CSVColumn
(
self
,
name
,
self
.
separator
,
type
)
self
.
content
.
append
(
column
)
if
self
.
rows
>
0
and
type
!=
'
dict
'
:
column
.
fillZeros
(
self
.
rows
)
self
.
columns
+=
1
return
True
def
addSubColumn
(
self
,
parentName
,
name
,
type
=
''
):
for
column
in
self
.
content
:
if
column
.
name
==
parentName
:
column
.
addSubColumn
(
name
,
type
)
if
len
(
column
.
content
)
>
1
:
self
.
columns
+=
1
if
self
.
rows
>
0
:
column
.
content
[
-
1
].
fillZeros
(
self
.
rows
)
return
True
return
False
def
addRow
(
self
,
row
=
{}):
for
column
in
self
.
content
:
if
column
.
name
!=
'
Nr
'
or
'
Nr
'
in
row
.
keys
():
column
.
addValue
(
row
[
column
.
name
])
else
:
if
self
.
rows
>
0
:
column
.
addValue
(
column
.
content
[
self
.
rows
-
1
]
+
1
)
else
:
column
.
addValue
(
1
)
self
.
rows
+=
1
def
addEmptyRow
(
self
):
for
column
in
self
.
content
:
if
column
.
name
==
"
Nr
"
:
if
self
.
rows
>
0
:
column
.
addValue
(
column
.
content
[
self
.
rows
-
1
]
+
1
)
else
:
column
.
addValue
(
1
)
else
:
column
.
fillZeros
(
1
)
self
.
rows
+=
1
def
insertInColumn
(
self
,
columnName
,
row
,
data
,
subColumn
=
''
):
assert
row
<
self
.
rows
for
column
in
self
.
content
:
if
column
.
name
==
columnName
:
column
.
insertValue
(
row
,
data
,
subColumn
)
def
increment
(
self
,
columnName
,
row
,
subColumn
=
''
):
for
column
in
self
.
content
:
if
column
.
name
==
columnName
:
column
.
increment
(
row
,
subColumn
)
def
getValue
(
self
,
columnName
,
row
,
subColumn
=
''
):
assert
row
<
self
.
rows
for
column
in
self
.
content
:
if
column
.
name
==
columnName
:
return
column
.
getValue
(
row
,
subColumn
)
def
hasSubColumns
(
self
):
for
column
in
self
.
content
:
if
column
.
hasSubColumns
():
return
True
return
False
def
hasNrColumn
(
self
):
return
self
.
hasColumn
(
"
Nr
"
)
def
hasColumn
(
self
,
name
):
for
column
in
self
.
content
:
if
column
.
name
==
name
:
return
True
return
False
def
countAvg
(
self
):
results
=
{}
for
column
in
self
.
content
:
results
[
column
.
name
]
=
column
.
countSum
()
if
column
.
type
==
'
int
'
or
column
.
type
==
'
float
'
:
results
[
column
.
name
]
=
results
[
column
.
name
]
/
self
.
rows
elif
column
.
type
==
'
dict
'
:
for
subColumn
in
results
[
column
.
name
].
keys
():
results
[
column
.
name
][
subColumn
]
=
results
[
column
.
name
][
subColumn
]
/
self
.
rows
if
self
.
hasNrColumn
():
results
[
'
Nr
'
]
=
'
AVG
'
self
.
addRow
(
results
)
def
countSum
(
self
):
results
=
{}
for
column
in
self
.
content
:
results
[
column
.
name
]
=
column
.
countSum
()
if
self
.
hasNrColumn
():
results
[
'
Nr
'
]
=
'
SUM
'
self
.
addRow
(
results
)
'''
Helping functions
'''
def
__repeat_to_length
(
self
,
string_to_expand
,
length
):
return
(
string_to_expand
*
((
length
/
len
(
string_to_expand
))
+
1
))[:
length
]
def
ptr
(
self
,
data
,
width
):
if
type
(
data
).
__name__
==
'
int
'
:
return
'
{0:{width}{base}}
'
.
format
(
data
,
base
=
'
d
'
,
width
=
width
)
+
self
.
separator
elif
type
(
data
).
__name__
==
'
float
'
:
return
'
{0:{width}{base}}
'
.
format
(
data
,
base
=
'
.4f
'
,
width
=
width
)
+
self
.
separator
else
:
return
'
{0:{width}{base}}
'
.
format
(
data
,
base
=
'
s
'
,
width
=
width
)
+
self
.
separator
def
__str__
(
self
):
result
=
''
#HEADERS
for
column
in
self
.
content
:
result
+=
self
.
ptr
(
column
.
name
,
column
.
width
)
if
column
.
hasSubColumns
():
for
i
in
range
(
1
,
len
(
column
.
content
)):
result
+=
self
.
separator
result
+=
'
\n
'
#SUBHEADERS
for
column
in
self
.
content
:
if
self
.
hasSubColumns
():
if
column
.
hasSubColumns
():
for
subColumn
in
column
.
content
:
if
len
(
column
.
content
)
>
1
:
result
+=
self
.
ptr
(
subColumn
.
name
,
subColumn
.
width
)
else
:
result
+=
self
.
ptr
(
subColumn
.
name
,
column
.
width
)
else
:
result
+=
self
.
ptr
(
'
'
,
column
.
width
)
result
+=
'
\n
'
#DATA
for
i
in
range
(
0
,
self
.
rows
):
#Rows iteration
for
column
in
self
.
content
:
#Column iteration
if
column
.
hasSubColumns
():
for
subColumn
in
column
.
content
:
result
+=
self
.
ptr
(
subColumn
.
content
[
i
],
subColumn
.
width
)
else
:
result
+=
self
.
ptr
(
column
.
content
[
i
],
column
.
width
)
result
+=
'
\n
'
return
result
\ No newline at end of file
This diff is collapsed.
Click to expand it.
utils/CSVWriter.py
deleted
100644 → 0
+
0
−
192
View file @
493c914d
#!/usr/bin/python
#-*- coding: utf-8 -*-
'''
Created on 03-08-2012
@author: jezozwierzak
'''
class
CSVWriter
:
def
__init__
(
self
,
separator
=
'
;
'
):
self
.
widths
=
[]
self
.
list
=
[]
self
.
rows
=
0
self
.
columns
=
0
self
.
separator
=
separator
def
addSubColumn
(
self
,
parentIndex
,
name
):
parentColumn
=
self
.
list
[
parentIndex
];
parentColumn
.
append
([
name
])
subColsStr
=
''
for
i
in
range
(
1
,
len
(
parentColumn
)):
subColsStr
+=
parentColumn
[
i
][
0
]
+
self
.
separator
if
len
(
subColsStr
)
>
self
.
widths
[
parentIndex
]:
self
.
widths
[
parentIndex
]
=
len
(
subColsStr
)
-
1
if
len
(
parentColumn
[
1
:])
>
1
:
self
.
columns
+=
1
def
addSubColumnByName
(
self
,
parentName
,
name
):
assert
self
.
rows
==
0
,
'
You have to add all Column names before adding rows
'
parentIndex
=
self
.
columnIndex
(
parentName
)
self
.
addSubColumn
(
parentIndex
,
name
)
def
addSubColumnsByName
(
self
,
parentName
,
names
=
[]):
for
name
in
names
:
self
.
addSubColumnByName
(
parentName
,
name
)
def
addSubColumns
(
self
,
parentIndex
,
names
=
[]):
for
name
in
names
:
self
.
addSubColumn
(
parentIndex
,
name
)
def
addColumn
(
self
,
name
):
assert
self
.
rows
==
0
,
'
You have to add all Column names before adding rows
'
self
.
list
.
append
([
name
])
self
.
widths
.
append
(
len
(
name
))
self
.
columns
+=
1
def
addColumns
(
self
,
names
=
[]):
for
name
in
names
:
self
.
addColumn
(
name
)
def
addRow
(
self
,
row
=
[]):
assert
len
(
row
)
==
len
(
self
.
list
),
'
Wrong number of columns in row
'
for
i
in
range
(
0
,
len
(
self
.
list
)):
column
=
self
.
list
[
i
]
if
len
(
column
)
>
1
and
type
(
column
[
1
]).
__name__
==
'
list
'
:
#Adding data to subcolumns
assert
len
(
row
[
i
])
==
len
(
column
)
-
1
,
'
Wrong number of subColumns in column
'
+
column
[
0
]
for
j
in
range
(
0
,
len
(
row
[
i
])):
column
[
j
+
1
].
append
(
row
[
i
][
j
])
subColsStr
=
''
for
j
in
range
(
0
,
len
(
row
[
i
])):
subColsStr
+=
'
{0:{base}}
'
.
format
(
row
[
i
][
j
],
base
=
'
.4f
'
)
+
self
.
separator
+
'
'
if
len
(
subColsStr
)
>
self
.
widths
[
i
]:
self
.
widths
[
i
]
=
len
(
subColsStr
)
-
1
else
:
#Adding data to column
column
.
append
(
row
[
i
])
if
len
(
str
(
row
[
i
]))
>
self
.
widths
[
i
]:
self
.
widths
[
i
]
=
len
(
str
(
row
[
i
]))
self
.
rows
+=
1
def
allWidth
(
self
):
sum
=
0
for
width
in
self
.
widths
:
sum
+=
width
return
width
def
columnIndex
(
self
,
name
):
for
column
in
self
.
list
:
if
column
[
0
]
==
name
:
return
self
.
list
.
index
(
column
)
def
hasSubColumns
(
self
):
for
column
in
self
.
list
:
if
len
(
column
)
>
1
and
type
(
column
[
1
]).
__name__
==
'
list
'
:
return
True
return
False
def
hasColumnSubColumns
(
self
,
index
):
column
=
self
.
list
[
index
]
return
len
(
column
)
>
1
and
type
(
column
[
1
]).
__name__
==
'
list
'
def
repeat_to_length
(
self
,
string_to_expand
,
length
):
return
(
string_to_expand
*
((
length
/
len
(
string_to_expand
))
+
1
))[:
length
]
def
count_avg
(
self
,
):
results
=
[]
if
not
self
.
hasSubColumns
():
for
i
in
range
(
0
,
len
(
self
.
list
)):
#Iterowanie po kolumnach
results
.
append
(
0
)
for
j
in
range
(
1
,
1
+
self
.
rows
):
# Iterowanie po wierszach
results
[
i
]
+=
self
.
list
[
i
][
j
]
results
[
i
]
/=
self
.
rows
else
:
for
i
in
range
(
0
,
len
(
self
.
list
)):
#Iterowanie po kolumnach
if
self
.
hasColumnSubColumns
(
i
):
subResults
=
[]
for
k
in
range
(
0
,
len
(
self
.
list
[
i
][
1
:])):
subColumn
=
self
.
list
[
i
][
1
:][
k
]
subResults
.
append
(
0
)
for
j
in
range
(
1
,
1
+
self
.
rows
):
# Iterowanie po wierszach
subResults
[
k
]
+=
subColumn
[
j
]
subResults
[
k
]
/=
self
.
rows
results
.
append
(
subResults
)
else
:
results
.
append
(
0
)
for
j
in
range
(
1
,
1
+
self
.
rows
):
# Iterowanie po wierszach
results
[
i
]
+=
self
.
list
[
i
][
j
]
results
[
i
]
/=
self
.
rows
results
=
results
[
1
:]
results
[:
0
]
=
[
'
AVG
'
]
self
.
addRow
(
results
)
def
__str__
(
self
):
result
=
''
if
not
self
.
hasSubColumns
():
for
j
in
range
(
0
,
1
+
self
.
rows
):
# Iterowanie po wierszach
for
i
in
range
(
0
,
len
(
self
.
list
)):
#Iterowanie po kolumnach
if
type
(
self
.
list
[
i
][
j
]).
__name__
==
'
int
'
:
result
+=
'
{0:{width}{base}}
'
.
format
(
self
.
list
[
i
][
j
],
base
=
'
d
'
,
width
=
self
.
widths
[
i
])
+
self
.
separator
elif
type
(
self
.
list
[
i
][
j
]).
__name__
==
'
float
'
:
result
+=
'
{0:{width}{base}}
'
.
format
(
self
.
list
[
i
][
j
],
base
=
'
.4f
'
,
width
=
self
.
widths
[
i
])
+
self
.
separator
else
:
result
+=
'
{0:{width}{base}}
'
.
format
(
self
.
list
[
i
][
j
],
base
=
'
s
'
,
width
=
self
.
widths
[
i
])
+
self
.
separator
result
+=
'
\n
'
else
:
#Printing Thead
thead
=
zip
(
*
self
.
list
)[
0
]
for
i
in
range
(
0
,
len
(
thead
)):
if
self
.
hasColumnSubColumns
(
i
):
numberOfColumns
=
len
(
self
.
list
[
i
][
1
:])
result
+=
'
{0:{width}{base}}
'
.
format
(
thead
[
i
],
base
=
'
s
'
,
width
=
self
.
widths
[
i
]
-
numberOfColumns
+
1
)
+
self
.
separator
for
j
in
range
(
1
,
numberOfColumns
):
result
+=
self
.
separator
else
:
result
+=
'
{0:{width}{base}}
'
.
format
(
thead
[
i
],
base
=
'
s
'
,
width
=
self
.
widths
[
i
])
+
self
.
separator
result
+=
'
\n
'
#Printing subTheads:
for
i
in
range
(
0
,
len
(
self
.
list
)):
if
self
.
hasColumnSubColumns
(
i
):
numberOfColumns
=
len
(
self
.
list
[
i
][
1
:])
for
subColumn
in
self
.
list
[
i
][
1
:]:
result
+=
'
{0:{width}{base}}
'
.
format
(
subColumn
[
0
],
base
=
'
s
'
,
width
=
(
self
.
widths
[
i
]
/
numberOfColumns
))
+
self
.
separator
else
:
result
+=
'
{0:{width}{base}}
'
.
format
(
''
,
base
=
'
s
'
,
width
=
self
.
widths
[
i
])
+
self
.
separator
result
+=
'
\n
'
#Printing Data
for
j
in
range
(
1
,
1
+
self
.
rows
):
# Iterowanie po wierszach
for
i
in
range
(
0
,
len
(
self
.
list
)):
#Iterowanie po kolumnach
if
self
.
hasColumnSubColumns
(
i
):
for
subcolumns
in
self
.
list
[
i
][
1
:]:
if
type
(
subcolumns
[
j
]).
__name__
==
'
int
'
:
result
+=
'
{0:{width}{base}}
'
.
format
(
subcolumns
[
j
],
base
=
'
d
'
,
width
=
(
self
.
widths
[
i
]
/
numberOfColumns
))
+
self
.
separator
elif
type
(
subcolumns
[
j
]).
__name__
==
'
float
'
:
result
+=
'
{0:{width}{base}}
'
.
format
(
subcolumns
[
j
],
base
=
'
.4f
'
,
width
=
(
self
.
widths
[
i
]
/
numberOfColumns
))
+
self
.
separator
else
:
result
+=
'
{0:{width}{base}}
'
.
format
(
subcolumns
[
j
],
base
=
'
s
'
,
width
=
(
self
.
widths
[
i
]
/
numberOfColumns
))
+
self
.
separator
else
:
if
type
(
self
.
list
[
i
][
j
]).
__name__
==
'
int
'
:
result
+=
'
{0:{width}{base}}
'
.
format
(
self
.
list
[
i
][
j
],
base
=
'
d
'
,
width
=
self
.
widths
[
i
])
+
self
.
separator
elif
type
(
self
.
list
[
i
][
j
]).
__name__
==
'
float
'
:
result
+=
'
{0:{width}{base}}
'
.
format
(
self
.
list
[
i
][
j
],
base
=
'
.4f
'
,
width
=
self
.
widths
[
i
])
+
self
.
separator
else
:
result
+=
'
{0:{width}{base}}
'
.
format
(
self
.
list
[
i
][
j
],
base
=
'
s
'
,
width
=
self
.
widths
[
i
])
+
self
.
separator
result
+=
'
\n
'
return
result
This diff is collapsed.
Click to expand it.
utils/chunk_eval.py
+
60
−
35
View file @
d55d81ca
...
...
@@ -2,6 +2,8 @@
#-*- coding: utf-8 -*-
'''
Created on 01-08-2012
@author: Adam Pawlaczek
'''
# Copyright (C) 2012 Adam Pawlaczek.
# This program is free software; you can redistribute and/or modify it
...
...
@@ -15,6 +17,12 @@ Created on 01-08-2012
#
# See the LICENCE and COPYING files for more details
from
optparse
import
OptionParser
import
corpus2
import
sys
,
os
from
CSVTable
import
CSVTable
import
codecs
descr
=
"""
%prog [options] CHUNKED REF
Reads the two chunk-annotated corpora: CHUNKED (chunker output) and REF
...
...
@@ -26,10 +34,6 @@ for the following settings:
NOTE: this script treats discontinuous chunks as whole annotations.
"""
from
optparse
import
OptionParser
import
corpus2
import
sys
,
os
from
CSVWriter
import
CSVWriter
class
Stats
:
def
__init__
(
self
):
...
...
@@ -58,13 +62,17 @@ class Stats:
self
.
head_hits
+=
len
(
ch
.
intersection
(
ref
))
def
getPRF
(
self
,
hits
):
result
=
{}
p
=
0.0
if
self
.
ch_chunks
==
0
else
100.0
*
hits
/
self
.
ch_chunks
r
=
0.0
if
self
.
ref_chunks
==
0
else
100.0
*
hits
/
self
.
ref_chunks
f
=
0.0
if
p
+
r
==
0.0
else
2.0
*
p
*
r
/
(
p
+
r
)
return
[
p
,
r
,
f
]
result
[
'
P
'
]
=
p
result
[
'
R
'
]
=
r
result
[
'
F
'
]
=
f
return
result
def
getStats
(
self
):
return
[
self
.
getPRF
(
self
.
chunk_hits
)
]
return
self
.
getPRF
(
self
.
chunk_hits
)
def
get_annots
(
sent
,
chan_name
):
# wrap the sentence as an AnnotatedSentence
...
...
@@ -106,12 +114,18 @@ def go():
ch_path
,
ref_path
=
args
main
(
ch_path
,
ref_path
,
options
.
chunk_names
,
options
.
input_format
,
options
.
out_path
,
options
.
tagset
,
options
.
verbose
,
options
.
folds
)
def
main
(
ch_path
,
ref_path
,
chan_name
,
input_format
,
out_path
,
tagset
,
verbose
,
folds
):
def
main
(
ch_path
,
ref_path
,
chan_name
s
,
input_format
,
out_path
,
tagset
,
verbose
,
folds
):
c
svWriter
=
CSVWriter
(
"
,
"
)
c
han_names
=
chan_names
.
split
(
"
,
"
)
csvWriter
.
addColumns
([
"
Nr
"
,
"
Chunk
"
])
csvWriter
.
addSubColumnsByName
(
"
Chunk
"
,
[
"
P
"
,
"
R
"
,
"
F
"
])
csvTable
=
CSVTable
(
"
;
"
)
csvTable
.
addColumn
(
'
Nr
'
)
for
chan_name
in
chan_names
:
csvTable
.
addColumn
(
chan_name
)
csvTable
.
addSubColumn
(
chan_name
,
"
P
"
,
type
=
"
float
"
)
csvTable
.
addSubColumn
(
chan_name
,
"
R
"
,
type
=
"
float
"
)
csvTable
.
addSubColumn
(
chan_name
,
"
F
"
,
type
=
"
float
"
)
tagset
=
corpus2
.
get_named_tagset
(
tagset
)
...
...
@@ -123,6 +137,10 @@ def main(ch_path, ref_path, chan_name, input_format, out_path, tagset, verbose,
ch_path_fold
=
ch_path
ref_path_fold
=
ref_path
results
=
{}
for
chan_name
in
chan_names
:
ch_rdr
=
corpus2
.
TokenReader
.
create_path_reader
(
input_format
,
tagset
,
ch_path_fold
)
ref_rdr
=
corpus2
.
TokenReader
.
create_path_reader
(
...
...
@@ -146,11 +164,18 @@ def main(ch_path, ref_path, chan_name, input_format, out_path, tagset, verbose,
ref_annots
=
get_annots
(
ref_sent
,
chan_name
)
stats
.
update
(
ch_annots
,
ref_annots
)
results
=
stats
.
getStats
()
results
[:
0
]
=
[
fold
]
csvWriter
.
addRow
(
results
)
csvWriter
.
count_avg
()
print
csvWriter
results
[
chan_name
]
=
stats
.
getStats
()
csvTable
.
addRow
(
results
)
if
folds
>
1
:
csvTable
.
countAvg
()
if
out_path
!=
''
:
out
=
codecs
.
open
(
out_path
,
"
w
"
,
"
utf-8
"
)
out
.
close
()
else
:
print
csvTable
if
__name__
==
'
__main__
'
:
go
()
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment