Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
corpus2
Manage
Activity
Members
Labels
Plan
Issues
4
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
corpus2
Commits
5b9d1cbc
Commit
5b9d1cbc
authored
12 years ago
by
Pawel Orlowicz
Browse files
Options
Downloads
Patches
Plain Diff
relation_eval refactored
parent
924f54e5
Branches
Branches containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
utils/relation_eval.py
+37
-35
37 additions, 35 deletions
utils/relation_eval.py
with
37 additions
and
35 deletions
utils/relation_eval.py
+
37
−
35
View file @
5b9d1cbc
#!/usr/bin/python
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2012 Paweł Orłowicz.
# Copyright (C) 2012 Paweł Orłowicz.
# This program is free software; you can redistribute and/or modify it
# This program is free software; you can redistribute and/or modify it
...
@@ -31,7 +32,7 @@ from optparse import OptionParser
...
@@ -31,7 +32,7 @@ from optparse import OptionParser
import
sys
import
sys
import
corpus2
import
corpus2
class
RelStats
:
class
RelStats
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
both_hits
=
0
self
.
both_hits
=
0
self
.
head_hits
=
0
self
.
head_hits
=
0
...
@@ -39,7 +40,7 @@ class RelStats :
...
@@ -39,7 +40,7 @@ class RelStats :
self
.
any_hits
=
0
self
.
any_hits
=
0
#helper method to get annotation vector from annotated sentence
#helper method to get annotation vector from annotated sentence
def
get_channel_annotations
(
self
,
ann_sent
,
dir_point
)
:
def
get_channel_annotations
(
self
,
ann_sent
,
dir_point
)
:
chann_name
=
dir_point
.
channel_name
()
chann_name
=
dir_point
.
channel_name
()
annotation_number
=
dir_point
.
annotation_number
()
-
1
annotation_number
=
dir_point
.
annotation_number
()
-
1
channel
=
ann_sent
.
get_channel
(
chann_name
)
channel
=
ann_sent
.
get_channel
(
chann_name
)
...
@@ -47,7 +48,7 @@ class RelStats :
...
@@ -47,7 +48,7 @@ class RelStats :
return
ann_vec
[
annotation_number
]
return
ann_vec
[
annotation_number
]
#helper method to get list of tokens' indices
#helper method to get list of tokens' indices
def
get_indices
(
self
,
annotated_sentence
,
direction_point
)
:
def
get_indices
(
self
,
annotated_sentence
,
direction_point
):
ann_chann
=
self
.
get_channel_annotations
(
annotated_sentence
,
direction_point
)
ann_chann
=
self
.
get_channel_annotations
(
annotated_sentence
,
direction_point
)
indices
=
ann_chann
.
indices
indices
=
ann_chann
.
indices
#loop to unwrap Integer objects from ann_chann.indices
#loop to unwrap Integer objects from ann_chann.indices
...
@@ -58,34 +59,38 @@ class RelStats :
...
@@ -58,34 +59,38 @@ class RelStats :
return
inds
return
inds
#helper to get index of the chunk's head
#helper to get index of the chunk's head
def
get_head_index
(
self
,
annotated_sentence
,
direction_point
)
:
def
get_head_index
(
self
,
annotated_sentence
,
direction_point
)
:
ann_chann
=
self
.
get_channel_annotations
(
annotated_sentence
,
direction_point
)
ann_chann
=
self
.
get_channel_annotations
(
annotated_sentence
,
direction_point
)
head_index
=
ann_chann
.
head_index
head_index
=
ann_chann
.
head_index
return
head_index
return
head_index
#returns values of hits from one direction point of relation
#returns values of hits from one direction point of relation
def
verify_relation
(
self
,
ref_ann_sent
,
dir_point_ref
,
target_ann_sent
,
dir_point_target
)
:
def
verify_relation
(
self
,
ref_ann_sent
,
dir_point_ref
,
target_ann_sent
,
dir_point_target
)
:
both
,
head
,
chun
=
0
,
0
,
0
both
,
head
,
chun
=
0
,
0
,
0
#if indices from ref chunk and target chunks equals (tokens are the same) then chun hits
#if indices from ref chunk and target chunks equals (tokens are the same) then chun hits
if
self
.
get_indices
(
ref_ann_sent
,
dir_point_ref
)
==
self
.
get_indices
(
target_ann_sent
,
dir_point_target
)
:
if
self
.
get_indices
(
ref_ann_sent
,
dir_point_ref
)
==
self
.
get_indices
(
target_ann_sent
,
dir_point_target
)
:
chun
+
=
1
chun
=
1
#
if chun hits and head indices match then head hits
#
if chun hits and head indices match then head hits
if
self
.
get_head_index
(
ref_ann_sent
,
dir_point_ref
)
==
self
.
get_head_index
(
target_ann_sent
,
dir_point_target
)
:
#
if self.get_head_index(ref_ann_sent, dir_point_ref) == self.get_head_index(target_ann_sent, dir_point_target):
head
+
=
1
#
head =1
#if indices are different (chunks consists of different sets of words) but heads match then head hits
#if indices are different (chunks consists of different sets of words) but heads match then head hits
elif
self
.
get_head_index
(
ref_ann_sent
,
dir_point_ref
)
==
self
.
get_head_index
(
target_ann_sent
,
dir_point_target
)
:
if
self
.
get_head_index
(
ref_ann_sent
,
dir_point_ref
)
==
self
.
get_head_index
(
target_ann_sent
,
dir_point_target
):
head
+=
1
head
=
1
if
chun
==
1
and
head
==
1
:
both
=
1
return
both
,
chun
,
head
return
both
,
chun
,
head
#if there was a hit on both sides of relation (dir_from, dir_to) then update counters
#if there was a hit on both sides of relation (dir_from, dir_to) then update counters
def
update_stats
(
self
,
both
,
chun
,
head
)
:
def
update_stats
(
self
,
both
,
chun
,
head
)
:
if
chun
==
2
:
if
chun
==
2
:
self
.
chun_hits
+=
1
self
.
chun_hits
+=
1
if
head
==
2
:
if
head
==
2
:
self
.
head_hits
+=
1
self
.
head_hits
+=
1
if
chun
==
2
and
head
==
2
:
if
chun
==
2
and
head
==
2
:
self
.
both_hits
+=
1
self
.
both_hits
+=
1
if
chun
==
2
or
head
==
2
:
if
both
>
0
and
chun
+
head
>
2
:
self
.
any_hits
+=
1
if
both
==
0
and
chun
+
head
>
1
:
self
.
any_hits
+=
1
self
.
any_hits
+=
1
def
print_stats
(
self
,
ref_rels_count
,
target_rels_count
,
stat_mode
):
def
print_stats
(
self
,
ref_rels_count
,
target_rels_count
,
stat_mode
):
...
@@ -114,35 +119,35 @@ class RelStats :
...
@@ -114,35 +119,35 @@ class RelStats :
print
(
'
Head match:
\t
'
)
print
(
'
Head match:
\t
'
)
print
'
%.2f
\t
%.2f
\t
%.2f
'
%
(
p
,
r
,
f
)
print
'
%.2f
\t
%.2f
\t
%.2f
'
%
(
p
,
r
,
f
)
def
compare
(
rel1
,
rel2
)
:
def
compare
(
rel1
,
rel2
)
:
dp1_from
=
rel1
.
rel_from
()
dp1_from
=
rel1
.
rel_from
()
dp2_from
=
rel2
.
rel_from
()
dp2_from
=
rel2
.
rel_from
()
dp1_to
=
rel1
.
rel_to
()
dp1_to
=
rel1
.
rel_to
()
dp2_to
=
rel2
.
rel_to
()
dp2_to
=
rel2
.
rel_to
()
if
cmp
(
dp1_from
.
sentence_id
(),
dp2_from
.
sentence_id
())
<
0
:
if
cmp
(
dp1_from
.
sentence_id
(),
dp2_from
.
sentence_id
())
<
0
:
return
-
1
return
-
1
elif
cmp
(
dp1_from
.
sentence_id
(),
dp2_from
.
sentence_id
())
>
0
:
elif
cmp
(
dp1_from
.
sentence_id
(),
dp2_from
.
sentence_id
())
>
0
:
return
1
return
1
if
cmp
(
dp1_from
.
channel_name
(),
dp2_from
.
channel_name
())
<
0
:
if
cmp
(
dp1_from
.
channel_name
(),
dp2_from
.
channel_name
())
<
0
:
return
-
1
return
-
1
elif
cmp
(
dp1_from
.
channel_name
(),
dp2_from
.
channel_name
())
>
0
:
elif
cmp
(
dp1_from
.
channel_name
(),
dp2_from
.
channel_name
())
>
0
:
return
1
return
1
if
cmp
(
dp1_from
.
annotation_number
(),
dp2_from
.
annotation_number
())
<
0
:
if
cmp
(
dp1_from
.
annotation_number
(),
dp2_from
.
annotation_number
())
<
0
:
return
-
1
return
-
1
elif
cmp
(
dp1_from
.
annotation_number
(),
dp2_from
.
annotation_number
())
>
0
:
elif
cmp
(
dp1_from
.
annotation_number
(),
dp2_from
.
annotation_number
())
>
0
:
return
1
return
1
if
cmp
(
dp1_to
.
sentence_id
(),
dp2_to
.
sentence_id
())
<
0
:
if
cmp
(
dp1_to
.
sentence_id
(),
dp2_to
.
sentence_id
())
<
0
:
return
-
1
return
-
1
elif
cmp
(
dp1_to
.
sentence_id
(),
dp2_to
.
sentence_id
())
>
0
:
elif
cmp
(
dp1_to
.
sentence_id
(),
dp2_to
.
sentence_id
())
>
0
:
return
1
return
1
if
cmp
(
dp1_to
.
channel_name
(),
dp2_to
.
channel_name
())
<
0
:
if
cmp
(
dp1_to
.
channel_name
(),
dp2_to
.
channel_name
())
<
0
:
return
-
1
return
-
1
elif
cmp
(
dp1_to
.
channel_name
(),
dp2_to
.
channel_name
())
>
0
:
elif
cmp
(
dp1_to
.
channel_name
(),
dp2_to
.
channel_name
())
>
0
:
return
1
return
1
if
cmp
(
dp1_to
.
annotation_number
(),
dp2_to
.
annotation_number
())
<
0
:
if
cmp
(
dp1_to
.
annotation_number
(),
dp2_to
.
annotation_number
())
<
0
:
return
-
1
return
-
1
elif
cmp
(
dp1_to
.
annotation_number
(),
dp2_to
.
annotation_number
())
>
0
:
elif
cmp
(
dp1_to
.
annotation_number
(),
dp2_to
.
annotation_number
())
>
0
:
return
1
return
1
if
rel1
.
rel_name
()
<
rel2
.
rel_name
():
if
rel1
.
rel_name
()
<
rel2
.
rel_name
():
...
@@ -169,7 +174,6 @@ def go():
...
@@ -169,7 +174,6 @@ def go():
sys
.
exit
(
1
)
sys
.
exit
(
1
)
batch_ref
,
batch_target
,
rel_name
=
args
batch_ref
,
batch_target
,
rel_name
=
args
rel_stats
=
RelStats
()
rel_stats
=
RelStats
()
corpus_type
=
"
document
"
corpus_type
=
"
document
"
...
@@ -182,7 +186,7 @@ def go():
...
@@ -182,7 +186,7 @@ def go():
target_file
=
open
(
batch_target
,
"
r
"
)
target_file
=
open
(
batch_target
,
"
r
"
)
line_ref
=
ref_file
.
readline
()
line_ref
=
ref_file
.
readline
()
line_target
=
target_file
.
readline
()
line_target
=
target_file
.
readline
()
while
line_ref
and
line_target
:
while
line_ref
and
line_target
:
line_ref
=
line_ref
.
strip
()
line_ref
=
line_ref
.
strip
()
ref_ccl_filename
,
ref_rel_filename
=
line_ref
.
split
(
"
;
"
)
ref_ccl_filename
,
ref_rel_filename
=
line_ref
.
split
(
"
;
"
)
...
@@ -190,7 +194,6 @@ def go():
...
@@ -190,7 +194,6 @@ def go():
line_target
=
line_target
.
strip
()
line_target
=
line_target
.
strip
()
target_ccl_filename
,
target_rel_filename
=
line_target
.
split
(
"
;
"
)
target_ccl_filename
,
target_rel_filename
=
line_target
.
split
(
"
;
"
)
ref_ccl_rdr
=
corpus2
.
CclRelReader
(
tagset
,
ref_ccl_filename
,
ref_rel_filename
)
ref_ccl_rdr
=
corpus2
.
CclRelReader
(
tagset
,
ref_ccl_filename
,
ref_rel_filename
)
target_ccl_rdr
=
corpus2
.
CclRelReader
(
tagset
,
target_ccl_filename
,
target_rel_filename
)
target_ccl_rdr
=
corpus2
.
CclRelReader
(
tagset
,
target_ccl_filename
,
target_rel_filename
)
...
@@ -205,15 +208,14 @@ def go():
...
@@ -205,15 +208,14 @@ def go():
ref_sents
=
dict
([
(
s
.
id
(),
corpus2
.
AnnotatedSentence
.
wrap_sentence
(
s
))
for
c
in
ref_doc
.
paragraphs
()
for
s
in
c
.
sentences
()])
ref_sents
=
dict
([
(
s
.
id
(),
corpus2
.
AnnotatedSentence
.
wrap_sentence
(
s
))
for
c
in
ref_doc
.
paragraphs
()
for
s
in
c
.
sentences
()])
target_sents
=
dict
([
(
s
.
id
(),
corpus2
.
AnnotatedSentence
.
wrap_sentence
(
s
))
for
c
in
target_doc
.
paragraphs
()
for
s
in
c
.
sentences
()])
target_sents
=
dict
([
(
s
.
id
(),
corpus2
.
AnnotatedSentence
.
wrap_sentence
(
s
))
for
c
in
target_doc
.
paragraphs
()
for
s
in
c
.
sentences
()])
for
pattern
in
ref_rels
:
for
pattern
in
ref_rels
:
t
=
filter
(
lambda
x
:
(
compare
(
x
,
pattern
)
==
0
)
,
target_rels
)
t
=
filter
(
lambda
x
:
(
compare
(
x
,
pattern
)
==
0
)
,
target_rels
)
if
len
(
t
)
>
0
:
if
len
(
t
)
>
0
:
t
=
t
[
0
]
t
=
t
[
0
]
r
=
pattern
r
=
pattern
both
,
chun
,
head
=
0
,
0
,
0
both
,
chun
,
head
=
0
,
0
,
0
for
dir_point_ref
,
dir_point_target
in
zip
([
r
.
rel_from
(),
r
.
rel_to
()],
[
t
.
rel_from
(),
t
.
rel_to
()])
:
for
dir_point_ref
,
dir_point_target
in
zip
([
r
.
rel_from
(),
r
.
rel_to
()],
[
t
.
rel_from
(),
t
.
rel_to
()])
:
ref_ann_sent
=
ref_sents
[
dir_point_ref
.
sentence_id
()]
ref_ann_sent
=
ref_sents
[
dir_point_ref
.
sentence_id
()]
target_ann_sent
=
target_sents
[
dir_point_target
.
sentence_id
()]
target_ann_sent
=
target_sents
[
dir_point_target
.
sentence_id
()]
b
,
c
,
h
=
rel_stats
.
verify_relation
(
ref_ann_sent
,
dir_point_ref
,
target_ann_sent
,
dir_point_target
)
b
,
c
,
h
=
rel_stats
.
verify_relation
(
ref_ann_sent
,
dir_point_ref
,
target_ann_sent
,
dir_point_target
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment