Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
corpus2
Manage
Activity
Members
Labels
Plan
Issues
4
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
corpus2
Commits
9a030bad
Commit
9a030bad
authored
13 years ago
by
Adam Radziszewski
Browse files
Options
Downloads
Patches
Plain Diff
tagger eval script: fixes
parent
90e4ea15
Branches
Branches containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
utils/tagger-eval.py
+12
-3
12 additions, 3 deletions
utils/tagger-eval.py
with
12 additions
and
3 deletions
utils/tagger-eval.py
+
12
−
3
View file @
9a030bad
...
@@ -57,7 +57,7 @@ def tok_seqs(rdr_here, rdr_there, respect_spaces, verbose_mode, debug_mode):
...
@@ -57,7 +57,7 @@ def tok_seqs(rdr_here, rdr_there, respect_spaces, verbose_mode, debug_mode):
buff_here
=
[
tok_here
]
buff_here
=
[
tok_here
]
buff_there
=
[
tok_there
]
buff_there
=
[
tok_there
]
LIMIT
=
2
0
LIMIT
=
3
0
num_iter
=
0
num_iter
=
0
...
@@ -124,12 +124,15 @@ class TokComp:
...
@@ -124,12 +124,15 @@ class TokComp:
non-punct token.
non-punct token.
punc_tag is a string representation of tag used for punctuation.
punc_tag is a string representation of tag used for punctuation.
unk_tag is a string representation of tag used for unknown words.
Set expand_optional to True if ommission of optional attribute
Set expand_optional to True if ommission of optional attribute
values should be treated as multiple tags, each with a different
values should be treated as multiple tags, each with a different
variant of the value.
"""
variant of the value.
"""
def
__init__
(
self
,
tagset
,
punc_tag
,
expand_optional
,
debug
=
False
):
def
__init__
(
self
,
tagset
,
punc_tag
,
unk_tag
,
expand_optional
,
debug
=
False
):
self
.
tagset
=
tagset
self
.
tagset
=
tagset
self
.
punc_tag
=
punc_tag
self
.
punc_tag
=
punc_tag
self
.
unk_tag
=
unk_tag
self
.
expand_optional
=
expand_optional
self
.
expand_optional
=
expand_optional
self
.
debug
=
debug
self
.
debug
=
debug
self
.
ref_toks
=
0
# all tokens in ref corpus
self
.
ref_toks
=
0
# all tokens in ref corpus
...
@@ -158,6 +161,10 @@ class TokComp:
...
@@ -158,6 +161,10 @@ class TokComp:
tok_tags
=
set
([
self
.
tagset
.
tag_to_string
(
lex
.
tag
())
for
lex
in
tok
.
lexemes
()
if
lex
.
is_disamb
()])
tok_tags
=
set
([
self
.
tagset
.
tag_to_string
(
lex
.
tag
())
for
lex
in
tok
.
lexemes
()
if
lex
.
is_disamb
()])
return
tok_tags
==
set
([
self
.
punc_tag
])
return
tok_tags
==
set
([
self
.
punc_tag
])
def
is_unknown
(
self
,
tok
):
tok_tags
=
[
self
.
tagset
.
tag_to_string
(
lex
.
tag
())
for
lex
in
tok
.
lexemes
()]
return
unk_tag
in
tok_tags
def
tagstrings_of_token
(
self
,
tok
):
def
tagstrings_of_token
(
self
,
tok
):
"""
Returns a set of strings, corresponding to disamb tags
"""
Returns a set of strings, corresponding to disamb tags
found in the token. If expand_optional, multiple tags may be
found in the token. If expand_optional, multiple tags may be
...
@@ -205,6 +212,8 @@ class TokComp:
...
@@ -205,6 +212,8 @@ class TokComp:
self
.
tag_toks
+=
len
(
tag_seq
)
self
.
tag_toks
+=
len
(
tag_seq
)
self
.
ref_toks
+=
len
(
ref_seq
)
self
.
ref_toks
+=
len
(
ref_seq
)
unk_tokens
=
sum
(
self
.
is_unk
(
ref_tok
)
for
ref_tok
in
# first variant: no segmentation mess
# first variant: no segmentation mess
if
len
(
tag_seq
)
==
1
and
len
(
ref_seq
)
==
1
:
if
len
(
tag_seq
)
==
1
and
len
(
ref_seq
)
==
1
:
tagval
,
posval
=
self
.
cmp_toks
(
tag_seq
[
0
],
ref_seq
[
0
])
tagval
,
posval
=
self
.
cmp_toks
(
tag_seq
[
0
],
ref_seq
[
0
])
...
@@ -356,7 +365,7 @@ def go():
...
@@ -356,7 +365,7 @@ def go():
parser
.
add_option
(
'
-d
'
,
'
--debug
'
,
action
=
'
store_true
'
,
dest
=
'
debug_mode
'
)
parser
.
add_option
(
'
-d
'
,
'
--debug
'
,
action
=
'
store_true
'
,
dest
=
'
debug_mode
'
)
(
options
,
args
)
=
parser
.
parse_args
()
(
options
,
args
)
=
parser
.
parse_args
()
if
len
(
args
)
<
2
and
len
(
args
)
%
2
!=
0
:
if
len
(
args
)
<
2
or
len
(
args
)
%
2
!=
0
:
print
'
You need to provide a series of tagged folds and a coresponding
'
print
'
You need to provide a series of tagged folds and a coresponding
'
print
'
series of reference folds.
'
print
'
series of reference folds.
'
print
'
See --help for details.
'
print
'
See --help for details.
'
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment