Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
W
WCCL
Manage
Activity
Members
Labels
Plan
Issues
4
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
WCCL
Commits
af5d164e
Commit
af5d164e
authored
3 years ago
by
Jarema Radom
Browse files
Options
Downloads
Patches
Plain Diff
Migrated wccl-rules from py2 to py3
parent
17d9424d
Branches
wccl-rules-migration
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/doc/wccl-rules.py
+20
-28
20 additions, 28 deletions
src/doc/wccl-rules.py
with
20 additions
and
28 deletions
src/doc/wccl-rules.py
+
20
−
28
View file @
af5d164e
#!/usr/bin/python
# -*- coding: utf-8 -*-
import
sys
from
optparse
import
OptionParser
...
...
@@ -69,12 +68,12 @@ def process_sent(asent, wccl_file, shall_print_ann_info):
match_rules
=
wccl_file
.
get_match_rules_ptr
()
match_rules
.
apply_all
(
asent
)
if
shall_print_ann_info
:
print
'
####
'
,
'
'
.
join
(
tok
.
orth_utf8
()
for
tok
in
asent
.
tokens
())
print
asent
.
annotation_info
()
print
(
'
####
'
,
'
'
.
join
(
tok
.
orth_utf8
()
for
tok
in
asent
.
tokens
())
)
print
(
asent
.
annotation_info
()
)
for
chan_name
in
asent
.
all_channels
():
chan
=
asent
.
get_channel
(
chan_name
)
# get the internal representation -- annotation id vector
print
chan_name
,
'
\t
'
,
chan
.
dump_segments
()
print
(
chan_name
,
'
\t
'
,
chan
.
dump_segments
()
)
# if you want to get it as a int vector, use chan.segments()
# or to get one it at idx -- chan.get_segment_at(idx)
# 0 means no annot there, positive values==ann indices
...
...
@@ -84,7 +83,7 @@ def process_sent(asent, wccl_file, shall_print_ann_info):
for
ann
in
ann_vec
:
idx_text
=
'
,
'
.
join
(
str
(
idx
)
for
idx
in
ann
.
indices
)
orth_text
=
'
'
.
join
(
asent
.
tokens
()[
idx
].
orth_utf8
()
for
idx
in
ann
.
indices
)
print
'
\t
%s
\t
%s
\t
(%s)
'
%
(
chan_name
,
idx_text
,
orth_text
)
print
(
'
\t
%s
\t
%s
\t
(%s)
'
%
(
chan_name
,
idx_text
,
orth_text
)
)
def
go
():
...
...
@@ -147,32 +146,25 @@ def go():
writer
=
get_writer
(
options
.
out_path
,
tagset
,
options
.
output_format
)
# processing paragraph-by-paragraph
if
options
.
preserve_chunks
:
while
True
:
chunk
=
reader
.
get_next_chunk
()
if
not
chunk
:
break
# end of input
# process each sentence separately
for
sent
in
chunk
.
sentences
():
# wrap the sentence as an AnnotatedSentence
asent
=
corpus2
.
AnnotatedSentence
.
wrap_sentence
(
sent
)
process_sent
(
sent
,
wccl_file
,
options
.
ann_info
)
# save processed chunk
# NOTE: if the input sent was not AnnotatedSentence, the changes
# will be discarded
writer
.
write_chunk
(
chunk
)
else
:
while
True
:
sent
=
reader
.
get_next_sentence
()
if
not
sent
:
break
# end of input
while
True
:
chunk
=
reader
.
get_next_chunk
()
if
not
chunk
:
break
# end of input
# process each sentence separately
for
sent
in
chunk
.
sentences
():
# wrap the sentence as an AnnotatedSentence
asent
=
corpus2
.
AnnotatedSentence
.
wrap_sentence
(
sent
)
process_sent
(
asent
,
wccl_file
,
options
.
ann_info
)
# save processed sentence (safe)
# NOTE: if the input sent was not AnnotatedSentence, the changes
# will be discarded
writer
.
write_sentence
(
sent
)
# save processed chunk
# NOTE: if the input sent was not AnnotatedSentence, the changes
# will be discarded
if
writer
:
if
options
.
preserve_chunks
:
writer
.
write_chunk
(
chunk
)
else
:
writer
.
write_sentence
(
sent
)
writer
.
finish
()
if
__name__
==
'
__main__
'
:
go
()
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment