Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
Iobber
Manage
Activity
Members
Labels
Plan
Issues
4
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Syntactic Tools
Chunking
Iobber
Commits
a4479f71
Commit
a4479f71
authored
12 years ago
by
Adam Radziszewski
Browse files
Options
Downloads
Patches
Plain Diff
update iobber_txt: default configs+models and possibility to tag only, no chunk
parent
9fae38f0
Branches
Branches containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
iobber/iobber_txt.py
+15
-10
15 additions, 10 deletions
iobber/iobber_txt.py
with
15 additions
and
10 deletions
iobber/iobber_txt.py
+
15
−
10
View file @
a4479f71
...
...
@@ -64,17 +64,20 @@ def go():
parser
.
add_option
(
'
-O
'
,
'
--output-file
'
,
type
=
'
string
'
,
action
=
'
store
'
,
dest
=
'
out_path
'
,
default
=
''
,
help
=
'
set output filename (do not write to stdout)
'
)
parser
.
add_option
(
'
--no-chunk
'
,
action
=
'
store_false
'
,
dest
=
'
shall_chunk
'
,
default
=
True
,
help
=
'
don
\'
t run the chunker, only the tagger
'
)
parser
.
add_option
(
'
-c
'
,
'
--chunker-config
'
,
type
=
'
string
'
,
action
=
'
store
'
,
dest
=
'
chunker_config
'
,
default
=
'
kpwr.ini
'
,
help
=
'
use given chunker config (default: kpwr.ini)
'
)
parser
.
add_option
(
'
-C
'
,
'
--chunker-model
'
,
type
=
'
string
'
,
action
=
'
store
'
,
dest
=
'
chunker_dir
'
,
default
=
''
,
dest
=
'
chunker_dir
'
,
default
=
'
model-kpwr04
'
,
help
=
'
read chunker trained model from the given dir
'
)
parser
.
add_option
(
'
-w
'
,
'
--tagger-config
'
,
type
=
'
string
'
,
action
=
'
store
'
,
dest
=
'
tagger_config
'
,
default
=
'
nkjp.ini
'
,
help
=
'
use given tagger (wcrft) config (default: nkjp.ini)
'
)
parser
.
add_option
(
'
-W
'
,
'
--tagger-model
'
,
type
=
'
string
'
,
action
=
'
store
'
,
dest
=
'
tagger_dir
'
,
default
=
''
,
dest
=
'
tagger_dir
'
,
default
=
'
model_nkjp10_wcrft
'
,
help
=
'
read tagger (wcrft) trained model from the given dir
'
)
parser
.
add_option
(
'
-m
'
,
'
--maca-config
'
,
type
=
'
string
'
,
action
=
'
store
'
,
dest
=
'
maca_config
'
,
default
=
''
,
...
...
@@ -88,13 +91,13 @@ def go():
files
=
args
chunkr
=
chunker
.
Chunker
(
options
.
chunker_config
,
options
.
chunker_dir
)
tagr
=
tagger
.
Tagger
(
options
.
tagger_config
,
options
.
tagger_dir
)
if
options
.
shall_chunk
:
chunkr
=
chunker
.
Chunker
(
options
.
chunker_config
,
options
.
chunker_dir
)
if
options
.
maca_config
!=
''
:
tagr
.
maca_config
=
options
.
maca_config
# TODO option not to use chunker
# tag and chunk
inputs
=
[]
outputs
=
[]
...
...
@@ -114,11 +117,12 @@ def go():
outputs
=
[
path
+
'
.tag
'
for
path
in
inputs
]
if
inputs
:
tagr
.
load_model
()
chunkr
.
load_model
()
assert
(
tagr
.
tagset
.
name
()
==
chunkr
.
tagset
.
name
()),
(
'
Tagger and chunker config must
'
+
'
operate on the same tagset: %s v. %s
'
%
(
tagr
.
tagset
.
name
(),
chunkr
.
tagset
.
name
()))
if
options
.
shall_chunk
:
chunkr
.
load_model
()
assert
(
tagr
.
tagset
.
name
()
==
chunkr
.
tagset
.
name
()),
(
'
Tagger and chunker config must
'
+
'
operate on the same tagset: %s v. %s
'
%
(
tagr
.
tagset
.
name
(),
chunkr
.
tagset
.
name
()))
for
in_path
,
out_path
in
zip
(
inputs
,
outputs
):
if
in_path
and
options
.
verbose
:
sys
.
stderr
.
write
(
'
Processing %s...
\n
'
%
in_path
)
...
...
@@ -142,7 +146,8 @@ def go():
new_sent
=
corpus2
.
AnnotatedSentence
.
cast_as_sentence
(
new_asent
)
# preserve_ambiguity = False
tagr
.
disambiguate_sentence
(
new_sent
)
chunkr
.
tag_sentence
(
new_sent
)
if
options
.
shall_chunk
:
chunkr
.
tag_sentence
(
new_sent
)
# create a new paragraph with the new sentence
new_par
.
append
(
new_sent
)
# save tagged paragraph
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment