Skip to content
Snippets Groups Projects
Commit a4479f71 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

update iobber_txt: default configs+models and possibility to tag only, no chunk

parent 9fae38f0
Branches
No related merge requests found
......@@ -64,17 +64,20 @@ def go():
parser.add_option('-O', '--output-file', type='string', action='store',
dest='out_path', default='',
help='set output filename (do not write to stdout)')
parser.add_option('--no-chunk', action='store_false',
dest='shall_chunk', default=True,
help='don\'t run the chunker, only the tagger')
parser.add_option('-c', '--chunker-config', type='string', action='store',
dest='chunker_config', default='kpwr.ini',
help='use given chunker config (default: kpwr.ini)')
parser.add_option('-C', '--chunker-model', type='string', action='store',
dest='chunker_dir', default='',
dest='chunker_dir', default='model-kpwr04',
help='read chunker trained model from the given dir')
parser.add_option('-w', '--tagger-config', type='string', action='store',
dest='tagger_config', default='nkjp.ini',
help='use given tagger (wcrft) config (default: nkjp.ini)')
parser.add_option('-W', '--tagger-model', type='string', action='store',
dest='tagger_dir', default='',
dest='tagger_dir', default='model_nkjp10_wcrft',
help='read tagger (wcrft) trained model from the given dir')
parser.add_option('-m', '--maca-config', type='string', action='store',
dest='maca_config', default='',
......@@ -88,13 +91,13 @@ def go():
files = args
chunkr = chunker.Chunker(options.chunker_config, options.chunker_dir)
tagr = tagger.Tagger(options.tagger_config, options.tagger_dir)
if options.shall_chunk:
chunkr = chunker.Chunker(options.chunker_config, options.chunker_dir)
if options.maca_config != '':
tagr.maca_config = options.maca_config
# TODO option not to use chunker
# tag and chunk
inputs = []
outputs = []
......@@ -114,11 +117,12 @@ def go():
outputs = [path + '.tag' for path in inputs]
if inputs:
tagr.load_model()
chunkr.load_model()
assert (tagr.tagset.name()
== chunkr.tagset.name()), ('Tagger and chunker config must'
+ 'operate on the same tagset: %s v. %s' % (tagr.tagset.name(),
chunkr.tagset.name()))
if options.shall_chunk:
chunkr.load_model()
assert (tagr.tagset.name()
== chunkr.tagset.name()), ('Tagger and chunker config must'
+ 'operate on the same tagset: %s v. %s' % (tagr.tagset.name(),
chunkr.tagset.name()))
for in_path, out_path in zip(inputs, outputs):
if in_path and options.verbose:
sys.stderr.write('Processing %s...\n' % in_path)
......@@ -142,7 +146,8 @@ def go():
new_sent = corpus2.AnnotatedSentence.cast_as_sentence(new_asent)
# preserve_ambiguity = False
tagr.disambiguate_sentence(new_sent)
chunkr.tag_sentence(new_sent)
if options.shall_chunk:
chunkr.tag_sentence(new_sent)
# create a new paragraph with the new sentence
new_par.append(new_sent)
# save tagged paragraph
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment