diff --git a/iobber/iobber_txt.py b/iobber/iobber_txt.py index 19346d80be3718e12f410bb4d19950969b99d169..bfb4f4ea5bb50a84329a1eb358c9b2026bfb113d 100755 --- a/iobber/iobber_txt.py +++ b/iobber/iobber_txt.py @@ -64,17 +64,20 @@ def go(): parser.add_option('-O', '--output-file', type='string', action='store', dest='out_path', default='', help='set output filename (do not write to stdout)') + parser.add_option('--no-chunk', action='store_false', + dest='shall_chunk', default=True, + help='don\'t run the chunker, only the tagger') parser.add_option('-c', '--chunker-config', type='string', action='store', dest='chunker_config', default='kpwr.ini', help='use given chunker config (default: kpwr.ini)') parser.add_option('-C', '--chunker-model', type='string', action='store', - dest='chunker_dir', default='', + dest='chunker_dir', default='model-kpwr04', help='read chunker trained model from the given dir') parser.add_option('-w', '--tagger-config', type='string', action='store', dest='tagger_config', default='nkjp.ini', help='use given tagger (wcrft) config (default: nkjp.ini)') parser.add_option('-W', '--tagger-model', type='string', action='store', - dest='tagger_dir', default='', + dest='tagger_dir', default='model_nkjp10_wcrft', help='read tagger (wcrft) trained model from the given dir') parser.add_option('-m', '--maca-config', type='string', action='store', dest='maca_config', default='', @@ -88,13 +91,13 @@ def go(): files = args - chunkr = chunker.Chunker(options.chunker_config, options.chunker_dir) tagr = tagger.Tagger(options.tagger_config, options.tagger_dir) + if options.shall_chunk: + chunkr = chunker.Chunker(options.chunker_config, options.chunker_dir) if options.maca_config != '': tagr.maca_config = options.maca_config - # TODO option not to use chunker # tag and chunk inputs = [] outputs = [] @@ -114,11 +117,12 @@ def go(): outputs = [path + '.tag' for path in inputs] if inputs: tagr.load_model() - chunkr.load_model() - assert (tagr.tagset.name() - == chunkr.tagset.name()), ('Tagger and chunker config must' - + 'operate on the same tagset: %s v. %s' % (tagr.tagset.name(), - chunkr.tagset.name())) + if options.shall_chunk: + chunkr.load_model() + assert (tagr.tagset.name() + == chunkr.tagset.name()), ('Tagger and chunker config must' + + 'operate on the same tagset: %s v. %s' % (tagr.tagset.name(), + chunkr.tagset.name())) for in_path, out_path in zip(inputs, outputs): if in_path and options.verbose: sys.stderr.write('Processing %s...\n' % in_path) @@ -142,7 +146,8 @@ def go(): new_sent = corpus2.AnnotatedSentence.cast_as_sentence(new_asent) # preserve_ambiguity = False tagr.disambiguate_sentence(new_sent) - chunkr.tag_sentence(new_sent) + if options.shall_chunk: + chunkr.tag_sentence(new_sent) # create a new paragraph with the new sentence new_par.append(new_sent) # save tagged paragraph