From aefc58ff4d156b7ce838db035e5486711df3ed55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Warzocha?= <radoslaw.warzocha@gmail.com> Date: Tue, 7 Oct 2014 11:46:12 +0200 Subject: [PATCH] Making Iobber work with Wcrft2 --- iobber/iobber_txt.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/iobber/iobber_txt.py b/iobber/iobber_txt.py index b7fd003..41fea07 100755 --- a/iobber/iobber_txt.py +++ b/iobber/iobber_txt.py @@ -20,9 +20,7 @@ import sys from optparse import OptionParser import corpus2 - -from wcrft import tagger -from wcrft import corpio as tagger_io +import wcrft2 import chunker @@ -111,7 +109,10 @@ def main(files, tagger_config, tagger_dir, shall_chunk, """Create a Tagger (WCRFT) and a Chunker (IOBBER) object and get all the input parts processed according to function args.""" - tagr = tagger.Tagger(tagger_config, tagger_dir) + tagr = wcrft2.Tagger(tagger_config, tagger_dir) + tagset = tagr.get_tagset() + maca_conf = tagr.get_maca_config() + if shall_chunk: chunkr = chunker.Chunker(chunker_config, chunker_dir) @@ -139,16 +140,13 @@ def main(files, tagger_config, tagger_dir, shall_chunk, tagr.load_model() if shall_chunk: chunkr.load_model() - assert (tagr.tagset.name() - == chunkr.tagset.name()), ('Tagger and chunker config must' - + 'operate on the same tagset: %s v. %s' % (tagr.tagset.name(), - chunkr.tagset.name())) + assert (tagset.name() == chunkr.tagset.name()), ('Tagger and chunker config must' + + 'operate on the same tagset: %s v. %s' % (tagset.name(), chunkr.tagset.name())) for in_path, out_path in zip(inputs, outputs): if in_path and verbose: sys.stderr.write('Processing %s...\n' % in_path) - reader = tagger_io.get_reader( - in_path, tagr.tagset, input_format, tagr.maca_config) - writer = tagger_io.get_writer(out_path, tagr.tagset, output_format) + reader = wcrft2.get_reader(in_path, input_format, tagset, maca_conf) + writer = wcrft2.get_writer(out_path, output_format, tagset) while True: par = reader.get_next_chunk() # here `chunk' denotes paragraph if not par: -- GitLab