Skip to content
Snippets Groups Projects
Commit aefc58ff authored by Radosław Warzocha's avatar Radosław Warzocha
Browse files

Making Iobber work with Wcrft2

parent 78267450
No related merge requests found
......@@ -20,9 +20,7 @@ import sys
from optparse import OptionParser
import corpus2
from wcrft import tagger
from wcrft import corpio as tagger_io
import wcrft2
import chunker
......@@ -111,7 +109,10 @@ def main(files, tagger_config, tagger_dir, shall_chunk,
"""Create a Tagger (WCRFT) and a Chunker (IOBBER) object
and get all the input parts processed according to function args."""
tagr = tagger.Tagger(tagger_config, tagger_dir)
tagr = wcrft2.Tagger(tagger_config, tagger_dir)
tagset = tagr.get_tagset()
maca_conf = tagr.get_maca_config()
if shall_chunk:
chunkr = chunker.Chunker(chunker_config, chunker_dir)
......@@ -139,16 +140,13 @@ def main(files, tagger_config, tagger_dir, shall_chunk,
tagr.load_model()
if shall_chunk:
chunkr.load_model()
assert (tagr.tagset.name()
== chunkr.tagset.name()), ('Tagger and chunker config must'
+ 'operate on the same tagset: %s v. %s' % (tagr.tagset.name(),
chunkr.tagset.name()))
assert (tagset.name() == chunkr.tagset.name()), ('Tagger and chunker config must'
+ 'operate on the same tagset: %s v. %s' % (tagset.name(), chunkr.tagset.name()))
for in_path, out_path in zip(inputs, outputs):
if in_path and verbose:
sys.stderr.write('Processing %s...\n' % in_path)
reader = tagger_io.get_reader(
in_path, tagr.tagset, input_format, tagr.maca_config)
writer = tagger_io.get_writer(out_path, tagr.tagset, output_format)
reader = wcrft2.get_reader(in_path, input_format, tagset, maca_conf)
writer = wcrft2.get_writer(out_path, output_format, tagset)
while True:
par = reader.get_next_chunk() # here `chunk' denotes paragraph
if not par:
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment