Skip to content
Snippets Groups Projects
Commit aefc58ff authored by Radosław Warzocha's avatar Radosław Warzocha
Browse files

Making Iobber work with Wcrft2

parent 78267450
No related branches found
No related tags found
No related merge requests found
...@@ -20,9 +20,7 @@ import sys ...@@ -20,9 +20,7 @@ import sys
from optparse import OptionParser from optparse import OptionParser
import corpus2 import corpus2
import wcrft2
from wcrft import tagger
from wcrft import corpio as tagger_io
import chunker import chunker
...@@ -111,7 +109,10 @@ def main(files, tagger_config, tagger_dir, shall_chunk, ...@@ -111,7 +109,10 @@ def main(files, tagger_config, tagger_dir, shall_chunk,
"""Create a Tagger (WCRFT) and a Chunker (IOBBER) object """Create a Tagger (WCRFT) and a Chunker (IOBBER) object
and get all the input parts processed according to function args.""" and get all the input parts processed according to function args."""
tagr = tagger.Tagger(tagger_config, tagger_dir) tagr = wcrft2.Tagger(tagger_config, tagger_dir)
tagset = tagr.get_tagset()
maca_conf = tagr.get_maca_config()
if shall_chunk: if shall_chunk:
chunkr = chunker.Chunker(chunker_config, chunker_dir) chunkr = chunker.Chunker(chunker_config, chunker_dir)
...@@ -139,16 +140,13 @@ def main(files, tagger_config, tagger_dir, shall_chunk, ...@@ -139,16 +140,13 @@ def main(files, tagger_config, tagger_dir, shall_chunk,
tagr.load_model() tagr.load_model()
if shall_chunk: if shall_chunk:
chunkr.load_model() chunkr.load_model()
assert (tagr.tagset.name() assert (tagset.name() == chunkr.tagset.name()), ('Tagger and chunker config must'
== chunkr.tagset.name()), ('Tagger and chunker config must' + 'operate on the same tagset: %s v. %s' % (tagset.name(), chunkr.tagset.name()))
+ 'operate on the same tagset: %s v. %s' % (tagr.tagset.name(),
chunkr.tagset.name()))
for in_path, out_path in zip(inputs, outputs): for in_path, out_path in zip(inputs, outputs):
if in_path and verbose: if in_path and verbose:
sys.stderr.write('Processing %s...\n' % in_path) sys.stderr.write('Processing %s...\n' % in_path)
reader = tagger_io.get_reader( reader = wcrft2.get_reader(in_path, input_format, tagset, maca_conf)
in_path, tagr.tagset, input_format, tagr.maca_config) writer = wcrft2.get_writer(out_path, output_format, tagset)
writer = tagger_io.get_writer(out_path, tagr.tagset, output_format)
while True: while True:
par = reader.get_next_chunk() # here `chunk' denotes paragraph par = reader.get_next_chunk() # here `chunk' denotes paragraph
if not par: if not par:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment