Skip to content
Snippets Groups Projects
Commit 0727e5d9 authored by jezozwierzak's avatar jezozwierzak
Browse files

some naming fixes

parent a21dc6c0
No related merge requests found
......@@ -18,7 +18,7 @@ This will install the python modules (iobber package), the iobber executable and
To use the trained model, issue the following (for more details please consult README and the output of iobber -h):
iobber kpwr.ini -d model-kpwr03/ my_xces_input.xml -i xces -O ccl_chunked_output.xml
iobber kpwr.ini -d model-kpwr04/ my_xces_input.xml -i xces -O ccl_chunked_output.xml
NOTE: the kpwr.ini configuration assumes that the input is morphosyntactically tagged.
......@@ -110,7 +110,7 @@ class Chunker:
self.conf, self.model_name, self.data_dir, layer)
def train_and_save(self, in_path, input_format):
"""Trains the tagger and stores the model to files beginning with
"""Trains the chunker and stores the model to files beginning with
model_name."""
self.layer_models = None # forget any previously trained model
if self.verbose:
......@@ -158,9 +158,8 @@ class Chunker:
chan = asent.get_channel(chan_name)
there_iob = corpus2.to_string(chan.get_iob_at(tok_idx))
if there_iob != 'O':
if non_O_chan is not None:
sys.stderr.write(
'WARNING: overlapping phrases in sentence %s\n' % unicode(asent.id()))
if non_O_chan is not None and self.verbose:
sys.stderr.write('WARNING: overlapping phrases in sentence %s\n' % unicode(asent.id()))
else:
non_O_chan = chan_name
non_O_tag = there_iob
......
......@@ -20,11 +20,11 @@ import config, corpio
DATA_SEP = '\t'
def open_tr_files(model_name, data_dir, channels):
def open_tr_files(model_name, data_dir, layers):
tr_files = {}
for chan in channels:
tr_files[chan] = codecs.open(corpio.f_name(model_name, data_dir,
config.EXT_DATA, chan), 'wb', 'utf-8')
for layer in layers:
tr_files[layer] = codecs.open(corpio.f_name(model_name, data_dir,
config.EXT_DATA, layer), 'wb', 'utf-8')
return tr_files
def close_tr_files(tr_files):
......
......@@ -14,7 +14,7 @@ setup(name='iobber',
package_data={
'iobber': [
'data/*.ini', 'data/*.ccl', 'data/*.txt', 'data/*.lex',
'data/model-kpwr03/*.cr', 'data/model-kpwr03/*.lex']
'data/model-kpwr04/*.cr', 'data/model-kpwr04/*.lex']
# TODO: pre-trained models as well
},
license='LGPL',
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment