From 0727e5d9b6ffb8f66c1ac44ea70addb3f530c821 Mon Sep 17 00:00:00 2001 From: jezozwierzak <jezozwierzak@gmail.com> Date: Fri, 5 Oct 2012 11:20:52 +0200 Subject: [PATCH] some naming fixes --- INSTALL | 2 +- iobber/chunker.py | 7 +++---- iobber/classify.py | 8 ++++---- setup.py | 2 +- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/INSTALL b/INSTALL index cf575a5..1e3e134 100644 --- a/INSTALL +++ b/INSTALL @@ -18,7 +18,7 @@ This will install the python modules (iobber package), the iobber executable and To use the trained model, issue the following (for more details please consult README and the output of iobber -h): -iobber kpwr.ini -d model-kpwr03/ my_xces_input.xml -i xces -O ccl_chunked_output.xml +iobber kpwr.ini -d model-kpwr04/ my_xces_input.xml -i xces -O ccl_chunked_output.xml NOTE: the kpwr.ini configuration assumes that the input is morphosyntactically tagged. diff --git a/iobber/chunker.py b/iobber/chunker.py index 0dde3d6..d872499 100644 --- a/iobber/chunker.py +++ b/iobber/chunker.py @@ -110,7 +110,7 @@ class Chunker: self.conf, self.model_name, self.data_dir, layer) def train_and_save(self, in_path, input_format): - """Trains the tagger and stores the model to files beginning with + """Trains the chunker and stores the model to files beginning with model_name.""" self.layer_models = None # forget any previously trained model if self.verbose: @@ -158,9 +158,8 @@ class Chunker: chan = asent.get_channel(chan_name) there_iob = corpus2.to_string(chan.get_iob_at(tok_idx)) if there_iob != 'O': - if non_O_chan is not None: - sys.stderr.write( - 'WARNING: overlapping phrases in sentence %s\n' % unicode(asent.id())) + if non_O_chan is not None and self.verbose: + sys.stderr.write('WARNING: overlapping phrases in sentence %s\n' % unicode(asent.id())) else: non_O_chan = chan_name non_O_tag = there_iob diff --git a/iobber/classify.py b/iobber/classify.py index bb09915..a90ef1f 100644 --- a/iobber/classify.py +++ b/iobber/classify.py @@ -20,11 +20,11 @@ import config, corpio DATA_SEP = '\t' -def open_tr_files(model_name, data_dir, channels): +def open_tr_files(model_name, data_dir, layers): tr_files = {} - for chan in channels: - tr_files[chan] = codecs.open(corpio.f_name(model_name, data_dir, - config.EXT_DATA, chan), 'wb', 'utf-8') + for layer in layers: + tr_files[layer] = codecs.open(corpio.f_name(model_name, data_dir, + config.EXT_DATA, layer), 'wb', 'utf-8') return tr_files def close_tr_files(tr_files): diff --git a/setup.py b/setup.py index 0e8d976..104fc2e 100755 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ setup(name='iobber', package_data={ 'iobber': [ 'data/*.ini', 'data/*.ccl', 'data/*.txt', 'data/*.lex', - 'data/model-kpwr03/*.cr', 'data/model-kpwr03/*.lex'] + 'data/model-kpwr04/*.cr', 'data/model-kpwr04/*.lex'] # TODO: pre-trained models as well }, license='LGPL', -- GitLab