From 5629772b660e8a43971e34879899cc3a6d62f153 Mon Sep 17 00:00:00 2001 From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl> Date: Mon, 23 Apr 2012 09:44:43 +0200 Subject: [PATCH] iobber: read configs and models from install site, mv data to package data subdir --- iobber/chunker.py | 7 ++++--- iobber/classify.py | 2 +- iobber/corpio.py | 16 ++++++++++++++++ {config => iobber/data}/kpwr-layer1.txt | 0 {config => iobber/data}/kpwr-layer2.txt | 0 {config => iobber/data}/kpwr.ccl | 0 {config => iobber/data}/kpwr.ini | 0 iobber/iobber.py | 8 ++++---- 8 files changed, 25 insertions(+), 8 deletions(-) rename {config => iobber/data}/kpwr-layer1.txt (100%) rename {config => iobber/data}/kpwr-layer2.txt (100%) rename {config => iobber/data}/kpwr.ccl (100%) rename {config => iobber/data}/kpwr.ini (100%) diff --git a/iobber/chunker.py b/iobber/chunker.py index 50aeb26..7c9d404 100644 --- a/iobber/chunker.py +++ b/iobber/chunker.py @@ -75,13 +75,14 @@ class Chunker: accompanying file named config.ccl. Trained chunker model is sought in (or written to when training) data_dir. The model is basically a trained CRF classifier.""" - self.conf_dir, conf_fname = os.path.split(config_path) + found_config_path = corpio.get_data(config_path) + self.conf_dir, conf_fname = os.path.split(found_config_path) # models (trained classifiers) self.model_name, dummy = os.path.splitext(conf_fname) - self.data_dir = data_dir + self.data_dir = corpio.get_data(data_dir) self.verbose = verbose # load the config file - with open(config_path) as config_file: + with open(found_config_path) as config_file: self.conf = ConfigParser.RawConfigParser() self.conf.readfp(config_file) self.tagset = corpio.get_tagset(self.conf) diff --git a/iobber/classify.py b/iobber/classify.py index bce6a1d..9c3ee89 100644 --- a/iobber/classify.py +++ b/iobber/classify.py @@ -64,7 +64,7 @@ def load(conf, model_name, data_dir, chan_name): cr_fname = corpio.f_name(model_name, data_dir, config.EXT_CR, chan_name) if os.path.isfile(cr_fname): return CRFPP.Tagger('-m %s' % cr_fname) - return None + raise IOError('can\'t open classifier from %s' % cr_fname) def open_sent(crf_obj): """ diff --git a/iobber/corpio.py b/iobber/corpio.py index 7c17f27..7481414 100644 --- a/iobber/corpio.py +++ b/iobber/corpio.py @@ -23,15 +23,28 @@ import corpus2, wccl import config import codecs, os +_ROOT = os.path.abspath(os.path.dirname(__file__)) + format_help = """ Available input formats: """ + ' '.join(corpus2.TokenReader.available_reader_types()) + """ """ + ' '.join(corpus2.TokenReader.available_reader_types_help()) + """ Available output formats: """ + ' '.join(corpus2.TokenWriter.available_writer_types()) + """ """ + ' '.join(corpus2.TokenWriter.available_writer_types_help()) +def get_data(path): + """Tries to resolve path to the given subdir, trying the path locally + and then in the install site.""" + if os.path.exists(path): + return path + in_data = os.path.join(_ROOT, 'data', path) + if os.path.exists(in_data): + return in_data + raise IOError('can\'t locate %s, tried locally and %s' % (path, in_data)) + def f_name(model_name, subdir, ext, suff = ''): """Gets the filename based on model_name having the given extension. Optionally, you can specify name suffix.""" + base = (model_name + '-' + suff + '.' + ext) if suff else (model_name + '.' + ext) return os.path.join(subdir, base) @@ -44,6 +57,9 @@ def get_reader(in_path, tagset, input_format, read_disamb_only): 'disamb' lexemes/interpretations.""" if read_disamb_only: fixd_format = input_format + ',disamb_only' + # force casting sentences as AnnotatedSentences + # required to get XCES input right + fixd_format += ',ann' if in_path: return corpus2.TokenReader.create_path_reader( diff --git a/config/kpwr-layer1.txt b/iobber/data/kpwr-layer1.txt similarity index 100% rename from config/kpwr-layer1.txt rename to iobber/data/kpwr-layer1.txt diff --git a/config/kpwr-layer2.txt b/iobber/data/kpwr-layer2.txt similarity index 100% rename from config/kpwr-layer2.txt rename to iobber/data/kpwr-layer2.txt diff --git a/config/kpwr.ccl b/iobber/data/kpwr.ccl similarity index 100% rename from config/kpwr.ccl rename to iobber/data/kpwr.ccl diff --git a/config/kpwr.ini b/iobber/data/kpwr.ini similarity index 100% rename from config/kpwr.ini rename to iobber/data/kpwr.ini diff --git a/iobber/iobber.py b/iobber/iobber.py index e0f707c..7b4e397 100755 --- a/iobber/iobber.py +++ b/iobber/iobber.py @@ -57,7 +57,7 @@ def go(): help='set output filename (do not write to stdout)') parser.add_option('-d', '--data-dir', type='string', action='store', dest='data_dir', default='', - help='assume WCCL and trained model to sit in the given dir') + help='use the given directory to look for the trained model (or save the model when training)') parser.add_option('--sent-only', action='store_false', dest='preserve_chunks', default=True, help='process sentences, ignoring division into paragraphs') @@ -72,12 +72,12 @@ def go(): if len(args) < 1: sys.stderr.write('You need to provide a config file and specify input.\n') - sys.stderr.write('See %s --help\n' % sys.argv[0]) + sys.stderr.write('See --help for details.\n') sys.exit(1) - config_path = args[0] + config_fname = args[0] files = args[1:] - tagr = chunker.Chunker(config_path, options.data_dir, + tagr = chunker.Chunker(config_fname, options.data_dir, verbose = options.verbose) if options.is_training: -- GitLab