From 5629772b660e8a43971e34879899cc3a6d62f153 Mon Sep 17 00:00:00 2001
From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl>
Date: Mon, 23 Apr 2012 09:44:43 +0200
Subject: [PATCH] iobber: read configs and models from install site, mv data to
 package data subdir

---
 iobber/chunker.py                       |  7 ++++---
 iobber/classify.py                      |  2 +-
 iobber/corpio.py                        | 16 ++++++++++++++++
 {config => iobber/data}/kpwr-layer1.txt |  0
 {config => iobber/data}/kpwr-layer2.txt |  0
 {config => iobber/data}/kpwr.ccl        |  0
 {config => iobber/data}/kpwr.ini        |  0
 iobber/iobber.py                        |  8 ++++----
 8 files changed, 25 insertions(+), 8 deletions(-)
 rename {config => iobber/data}/kpwr-layer1.txt (100%)
 rename {config => iobber/data}/kpwr-layer2.txt (100%)
 rename {config => iobber/data}/kpwr.ccl (100%)
 rename {config => iobber/data}/kpwr.ini (100%)

diff --git a/iobber/chunker.py b/iobber/chunker.py
index 50aeb26..7c9d404 100644
--- a/iobber/chunker.py
+++ b/iobber/chunker.py
@@ -75,13 +75,14 @@ class Chunker:
 		accompanying file named config.ccl. Trained chunker model is sought in
 		(or written to when training) data_dir. The model is basically a
 		trained CRF classifier."""
-		self.conf_dir, conf_fname = os.path.split(config_path)
+		found_config_path = corpio.get_data(config_path)
+		self.conf_dir, conf_fname = os.path.split(found_config_path)
 		# models (trained classifiers)
 		self.model_name, dummy = os.path.splitext(conf_fname)
-		self.data_dir = data_dir
+		self.data_dir = corpio.get_data(data_dir)
 		self.verbose = verbose
 		# load the config file
-		with open(config_path) as config_file:
+		with open(found_config_path) as config_file:
 			self.conf = ConfigParser.RawConfigParser()
 			self.conf.readfp(config_file)
 		self.tagset = corpio.get_tagset(self.conf)
diff --git a/iobber/classify.py b/iobber/classify.py
index bce6a1d..9c3ee89 100644
--- a/iobber/classify.py
+++ b/iobber/classify.py
@@ -64,7 +64,7 @@ def load(conf, model_name, data_dir, chan_name):
 	cr_fname = corpio.f_name(model_name, data_dir, config.EXT_CR, chan_name)
 	if os.path.isfile(cr_fname):
 		return CRFPP.Tagger('-m %s' % cr_fname)
-	return None
+	raise IOError('can\'t open classifier from %s' % cr_fname)
 
 def open_sent(crf_obj):
 	"""
diff --git a/iobber/corpio.py b/iobber/corpio.py
index 7c17f27..7481414 100644
--- a/iobber/corpio.py
+++ b/iobber/corpio.py
@@ -23,15 +23,28 @@ import corpus2, wccl
 import config
 import codecs, os
 
+_ROOT = os.path.abspath(os.path.dirname(__file__))
+
 format_help = """
 Available input formats: """ + ' '.join(corpus2.TokenReader.available_reader_types()) + """
 """ + ' '.join(corpus2.TokenReader.available_reader_types_help()) + """
 Available output formats: """ + ' '.join(corpus2.TokenWriter.available_writer_types()) + """
 """ + ' '.join(corpus2.TokenWriter.available_writer_types_help())
 
+def get_data(path):
+	"""Tries to resolve path to the given subdir, trying the path locally
+	and then in the install site."""
+	if os.path.exists(path):
+		return path
+	in_data = os.path.join(_ROOT, 'data', path)
+	if os.path.exists(in_data):
+		return in_data
+	raise IOError('can\'t locate %s, tried locally and %s' % (path, in_data))
+
 def f_name(model_name, subdir, ext, suff = ''):
 	"""Gets the filename based on model_name having the given
 	extension. Optionally, you can specify name suffix."""
+	
 	base = (model_name + '-' + suff + '.' + ext) if suff else (model_name + '.' + ext)
 	return os.path.join(subdir, base)
 
@@ -44,6 +57,9 @@ def get_reader(in_path, tagset, input_format, read_disamb_only):
 	'disamb' lexemes/interpretations."""
 	if read_disamb_only:
 		fixd_format = input_format + ',disamb_only'
+	# force casting sentences as AnnotatedSentences
+	# required to get XCES input right
+	fixd_format += ',ann'
 	
 	if in_path:
 		return corpus2.TokenReader.create_path_reader(
diff --git a/config/kpwr-layer1.txt b/iobber/data/kpwr-layer1.txt
similarity index 100%
rename from config/kpwr-layer1.txt
rename to iobber/data/kpwr-layer1.txt
diff --git a/config/kpwr-layer2.txt b/iobber/data/kpwr-layer2.txt
similarity index 100%
rename from config/kpwr-layer2.txt
rename to iobber/data/kpwr-layer2.txt
diff --git a/config/kpwr.ccl b/iobber/data/kpwr.ccl
similarity index 100%
rename from config/kpwr.ccl
rename to iobber/data/kpwr.ccl
diff --git a/config/kpwr.ini b/iobber/data/kpwr.ini
similarity index 100%
rename from config/kpwr.ini
rename to iobber/data/kpwr.ini
diff --git a/iobber/iobber.py b/iobber/iobber.py
index e0f707c..7b4e397 100755
--- a/iobber/iobber.py
+++ b/iobber/iobber.py
@@ -57,7 +57,7 @@ def go():
 		help='set output filename (do not write to stdout)')
 	parser.add_option('-d', '--data-dir', type='string', action='store',
 		dest='data_dir', default='',
-		help='assume WCCL and trained model to sit in the given dir')
+		help='use the given directory to look for the trained model (or save the model when training)')
 	parser.add_option('--sent-only', action='store_false',
 		dest='preserve_chunks', default=True,
 		help='process sentences, ignoring division into paragraphs')
@@ -72,12 +72,12 @@ def go():
 	
 	if len(args) < 1:
 		sys.stderr.write('You need to provide a config file and specify input.\n')
-		sys.stderr.write('See %s --help\n' % sys.argv[0])
+		sys.stderr.write('See --help for details.\n')
 		sys.exit(1)
-	config_path = args[0]
+	config_fname = args[0]
 	files = args[1:]
 	
-	tagr = chunker.Chunker(config_path, options.data_dir,
+	tagr = chunker.Chunker(config_fname, options.data_dir,
 		verbose = options.verbose)
 	
 	if options.is_training:
-- 
GitLab