From f91daedfd63c61d37ff059a65443f3bc8280ba92 Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Thu, 5 May 2011 15:08:06 +0200
Subject: [PATCH] lexicon path searching in wcclfile parsing, use in wccl-run
 (-P)

---
 libwccl/lexicon/lexiconparser.cpp |  8 +++++---
 libwccl/lexicon/lexiconparser.h   |  4 +++-
 libwccl/parser/Parser.cpp         |  9 +++++----
 libwccl/parser/Parser.h           |  6 ++++--
 libwccl/parser/grammar.g          |  5 +++--
 libwccl/wcclfile.h                | 15 ++++++++++++---
 wccl-apps/wccl-run.cpp            | 17 +++++++++++++++--
 7 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/libwccl/lexicon/lexiconparser.cpp b/libwccl/lexicon/lexiconparser.cpp
index 77f7561..acb54cc 100644
--- a/libwccl/lexicon/lexiconparser.cpp
+++ b/libwccl/lexicon/lexiconparser.cpp
@@ -10,12 +10,14 @@
 namespace Wccl {
 
 boost::shared_ptr<Lexicon> LexiconParser::parse_lexicon(
+	const PwrNlp::PathSearcherBase& search_path,
 	const std::string& lexicon_name,
-	const std::string& path)
+	const std::string& filename)
 {
-	std::ifstream is(path.c_str());
+	std::ifstream is;
+	search_path.open_stream_or_throw(filename, is, "lexicon");
 	if (!is.good()) {
-		throw Wccl::FileNotFound(path, "", __FUNCTION__);
+		throw Wccl::FileNotFound(filename, "", __FUNCTION__);
 	}
 
 	ANTLRLexiconLexer lexer(is);
diff --git a/libwccl/lexicon/lexiconparser.h b/libwccl/lexicon/lexiconparser.h
index 521d19a..51cf73a 100644
--- a/libwccl/lexicon/lexiconparser.h
+++ b/libwccl/lexicon/lexiconparser.h
@@ -2,6 +2,7 @@
 #define LIBWCCL_LEXICON_LEXICONPARSER_H
 
 #include <boost/shared_ptr.hpp>
+#include <libpwrutils/pathsearch.h>
 
 namespace Wccl {
 
@@ -11,8 +12,9 @@ class LexiconParser
 {
 public:
 	static boost::shared_ptr<Lexicon> parse_lexicon(
+		const PwrNlp::PathSearcherBase& search_path,
 		const std::string& lexicon_name,
-		const std::string& path);
+		const std::string& filename);
 };
 } /* end ns Wccl */
 
diff --git a/libwccl/parser/Parser.cpp b/libwccl/parser/Parser.cpp
index 7a5703c..8cf342d 100644
--- a/libwccl/parser/Parser.cpp
+++ b/libwccl/parser/Parser.cpp
@@ -563,13 +563,13 @@ boost::shared_ptr<MatchRule> Parser::parseMatchRule(std::istream& istr) const
  * @return the parsed file via a shared pointer
  */
 boost::shared_ptr<WcclFile> Parser::parseWcclFile(
-		const std::string& str) const
+		const std::string& str, const std::string& search_path /*= "."*/) const
 {
 	std::stringstream ss (std::stringstream::in | std::stringstream::out);
 	ss << str;
 
 	try {
-		return this->parseWcclFile(ss);
+		return this->parseWcclFile(ss, search_path);
 	}
 	catch (ParserException&) {
 		throw;
@@ -581,14 +581,15 @@ boost::shared_ptr<WcclFile> Parser::parseWcclFile(
  * @arg istr input stream with writed rule
  * @return the parsed file via a shared pointer
  */
-boost::shared_ptr<WcclFile> Parser::parseWcclFile(std::istream& istr) const
+boost::shared_ptr<WcclFile> Parser::parseWcclFile(std::istream& istr,
+		const std::string& search_path /*= "."*/) const
 {
 	ANTLRLexer lexer(istr);
 	ANTLRParser parser(lexer);
 	boost::shared_ptr<WcclFile> res;
 
 	try {
-		res = parser.parse_wccl_file(tagset_);
+		res = parser.parse_wccl_file(tagset_, search_path);
 	} catch (antlr::MismatchedTokenException &e) {
 		throw ParserException(
 				e.getFileLineColumnString() + " " + e.getMessage()
diff --git a/libwccl/parser/Parser.h b/libwccl/parser/Parser.h
index 6873365..3d642ea 100644
--- a/libwccl/parser/Parser.h
+++ b/libwccl/parser/Parser.h
@@ -94,9 +94,11 @@ public:
 	// ---------------------------------------------------------------------------
 	// WCCL file parsing
 	boost::shared_ptr<WcclFile>
-			parseWcclFile(const std::string& file_contents_string) const;
+			parseWcclFile(const std::string& file_contents_string,
+				const std::string& search_path = ".") const;
 	boost::shared_ptr<WcclFile>
-			parseWcclFile(std::istream& is) const;
+			parseWcclFile(std::istream& is,
+				const std::string& search_path = ".") const;
 
 	// ---------------------------------------------------------------------------
 	const Corpus2::Tagset& tagset() const {
diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g
index 5071cda..074020e 100644
--- a/libwccl/parser/grammar.g
+++ b/libwccl/parser/grammar.g
@@ -291,10 +291,10 @@ parse_match_rule
 // ----------------------------------------------------------------------------
 // Rule for parsing wccl files
 parse_wccl_file
-	[const Corpus2::Tagset& tagset]
+	[const Corpus2::Tagset& tagset, const std::string search_path]
 	returns [boost::shared_ptr<WcclFile> wccl_file]
 {
-	wccl_file = boost::make_shared<WcclFile>(tagset);
+	wccl_file = boost::make_shared<WcclFile>(tagset, search_path);
 	boost::shared_ptr<TagRuleSequence> rule_seq;
 }
 	: (imports_section [*wccl_file])?
@@ -1894,6 +1894,7 @@ import [WcclFile& wccl_file]
 	: "import" LPAREN file_path : STRING COMMA lexicon_name : STRING RPAREN {
 		wccl_file.import_lexicon(
 			LexiconParser::parse_lexicon(
+				wccl_file.path(),
 				token_ref_to_std_string(lexicon_name),
 				token_ref_to_std_string(file_path)));
 	}
diff --git a/libwccl/wcclfile.h b/libwccl/wcclfile.h
index c77d5be..2919b49 100644
--- a/libwccl/wcclfile.h
+++ b/libwccl/wcclfile.h
@@ -9,6 +9,8 @@
 #include <libwccl/wcclfileopsections.h>
 #include <libwccl/ops/tagrulesequence.h>
 #include <libwccl/lexicon/lexicons.h>
+#include <libwccl/exception.h>
+#include <libpwrutils/pathsearch.h>
 
 namespace Wccl {
 
@@ -21,7 +23,7 @@ class WcclFile
 	  WcclFileOpSections<OpSequence<Match> >
 {
 public:
-	explicit WcclFile(const Corpus2::Tagset& tagset);
+	WcclFile(const Corpus2::Tagset& tagset, const std::string& search_path);
 
 	const std::vector<boost::shared_ptr<UntypedOpSequence> >& untyped_sections();
 	template<class T>
@@ -102,12 +104,16 @@ public:
 	std::string to_string() const;
 
 	const Corpus2::Tagset& tagset() const;
+	const PwrNlp::PathSearcher<Wccl::FileNotFound> path() const { return path_; }
+	PwrNlp::PathSearcher<Wccl::FileNotFound> path() { return path_; }
+
 private:
 	std::ostream& write_to(std::ostream& ostream) const;
 	std::vector<boost::shared_ptr<FunctionalOpSequence> > all_sections_;
 	boost::shared_ptr<TagRuleSequence> tag_rules_;
 	boost::shared_ptr<Lexicons> lexicons_;
 	const Corpus2::Tagset& tagset_;
+	PwrNlp::PathSearcher<Wccl::FileNotFound> path_;
 };
 
 } /* end ns Wccl */
@@ -118,9 +124,12 @@ private:
 namespace Wccl {
 
 inline
-WcclFile::WcclFile(const Corpus2::Tagset& tagset)
-  : lexicons_(boost::make_shared<Lexicons>()), tagset_(tagset)
+WcclFile::WcclFile(const Corpus2::Tagset& tagset, const std::string& search_path)
+  : lexicons_(boost::make_shared<Lexicons>()), tagset_(tagset),
+    path_(":")
 {
+	path_.set_search_path(search_path);
+	path_.set_verbose(true);
 }
 
 inline
diff --git a/wccl-apps/wccl-run.cpp b/wccl-apps/wccl-run.cpp
index 4b23e30..2a20c37 100644
--- a/wccl-apps/wccl-run.cpp
+++ b/wccl-apps/wccl-run.cpp
@@ -44,7 +44,8 @@ class Runner
 {
 public:
 	Runner(const Corpus2::Tagset& tagset)
-	 : tagset_(tagset), parser_(tagset_), token_idx(0), progress_(false)
+	 : tagset_(tagset), parser_(tagset_), token_idx(0), progress_(false),
+	   search_path_(".")
 	{
 	}
 
@@ -69,6 +70,10 @@ public:
 
 	void output_tabular(const std::vector< std::vector< UnicodeString > > outputs);
 
+	void set_search_path(const std::string& path) {
+		search_path_ = path;
+	}
+
 private:
 	const Corpus2::Tagset& tagset_;
 	Wccl::Parser parser_;
@@ -76,6 +81,7 @@ private:
 	std::vector< std::string > op_names_;
 	int token_idx;
 	bool progress_;
+	std::string search_path_;
 };
 
 bool Runner::load_more_operators(const std::string& filename)
@@ -87,7 +93,7 @@ bool Runner::load_more_operators(const std::string& filename)
 			throw Wccl::FileNotFound(filename, "", __FUNCTION__);
 		}
 
-		retOp = parser_.parseWcclFile(is);
+		retOp = parser_.parseWcclFile(is, search_path_);
 		if (retOp) {
 			boost::filesystem::path p(filename);
 			std::string prefix = p.stem() + ":";
@@ -144,6 +150,7 @@ bool Runner::load_operator_string(const std::string& op_string)
 
 void Runner::do_sentence(const boost::shared_ptr<Corpus2::Sentence>& sentence)
 {
+	std::cerr << "dos";
 	Wccl::SentenceContext sc(sentence);
 	std::vector< std::vector< UnicodeString > > outputs;
 
@@ -249,6 +256,7 @@ int main(int argc, char** argv)
 	std::string tagset_load = "kipi";
 	bool first = false, progress = false;
 	std::string input_format;
+	std::string search_path;
 	std::vector<std::string> corpora_files, files, operator_strings;
 	bool corpus_stdin = false;
 	using boost::program_options::value;
@@ -265,6 +273,8 @@ int main(int argc, char** argv)
 			 "CCL operator file or string")
 			("files,f", value(&files),
 			 "Files to load, looking at the extension to determine type")
+			("search-path,P", value(&search_path),
+			 "WCCL resources (lexicons) search path")
 			("corpus-from-stdin,I", value(&corpus_stdin)->zero_tokens(),
 			 "Read corpus from stdin")
 			("quiet,q", value(&quiet)->zero_tokens(),
@@ -329,6 +339,9 @@ int main(int argc, char** argv)
 		const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load);
 		Runner runner(tagset);
 		runner.use_progress(progress);
+		if (!search_path.empty()) {
+			runner.set_search_path(search_path);
+		}
 		foreach (const std::string& f, operator_strings) {
 			if (boost::algorithm::ends_with(f, ".ccl")) {
 				size_t sz = runner.operators().size();
-- 
GitLab