diff --git a/libwccl/lexicon/lexiconparser.cpp b/libwccl/lexicon/lexiconparser.cpp index 77f75611309587d2b7c0d8e1274c76244f4d8bb7..acb54cc41e804bc9e1afffaca47b02ebfa2a492a 100644 --- a/libwccl/lexicon/lexiconparser.cpp +++ b/libwccl/lexicon/lexiconparser.cpp @@ -10,12 +10,14 @@ namespace Wccl { boost::shared_ptr<Lexicon> LexiconParser::parse_lexicon( + const PwrNlp::PathSearcherBase& search_path, const std::string& lexicon_name, - const std::string& path) + const std::string& filename) { - std::ifstream is(path.c_str()); + std::ifstream is; + search_path.open_stream_or_throw(filename, is, "lexicon"); if (!is.good()) { - throw Wccl::FileNotFound(path, "", __FUNCTION__); + throw Wccl::FileNotFound(filename, "", __FUNCTION__); } ANTLRLexiconLexer lexer(is); diff --git a/libwccl/lexicon/lexiconparser.h b/libwccl/lexicon/lexiconparser.h index 521d19a6c794d45456961ad872dbd74219feab6d..51cf73afff49f0f57f230b1f35cf8379149c0887 100644 --- a/libwccl/lexicon/lexiconparser.h +++ b/libwccl/lexicon/lexiconparser.h @@ -2,6 +2,7 @@ #define LIBWCCL_LEXICON_LEXICONPARSER_H #include <boost/shared_ptr.hpp> +#include <libpwrutils/pathsearch.h> namespace Wccl { @@ -11,8 +12,9 @@ class LexiconParser { public: static boost::shared_ptr<Lexicon> parse_lexicon( + const PwrNlp::PathSearcherBase& search_path, const std::string& lexicon_name, - const std::string& path); + const std::string& filename); }; } /* end ns Wccl */ diff --git a/libwccl/parser/Parser.cpp b/libwccl/parser/Parser.cpp index 7a5703c8d0e2e8c58a173bd9c184e0a9113cb00f..8cf342df0c1751ae9b9d9d33fe05f8ed64fbe2bd 100644 --- a/libwccl/parser/Parser.cpp +++ b/libwccl/parser/Parser.cpp @@ -563,13 +563,13 @@ boost::shared_ptr<MatchRule> Parser::parseMatchRule(std::istream& istr) const * @return the parsed file via a shared pointer */ boost::shared_ptr<WcclFile> Parser::parseWcclFile( - const std::string& str) const + const std::string& str, const std::string& search_path /*= "."*/) const { std::stringstream ss (std::stringstream::in | std::stringstream::out); ss << str; try { - return this->parseWcclFile(ss); + return this->parseWcclFile(ss, search_path); } catch (ParserException&) { throw; @@ -581,14 +581,15 @@ boost::shared_ptr<WcclFile> Parser::parseWcclFile( * @arg istr input stream with writed rule * @return the parsed file via a shared pointer */ -boost::shared_ptr<WcclFile> Parser::parseWcclFile(std::istream& istr) const +boost::shared_ptr<WcclFile> Parser::parseWcclFile(std::istream& istr, + const std::string& search_path /*= "."*/) const { ANTLRLexer lexer(istr); ANTLRParser parser(lexer); boost::shared_ptr<WcclFile> res; try { - res = parser.parse_wccl_file(tagset_); + res = parser.parse_wccl_file(tagset_, search_path); } catch (antlr::MismatchedTokenException &e) { throw ParserException( e.getFileLineColumnString() + " " + e.getMessage() diff --git a/libwccl/parser/Parser.h b/libwccl/parser/Parser.h index 6873365caf4a787bb1ca46000f7c9256821411ba..3d642eaf63984341be306e977c47a557c802fbfd 100644 --- a/libwccl/parser/Parser.h +++ b/libwccl/parser/Parser.h @@ -94,9 +94,11 @@ public: // --------------------------------------------------------------------------- // WCCL file parsing boost::shared_ptr<WcclFile> - parseWcclFile(const std::string& file_contents_string) const; + parseWcclFile(const std::string& file_contents_string, + const std::string& search_path = ".") const; boost::shared_ptr<WcclFile> - parseWcclFile(std::istream& is) const; + parseWcclFile(std::istream& is, + const std::string& search_path = ".") const; // --------------------------------------------------------------------------- const Corpus2::Tagset& tagset() const { diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 5071cdaaf59693d4b94483b6215a60e0e6a00190..074020eeaefa442a181ce8036d414c1fc02e3f42 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -291,10 +291,10 @@ parse_match_rule // ---------------------------------------------------------------------------- // Rule for parsing wccl files parse_wccl_file - [const Corpus2::Tagset& tagset] + [const Corpus2::Tagset& tagset, const std::string search_path] returns [boost::shared_ptr<WcclFile> wccl_file] { - wccl_file = boost::make_shared<WcclFile>(tagset); + wccl_file = boost::make_shared<WcclFile>(tagset, search_path); boost::shared_ptr<TagRuleSequence> rule_seq; } : (imports_section [*wccl_file])? @@ -1894,6 +1894,7 @@ import [WcclFile& wccl_file] : "import" LPAREN file_path : STRING COMMA lexicon_name : STRING RPAREN { wccl_file.import_lexicon( LexiconParser::parse_lexicon( + wccl_file.path(), token_ref_to_std_string(lexicon_name), token_ref_to_std_string(file_path))); } diff --git a/libwccl/wcclfile.h b/libwccl/wcclfile.h index c77d5be7b5274a30bcf8abc46899d8bbf9a39cda..2919b49c5ff142fc56846cf422727c10c0b7cd18 100644 --- a/libwccl/wcclfile.h +++ b/libwccl/wcclfile.h @@ -9,6 +9,8 @@ #include <libwccl/wcclfileopsections.h> #include <libwccl/ops/tagrulesequence.h> #include <libwccl/lexicon/lexicons.h> +#include <libwccl/exception.h> +#include <libpwrutils/pathsearch.h> namespace Wccl { @@ -21,7 +23,7 @@ class WcclFile WcclFileOpSections<OpSequence<Match> > { public: - explicit WcclFile(const Corpus2::Tagset& tagset); + WcclFile(const Corpus2::Tagset& tagset, const std::string& search_path); const std::vector<boost::shared_ptr<UntypedOpSequence> >& untyped_sections(); template<class T> @@ -102,12 +104,16 @@ public: std::string to_string() const; const Corpus2::Tagset& tagset() const; + const PwrNlp::PathSearcher<Wccl::FileNotFound> path() const { return path_; } + PwrNlp::PathSearcher<Wccl::FileNotFound> path() { return path_; } + private: std::ostream& write_to(std::ostream& ostream) const; std::vector<boost::shared_ptr<FunctionalOpSequence> > all_sections_; boost::shared_ptr<TagRuleSequence> tag_rules_; boost::shared_ptr<Lexicons> lexicons_; const Corpus2::Tagset& tagset_; + PwrNlp::PathSearcher<Wccl::FileNotFound> path_; }; } /* end ns Wccl */ @@ -118,9 +124,12 @@ private: namespace Wccl { inline -WcclFile::WcclFile(const Corpus2::Tagset& tagset) - : lexicons_(boost::make_shared<Lexicons>()), tagset_(tagset) +WcclFile::WcclFile(const Corpus2::Tagset& tagset, const std::string& search_path) + : lexicons_(boost::make_shared<Lexicons>()), tagset_(tagset), + path_(":") { + path_.set_search_path(search_path); + path_.set_verbose(true); } inline diff --git a/wccl-apps/wccl-run.cpp b/wccl-apps/wccl-run.cpp index 4b23e30c5888665b3e0be7da5f2ce7d35724085e..2a20c375fef806607f0075bd3cd7911c43f15766 100644 --- a/wccl-apps/wccl-run.cpp +++ b/wccl-apps/wccl-run.cpp @@ -44,7 +44,8 @@ class Runner { public: Runner(const Corpus2::Tagset& tagset) - : tagset_(tagset), parser_(tagset_), token_idx(0), progress_(false) + : tagset_(tagset), parser_(tagset_), token_idx(0), progress_(false), + search_path_(".") { } @@ -69,6 +70,10 @@ public: void output_tabular(const std::vector< std::vector< UnicodeString > > outputs); + void set_search_path(const std::string& path) { + search_path_ = path; + } + private: const Corpus2::Tagset& tagset_; Wccl::Parser parser_; @@ -76,6 +81,7 @@ private: std::vector< std::string > op_names_; int token_idx; bool progress_; + std::string search_path_; }; bool Runner::load_more_operators(const std::string& filename) @@ -87,7 +93,7 @@ bool Runner::load_more_operators(const std::string& filename) throw Wccl::FileNotFound(filename, "", __FUNCTION__); } - retOp = parser_.parseWcclFile(is); + retOp = parser_.parseWcclFile(is, search_path_); if (retOp) { boost::filesystem::path p(filename); std::string prefix = p.stem() + ":"; @@ -144,6 +150,7 @@ bool Runner::load_operator_string(const std::string& op_string) void Runner::do_sentence(const boost::shared_ptr<Corpus2::Sentence>& sentence) { + std::cerr << "dos"; Wccl::SentenceContext sc(sentence); std::vector< std::vector< UnicodeString > > outputs; @@ -249,6 +256,7 @@ int main(int argc, char** argv) std::string tagset_load = "kipi"; bool first = false, progress = false; std::string input_format; + std::string search_path; std::vector<std::string> corpora_files, files, operator_strings; bool corpus_stdin = false; using boost::program_options::value; @@ -265,6 +273,8 @@ int main(int argc, char** argv) "CCL operator file or string") ("files,f", value(&files), "Files to load, looking at the extension to determine type") + ("search-path,P", value(&search_path), + "WCCL resources (lexicons) search path") ("corpus-from-stdin,I", value(&corpus_stdin)->zero_tokens(), "Read corpus from stdin") ("quiet,q", value(&quiet)->zero_tokens(), @@ -329,6 +339,9 @@ int main(int argc, char** argv) const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load); Runner runner(tagset); runner.use_progress(progress); + if (!search_path.empty()) { + runner.set_search_path(search_path); + } foreach (const std::string& f, operator_strings) { if (boost::algorithm::ends_with(f, ".ccl")) { size_t sz = runner.operators().size();