From d08f1bc7d17dda9b826240397bc60db2e59dd21b Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Thu, 25 Nov 2010 14:54:08 +0100 Subject: [PATCH] add any-parsing to Parser class, use it in wcclparser's new main --- libwccl/ops/functions.h | 2 +- libwccl/parser/ANTLRParserResult.h | 28 +++++- libwccl/parser/Parser.cpp | 55 +++++++++++ libwccl/parser/Parser.h | 6 ++ wcclparser/CMakeLists.txt | 12 +++ wcclparser/main.cpp | 141 +++++++++++++++++++++++++++++ 6 files changed, 240 insertions(+), 4 deletions(-) create mode 100644 wcclparser/main.cpp diff --git a/libwccl/ops/functions.h b/libwccl/ops/functions.h index ea3b55d..f2e4842 100644 --- a/libwccl/ops/functions.h +++ b/libwccl/ops/functions.h @@ -30,7 +30,7 @@ public: * @note May be incomplete and/or contain internal info. */ virtual std::string raw_name() const = 0; -protected: +//protected: /** * Base type returned after application of function * (shared pointer to a const Value) diff --git a/libwccl/parser/ANTLRParserResult.h b/libwccl/parser/ANTLRParserResult.h index c60f43b..7845f86 100644 --- a/libwccl/parser/ANTLRParserResult.h +++ b/libwccl/parser/ANTLRParserResult.h @@ -6,16 +6,38 @@ #include <libwccl/variables.h> #include <libwccl/ops/functions.h> + template<class T> -class ANTLRParserResult +class ANTLRParserResult; + +class ANTLRParserResultBase { public: - ANTLRParserResult() { - this->variables.reset(new Wccl::Variables()); + ANTLRParserResultBase() + : variables(new Wccl::Variables()) + { + } + + boost::shared_ptr<Wccl::Variables> variables; + + virtual boost::shared_ptr<Wccl::FunctionBase> get_op_base() const = 0; +}; + +template<class T> +class ANTLRParserResult : public ANTLRParserResultBase +{ +public: + ANTLRParserResult() + { } boost::shared_ptr<Wccl::Variables> variables; boost::shared_ptr<Wccl::Function<T> > op; + + boost::shared_ptr<Wccl::FunctionBase> get_op_base() const + { + return op; + } }; #endif // ANTLRPARSERRESULT_H diff --git a/libwccl/parser/Parser.cpp b/libwccl/parser/Parser.cpp index 35b5591..6f69c9b 100644 --- a/libwccl/parser/Parser.cpp +++ b/libwccl/parser/Parser.cpp @@ -108,3 +108,58 @@ boost::shared_ptr<ANTLRParserResult<Wccl::TSet> > Parser::parseSymSetOperator( return parser.parse_sym_set_operator(this->tagset); } + +// ---------------------------------------------------------------------------- +/** + * @desc Parse any operator contained in a std::string. Converts the string to + * to a stream and calls parseAnyOperator with it. + * @arg str operator string + * @return the parsed operator via a shared pointer + */ +boost::shared_ptr<ANTLRParserResultBase> Parser::parseAnyOperator( + const std::string& str) const +{ + std::stringstream ss (std::stringstream::in | std::stringstream::out); + ss << str; + + return this->parseAnyOperator(ss); +} + +/** + * @desc Parse any operator. Runs parse_*_operator rules in sequence + * in the parser grammar until one succedes, or all fail. Rethrows + * the exception returned by the last parse_* attempt. + * @arg istr input stream with the operator + * @return the parsed operator via a shared pointer + */ +boost::shared_ptr<ANTLRParserResultBase> Parser::parseAnyOperator( + std::istream& istr) const +{ + ANTLRLexer lexer(istr); + ANTLRParser parser(lexer); + + boost::shared_ptr<ANTLRParserResultBase> result; + if (!result) { + try { + result = parser.parse_sym_set_operator(this->tagset); + } catch (antlr::ANTLRException) { + // ignore, try another type + } + } + if (!result) { + try { + result = parser.parse_string_operator(this->tagset); + } catch (antlr::ANTLRException) { + // ignore, try another type + } + } + if (!result) { + try { + result = parser.parse_predicates(this->tagset); + } catch (antlr::ANTLRException) { + throw; + } + } + assert(result); + return result; +} diff --git a/libwccl/parser/Parser.h b/libwccl/parser/Parser.h index 10709d4..872adec 100644 --- a/libwccl/parser/Parser.h +++ b/libwccl/parser/Parser.h @@ -45,6 +45,12 @@ public: boost::shared_ptr<ANTLRParserResult<Wccl::TSet> > parseSymSetOperator(std::istream&) const; + // --------------------------------------------------------------------------- + // methods for parsing any operators + boost::shared_ptr<ANTLRParserResultBase> + parseAnyOperator(const std::string&) const; + boost::shared_ptr<ANTLRParserResultBase> + parseAnyOperator(std::istream&) const; private: const Corpus2::Tagset &tagset; }; diff --git a/wcclparser/CMakeLists.txt b/wcclparser/CMakeLists.txt index 8aa7e3f..4918095 100644 --- a/wcclparser/CMakeLists.txt +++ b/wcclparser/CMakeLists.txt @@ -1,9 +1,21 @@ PROJECT( parser ) +find_package(Libedit) +if (Libedit_FOUND) + message(STATUS "Building with libedit") + add_definitions( -DHAVE_LIBEDIT ) + set(LIBS ${LIBS} ${Libedit_LIBRARIES}) +endif (Libedit_FOUND) + include_directories( ${CMAKE_SOURCE_DIR} ) add_definitions(-DLIBWCCL_WCCLPARSER_DATA_DIR="${PROJECT_SOURCE_DIR}/") +add_executable(wcclparser + main.cpp +) +target_link_libraries (wcclparser wccl ${Boost_LIBRARIES} antlr ${LIBS}) + # String operator add_executable(parser-strop strop_main.cpp diff --git a/wcclparser/main.cpp b/wcclparser/main.cpp new file mode 100644 index 0000000..37c910c --- /dev/null +++ b/wcclparser/main.cpp @@ -0,0 +1,141 @@ +#include <cstdlib> + +#include <libwccl/values/strset.h> +#include <libwccl/parser/Parser.h> + +#include <boost/bind.hpp> + +#include <antlr/MismatchedTokenException.hpp> +// ---------------------------------------------------------------------------- + +//#ifdef HAVE_LIBEDIT +#include <histedit.h> +//#endif + +/** + * @desc A simple command line tester for testing operators + */ + + +namespace { + const char* _prompt = "Enter any operator expression: "; +} + +void std_read_loop(boost::function<bool (const std::string&)>& line_cb) +{ + while (std::cin.good()) { + std::string s; + getline(std::cin, s); + if (line_cb(s)) { + return; + } + } +} + +#ifdef HAVE_LIBEDIT +const char* query_prompt(EditLine*) { + return _prompt; +} + +void libedit_read_loop(boost::function<bool (const std::string&)>& line_cb) +{ + EditLine *el = el_init("wccl-parser", stdin, stdout, stderr); + el_set(el, EL_PROMPT, &query_prompt); + el_set(el, EL_EDITOR, "emacs"); + History* myhistory = history_init(); + if (myhistory == NULL) { + std::cerr << "EditLine history init error\n"; + el_end(el); + std_read_loop(line_cb); + return; + } + HistEvent ev; + history(myhistory, &ev, H_SETSIZE, 1024); + el_set(el, EL_HIST, history, myhistory); + + bool more = true; + while (more) { + int count; + const char *line = el_gets(el, &count); // line gets a trailing \n + if (line == NULL || line[0] == 0) { + more = false; + } else { + std::string s(line, strlen(line) - 1); + if (line_cb(s)) { + more = false; + } else { + history(myhistory, &ev, H_ENTER, line); + } + } + } + + history_end(myhistory); + el_end(el); +} +#endif + +bool process_line(const std::string& line, Parser& parser) +{ + if (line.empty() || line == "exit" || line == "quit") { + return true; + } else if (line == "clear" || line == "cls") { + if (system("clear")) {} + return false; + } + + boost::shared_ptr<const Wccl::Value> retVal; + boost::shared_ptr<ANTLRParserResultBase> retOp; + boost::shared_ptr<Corpus2::Sentence> sentence; + Wccl::SentenceContext sc(sentence); + + try { + retOp = parser.parseAnyOperator(line); + if (retOp) { + Wccl::FunExecContext cx(sc, retOp->variables); + retVal = retOp->get_op_base()->apply_internal(cx); + if (retVal) { + std::cerr << "Parsed expression: " << retVal->to_raw_string() + << std::endl; + } else { + std::cerr << "Problem while parsing -- " + << "retVal is NULL!" << std::endl; + } + } else { + std::cerr << "Problem while parsing -- " + << "parser returned NULL!" << std::endl; + } + } catch (antlr::MismatchedTokenException &e) { + std::cerr << e.getMessage() << std::endl; + } catch (Wccl::InvalidVariableName &e) { + std::cerr << "Wccl::InvalidVariableName " << e.info() << std::endl; + } catch (Wccl::VariableTypeMismatch &e) { + std::cerr << "Wccl::VariableTypeMismatch " << e.info() << std::endl; + } catch (Wccl::WcclError& e) { + std::cerr << "Wccl::WcclError " << e.info() << std::endl; + } catch (PwrNlp::PwrNlpError& e) { + std::cerr << "PwrNlp::PwrNlpError " << e.info() << std::endl; + } catch (antlr::ANTLRException& e) { + std::cerr << "Antlr error " << e.getMessage() << std::endl; + } + return false; +} + +int main() +{ + Corpus2::Tagset tagset; + Parser parser(tagset); + + if (system("clear")) { + // + } + + boost::function<bool (const std::string&)> f; + f = boost::bind(&process_line, _1, boost::ref(parser)); +#ifdef HAVE_LIBEDIT + libedit_read_loop(f); +#else + std_read_loop(f); +#endif + + return 0; +} -- GitLab