diff --git a/libwccl/parser/Parser.cpp b/libwccl/parser/Parser.cpp index 86389a32f0ec313a00b416f233ff695bd3a21767..41914ca0afdba93638829ab4d25b958f751a5077 100644 --- a/libwccl/parser/Parser.cpp +++ b/libwccl/parser/Parser.cpp @@ -30,7 +30,7 @@ Parser::~Parser() * @arg str operator string * @return the parsed operator via a shared pointer */ -boost::shared_ptr<ANTLRParserResult<StrSet> > Parser::parseStringOperator( +boost::shared_ptr<Operator<StrSet> > Parser::parseStringOperator( const std::string& str) const { std::stringstream ss (std::stringstream::in | std::stringstream::out); @@ -45,7 +45,7 @@ boost::shared_ptr<ANTLRParserResult<StrSet> > Parser::parseStringOperator( * @arg istr input stream with the operator * @return the parsed operator via a shared pointer */ -boost::shared_ptr<ANTLRParserResult<StrSet> > Parser::parseStringOperator( +boost::shared_ptr<Operator<StrSet> > Parser::parseStringOperator( std::istream& istr) const { ANTLRLexer lexer(istr); @@ -54,20 +54,19 @@ boost::shared_ptr<ANTLRParserResult<StrSet> > Parser::parseStringOperator( return parser.parse_strset_operator(tagset_); } -// ---------------------------------------------------------------------------- /** * @desc Parse predicates contained in a std::string. Converts the string * to a stream and callis parsePredicate with it * @arg str operator string * @return the parsed operator via a shared pointer */ -boost::shared_ptr<ANTLRParserResult<Bool> > Parser::parsePredicate( +boost::shared_ptr<Operator<Bool> > Parser::parseBoolOperator( const std::string& str) const { std::stringstream ss (std::stringstream::in | std::stringstream::out); ss << str; - return this->parsePredicate(ss); + return this->parseBoolOperator(ss); } /** @@ -75,7 +74,7 @@ boost::shared_ptr<ANTLRParserResult<Bool> > Parser::parsePredicate( * @arg istr input stream with writed predicate * @return the parsed operator via a shared pointer */ -boost::shared_ptr<ANTLRParserResult<Bool> > Parser::parsePredicate( +boost::shared_ptr<Operator<Bool> > Parser::parseBoolOperator( std::istream& istr) const { ANTLRLexer lexer(istr); @@ -91,7 +90,7 @@ boost::shared_ptr<ANTLRParserResult<Bool> > Parser::parsePredicate( * @arg str operator string * @return the parsed operator via a shared pointer */ -boost::shared_ptr<ANTLRParserResult<TSet> > Parser::parseSymSetOperator( +boost::shared_ptr<Operator<TSet> > Parser::parseSymSetOperator( const std::string& str) const { std::stringstream ss (std::stringstream::in | std::stringstream::out); @@ -106,7 +105,7 @@ boost::shared_ptr<ANTLRParserResult<TSet> > Parser::parseSymSetOperator( * @arg istr input stream with the operator * @return the parsed operator via a shared pointer */ -boost::shared_ptr<ANTLRParserResult<TSet> > Parser::parseSymSetOperator( +boost::shared_ptr<Operator<TSet> > Parser::parseSymSetOperator( std::istream& istr) const { ANTLRLexer lexer(istr); @@ -121,7 +120,7 @@ boost::shared_ptr<ANTLRParserResult<TSet> > Parser::parseSymSetOperator( * @arg str operator string * @return the parsed operator via a shared pointer */ -boost::shared_ptr<ANTLRParserResult<Position> > Parser::parsePositionOperator( +boost::shared_ptr<Operator<Position> > Parser::parsePositionOperator( const std::string& str) const { std::stringstream ss (std::stringstream::in | std::stringstream::out); @@ -136,7 +135,7 @@ boost::shared_ptr<ANTLRParserResult<Position> > Parser::parsePositionOperator( * @arg istr input stream with the operator * @return the parsed operator via a shared pointer */ -boost::shared_ptr<ANTLRParserResult<Position> > Parser::parsePositionOperator( +boost::shared_ptr<Operator<Position> > Parser::parsePositionOperator( std::istream& istr) const { ANTLRLexer lexer(istr); @@ -144,14 +143,13 @@ boost::shared_ptr<ANTLRParserResult<Position> > Parser::parsePositionOperator( return parser.parse_position_operator(tagset_); } -// ---------------------------------------------------------------------------- /** * @desc Parse any operator contained in a std::string. Converts the string to * to a stream and calls parseAnyOperator with it. * @arg str operator string * @return the parsed operator via a shared pointer */ -boost::shared_ptr<ANTLRParserResultBase> Parser::parseAnyOperator( +boost::shared_ptr<FunctionalOperator> Parser::parseAnyOperator( const std::string& str) const { std::stringstream ss (std::stringstream::in | std::stringstream::out); @@ -167,13 +165,13 @@ boost::shared_ptr<ANTLRParserResultBase> Parser::parseAnyOperator( * @arg istr input stream with the operator * @return the parsed operator via a shared pointer */ -boost::shared_ptr<ANTLRParserResultBase> Parser::parseAnyOperator( +boost::shared_ptr<FunctionalOperator> Parser::parseAnyOperator( std::istream& istr) const { std::stringstream ss; ss << istr.rdbuf(); std::stringstream errors; - boost::shared_ptr<ANTLRParserResultBase> result; + boost::shared_ptr<FunctionalOperator> result; if (!result) { ss.clear(); ss.seekg(0, std::ios::beg); @@ -292,4 +290,6 @@ boost::shared_ptr<ANTLRParserResultBase> Parser::parseAnyOperator( return result; } +// ---------------------------------------------------------------------------- + } // end Wccl ns diff --git a/libwccl/parser/Parser.h b/libwccl/parser/Parser.h index 988ad730551c0c08f73ec493148a1fff93ea7fa8..b41892986c604addfeeac7dfcd0cfb281dd535d6 100644 --- a/libwccl/parser/Parser.h +++ b/libwccl/parser/Parser.h @@ -12,10 +12,10 @@ // ---------------------------------------------------------------------------- #include <libwccl/variables.h> #include <libwccl/values/strset.h> +#include <libwccl/ops/operator.h> // exceptions #include <libwccl/parser/ParserException.h> -#include <libwccl/parser/ANTLRParserResult.h> // ---------------------------------------------------------------------------- @@ -28,38 +28,39 @@ public: // --------------------------------------------------------------------------- // methods for parsing string operators - boost::shared_ptr<ANTLRParserResult<StrSet> > + boost::shared_ptr<Operator<StrSet> > parseStringOperator(const std::string&) const; - boost::shared_ptr<ANTLRParserResult<StrSet> > + boost::shared_ptr<Operator<StrSet> > parseStringOperator(std::istream&) const; // --------------------------------------------------------------------------- // methods for parsing bool operators - boost::shared_ptr<ANTLRParserResult<Bool> > - parsePredicate(const std::string&) const; - boost::shared_ptr<ANTLRParserResult<Bool> > - parsePredicate(std::istream&) const; + boost::shared_ptr<Operator<Bool> > + parseBoolOperator(const std::string&) const; + boost::shared_ptr<Operator<Bool> > + parseBoolOperator(std::istream&) const; // --------------------------------------------------------------------------- // methods for parsing bool operators - boost::shared_ptr<ANTLRParserResult<TSet> > + boost::shared_ptr<Operator<TSet> > parseSymSetOperator(const std::string&) const; - boost::shared_ptr<ANTLRParserResult<TSet> > + boost::shared_ptr<Operator<TSet> > parseSymSetOperator(std::istream&) const; // --------------------------------------------------------------------------- // methods for parsing position operators - boost::shared_ptr<ANTLRParserResult<Position> > + boost::shared_ptr<Operator<Position> > parsePositionOperator(const std::string&) const; - boost::shared_ptr<ANTLRParserResult<Position> > + boost::shared_ptr<Operator<Position> > parsePositionOperator(std::istream&) const; // --------------------------------------------------------------------------- // methods for parsing any operators - boost::shared_ptr<ANTLRParserResultBase> + boost::shared_ptr<FunctionalOperator> parseAnyOperator(const std::string&) const; - boost::shared_ptr<ANTLRParserResultBase> + boost::shared_ptr<FunctionalOperator> parseAnyOperator(std::istream&) const; + // --------------------------------------------------------------------------- const Corpus2::Tagset& tagset() const { return tagset_; diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index d4ee973a0ad697c0d99c78ee1c0491fd9f4480ca..cbf43f49d2ecd41f58f7beebc5eaadb66b22750e 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -19,6 +19,8 @@ header { #include <libwccl/sentencecontext.h> // operators + #include <libwccl/ops/operator.h> + #include <libwccl/ops/functions/constant.h> #include <libwccl/ops/functions/vargetter.h> #include <libwccl/ops/functions/conditional.h> @@ -60,9 +62,6 @@ header { #include <unicode/uniset.h> #include <unicode/unistr.h> - // - #include <libwccl/parser/ANTLRParserResult.h> - // start our namespace again ANTLR_BEGIN_NAMESPACE(Wccl) } @@ -136,64 +135,64 @@ private: // ---------------------------------------------------------------------------- // Rule for parsing string set operator with scope. -// Returns boost::shared_ptr<Function<StrSet> > -parse_strset_operator +// Returns boost::shared_ptr<Operator<StrSet> > +parse_strset_operator [const Corpus2::Tagset &tagset] - returns [boost::shared_ptr<ANTLRParserResult<StrSet> > res] + returns [boost::shared_ptr<Operator<StrSet> > res] { - res.reset(new ANTLRParserResult<StrSet>()); - boost::shared_ptr<Function<StrSet> > op; + Variables vars; + boost::shared_ptr<Function<StrSet> > body; } - : op = strset_operator [tagset, *res->variables] { - res->op = op; + : body = strset_operator [tagset, vars] { + res.reset(new Operator<StrSet>(body, vars)); } EOF ; // ---------------------------------------------------------------------------- // Rule for parsing bool operator with scope. -// Returns boost::shared_ptr<Function<Bool> > +// Returns boost::shared_ptr<Operator<Bool> > parse_bool_operator [const Corpus2::Tagset &tagset] - returns [boost::shared_ptr<ANTLRParserResult<Bool> > res] + returns [boost::shared_ptr<Operator<Bool> > res] { - res.reset(new ANTLRParserResult<Bool>()); - boost::shared_ptr<Function<Bool> > op; + Variables vars; + boost::shared_ptr<Function<Bool> > body; } - : op = bool_operator [tagset, *res->variables] { - res->op = op; + : body = bool_operator [tagset, vars] { + res.reset(new Operator<Bool>(body, vars)); } EOF ; // ---------------------------------------------------------------------------- // Rule for parsing symbol set operator with scope. -// Returns boost::shared_ptr<Function<TSet> > -parse_symset_operator +// Returns boost::shared_ptr<Operator<TSet> > +parse_symset_operator [const Corpus2::Tagset &tagset] - returns [boost::shared_ptr<ANTLRParserResult<TSet> > res] + returns [boost::shared_ptr<Operator<TSet> > res] { - res.reset(new ANTLRParserResult<TSet>()); - boost::shared_ptr<Function<TSet> > op; + Variables vars; + boost::shared_ptr<Function<TSet> > body; } - : op = symset_operator [tagset, *res->variables] { - res->op = op; + : body = symset_operator [tagset, vars] { + res.reset(new Operator<TSet>(body, vars)); } EOF ; // ---------------------------------------------------------------------------- // Rule for parsing position operator with scope. -// Returns boost::shared_ptr<Function<Position> > -parse_position_operator +// Returns boost::shared_ptr<Operator<Position> > +parse_position_operator [const Corpus2::Tagset &tagset] - returns [boost::shared_ptr<ANTLRParserResult<Position> > res] + returns [boost::shared_ptr<Operator<Position> > res] { - res.reset(new ANTLRParserResult<Position>()); - boost::shared_ptr<Function<Position> > op; + Variables vars; + boost::shared_ptr<Function<Position> > body; } - : op = position_operator [tagset, *res->variables] { - res->op = op; + : body = position_operator [tagset, vars] { + res.reset(new Operator<Position>(body, vars)); } EOF ; diff --git a/tests/datadriven.cpp b/tests/datadriven.cpp index 8de4828eb497a4adf26e72186e52e11b30681a84..142ba205e333210e07756f6de0ea4d71d04f9278 100644 --- a/tests/datadriven.cpp +++ b/tests/datadriven.cpp @@ -69,6 +69,16 @@ void test_one_item_actual(const compare_test& c) sentence.reset(reader.get_next_sentence()); BOOST_REQUIRE(sentence); } + else { + // + Corpus2::Token* the_token = new Corpus2::Token(".", PwrNlp::Whitespace::ManySpaces); + Corpus2::Tag t1(Corpus2::mask_t(0)); + Corpus2::Lexeme l1("aaa", t1); + the_token->add_lexeme(l1); + sentence->append(the_token); + + BOOST_REQUIRE(sentence); + } Wccl::SentenceContext sc(sentence); std::string operator_string, expected_output; @@ -81,15 +91,14 @@ void test_one_item_actual(const compare_test& c) std::getline(ifs_in, line); expected_output = line; Wccl::Parser parser(tagset); - boost::shared_ptr<ANTLRParserResultBase> parsed; + boost::shared_ptr<Wccl::FunctionalOperator> parsed; try { parsed = parser.parseAnyOperator(operator_string); } catch (Wccl::WcclError& e) { std::cerr << e.info() << "\n---\n" << operator_string << "\n---\n"; throw; } - Wccl::FunExecContext fu(sc, parsed->variables); - std::string output = parsed->get_op_base()->apply_internal(fu)->to_string(tagset); + std::string output = parsed->base_apply(sc)->to_string(tagset); if (output != expected_output) { BOOST_ERROR("Mismatch on line " << line_no << ":\n" << "Expected: " << expected_output << "\n" @@ -104,16 +113,18 @@ void test_one_item_actual(const compare_test& c) std::vector<std::string> fields; boost::algorithm::split(fields, line, boost::is_any_of(separators)); if (fields.size() == 2) { - boost::shared_ptr<Wccl::Value> v; - v = fu.variables()->get<Wccl::Value>(fields[0]); - if (!v) { + try { + const Wccl::Value& v = (*parsed)[fields[0]]; + + if (v.to_string(tagset) != fields[1]) { + BOOST_ERROR("Variable " << fields[0] + << " value mismatch on line " + << line_no << "\n: expected " << fields[1] + << " got " << v.to_string(tagset)); + } + } catch (Wccl::InvalidVariableName &e) { BOOST_ERROR("Invalid variable name in test: " - << fields[0] << " on line " << line_no); - } else if (v->to_string(tagset) != fields[1]) { - BOOST_ERROR("Variable " << fields[0] - << " value mismatch on line " - << line_no << "\n: expected " << fields[1] - << " got " << v->to_string(tagset)); + << fields[0] << " on line " << line_no); } } } diff --git a/wcclparser/bool_main.cpp b/wcclparser/bool_main.cpp index 05c842797d63463530c98d4bcd7bbd73253ea714..86bb5b7cbc7e2684df3feec49765aab38b963bc1 100644 --- a/wcclparser/bool_main.cpp +++ b/wcclparser/bool_main.cpp @@ -1,7 +1,7 @@ #include <cstdlib> #include <libwccl/values/bool.h> +#include <libwccl/ops/operator.h> #include <libwccl/parser/Parser.h> -#include <libwccl/parser/ANTLRParserResult.h> #include <antlr/NoViableAltException.hpp> #include <antlr/MismatchedTokenException.hpp> @@ -19,11 +19,17 @@ int main() Corpus2::Tagset tagset; Wccl::Parser parser(tagset); - boost::shared_ptr<const Wccl::Bool> retVal; - boost::shared_ptr<ANTLRParserResult<Wccl::Bool> > retOp; - boost::shared_ptr<Corpus2::Sentence> sentence; + boost::shared_ptr<const Wccl::Value> retVal; + boost::shared_ptr<Wccl::FunctionalOperator> retOp; + boost::shared_ptr<Corpus2::Sentence> sentence = boost::make_shared<Corpus2::Sentence>(); Wccl::SentenceContext sc(sentence); + Corpus2::Token* the_token = new Corpus2::Token("ZZ", PwrNlp::Whitespace::ManySpaces); + Corpus2::Tag t1(Corpus2::mask_t(0)); + Corpus2::Lexeme l1("aaa", t1); + the_token->add_lexeme(l1); + sentence->append(the_token); + if (system("clear")) { // } @@ -46,12 +52,12 @@ int main() } else { try { - retOp = parser.parsePredicate(str_in); + retOp = parser.parseBoolOperator(str_in); - if (retOp.get()) { - Wccl::FunExecContext cx(sc, retOp->variables); + if (retOp) { + retVal = retOp->base_apply(sc); - if ((retVal = retOp->op->apply(cx)).get()) { + if(retVal) { std::cerr << "Parsed expression: " << retVal->to_raw_string() << std::endl; } @@ -77,6 +83,10 @@ int main() std::cerr << "Wccl::InvalidVariableName" << std::endl; } catch (Wccl::VariableTypeMismatch &e) { std::cerr << "Wccl::VariableTypeMismatch" << std::endl; + } catch (Wccl::InvalidArgument &e) { + std::cerr << "Wccl::InvalidArgument " << e.info() << std::endl; + } catch (Wccl::WcclError &e) { + std::cerr << "Generic WcclError: " << e.info() << std::endl; } catch (...) { std::cerr << "[N] Syntax error!" << std::endl; diff --git a/wcclparser/main.cpp b/wcclparser/main.cpp index 250a9c68571de65390ff6eb48ae62e94efc38657..fdd2e40b5162ac2a8edf467e9987189f834f6ecb 100644 --- a/wcclparser/main.cpp +++ b/wcclparser/main.cpp @@ -91,15 +91,6 @@ void libedit_read_loop(boost::function<bool (const std::string&)>& line_cb) } #endif -void dumpvariables(const Wccl::Variables& vars, const Corpus2::Tagset& tagset) -{ - typedef std::pair<std::string, boost::shared_ptr<Wccl::Value> > v_t; - foreach (const v_t& v, vars.get_all<Wccl::Value>()) { - std::cerr << v.second->make_var_repr(v.first) << "=" - << v.second->to_string(tagset) << "\n"; - } -} - bool process_line(const std::string& line, Wccl::Parser& parser, Wccl::SentenceContext& sc, bool all_positions, bool dump_variables) { @@ -111,11 +102,12 @@ bool process_line(const std::string& line, Wccl::Parser& parser, } boost::shared_ptr<const Wccl::Value> retVal; - boost::shared_ptr<ANTLRParserResultBase> retOp; + boost::shared_ptr<Wccl::FunctionalOperator> retOp; try { retOp = parser.parseAnyOperator(line); if (retOp) { int pb, pe; + if (all_positions) { pb = 0; pe = sc.get_sentence().empty() ? 1 : sc.size(); @@ -123,18 +115,18 @@ bool process_line(const std::string& line, Wccl::Parser& parser, pb = sc.get_position(); pe = sc.get_position() + 1; } + for (int i = pb; i < pe; ++i) { sc.set_position(i); - Wccl::FunExecContext cx(sc, - boost::shared_ptr<Wccl::Variables>(retOp->variables->clone())); - retVal = retOp->get_op_base()->apply_internal(cx); + retVal = retOp->base_apply(sc); + if (retVal) { std::cerr << "[" << std::setw(2) << sc.get_position() << "] " << "Parsed expression: " << retVal->to_string(parser.tagset()) << std::endl; if (dump_variables) { - dumpvariables(*cx.variables(), parser.tagset()); + retOp->dumpvariables(parser.tagset()); } } else { std::cerr << "Problem while parsing -- " diff --git a/wcclparser/strop_main.cpp b/wcclparser/strop_main.cpp index aaf4c97f401a4113af38a9892c7285f1dbb54f59..632829b851ccf82641263e96e18a0d7d3ebce116 100644 --- a/wcclparser/strop_main.cpp +++ b/wcclparser/strop_main.cpp @@ -1,7 +1,8 @@ #include <cstdlib> #include <libwccl/values/strset.h> +#include <libwccl/ops/operator.h> #include <libwccl/parser/Parser.h> -#include <libwccl/parser/ANTLRParserResult.h> + #include <antlr/NoViableAltException.hpp> #include <antlr/MismatchedTokenException.hpp> #include <antlr/TokenStreamRecognitionException.hpp> @@ -11,17 +12,24 @@ /** * @desc It's simple command line tester for testing string operators */ + int main() { std::string str_in; Corpus2::Tagset tagset; Wccl::Parser parser(tagset); - boost::shared_ptr<const Wccl::StrSet> retVal; - boost::shared_ptr<ANTLRParserResult<Wccl::StrSet> > retOp; - boost::shared_ptr<Corpus2::Sentence> sentence; + boost::shared_ptr<const Wccl::Value> retVal; + boost::shared_ptr<Wccl::FunctionalOperator> retOp; + boost::shared_ptr<Corpus2::Sentence> sentence = boost::make_shared<Corpus2::Sentence>(); Wccl::SentenceContext sc(sentence); + Corpus2::Token* the_token = new Corpus2::Token("ZZ", PwrNlp::Whitespace::ManySpaces); + Corpus2::Tag t1(Corpus2::mask_t(0)); + Corpus2::Lexeme l1("aaa", t1); + the_token->add_lexeme(l1); + sentence->append(the_token); + if (system("clear")) { // } @@ -47,9 +55,9 @@ int main() retOp = parser.parseStringOperator(str_in); if (retOp.get()) { - Wccl::FunExecContext cx(sc, retOp->variables); + retVal = retOp->base_apply(sc); - if ((retVal = retOp->op->apply(cx)).get()) { + if (retVal) { std::cerr << "Parsed expression: " << retVal->to_raw_string() << std::endl; } diff --git a/wcclparser/tagset_main.cpp b/wcclparser/tagset_main.cpp index 94f84f9bef84294f45e86595f28fb3a80ec1f614..76e18a5bb71f5af646841c9ddeb67f8aeef18487 100644 --- a/wcclparser/tagset_main.cpp +++ b/wcclparser/tagset_main.cpp @@ -1,7 +1,8 @@ #include <cstdlib> #include <libwccl/values/tset.h> +#include <libwccl/ops/operator.h> #include <libwccl/parser/Parser.h> -#include <libwccl/parser/ANTLRParserResult.h> + #include <antlr/NoViableAltException.hpp> #include <antlr/MismatchedTokenException.hpp> #include <antlr/TokenStreamRecognitionException.hpp> @@ -13,17 +14,24 @@ /** * @desc It's simple command line tester for testing tagset operators */ + int main() { std::string str_in; const Corpus2::Tagset& tagset = Corpus2::get_named_tagset("kipi"); Wccl::Parser parser(tagset); - boost::shared_ptr<const Wccl::TSet> retVal; - boost::shared_ptr<ANTLRParserResult<Wccl::TSet> > retOp; - boost::shared_ptr<Corpus2::Sentence> sentence; + boost::shared_ptr<const Wccl::Value> retVal; + boost::shared_ptr<Wccl::FunctionalOperator> retOp; + boost::shared_ptr<Corpus2::Sentence> sentence = boost::make_shared<Corpus2::Sentence>(); Wccl::SentenceContext sc(sentence); + Corpus2::Token* the_token = new Corpus2::Token("ZZ", PwrNlp::Whitespace::ManySpaces); + Corpus2::Tag t1(Corpus2::mask_t(0)); + Corpus2::Lexeme l1("aaa", t1); + the_token->add_lexeme(l1); + sentence->append(the_token); + if (system("clear")) { // } @@ -49,9 +57,9 @@ int main() retOp = parser.parseSymSetOperator(str_in); if (retOp.get()) { - Wccl::FunExecContext cx(sc, retOp->variables); + retVal = retOp->base_apply(sc); - if ((retVal = retOp->op->apply(cx)).get()) { + if (retVal) { std::cerr << "Parsed expression: " << retVal->to_raw_string() << std::endl; }