diff --git a/libwccl/parser/Parser.cpp b/libwccl/parser/Parser.cpp index 067aeb0d07822dea41d93e3e564abcdf0992d0ad..5309a7729857437a4f5994c95cf1ea8eb1415390 100644 --- a/libwccl/parser/Parser.cpp +++ b/libwccl/parser/Parser.cpp @@ -18,67 +18,60 @@ Parser::~Parser() // ---------------------------------------------------------------------------- /** - * @desc Overloaded parsing operator writed in string. Calls base method - * @arg str operator as string - * @return call method @see parserOperator(const std::istream&) + * @desc Parse string operator writed in std::string. Converts writed operator + * to istream and calling parseStringOperator with istream + * @arg str writed operator + * @retrun boost::shared_ptr<Wccl::StrSet> */ -/* -std::string Parser::parseOperator(const std::string& str) const +boost::shared_ptr<Wccl::Function<Wccl::StrSet> > Parser::parseStringOperator( + const std::string& str) const { std::stringstream ss (std::stringstream::in | std::stringstream::out); ss << str; - return this->parseOperator(ss); + return this->parseStringOperator(ss); } -*/ + /** - * @desc Base method for parsing operator in stream - * @arg istr input stream with operator - * @return Operator + * @desc Parse string operator. Runs parse_string_operator rule + * in the parser grammar. + * @arg istr input stream with writed operator + * @return boost::shared_ptr<Wccl::Function<Wccl::StrSet> > to created operator */ -/* -std::string Parser::parseOperator(std::istream& istr) const +boost::shared_ptr<Wccl::Function<Wccl::StrSet> > Parser::parseStringOperator( + std::istream& istr) const { ANTLRLexer lexer(istr); ANTLRParser parser(lexer); - return "Ala ma kota"; + return parser.parse_string_operator(); } -*/ -// ----------------------------------------------------------------------------- +// ---------------------------------------------------------------------------- /** - * @desc Overloaded parsing expression writed in string. Calls base method - * @arg str variables in string + * @desc Parse values writed in std::string. Converts writed values + * to istream and calling parseValue with istream + * @arg str writed value(s) + * @retrun boost::shared_ptr<Wccl::Value> */ -void Parser::parseExpression(const std::string& str) const +boost::shared_ptr<Wccl::Value> Parser::parseValue(const std::string& str) const { std::stringstream ss (std::stringstream::in | std::stringstream::out); ss << str; - return this->parseExpression(ss); + return this->parseValue(ss); } /** - * @desc Base method for parsing expression in stream - * @arg istr input stream with variable - * @return Operator + * @desc Parse values. Runs parse_values rule in the parser grammar. + * @arg istr input stream with writed values + * @return boost::shared_ptr<Wccl::Value> to created value */ -void Parser::parseExpression(std::istream& istr) const +boost::shared_ptr<Wccl::Value> Parser::parseValue(std::istream& istr) const { ANTLRLexer lexer(istr); ANTLRParser parser(lexer); - try { - parser.start_rules(); - - std::cerr << "Syntax ok!" << std::endl; - } - catch (ParserException e) { - std::cerr << e.info() << std::endl; - } - catch (...) { - std::cerr << "Syntax error!" << std::endl; - } + return parser.parse_values(); } diff --git a/libwccl/parser/Parser.h b/libwccl/parser/Parser.h index 9eda1abe392c471ff26a74c3c5183c60a8586c3c..a7ed2e313d7fcaf3d6bc0372dee67f50a8d00628 100644 --- a/libwccl/parser/Parser.h +++ b/libwccl/parser/Parser.h @@ -1,25 +1,38 @@ #ifndef LIBWCCL_PARSER_PARSER_H #define LIBWCCL_PARSER_PARSER_H +#include "ANTLRLexer.hpp" +#include "ANTLRParser.hpp" + +// ---------------------------------------------------------------------------- #include <sstream> + +// ---------------------------------------------------------------------------- #include <libcorpus2/tagset.h> -#include "ANTLRLexer.hpp" -#include "ANTLRParser.hpp" +// ---------------------------------------------------------------------------- +#include <libwccl/variables.h> +#include <libwccl/values/strset.h> +// exceptions #include <libwccl/parser/ParserException.h> -// <libwccl> +// ---------------------------------------------------------------------------- class Parser { public: Parser(const Corpus2::Tagset&); ~Parser(); - // -------------------------------------------------------------------------- - // FIXME - void parseExpression(const std::string&) const; - void parseExpression(std::istream& ) const; + // --------------------------------------------------------------------------- + // methods for parsing string operator + boost::shared_ptr<Wccl::Function<Wccl::StrSet> > + parseStringOperator(const std::string&) const; + boost::shared_ptr<Wccl::Function<Wccl::StrSet> > + parseStringOperator(std::istream& ) const; + // methods for parsing values + boost::shared_ptr<Wccl::Value> parseValue(const std::string&) const; + boost::shared_ptr<Wccl::Value> parseValue(std::istream& ) const; private: const Corpus2::Tagset &tagset; diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index cbbe0bd9426ed7023303a22b5738f3f647fb64dc..936afe9a587e44ee5be2dcc53158e39b6b2c9da7 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -1,20 +1,38 @@ header { #include <libwccl/parser/ParserException.h> - // libwccl - #include <cstdio> #include <antlr/Token.hpp> #include <boost/lexical_cast.hpp> + + // values/variables + #include <libwccl/variables.h> + #include <libwccl/values/bool.h> + #include <libwccl/values/tset.h> + #include <libwccl/values/strset.h> + #include <libwccl/values/position.h> + #include <libwccl/values/positionref.h> + + // sentence context + #include <libwccl/sentencecontext.h> + + // operators + #include <libwccl/ops/and.h> + #include <libwccl/ops/affix.h> + #include <libwccl/ops/toupper.h> + #include <libwccl/ops/tolower.h> + #include <libwccl/ops/constant.h> + #include <libwccl/ops/functions.h> + #include <libwccl/ops/logicalpredicate.h> } options { language = "Cpp"; } -// ---------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------- // ANTLR PARSER -// ---------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------- class ANTLRParser extends Parser; options { k = 6; @@ -28,14 +46,34 @@ private: const std::string token_ref_to_std_string(antlr::RefToken& rstr) { return (((antlr::Token*)rstr)->getText()); } - const int token_ref_to_int(antlr::RefToken& rstr) { + // + int token_ref_to_int(antlr::RefToken& rstr) { return atoi(this->token_ref_to_std_string(rstr).c_str()); } + + // hepls function for processing + boost::shared_ptr<Wccl::Function<Wccl::StrSet> > get_str_set_expr( + boost::shared_ptr<Wccl::StrSet> ret_str_set) + { + boost::shared_ptr<Wccl::Function<Wccl::StrSet> > strset_expr( + new Wccl::Constant<Wccl::StrSet>(*ret_str_set.get()) + ); + + return strset_expr; + } + + Wccl::SentenceContext get_tmp_context() { + boost::shared_ptr<Corpus2::Sentence> sentence; + Wccl::SentenceContext sc(sentence); + + return sc; + } } -///////////////////////////////////////////////////////////////////////////////////// -///////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------------- // Start all rules +/* start_rules { std::string name = ""; @@ -44,57 +82,132 @@ start_rules | position_op [name] { fprintf(stderr, "%s\n", name.c_str()); } | filters_op [name] { fprintf(stderr, "%s\n", name.c_str()); } | setvar_op [name] { fprintf(stderr, "%s\n", name.c_str()); } - | string_operators [name] { fprintf(stderr, "%s\n", name.c_str()); } | boolean_op [name] { fprintf(stderr, "%s\n", name.c_str()); } ; +*/ -// --------------------------------------------------------------------------------- -// values: -// Walues can be use for setvar(...,..) -values [std::string& name]: - position [name] - | str_set [name] - | sym_set [name] - | boolean [name] - ; -// +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// "GLOBAL" RULES +// ---------------------------------------------------------------------------- +// Rules for parsing string operators in scope (variables). +// Returns boost::shared_ptr<Wccl::Function<Wccl::StrSet> > +parse_string_operator + returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > res] +{ + Wccl::Variables vars; + boost::shared_ptr<Wccl::StrSet> mret; +} + : mret = string_operators [vars, res] +; +// ---------------------------------------------------------------------------- +// Rules for parsing predicates in scope (variables). +// Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> > +parse_predicates + returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > res] +{ + Wccl::Variables vars; + boost::shared_ptr<Wccl::Bool> mret; +} + : mret = predicates [vars, res] +; +// ---------------------------------------------------------------------------- +// Rules for parsing values in scope (variables). +// Returns boost::shared_ptr<Wccl::Value> +parse_values + returns [boost::shared_ptr<Wccl::Value> ret] +{ + Wccl::Variables vars; +} + : ret = values [vars] +; + +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// All values: +// Values can be use for setvar(...,..) +// ---------------------------------------------------------------------------- +values + [Wccl::Variables& vars] + returns [boost::shared_ptr<Wccl::Value> res] + : res = position [vars] + | res = str_set [vars] + | res = sym_set [vars] + | res = boolean [vars] +; +// ---------------------------------------------------------------------------- // Values reference => values + position_ref // !! Cannot use for setvar(...,...) !! +/* values_ref [std::string& name]: values [name] | position_ref [name] | boolean_ref [name] ; -///////////////////////////////////////////////////////////////////////////////////// +*/ +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// // Position: $0name -position [std::string& name]: - DOLLAR "0" n: SYMBOL { name = token_ref_to_std_string(n); } - ; -// +// ---------------------------------------------------------------------------- +// TODO Cos nie lapie dobrze implementacja!!! Moze jakas dodatkowa +// TODO regula do lexera? +position + [Wccl::Variables& vars] + returns [boost::shared_ptr<Wccl::Position> val] + : DOLLAR "0" n: SYMBOL { + val = vars.get_put<Wccl::Position>(token_ref_to_std_string(n)); + } +; +// ---------------------------------------------------------------------------- // Position reference: $(0-9)+name // !! Cannot use for setvar(...,...) !! -position_ref [std::string& name]: - DOLLAR INT n: SYMBOL { name = token_ref_to_std_string(n); } - ; -///////////////////////////////////////////////////////////////////////////////////// -// String set: $name -str_set [std::string& name]: - DOLLAR n: SYMBOL { name = token_ref_to_std_string(n); } - ; -///////////////////////////////////////////////////////////////////////////////////// -// Sym set: $$name -sym_set [std::string& name]: - DOLLAR DOLLAR n: SYMBOL { name = token_ref_to_std_string(n); } - ; -///////////////////////////////////////////////////////////////////////////////////// +position_ref + [Wccl::Variables& vars] + returns [boost::shared_ptr<Wccl::PositionRef> val] + : DOLLAR p_ref: INT n: SYMBOL { + val.reset( + new Wccl::PositionRef( + vars.get_put<Wccl::Position>(token_ref_to_std_string(n)), + token_ref_to_int(p_ref) + ) + ); + } +; +// ---------------------------------------------------------------------------- +// String set, call examples: $name, $Name, $_name, $_Name etc. +// This expression gets variable of tyme StrSet from string-named variable +// Returns variable<StrSet> from Set-variables +str_set + [Wccl::Variables& vars] + returns [boost::shared_ptr<Wccl::StrSet> val] + : DOLLAR n: SYMBOL { + val = vars.get_put<Wccl::StrSet>(token_ref_to_std_string(n)); + } +; +// ---------------------------------------------------------------------------- +// Symbol set: $$name +sym_set + [Wccl::Variables& vars] + returns [boost::shared_ptr<Wccl::TSet> val] + : DOLLAR DOLLAR n: SYMBOL { + val = vars.get_put<Wccl::TSet>(token_ref_to_std_string(n)); + } +; +// ---------------------------------------------------------------------------- // Bool: $?name -boolean [std::string& name]: - DOLLAR Q_MARK n1: SYMBOL { name = token_ref_to_std_string(n1); } - ; +boolean + [Wccl::Variables& vars] + returns [boost::shared_ptr<Wccl::Bool> val] + : DOLLAR Q_MARK n: SYMBOL { + val = vars.get_put<Wccl::Bool>(token_ref_to_std_string(n)); + } +; // Boolean $!name +/* boolean_ref [std::string& name]: DOLLAR E_MARK n1: SYMBOL { name = token_ref_to_std_string(n1); } ; +*/ ///////////////////////////////////////////////////////////////////////////////////// // OPERATORS @@ -102,6 +215,7 @@ boolean_ref [std::string& name]: // ---------------------------------------------------------------------------------- // Positions operator // TODO range przyjmuje postion_ref. ?? Zmienic na position ?? +/* position_op [std::string& name] { std::string r1, r2; @@ -110,8 +224,10 @@ position_op [std::string& name] | "range" LPAREN s: SYMBOL COMMA position_ref [r1] COMMA position_ref [r2] RPAREN { name = ("Range opertator from " + token_ref_to_std_string(s) + " [" + r1 + ":" + r2 + "]!"); } ; +*/ // ---------------------------------------------------------------------------------- // Filtering operator +/* filters_op [std::string& name] { std::string p, p2, e1, e2; @@ -121,124 +237,162 @@ filters_op [std::string& name] | "agrflt" LPAREN position_ref [p] COMMA position_ref [p2] COMMA es_any [e1] COMMA i: INT RPAREN { name = ( "Agrflt operator p1 " + p + " p2 " + p2 + " for set " + e1 + " aggr_attrs " + token_ref_to_std_string(i)); } ; +*/ // ---------------------------------------------------------------------------------- // Setvar operator +/* setvar_op [std::string& value] : setvar_pos [value] | setvar_bool [value] | setvar_sset [value] | setvar_tset [value] ; +*/ // setvar dla position przyjmuje position_ref -> TODO sprawdzic dlaczego // gramatyka nie pokrywa "setvar" LPAREN position COMMA position_v RPAREN +/* setvar_pos [std::string& value] : "setvar" LPAREN position_ref [value] COMMA position_v [value] RPAREN // : "setvar" LPAREN position [value] COMMA position_v [value] RPAREN ; - +*/ +/* setvar_bool [std::string& value] : "setvar" LPAREN boolean [value] COMMA boolean_v [value] RPAREN ; - +*/ +/* setvar_sset [std::string& value] : "setvar" LPAREN str_set [value] COMMA str_set_v [value] RPAREN ; - +*/ +/* setvar_tset [std::string& value] : "setvar" LPAREN sym_set [value] COMMA sym_set_v [value] RPAREN ; - -// ---------------------------------------------------------------------------------- -// Values -///////////////////////////////////////////////////////////////////////////////////// -// boolean: -boolean_v [std::string& value] - : "True" { value = "True"; } - | "False" { value = "False"; } - | boolean[value] - ; -///////////////////////////////////////////////////////////////////////////////////// -// position value: -position_v [std::string& value] - : i: INT { value = token_ref_to_std_string(i); } - | "begin" { value = "begin"; } - | "end" { value = "end"; } - | "nowhere" { value = "nowhere"; } - | position [value] - ; -///////////////////////////////////////////////////////////////////////////////////// -// string set in -str_set_in [std::string& value] - : v1: STRING { value += token_ref_to_std_string(v1); } - | v2: STRING COMMA str_set_in [value] { value += (", " + token_ref_to_std_string(v2)); } +*/ + +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// VALUES +/////////////////////////////////////////////////////////////////////////////// +// ---------------------------------------------------------------------------- +// Single or muliple values in string set +str_set_v_in + [Wccl::Variables& vars] + returns [boost::shared_ptr<Wccl::StrSet> var] +{ + var.reset(new Wccl::StrSet); +} + : v1: STRING { + var->insert(token_ref_to_std_string(v1).c_str()); + } + | v2: STRING COMMA var = str_set_v_in [vars] { + var->insert(token_ref_to_std_string(v2).c_str()); + } ; -// string set [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"] -str_set_v [std::string& value] - : LBRACKET RBRACKET - | LBRACKET str_set_in[value] RBRACKET +// string set called as unnamed (temporary) StrSet: +// calls: [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"] +// Actually, doing nothing with vars. +str_set_v + [Wccl::Variables& vars] + returns [boost::shared_ptr<Wccl::StrSet> val] + : LBRACKET RBRACKET { + val.reset(new Wccl::StrSet); // initialize as unnamed empty variable + } + | LBRACKET val = str_set_v_in [vars] RBRACKET ; -///////////////////////////////////////////////////////////////////////////////////// +// ---------------------------------------------------------------------------- // element of sym set +/* sym_set_elem_s [std::string& value] : s1: SYMBOL { value += token_ref_to_std_string(s1); } | s2: SYMBOL COMMA sym_set_elem_s [value] { value += token_ref_to_std_string(s2); } | s3: SYMBOL COMMA sym_set_elem_g [value] { value += token_ref_to_std_string(s3); } ; +*/ // element of sym set +/* sym_set_elem_g [std::string& value] : G_MARK s1: SYMBOL G_MARK { value += token_ref_to_std_string(s1); } | G_MARK s2: SYMBOL G_MARK COMMA sym_set_elem_g [value] { value += token_ref_to_std_string(s2); } | G_MARK s3: SYMBOL G_MARK COMMA sym_set_elem_s [value] { value += token_ref_to_std_string(s3); } ; +*/ // sym set in +/* sym_set_in [std::string& value] : sym_set_elem_s [value] | sym_set_elem_g [value] ; +*/ // sym set {} {a} {a, b} +/* sym_set_v [std::string& value] : LCURLY RCURLY | LCURLY sym_set_in [value] RCURLY ; - -///////////////////////////////////////////////////////////////////////////////////// -///////////////////////////////////////////////////////////////////////////////////// +*/ +// ---------------------------------------------------------------------------- +// boolean: +boolean_v + [Wccl::Variables& vars] + returns [boost::shared_ptr<Wccl::Bool> val] + : "True" { val.reset(new Wccl::Bool(true )); } + | "False" { val.reset(new Wccl::Bool(false)); } + | val = boolean [vars] +; +// ---------------------------------------------------------------------------- +// position value: +/* +position_v [std::string& value] + : i: INT { value = token_ref_to_std_string(i); } + | "begin" { value = "begin"; } + | "end" { value = "end"; } + | "nowhere" { value = "nowhere"; } + | position [value] + ; +*/ +// ---------------------------------------------------------------------------- // internal values: +/* v_literal [std::string& value] : s1: STRING { value = token_ref_to_std_string(s1); } | s2: SYMBOL { value = token_ref_to_std_string(s2); } ; - +*/ ///////////////////////////////////////////////////////////////////////////////////// // constants // set of values -s_literal [std::string& v] +/* +st::shared_ptr<Wccl::StrSet> ret]s_literal [std::string& v] : LBRACKET ((v_literal[v]) (COMMA v_literal [v])*)? RBRACKET ; +*/ // comma-separated predicates +/* seq_et [std::string& v]: et_any [v] (COMMA et_any [v])* ; - -es_any [std::string& v]: +*/ +/* +es_any [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret] s_literal [v] | es_op [v] ; - -es_op [std::string& v]: - position_ref [v] - | filters_op [v] - ; - +*/ +/* et_bool [std::string& v]: boolean [v] | boolean_op [v] ; +*/ // set relations +/* et_set [std::string& v] { std::string v1, v2; @@ -247,12 +401,14 @@ et_set [std::string& v] | "inter" LPAREN es_any [v1] COMMA es_any [v2] RPAREN { v = ("inter " + v1 + " " + v2); } | "equal" LPAREN es_any [v1] COMMA es_any [v2] RPAREN { v = ("equal " + v1 + " " + v2); } ; - +*/ +/* et_string [std::string& v] : "isbig" LPAREN es_any [v] RPAREN | "hasnum" LPAREN es_any [v] RPAREN ; - +*/ +/* et_action [std::string& v] { std::string v1, v2; @@ -267,7 +423,8 @@ et_action [std::string& v] | "lextend" LPAREN s4: SYMBOL RPAREN { v = ("lextend " + token_ref_to_std_string(s4)); } | "rextend" LPAREN s5: SYMBOL RPAREN { v = ("rextend " + token_ref_to_std_string(s5)); } ; - +*/ +/* et_iter [std::string& v] { std::string v1, v2, v3, v4; @@ -282,8 +439,10 @@ et_iter [std::string& v] | "rphrase" LPAREN position_ref [v1] COMMA SYMBOL COMMA position_ref [v2] RPAREN | "accept" LPAREN seq_et [v1] RPAREN ; +*/ // predicates checking agreement +/* et_agr [std::string& name] { std::string p1, p2, v; @@ -292,12 +451,14 @@ et_agr [std::string& name] | "agrpp" LPAREN position_ref [p1] COMMA position_ref [p2] COMMA es_any [v] COMMA i2: INT RPAREN | "wagr" LPAREN position_ref [p1] COMMA position_ref [p2] COMMA es_any [v] COMMA i3: INT RPAREN ; - +*/ +/* // annotation checking predicates et_annot [std::string& v] : "phrase" LPAREN position_ref [v] COMMA s: SYMBOL RPAREN ; - +*/ +/* // constraints et_any [std::string& v] : et_bool [v] @@ -308,47 +469,175 @@ et_any [std::string& v] | et_agr [v] | et_annot [v] ; - -///////////////////////////////////////////////////////////////////////////////////// -// OERATORS -///////////////////////////////////////////////////////////////////////////////////// -// Operators returns str_set: orth[$-2P] -string_operators [std::string& name] - : op_orth [name] - | op_base [name] - | op_lower [name] - | op_upper [name] - | op_affix [name] +*/ + +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// Stiring operators returns boost::shared_ptr<Wccl::Function<Wccl::StrSet> > +/////////////////////////////////////////////////////////////////////////////// +// ---------------------------------------------------------------------------- +string_operators + [Wccl::Variables& vars, boost::shared_ptr<Wccl::Function<Wccl::StrSet> >& op] + returns [boost::shared_ptr<Wccl::StrSet> ret] + : ret = op_orth [vars] { + /* + op.reset( + new Wccl::Orth( + boost::shared_ptr<Wccl::Function<Wccl::StrSet> >( + new Wccl::Constant<Wccl::StrSet>(*ret.get()) + ) + ) + ); + */ + } + | ret = op_base [vars] { + /* + op.reset( + new Wccl::Base( + boost::shared_ptr<Wccl::Function<Wccl::StrSet> >( + new Wccl::Constant<Wccl::StrSet>(*ret.get()) + ) + ) + ); + */ + } + | ret = op_lower [vars] { + op.reset( + new Wccl::ToLower( + boost::shared_ptr<Wccl::Function<Wccl::StrSet> >( + new Wccl::Constant<Wccl::StrSet>(*ret.get()) + ) + ) + ); + } + | ret = op_upper [vars] { + op.reset( + new Wccl::ToUpper( + boost::shared_ptr<Wccl::Function<Wccl::StrSet> >( + new Wccl::Constant<Wccl::StrSet>(*ret.get()) + ) + ) + ); + } + | ret = op_affix [vars] { + op.reset( + new Wccl::ToUpper( + boost::shared_ptr<Wccl::Function<Wccl::StrSet> >( + new Wccl::Constant<Wccl::StrSet>(*ret.get()) + ) + ) + ); + } ; - -op_orth [std::string& name] - : "orth" LBRACKET position_ref [name] RBRACKET { name = "Orth operator!"; } - ; - -op_base [std::string& name] - : "base" LBRACKET position_ref [name] RBRACKET { name = "Base operator!"; } +// Implementations of string operators: +// ---------------------------------------------------------------------------- +op_orth [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret] +{ + boost::shared_ptr<Wccl::PositionRef> tmpPosRef; +} + : "orth" LBRACKET tmpPosRef = position_ref [vars] RBRACKET { + // TODO + } ; - -op_lower [std::string& name] - : "lower" LPAREN str_set [name] RPAREN { name = "Lower operator!"; } - | "lower" LPAREN str_set_v [name] RPAREN { name = "Lower operator!"; } +// ---------------------------------------------------------------------------- +op_base [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret] +{ + boost::shared_ptr<Wccl::PositionRef> tmpPosRef; +} + : "base" LBRACKET tmpPosRef = position_ref [vars] RBRACKET { + // TODO + } ; +// ---------------------------------------------------------------------------- +op_lower [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret] +{ + boost::shared_ptr<Wccl::StrSet> ret_str_set; + boost::shared_ptr<Wccl::Function<Wccl::StrSet> > tmp_op; +} + : "lower" LPAREN ( + ret_str_set = str_set [vars] | + ret_str_set = str_set_v [vars] + ) RPAREN { + Wccl::ToLower to_lower(get_str_set_expr(ret_str_set)); + ret = to_lower.apply(get_tmp_context()); + } + | "lower" LPAREN ret = string_operators[vars, tmp_op] RPAREN +; +// ---------------------------------------------------------------------------- +op_upper [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret] +{ + boost::shared_ptr<Wccl::StrSet> ret_str_set; + boost::shared_ptr<Wccl::Function<Wccl::StrSet> > tmp_op; +} + : "upper" LPAREN ( + ret_str_set = str_set [vars] | + ret_str_set = str_set_v [vars] + ) RPAREN { + Wccl::ToUpper to_upper(get_str_set_expr(ret_str_set)); + ret = to_upper.apply(get_tmp_context()); + } + | "upper" LPAREN ret = string_operators[vars, tmp_op] RPAREN +; +// ---------------------------------------------------------------------------- +op_affix [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret] +{ + boost::shared_ptr<Wccl::StrSet> ret_str_set; + boost::shared_ptr<Wccl::Function<Wccl::StrSet> > tmp_op; +} + : "affix" LPAREN ( + ret_str_set = str_set [vars] | + ret_str_set = str_set_v [vars] | + ret_str_set = string_operators[vars, tmp_op] + ) COMMA p_af: INT RPAREN { + Wccl::Affix affix(get_str_set_expr(ret_str_set), token_ref_to_int(p_af)); + ret = affix.apply(get_tmp_context()); + } +; -op_upper [std::string& name] - : "upper" LPAREN str_set [name] RPAREN { name = "Upper operator!"; } - | "upper" LPAREN str_set_v [name] RPAREN { name = "Upper operator!"; } - ; +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// Predicates returns boost::shared_ptr<Wccl::Function<Wccl::Bool> > +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +predicates + [Wccl::Variables& vars, boost::shared_ptr<Wccl::Function<Wccl::Bool> >& pr] + returns [boost::shared_ptr<Wccl::Bool> ret] + : ret = logical_predicates [vars, pr] +; +// Implementations of predicates: +// ---------------------------------------------------------------------------- +logical_predicates + [Wccl::Variables& vars, boost::shared_ptr<Wccl::Function<Wccl::Bool> >& pr] + returns [boost::shared_ptr<Wccl::Bool> ret] +{ + boost::shared_ptr<Wccl::LogicalPredicate::BoolFunctionPtr> v; +} + : ret = lpred_and [vars] { + // pr.reset(new Wccl::And(v)); + } +; +// ---------------------------------------------------------------------------- +lpred_and + [Wccl::Variables& vars] + returns [boost::shared_ptr<Wccl::Bool> ret] +{ + boost::shared_ptr<Wccl::Function<Wccl::Bool> > tmpPr; +} + : "and" LPAREN ret = logical_predicates [vars, tmpPr] (COMMA ret = logical_predicates [vars, tmpPr])* RPAREN +; -op_affix [std::string& name] - : "affix" LPAREN str_set [name] COMMA n1: INT RPAREN { name = "Affix operator " + token_ref_to_std_string(n1) + "!"; } - | "affix" LPAREN str_set_v [name] COMMA n2: INT RPAREN { name = "Affix operator " + token_ref_to_std_string(n2) + "!"; } - ; +/* +lpred_not +lpred_or +*/ +/* boolean_op [std::string& name] : "and" LPAREN seq_et [name] RPAREN | "not" LPAREN seq_et [name] RPAREN | "or" LPAREN seq_et [name] RPAREN ; +*/ // ---------------------------------------------------------------------------------- // ANTLR LEXER diff --git a/wcclparser/CMakeLists.txt b/wcclparser/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..df58345dc072aa5a1f8b5046fef5058fade7a9fa --- /dev/null +++ b/wcclparser/CMakeLists.txt @@ -0,0 +1,22 @@ +PROJECT( parser ) + +include_directories( ${CMAKE_SOURCE_DIR} ) + +add_definitions(-DLIBWCCL_WCCLPARSER_DATA_DIR="${PROJECT_SOURCE_DIR}/") + +add_executable(parser-strop + strop_main.cpp +) +target_link_libraries (parser-strop wccl ${Boost_LIBRARIES} antlr) + +add_executable(parser-val + val_main.cpp +) +target_link_libraries (parser-val wccl ${Boost_LIBRARIES} antlr) + +include_directories(${Boost_INCLUDE_DIR}) +link_directories(${Boost_LIBRARY_DIRS}) + +#add_custom_target(test tests) +#add_custom_target(test-verbose ./tests --log_level=message) + diff --git a/wcclparser/strop_main.cpp b/wcclparser/strop_main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c8381c5c7dc9fc6b8bd1b58511dc99ec6789fcf1 --- /dev/null +++ b/wcclparser/strop_main.cpp @@ -0,0 +1,68 @@ +#include <cstdlib> + +#include <libwccl/values/strset.h> +#include <libwccl/parser/Parser.h> + +// ---------------------------------------------------------------------------- + +/** + * @desc It's simple command line tester for testing string operators + */ + +int main() +{ + std::string str_in; + Corpus2::Tagset tagset; + Parser parser(tagset); + + boost::shared_ptr<Wccl::StrSet> retStr; + boost::shared_ptr<Wccl::Function<Wccl::StrSet> > retOp; + boost::shared_ptr<Corpus2::Sentence> sentence; + Wccl::SentenceContext sc(sentence); + + while (1) { + std::cerr << "Enter a string operator expression: "; + + getline(std::cin, str_in); + + if (str_in == "clear" || str_in == "cls") { + if (system("clear")) { + // + } + } + else if (str_in == "exit" || str_in == "quit") { + break; + } + else { + try { + retOp = parser.parseStringOperator(str_in); + + if (retOp.get()) { + if ((retStr = retOp->apply(sc)).get()) { + std::cerr << "Parsed expression: " << retStr->to_raw_string() << std::endl; + } + else { + std::cerr << "Problem while parsing -- haven't StrSet object in boost::shared_ptr!" << std::endl; + } + } + else { + std::cerr << "Problem while parsing -- haven't Function<Wccl::StrSet> object in boost::shared_ptr!" << std::endl; + } + } + catch (antlr::MismatchedTokenException &e) { + std::cerr << "Mismatch token exception!" << std::endl; + } + /* + catch (antlr::TokenStreamRecognitionException &e) { + std::cerr << "[2] Syntax error!" << std::endl; + } + */ + catch (...) { + std::cerr << "[N] Syntax error!" << std::endl; + } + } + } + + return 0; +} + diff --git a/wcclparser/val_main.cpp b/wcclparser/val_main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..21486566bbda2b95d2db843d5b44aa70ff5166c9 --- /dev/null +++ b/wcclparser/val_main.cpp @@ -0,0 +1,61 @@ +#include <cstdlib> + +#include <libwccl/values/strset.h> +#include <libwccl/parser/Parser.h> + +// ---------------------------------------------------------------------------- + +/** + * @desc It's simple command line tester for testing string operators + */ + +int main() +{ + std::string str_in; + Corpus2::Tagset tagset; + Parser parser(tagset); + boost::shared_ptr<Wccl::Value> valRet; + + while (1) { + std::cerr << "Enter a string operator expression: "; + + getline(std::cin, str_in); + + if (str_in == "clear" || str_in == "cls") { + if (system("clear")) { + // + } + } + else if (str_in == "exit" || str_in == "quit") { + break; + } + else { + try { + valRet = parser.parseValue(str_in); + + /* + if (retOp.get()) { + if ((retStr = retOp->apply(sc)).get()) { + std::cerr << "Parsed expression: " << retStr->to_raw_string() << std::endl; + } + else { + std::cerr << "Problem while parsing -- haven't StrSet object in boost::shared_ptr!" << std::endl; + } + } + else { + std::cerr << "Problem while parsing -- haven't Function<Wccl::StrSet> object in boost::shared_ptr!" << std::endl; + } + */ + } + catch (antlr::MismatchedTokenException &e) { + std::cerr << "Mismatch token exception!" << std::endl; + } + catch (...) { + std::cerr << "[N] Syntax error!" << std::endl; + } + } + } + + return 0; +} +