diff --git a/libwccl/parser/Parser.cpp b/libwccl/parser/Parser.cpp index 5309a7729857437a4f5994c95cf1ea8eb1415390..5ec704b478fefe52b77cdef573075c349de42d23 100644 --- a/libwccl/parser/Parser.cpp +++ b/libwccl/parser/Parser.cpp @@ -55,6 +55,7 @@ boost::shared_ptr<Wccl::Function<Wccl::StrSet> > Parser::parseStringOperator( * @arg str writed value(s) * @retrun boost::shared_ptr<Wccl::Value> */ +/* boost::shared_ptr<Wccl::Value> Parser::parseValue(const std::string& str) const { std::stringstream ss (std::stringstream::in | std::stringstream::out); @@ -62,12 +63,13 @@ boost::shared_ptr<Wccl::Value> Parser::parseValue(const std::string& str) const return this->parseValue(ss); } - +*/ /** * @desc Parse values. Runs parse_values rule in the parser grammar. * @arg istr input stream with writed values * @return boost::shared_ptr<Wccl::Value> to created value */ +/* boost::shared_ptr<Wccl::Value> Parser::parseValue(std::istream& istr) const { ANTLRLexer lexer(istr); @@ -75,3 +77,4 @@ boost::shared_ptr<Wccl::Value> Parser::parseValue(std::istream& istr) const return parser.parse_values(); } +*/ diff --git a/libwccl/parser/Parser.h b/libwccl/parser/Parser.h index a7ed2e313d7fcaf3d6bc0372dee67f50a8d00628..a76b3a9c24b94ecd2c6ac04d4bc1c6e530f46eea 100644 --- a/libwccl/parser/Parser.h +++ b/libwccl/parser/Parser.h @@ -30,9 +30,6 @@ public: parseStringOperator(const std::string&) const; boost::shared_ptr<Wccl::Function<Wccl::StrSet> > parseStringOperator(std::istream& ) const; - // methods for parsing values - boost::shared_ptr<Wccl::Value> parseValue(const std::string&) const; - boost::shared_ptr<Wccl::Value> parseValue(std::istream& ) const; private: const Corpus2::Tagset &tagset; diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 936afe9a587e44ee5be2dcc53158e39b6b2c9da7..03bb9bbc9591df67e1ecc1ccfc78670adc5fda65 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -24,6 +24,10 @@ header { #include <libwccl/ops/constant.h> #include <libwccl/ops/functions.h> #include <libwccl/ops/logicalpredicate.h> + + // Unicode String + #include <unicode/uniset.h> + #include <unicode/unistr.h> } options { @@ -43,12 +47,16 @@ options { { private: // - const std::string token_ref_to_std_string(antlr::RefToken& rstr) { - return (((antlr::Token*)rstr)->getText()); + const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { + return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText()).unescape(); + } + // + const std::string token_ref_to_std_string(antlr::RefToken& rstr) const { + return (((antlr::Token*)rstr)->getText()); } // int token_ref_to_int(antlr::RefToken& rstr) { - return atoi(this->token_ref_to_std_string(rstr).c_str()); + return atoi(((antlr::Token*)rstr)->getText().c_str()); } // hepls function for processing @@ -96,9 +104,8 @@ parse_string_operator returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > res] { Wccl::Variables vars; - boost::shared_ptr<Wccl::StrSet> mret; } - : mret = string_operators [vars, res] + : res = string_operators [vars] ; // ---------------------------------------------------------------------------- // Rules for parsing predicates in scope (variables). @@ -114,6 +121,7 @@ parse_predicates // ---------------------------------------------------------------------------- // Rules for parsing values in scope (variables). // Returns boost::shared_ptr<Wccl::Value> +/* parse_values returns [boost::shared_ptr<Wccl::Value> ret] { @@ -121,20 +129,23 @@ parse_values } : ret = values [vars] ; +*/ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // All values: // Values can be use for setvar(...,..) // ---------------------------------------------------------------------------- +/* values [Wccl::Variables& vars] - returns [boost::shared_ptr<Wccl::Value> res] + returns [boost::shared_ptr<Wccl::Constant<Wccl::Value> > res] : res = position [vars] | res = str_set [vars] | res = sym_set [vars] | res = boolean [vars] ; +*/ // ---------------------------------------------------------------------------- // Values reference => values + position_ref // !! Cannot use for setvar(...,...) !! @@ -153,14 +164,19 @@ values_ref [std::string& name]: // TODO regula do lexera? position [Wccl::Variables& vars] - returns [boost::shared_ptr<Wccl::Position> val] - : DOLLAR "0" n: SYMBOL { - val = vars.get_put<Wccl::Position>(token_ref_to_std_string(n)); + returns [boost::shared_ptr<Wccl::Constant<Wccl::Position> > op] +{ + boost::shared_ptr<Wccl::Position> val; +} + : DOLLAR "0" n: SYMBOL { + val = vars.get_put<Wccl::Position>(token_ref_to_std_string(n)); + op.reset(new Wccl::Constant<Wccl::Position>(*val.get())); } ; // ---------------------------------------------------------------------------- // Position reference: $(0-9)+name // !! Cannot use for setvar(...,...) !! +/* position_ref [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::PositionRef> val] @@ -173,35 +189,50 @@ position_ref ); } ; +*/ // ---------------------------------------------------------------------------- // String set, call examples: $name, $Name, $_name, $_Name etc. // This expression gets variable of tyme StrSet from string-named variable -// Returns variable<StrSet> from Set-variables +// Returns Wccl::Function<Wccl::StrSet> from Set-variables str_set [Wccl::Variables& vars] - returns [boost::shared_ptr<Wccl::StrSet> val] + returns [boost::shared_ptr<Wccl::Constant<Wccl::StrSet> > op] +{ + boost::shared_ptr<Wccl::StrSet> val; +} : DOLLAR n: SYMBOL { - val = vars.get_put<Wccl::StrSet>(token_ref_to_std_string(n)); + val = vars.get_put<Wccl::StrSet>(token_ref_to_std_string(n)); + op.reset(new Wccl::Constant<Wccl::StrSet>(*val.get())); } ; // ---------------------------------------------------------------------------- // Symbol set: $$name sym_set [Wccl::Variables& vars] - returns [boost::shared_ptr<Wccl::TSet> val] + returns [boost::shared_ptr<Wccl::Constant<Wccl::TSet> > op] +{ + boost::shared_ptr<Wccl::TSet> val; +} : DOLLAR DOLLAR n: SYMBOL { val = vars.get_put<Wccl::TSet>(token_ref_to_std_string(n)); + op.reset(new Wccl::Constant<Wccl::TSet>(*val.get())); } ; // ---------------------------------------------------------------------------- // Bool: $?name +/* boolean [Wccl::Variables& vars] - returns [boost::shared_ptr<Wccl::Bool> val] + returns [boost::shared_ptr<Wccl::Constant<Wccl::Bool> > op] +{ + boost::shared_ptr<Wccl::Bool> val; +} : DOLLAR Q_MARK n: SYMBOL { val = vars.get_put<Wccl::Bool>(token_ref_to_std_string(n)); + op.reset(new Wccl::Constant<Wccl::Bool>(*val.get())); } ; +*/ // Boolean $!name /* boolean_ref [std::string& name]: @@ -281,28 +312,27 @@ setvar_tset [std::string& value] // ---------------------------------------------------------------------------- // Single or muliple values in string set str_set_v_in - [Wccl::Variables& vars] - returns [boost::shared_ptr<Wccl::StrSet> var] + [boost::shared_ptr<Wccl::StrSet>& s_set] + : v1: STRING { s_set->insert(token_ref_to_ustring(v1)); } + | v2: STRING COMMA str_set_v_in [s_set] { + s_set->insert(token_ref_to_ustring(v2)); + } +; +// string set, called as unnamed (temporary) StrSet: +// calls: [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"] or variable $A +str_set_v + [Wccl::Variables& vars] + returns [boost::shared_ptr<Wccl::Constant<Wccl::StrSet> > val] { - var.reset(new Wccl::StrSet); + boost::shared_ptr<Wccl::StrSet> set(new Wccl::StrSet); } - : v1: STRING { - var->insert(token_ref_to_std_string(v1).c_str()); - } - | v2: STRING COMMA var = str_set_v_in [vars] { - var->insert(token_ref_to_std_string(v2).c_str()); - } - ; -// string set called as unnamed (temporary) StrSet: -// calls: [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"] -// Actually, doing nothing with vars. -str_set_v - [Wccl::Variables& vars] - returns [boost::shared_ptr<Wccl::StrSet> val] : LBRACKET RBRACKET { - val.reset(new Wccl::StrSet); // initialize as unnamed empty variable + val.reset(new Wccl::Constant<Wccl::StrSet>(*set.get())); + } + | LBRACKET str_set_v_in [set] RBRACKET { + val.reset(new Wccl::Constant<Wccl::StrSet>(*set.get())); } - | LBRACKET val = str_set_v_in [vars] RBRACKET + | val = str_set [vars] ; // ---------------------------------------------------------------------------- // element of sym set @@ -337,6 +367,7 @@ sym_set_v [std::string& value] */ // ---------------------------------------------------------------------------- // boolean: +/* boolean_v [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Bool> val] @@ -344,6 +375,7 @@ boolean_v | "False" { val.reset(new Wccl::Bool(false)); } | val = boolean [vars] ; +*/ // ---------------------------------------------------------------------------- // position value: /* @@ -477,60 +509,20 @@ et_any [std::string& v] /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- string_operators - [Wccl::Variables& vars, boost::shared_ptr<Wccl::Function<Wccl::StrSet> >& op] - returns [boost::shared_ptr<Wccl::StrSet> ret] - : ret = op_orth [vars] { - /* - op.reset( - new Wccl::Orth( - boost::shared_ptr<Wccl::Function<Wccl::StrSet> >( - new Wccl::Constant<Wccl::StrSet>(*ret.get()) - ) - ) - ); - */ - } - | ret = op_base [vars] { - /* - op.reset( - new Wccl::Base( - boost::shared_ptr<Wccl::Function<Wccl::StrSet> >( - new Wccl::Constant<Wccl::StrSet>(*ret.get()) - ) - ) - ); - */ - } - | ret = op_lower [vars] { - op.reset( - new Wccl::ToLower( - boost::shared_ptr<Wccl::Function<Wccl::StrSet> >( - new Wccl::Constant<Wccl::StrSet>(*ret.get()) - ) - ) - ); - } - | ret = op_upper [vars] { - op.reset( - new Wccl::ToUpper( - boost::shared_ptr<Wccl::Function<Wccl::StrSet> >( - new Wccl::Constant<Wccl::StrSet>(*ret.get()) - ) - ) - ); - } - | ret = op_affix [vars] { - op.reset( - new Wccl::ToUpper( - boost::shared_ptr<Wccl::Function<Wccl::StrSet> >( - new Wccl::Constant<Wccl::StrSet>(*ret.get()) - ) - ) - ); - } + [Wccl::Variables& vars] + returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] + /* + : ret = op_orth [vars] + | ret = op_base [vars] + */ + : ret = op_lower [vars] + | ret = op_upper [vars] + | ret = op_affix [vars] + | ret = str_set_v [vars] ; // Implementations of string operators: // ---------------------------------------------------------------------------- +/* op_orth [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret] { boost::shared_ptr<Wccl::PositionRef> tmpPosRef; @@ -538,8 +530,10 @@ op_orth [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret] : "orth" LBRACKET tmpPosRef = position_ref [vars] RBRACKET { // TODO } - ; +; +*/ // ---------------------------------------------------------------------------- +/* op_base [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret] { boost::shared_ptr<Wccl::PositionRef> tmpPosRef; @@ -547,53 +541,42 @@ op_base [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret] : "base" LBRACKET tmpPosRef = position_ref [vars] RBRACKET { // TODO } - ; +; +*/ // ---------------------------------------------------------------------------- -op_lower [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret] +// returns boost::shared_ptr<Wccl::Function<Wccl::StrSet> > +op_lower + [Wccl::Variables& vars] returns + [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { - boost::shared_ptr<Wccl::StrSet> ret_str_set; - boost::shared_ptr<Wccl::Function<Wccl::StrSet> > tmp_op; -} - : "lower" LPAREN ( - ret_str_set = str_set [vars] | - ret_str_set = str_set_v [vars] - ) RPAREN { - Wccl::ToLower to_lower(get_str_set_expr(ret_str_set)); - ret = to_lower.apply(get_tmp_context()); + boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret; +} + : "lower" LPAREN o_ret = string_operators[vars] RPAREN { + ret.reset(new Wccl::ToLower(o_ret)); } - | "lower" LPAREN ret = string_operators[vars, tmp_op] RPAREN ; // ---------------------------------------------------------------------------- -op_upper [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret] +op_upper + [Wccl::Variables& vars] + returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { - boost::shared_ptr<Wccl::StrSet> ret_str_set; - boost::shared_ptr<Wccl::Function<Wccl::StrSet> > tmp_op; -} - : "upper" LPAREN ( - ret_str_set = str_set [vars] | - ret_str_set = str_set_v [vars] - ) RPAREN { - Wccl::ToUpper to_upper(get_str_set_expr(ret_str_set)); - ret = to_upper.apply(get_tmp_context()); + boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret; +} + : "upper" LPAREN o_ret = string_operators[vars] RPAREN { + ret.reset(new Wccl::ToUpper(o_ret)); } - | "upper" LPAREN ret = string_operators[vars, tmp_op] RPAREN ; // ---------------------------------------------------------------------------- -op_affix [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret] +op_affix + [Wccl::Variables& vars] + returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { - boost::shared_ptr<Wccl::StrSet> ret_str_set; - boost::shared_ptr<Wccl::Function<Wccl::StrSet> > tmp_op; -} - : "affix" LPAREN ( - ret_str_set = str_set [vars] | - ret_str_set = str_set_v [vars] | - ret_str_set = string_operators[vars, tmp_op] - ) COMMA p_af: INT RPAREN { - Wccl::Affix affix(get_str_set_expr(ret_str_set), token_ref_to_int(p_af)); - ret = affix.apply(get_tmp_context()); + boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret; +} + : "affix" LPAREN o_ret = string_operators[vars] COMMA offset: INT RPAREN { + ret.reset(new Wccl::Affix(o_ret, token_ref_to_int(offset))); } ; - /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Predicates returns boost::shared_ptr<Wccl::Function<Wccl::Bool> > @@ -658,6 +641,21 @@ options { | '\'' (~'\'')* '\'' ; +// STRING_APOS +// options { +// paraphrase = "a string without apostrophe"; +// } +// : (~'"')* +// ; + +// STRING_QUOT +// options { +// paraphrase = "a string without quotation"; +// } +// : (~'\'')* +// ; + + INT options { paraphrase = "Integer"; @@ -665,6 +663,21 @@ options { : ('-'|'+')?('0'..'9')+ ; +QUOT_MARK +options { + paraphrase = "Quota mark"; +} + : '\'' + ; + +APOS_MARK +options { + paraphrase = "Aposptrophe mark"; +} + : '"' + ; + + Q_MARK options { paraphrase = "Query mark"; diff --git a/wcclparser/strop_main.cpp b/wcclparser/strop_main.cpp index c8381c5c7dc9fc6b8bd1b58511dc99ec6789fcf1..4f4d48564eb98a588acc7c53eaaf1b8f2c19fba9 100644 --- a/wcclparser/strop_main.cpp +++ b/wcclparser/strop_main.cpp @@ -42,11 +42,11 @@ int main() std::cerr << "Parsed expression: " << retStr->to_raw_string() << std::endl; } else { - std::cerr << "Problem while parsing -- haven't StrSet object in boost::shared_ptr!" << std::endl; + std::cerr << "Problem while parsing -- haven't got StrSet object in boost::shared_ptr!" << std::endl; } } else { - std::cerr << "Problem while parsing -- haven't Function<Wccl::StrSet> object in boost::shared_ptr!" << std::endl; + std::cerr << "Problem while parsing -- haven't got Function<Wccl::StrSet> object in boost::shared_ptr!" << std::endl; } } catch (antlr::MismatchedTokenException &e) { diff --git a/wcclparser/val_main.cpp b/wcclparser/val_main.cpp index 21486566bbda2b95d2db843d5b44aa70ff5166c9..8a379c554a316eb5b531f8791e76272561aae1a8 100644 --- a/wcclparser/val_main.cpp +++ b/wcclparser/val_main.cpp @@ -31,7 +31,7 @@ int main() } else { try { - valRet = parser.parseValue(str_in); + // valRet = parser.parseValue(str_in); /* if (retOp.get()) {