header { #include <libwccl/parser/ParserException.h> #include <cstdio> #include <antlr/Token.hpp> #include <boost/lexical_cast.hpp> // values/variables #include <libwccl/variables.h> #include <libwccl/values/bool.h> #include <libwccl/values/tset.h> #include <libwccl/values/strset.h> #include <libwccl/values/position.h> #include <libwccl/values/positionref.h> // sentence context #include <libwccl/sentencecontext.h> // operators #include <libwccl/ops/or.h> #include <libwccl/ops/nor.h> #include <libwccl/ops/and.h> #include <libwccl/ops/affix.h> #include <libwccl/ops/regex.h> #include <libwccl/ops/equals.h> #include <libwccl/ops/toupper.h> #include <libwccl/ops/tolower.h> #include <libwccl/ops/constant.h> #include <libwccl/ops/functions.h> #include <libwccl/ops/vargetter.h> #include <libwccl/ops/varsetter.h> #include <libwccl/ops/logicalpredicate.h> #include <libwccl/ops/intersects.h> #include <libwccl/ops/issubsetof.h> #include <libwccl/ops/setpredicate.h> #include <libwccl/ops/isinside.h> #include <libwccl/ops/isoutside.h> // Unicode String #include <unicode/uniset.h> #include <unicode/unistr.h> // #include <libwccl/parser/ANTLRParserResult.h> } options { language = "Cpp"; } // ---------------------------------------------------------------------------- // ANTLR PARSER // ---------------------------------------------------------------------------- class ANTLRParser extends Parser; options { k = 4; exportVocab = ANTLRExpr; buildAST = false; defaultErrorHandler = false; } { private: // const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText()).unescape(); } // const std::string token_ref_to_std_string(antlr::RefToken& rstr) const { return (((antlr::Token*)rstr)->getText()); } // int token_ref_to_int(antlr::RefToken& rstr) { return atoi(((antlr::Token*)rstr)->getText().c_str()); } } // TODO // - jak utworzyc TSet (dodac element do TSet) // - nie mozna utworzy Const::Value i na tym robic specjalizowany reset? // - base, orth /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // "GLOBAL" RULES /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- // Rules for parsing string operators in scope (variables). // Returns boost::shared_ptr<Wccl::Function<Wccl::StrSet> > parse_string_operator returns [boost::shared_ptr<ANTLRParserResult<Wccl::StrSet> > res] { res.reset(new ANTLRParserResult<Wccl::StrSet>()); boost::shared_ptr<Wccl::Function<Wccl::StrSet> > op; } : op = string_operators [*res->variables.get()] { res->op = op; } ; // ---------------------------------------------------------------------------- // Rules for parsing predicates in scope (variables). // Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> > parse_predicates returns [boost::shared_ptr<ANTLRParserResult<Wccl::Bool> > res] { res.reset(new ANTLRParserResult<Wccl::Bool>()); boost::shared_ptr<Wccl::Function<Wccl::Bool> > op; } : op = logical_predicates [*res->variables.get()] { res->op = op; } ; // ---------------------------------------------------------------------------- // Rules for parsing tagset (symbol set) operators // Returns boost::shared_ptr<Wccl::Function<Wccl::TSet> > parse_sym_set_operator returns [boost::shared_ptr<ANTLRParserResult<Wccl::TSet> > res] { res.reset(new ANTLRParserResult<Wccl::TSet>()); boost::shared_ptr<Wccl::Function<Wccl::TSet> > op; } : op = sym_set_operators [*res->variables.get()] { res->op = op; } ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // VALUES /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- // Single or muliple values in string set str_set_value_in [boost::shared_ptr<Wccl::StrSet>& s_set] : v1: STRING { s_set->insert(token_ref_to_ustring(v1)); } | v2: STRING COMMA str_set_value_in [s_set] { s_set->insert(token_ref_to_ustring(v2)); } ; // string set, called as unnamed (temporary) StrSet: // calls: [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"] or variable $A str_set_value returns [boost::shared_ptr<Wccl::Constant<Wccl::StrSet> > val] { boost::shared_ptr<Wccl::StrSet> set(new Wccl::StrSet); } : LBRACKET RBRACKET { val.reset(new Wccl::Constant<Wccl::StrSet>(*set.get())); } | LBRACKET str_set_value_in [set] RBRACKET { val.reset(new Wccl::Constant<Wccl::StrSet>(*set.get())); } ; // ---------------------------------------------------------------------------- // element of sym set sym_set_elem [boost::shared_ptr<Wccl::TSet> &t_set] : s1: SYMBOL { // t_set->insert(token_ref_to_ustring(s1)); } | G_MARK s2: SYMBOL G_MARK { // t_set->insert(token_ref_to_ustring(s2)); } | s3: SYMBOL COMMA sym_set_elem [t_set] { // t_set->insert(token_ref_to_ustring(s3)); } | G_MARK s4: SYMBOL G_MARK COMMA sym_set_elem [t_set] { // t_set->insert(token_ref_to_ustring(s3)); } ; // sym set in sym_set_value_in [boost::shared_ptr<Wccl::TSet> &set] : sym_set_elem [set] ; // sym set {} {a} {a, b} sym_set_value returns [boost::shared_ptr<Wccl::Constant<Wccl::TSet> > val] { boost::shared_ptr<Wccl::TSet> set(new Wccl::TSet); } : LCURLY RCURLY { val.reset(new Wccl::Constant<Wccl::TSet>(*set.get())); } | LCURLY sym_set_value_in [set] RCURLY { val.reset(new Wccl::Constant<Wccl::TSet>(*set.get())); } ; // ---------------------------------------------------------------------------- // boolean value: boolean_value returns [boost::shared_ptr<Wccl::Constant<Wccl::Bool> > val] : "True" { val.reset(new Wccl::Constant<Wccl::Bool>(Wccl::Bool(true ))); } | "False" { val.reset(new Wccl::Constant<Wccl::Bool>(Wccl::Bool(false))); } ; // ---------------------------------------------------------------------------- // position value: position_value returns [boost::shared_ptr<Wccl::Constant<Wccl::Position> > val] : i: INT { val.reset( new Wccl::Constant<Wccl::Position>(Wccl::Position(token_ref_to_int(i))) ); } | "begin" { val.reset( new Wccl::Constant<Wccl::Position>( Wccl::Position(Wccl::Position::Begin) ) ); } | "end" { val.reset( new Wccl::Constant<Wccl::Position>( Wccl::Position(Wccl::Position::End) ) ); } | "nowhere" { val.reset( new Wccl::Constant<Wccl::Position>( Wccl::Position(Wccl::Position::Nowhere) ) ); } ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // VARIABLES /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Position: $name // ---------------------------------------------------------------------------- // returns accessor position_variable_acc [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > pos_acc] : DOLLAR n: SYMBOL { vars.get_put<Wccl::Position>(token_ref_to_std_string(n)); Wccl::VariableAccessor<Wccl::Position> acc = vars.create_accessor<Wccl::Position>(token_ref_to_std_string(n)); pos_acc.reset(new Wccl::VariableAccessor<Wccl::Position>(acc)); } ; // returs vargetter position_variable [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > op] { boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > pos_acc; } : pos_acc = position_variable_acc [vars] { op.reset(new Wccl::VarGetter<Wccl::Position>(*pos_acc.get())); } ; // ---------------------------------------------------------------------------- // Position reference: $(0-9)+name // !! Cannot use for setvar(...,...) !! position_ref_variable [Wccl::Variables& vars] : DOLLAR p_ref: INT n: SYMBOL { // TODO } ; // ---------------------------------------------------------------------------- // String set, call examples: $name, $sName, $s_name, $s_Name etc. // This expression gets variable of tyme StrSet from string-named variable // returns accessor str_set_variable_acc [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > strset_acc] : DOLLAR STR_PREFIX n: SYMBOL { // get/put variable to variables vars.get_put<Wccl::StrSet>(token_ref_to_std_string(n)); // makes accessor for value Wccl::VariableAccessor<Wccl::StrSet> acc = vars.create_accessor<Wccl::StrSet>(token_ref_to_std_string(n)); strset_acc.reset(new Wccl::VariableAccessor<Wccl::StrSet>(acc)); } ; // vargetter str_set_variable [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VarGetter<Wccl::StrSet> > op] { boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > strset_acc; } : strset_acc = str_set_variable_acc [vars] { op.reset(new Wccl::VarGetter<Wccl::StrSet>(*strset_acc.get())); } ; // ---------------------------------------------------------------------------- // Symbol set: $tName // returns accessor sym_set_variable_acc [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > symset_acc] : DOLLAR TST_PREFIX n: SYMBOL { vars.get_put<Wccl::TSet>(token_ref_to_std_string(n)); Wccl::VariableAccessor<Wccl::TSet> acc = vars.create_accessor<Wccl::TSet>(token_ref_to_std_string(n)); symset_acc.reset(new Wccl::VariableAccessor<Wccl::TSet>(acc)); } ; // returns vargetter sym_set_variable [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VarGetter<Wccl::TSet> > op] { boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > symset_acc; } : symset_acc = sym_set_variable_acc [vars] { op.reset(new Wccl::VarGetter<Wccl::TSet>(*symset_acc.get())); } ; // ---------------------------------------------------------------------------- // Bool: $bName // returns accessor boolean_variable_acc [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > bool_acc] : DOLLAR BOOL_PREFIX n: SYMBOL { vars.get_put<Wccl::Bool>(token_ref_to_std_string(n)); Wccl::VariableAccessor<Wccl::Bool> acc = vars.create_accessor<Wccl::Bool>(token_ref_to_std_string(n)); bool_acc.reset(new Wccl::VariableAccessor<Wccl::Bool>(acc)); } ; // returns vargetter boolean_variable [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VarGetter<Wccl::Bool> > op] { boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > bool_acc; } : bool_acc = boolean_variable_acc [vars] { op.reset(new Wccl::VarGetter<Wccl::Bool>(*bool_acc.get())); } ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // OPERATORS /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Setvar operator // ---------------------------------------------------------------------------- setvar_op [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret] : ret = setvar_pos [vars] | ret = setvar_bool [vars] | ret = setvar_sset [vars] | ret = setvar_tset [vars] ; // Implementations of setvar: // ---------------------------------------------------------------------------- // setvar dla position przyjmuje position_ref_variable -> TODO sprawdzic dlaczego // gramatyka nie pokrywa "setvar" LPAREN position COMMA position_value RPAREN setvar_pos [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::Position> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > ret_acc; } : "setvar" LPAREN ret_acc = position_variable_acc [vars] COMMA ret_op = op_position [vars] RPAREN { op.reset(new Wccl::VarSetter<Wccl::Position>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- setvar_bool [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > ret_acc; } : "setvar" LPAREN ret_acc = boolean_variable_acc [vars] COMMA ret_op = logical_predicates [vars] RPAREN { op.reset(new Wccl::VarSetter<Wccl::Bool>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- setvar_sset [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > ret_acc; } : "setvar" LPAREN ret_acc = str_set_variable_acc [vars] COMMA ret_op = string_operators [vars] RPAREN { op.reset(new Wccl::VarSetter<Wccl::StrSet>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- setvar_tset [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > ret_acc; } : "setvar" LPAREN ret_acc = sym_set_variable_acc [vars] COMMA ret_op = sym_set_operators [vars] RPAREN { op.reset(new Wccl::VarSetter<Wccl::TSet>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Symbol set (tagset) operators // Returns boost::shared_ptr<Wccl::Function<Wccl::TSet> > // ---------------------------------------------------------------------------- sym_set_operators [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret] : ret = op_sym_set [vars] ; // Implementations of symbol set operators: // ---------------------------------------------------------------------------- op_sym_set [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > op] : op = sym_set_variable [vars] | op = sym_set_value ; // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Position operators // Returns boost::shared_ptr<Wccl::Function<Wccl::Position> > // ---------------------------------------------------------------------------- position_operators [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Position> > ret] : ret = op_position [vars] ; // Implementations of symbol set operators: // ---------------------------------------------------------------------------- op_position [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Position> > op] : op = position_variable [vars] | op = position_value ; // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Stiring operators // Returns boost::shared_ptr<Wccl::Function<Wccl::StrSet> > // ---------------------------------------------------------------------------- string_operators [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] : ret = op_orth [vars] | ret = op_base [vars] | ret = op_lower [vars] | ret = op_upper [vars] | ret = op_affix [vars] | ret = op_str_set [vars] ; // Implementations of string operators: // ---------------------------------------------------------------------------- op_orth [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { // TODO } : "orth" LBRACKET position_ref_variable [vars] RBRACKET { // ret = TODO } ; // ---------------------------------------------------------------------------- op_base [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { // TODO } : "base" LBRACKET position_ref_variable [vars] RBRACKET { // ret = TODO } ; // ---------------------------------------------------------------------------- op_lower [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret; } : "lower" LPAREN o_ret = string_operators[vars] RPAREN { ret.reset(new Wccl::ToLower(o_ret)); } ; // ---------------------------------------------------------------------------- op_upper [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret; } : "upper" LPAREN o_ret = string_operators[vars] RPAREN { ret.reset(new Wccl::ToUpper(o_ret)); } ; // ---------------------------------------------------------------------------- op_affix [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret; } : "affix" LPAREN o_ret = string_operators[vars] COMMA offset: INT RPAREN { ret.reset(new Wccl::Affix(o_ret, token_ref_to_int(offset))); } ; // ---------------------------------------------------------------------------- op_str_set [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > op] : op = str_set_variable [vars] | op = str_set_value ; // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Logical predicates // Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> > // ---------------------------------------------------------------------------- logical_predicates [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret] : ret = lpred_and [vars] | ret = lpred_or [vars] | ret = lpred_nor [vars] | ret = lpred_bool [vars] | ret = lpred_in [vars] | ret = lpred_inter [vars] | ret = lpred_eq [vars] | ret = lpred_regex [vars] | ret = setvar_op [vars] | ret = lpred_inout [vars] ; // ---------------------------------------------------------------------------- // comma-separated predicates logical_predicates_comma_sep [Wccl::Variables& vars] returns [boost::shared_ptr< std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > > ret_v] { boost::shared_ptr<Wccl::Function<Wccl::Bool> > pred; ret_v.reset( new std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > ); } : pred = logical_predicates [vars] { ret_v->push_back(pred); } ( COMMA pred = logical_predicates [vars] { ret_v->push_back(pred); })* ; // ---------------------------------------------------------------------------- lpred_and [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr< std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > > ret_v; } : "and" LPAREN ret_v = logical_predicates_comma_sep [vars] RPAREN { op.reset(new Wccl::And(ret_v)); } ; // ---------------------------------------------------------------------------- lpred_or [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr< std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > > ret_v; } : "or" LPAREN ret_v = logical_predicates_comma_sep [vars] RPAREN { op.reset(new Wccl::Or(ret_v)); } ; // ---------------------------------------------------------------------------- lpred_nor [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr< std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > > ret_v; } : "nor" LPAREN ret_v = logical_predicates_comma_sep [vars] RPAREN { op.reset(new Wccl::Nor(ret_v)); } ; // ---------------------------------------------------------------------------- lpred_bool [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] : op = boolean_variable [vars] | op = boolean_value ; // ---------------------------------------------------------------------------- lpred_in [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::TSet> > ts1, ts2; boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ss1, ss2; boost::shared_ptr<Wccl::Function<Wccl::Position> > p1, p2; } : "in" LPAREN ss1 = string_operators [vars] COMMA ss2 = string_operators [vars] RPAREN { op.reset(new Wccl::IsSubsetOf<Wccl::StrSet>(ss1, ss2)); } | "in" LPAREN ts1 = sym_set_operators [vars] COMMA ts2 = sym_set_operators [vars] RPAREN { op.reset(new Wccl::IsSubsetOf<Wccl::TSet>(ts1, ts2)); } /* | "in" LPAREN p1 = position_operators [vars] COMMA p2 = position_operators [vars] RPAREN { // op.reset(new Wccl::IsSubsetOf(*p1.get(), *p2.get())); } */ ; // ---------------------------------------------------------------------------- lpred_inter [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ss1, ss2; boost::shared_ptr<Wccl::Function<Wccl::TSet> > ts1, ts2; boost::shared_ptr<Wccl::Function<Wccl::Position> > p1, p2; } : "inter" LPAREN ss1 = string_operators [vars] COMMA ss2 = string_operators [vars] RPAREN { op.reset(new Wccl::Intersects<Wccl::StrSet>(ss1, ss2)); } | "inter" LPAREN ts1 = sym_set_operators [vars] COMMA ts2 = sym_set_operators [vars] RPAREN { op.reset(new Wccl::Intersects<Wccl::TSet>(ts1, ts2)); } /* | "inter" LPAREN p1 = position_operators [vars] COMMA p2 = position_operators [vars] RPAREN { // op.reset(new Wccl::Intersects(*p1.get(), *p2.get())); } */ ; // ---------------------------------------------------------------------------- lpred_eq [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::TSet> > ts1, ts2; boost::shared_ptr<Wccl::Function<Wccl::Position> > p1, p2; boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ss1, ss2; } : "equal" LPAREN ss1 = string_operators [vars] COMMA ss2 = string_operators [vars] RPAREN { op.reset(new Wccl::Equals<Wccl::StrSet>(ss1, ss2)); } | "equal" LPAREN ts1 = sym_set_operators [vars] COMMA ts2 = sym_set_operators [vars] RPAREN { op.reset(new Wccl::Equals<Wccl::TSet>(ts1, ts2)); } /* | "equal" LPAREN p1 = position_operators [vars] COMMA p2 = position_operators [vars] RPAREN { op.reset(new Wccl::Equals<Wccl::Position>(p1, p2)); } */ ; // ---------------------------------------------------------------------------- lpred_regex [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::StrSet> > expr; } : "regex" LPAREN expr = string_operators [vars] COMMA reg: STRING RPAREN { op.reset(new Wccl::Regex(expr, token_ref_to_ustring(reg))); } ; // ---------------------------------------------------------------------------- lpred_inout [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::Position> > ret_pos; } : "inside" LPAREN ret_pos = position_operators [vars] RPAREN { op.reset(new Wccl::IsInside(ret_pos)); } | "outside" LPAREN ret_pos = position_operators [vars] RPAREN { op.reset(new Wccl::IsOutside(ret_pos)); } ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ANTLR LEXER /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// class ANTLRLexer extends Lexer; options { exportVocab = ANTLRExpr; charVocabulary = '\3'..'\377'; testLiterals = false; k = 2; } STRING options { paraphrase = "a string"; } : '"' (~'"')* '"' | '\'' (~'\'')* '\'' ; INT options { paraphrase = "Integer"; } : ('-'|'+')? ('0'..'9')+ ; QUOT_MARK options { paraphrase = "Quota mark"; } : '\'' ; APOS_MARK options { paraphrase = "Aposptrophe mark"; } : '"' ; Q_MARK options { paraphrase = "Query mark"; } : '?' ; E_MARK options { paraphrase = "Exclamanation mark"; } : '!' ; G_MARK options { paraphrase = "Gravis mark"; } : '`' ; STR_PREFIX options { paraphrase = "String prefix"; } : "s:" ; TST_PREFIX options { paraphrase = "Tag set (symbol) prefix"; } : "t:" ; BOOL_PREFIX options { paraphrase = "Bool prefix"; } : "b:" ; LBRACKET options { paraphrase = "'['"; } : '[' ; RBRACKET options { paraphrase = "']'"; } : ']' ; LPAREN options { paraphrase = "'('"; } : '(' ; RPAREN options { paraphrase = "')'"; } : ')' ; LCURLY options { paraphrase = "'{'"; } : '{' ; RCURLY options { paraphrase = "'}'"; } : '}' ; DOLLAR options { paraphrase = "'$'"; } : '$' ; AT_MARK options { paraphrase = "'@'"; } : '@' ; COMMA options { paraphrase = "','"; } : ',' ; SYMBOL options { paraphrase = "Symbol"; testLiterals = true; } : ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* // : ('a'..'z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* ; /* VAR_NAME options { paraphrase = "Variable name"; } : ('A'..'Z') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* ; */ /* TRUE_VALUE options { paraphrase = "True value"; } : "True" ; FALSE_VALUE options { paraphrase = "False value"; } : "False" ; */ WS : ( ' ' | '\t' | '\r' '\n' { newline(); } | '\n' { newline(); } ) { $setType(antlr::Token::SKIP); } ; COMMENT options { paraphrase = "Comment"; } : "//" (~'\n')* '\n'{ $setType(antlr::Token::SKIP); newline(); } ; HASH options { paraphrase = "'#'"; } : '#' ; DSEPARATOR options { paraphrase = "':-'"; } : ":-" ;