header { //don't try to add all the headers inside our namespace ANTLR_END_NAMESPACE #include <libwccl/parser/ParserException.h> #include <cstdio> #include <antlr/Token.hpp> #include <boost/lexical_cast.hpp> // values/variables #include <libwccl/variables.h> #include <libwccl/values/bool.h> #include <libwccl/values/tset.h> #include <libwccl/values/strset.h> #include <libwccl/values/position.h> // sentence context #include <libwccl/sentencecontext.h> // operators #include <libwccl/ops/functions/bool/predicates/or.h> #include <libwccl/ops/functions/bool/predicates/nor.h> #include <libwccl/ops/functions/bool/predicates/and.h> #include <libwccl/ops/functions/bool/predicates/regex.h> #include <libwccl/ops/functions/bool/predicates/equals.h> #include <libwccl/ops/functions/bool/varsetter.h> #include <libwccl/ops/functions/strset/affix.h> #include <libwccl/ops/functions/strset/getorth.h> #include <libwccl/ops/functions/strset/toupper.h> #include <libwccl/ops/functions/strset/tolower.h> #include <libwccl/ops/functions/constant.h> #include <libwccl/ops/functions/vargetter.h> #include <libwccl/ops/functions/bool/predicates/intersects.h> #include <libwccl/ops/functions/bool/predicates/issubsetof.h> #include <libwccl/ops/functions/bool/predicates/isinside.h> #include <libwccl/ops/functions/bool/predicates/isoutside.h> #include <libwccl/ops/functions/position/relativeposition.h> #include <libwccl/ops/functions/conditional.h> // Unicode String #include <unicode/uniset.h> #include <unicode/unistr.h> // #include <libwccl/parser/ANTLRParserResult.h> // start our namespace again ANTLR_BEGIN_NAMESPACE(Wccl) } options { language = "Cpp"; genHashLines = false; namespace = "Wccl"; // genHashLines = true; } // ---------------------------------------------------------------------------- // ANTLR PARSER // ---------------------------------------------------------------------------- class ANTLRParser extends Parser; options { k = 1; buildAST = false; exportVocab = ANTLRExpr; defaultErrorHandler = false; } { private: // const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText()).unescape(); } /* const UnicodeString str_token_ref_to_ustring(antlr::RefToken& rstr) const { UnicodeString ret_ustr, ustr = token_ref_to_ustring(rstr); if (ustr.length() < 3) { return ""; } ustr.extract(1, ustr.length() - 2, ret_ustr); return ret_ustr; } */ // const std::string str_token_rem_grav(antlr::RefToken& rstr) const { size_t len = 0; std::string ret = token_ref_to_std_string(rstr); if ((len = ret.length()) < 2) { return ret; } if (ret[0] == '`' && ret[len - 1] == '`') { return ret.substr(1, len - 2); } return ret; } // const std::string token_ref_to_std_string(antlr::RefToken& rstr) const { return (((antlr::Token*)rstr)->getText()); } // int token_ref_to_int(antlr::RefToken& rstr) { return atoi(((antlr::Token*)rstr)->getText().c_str()); } } // TODO // - base, orth // TEMPORARY CHANGES -> // -> tymczasowo zakomentowalem wywoalnie regul condit_* // -> tymczasowo zakomentowalem 2 reguly z equal /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // "GLOBAL" RULES /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- // Rules for parsing string operators in scope (variables). // Returns boost::shared_ptr<Function<StrSet> > parse_string_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<StrSet> > res] { res.reset(new ANTLRParserResult<StrSet>()); boost::shared_ptr<Function<StrSet> > op; } : op = string_operators [tagset, *res->variables.get()] { res->op = op; } EOF ; // ---------------------------------------------------------------------------- // Rules for parsing predicates in scope (variables). // Returns boost::shared_ptr<Function<Bool> > parse_predicates [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<Bool> > res] { res.reset(new ANTLRParserResult<Bool>()); boost::shared_ptr<Function<Bool> > op; } : op = logical_predicates [tagset, *res->variables.get()] { res->op = op; } EOF ; // ---------------------------------------------------------------------------- // Rules for parsing tagset (symbol set) operators // Returns boost::shared_ptr<Function<TSet> > parse_sym_set_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<TSet> > res] { res.reset(new ANTLRParserResult<TSet>()); boost::shared_ptr<Function<TSet> > op; } : op = sym_set_operators [tagset, *res->variables.get()] { res->op = op; } EOF ; // ---------------------------------------------------------------------------- // Rules for parsing position operators // Returns boost::shared_ptr<Function<Position> > parse_position_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<Position> > res] { res.reset(new ANTLRParserResult<Position>()); boost::shared_ptr<Function<Position> > op; } : op = position_operators [tagset, *res->variables.get()] { res->op = op; } EOF ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // VALUES /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- // Single or muliple values in string set: // [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"] // Returns boost::shared_ptr<StrSet> str_set_literal returns [boost::shared_ptr<StrSet> s_set] { s_set.reset(new StrSet()); } : s0: STRING { s_set->insert(token_ref_to_ustring(s0)); } | LBRACKET ( s1: STRING { s_set->insert(token_ref_to_ustring(s1)); } ( COMMA s2: STRING { s_set->insert(token_ref_to_ustring(s2)); } )* )? RBRACKET ; // Constrant string set // Returns boost::shared_ptr<Constant<StrSet> > str_set_value returns [boost::shared_ptr<Constant<StrSet> > val] { boost::shared_ptr<StrSet> set; } : set = str_set_literal { val.reset(new Constant<StrSet>(*set.get())); } ; // ---------------------------------------------------------------------------- // Element of sym set. This rule, inset element into set. // Element may be: a or `a ` sym_set_elem [const Corpus2::Tagset& tagset, boost::shared_ptr<TSet>& t_set] : s1: SYMBOL { try { t_set->insert_symbol(tagset, str_token_rem_grav(s1)); } catch(Corpus2::TagParseError &e) { throw(ParserException(e.info())); } } ; // sym set literal // {} {sym_set_elem} {sym_set_elem, ..., sym_set_elem} // Returns boost::shared_ptr<TSet> sym_set_literal [const Corpus2::Tagset& tagset] returns [boost::shared_ptr<TSet> t_set] { t_set.reset(new TSet()); } : sym_set_elem[tagset, t_set] | LCURLY ( sym_set_elem[tagset, t_set] (COMMA sym_set_elem[tagset, t_set])* )? RCURLY ; // Constant symbol set // Returns boost::shared_ptr<Constant<TSet> > sym_set_value [const Corpus2::Tagset& tagset] returns [boost::shared_ptr<Constant<TSet> > val] { boost::shared_ptr<TSet> set; } : set = sym_set_literal [tagset] { val.reset(new Constant<TSet>(*set.get())); } ; // ---------------------------------------------------------------------------- // boolean value: // Literal bool value may be True or False // Returns boost::shared_ptr<Bool> bool_literal returns [boost::shared_ptr<Bool> val] : "True" { val.reset(new Bool(Bool(true ))); } | "False" { val.reset(new Bool(Bool(false))); } ; // Constat bool Value // Returns boost::shared_ptr<Constant<Bool> > boolean_value returns [boost::shared_ptr<Constant<Bool> > val] { boost::shared_ptr<Bool> bool_lit; } : bool_lit = bool_literal { val.reset(new Constant<Bool>(*bool_lit)); } ; // ---------------------------------------------------------------------------- // position value: // Position literal may be (+|-)?(0-9)+ or begin or end or nowhere // returns boost::shared_ptr<Position> position_literal returns [boost::shared_ptr<Position> val] { int i = 0; } : i = number { val.reset(new Position(Position(i))); } | "begin" { val.reset(new Position(Position(Position::Begin))); } | "end" { val.reset(new Position(Position(Position::End))); } | "nowhere" { val.reset(new Position(Position(Position::Nowhere))); } ; // Constant position value // Returns boost::shared_ptr<Constant<Position> > position_value returns [boost::shared_ptr<Constant<Position> > val] { boost::shared_ptr<Position> pos_lit; } : pos_lit = position_literal { val.reset(new Constant<Position>(*pos_lit)); } ; // ---------------------------------------------------------------------------- // Number may be unsigned or signed number returns [int ret] { ret = 0; } : s: SIGNED_INT { ret = token_ref_to_int(s); } | u: UNSIGNED_INT { ret = token_ref_to_int(u); } ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // VARIABLES /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- // Position: $name // Returns boost::shared_ptr<VariableAccessor<Position> > position_variable_acc [Variables& vars] returns [boost::shared_ptr<VariableAccessor<Position> > pos_acc] : POS_PREFIX n: SYMBOL { vars.get_put<Position>(str_token_rem_grav(n)); VariableAccessor<Position> acc = vars.create_accessor<Position>(str_token_rem_grav(n)); pos_acc.reset(new VariableAccessor<Position>(acc)); } ; // Position vargetter // Returs boost::shared_ptr<VarGetter<Position> > position_variable [Variables& vars] returns [boost::shared_ptr<VarGetter<Position> > op] { boost::shared_ptr<VariableAccessor<Position> > pos_acc; } : pos_acc = position_variable_acc [vars] { op.reset(new VarGetter<Position>(*pos_acc.get())); } ; // ---------------------------------------------------------------------------- // realtive position relpos [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Position> > ret] { int n = 0; boost::shared_ptr<Function<Position> > pos; } : "relpos" LPAREN pos = op_position [tagset, vars] COMMA n = number RPAREN { ret.reset(new RelativePosition(pos, n)); } ; // ---------------------------------------------------------------------------- // String set, $s:name // This expression gets variable of the type StrSet from string-named variable // Returns boost::shared_ptr<VariableAccessor<StrSet> > str_set_variable_acc [Variables& vars] returns [boost::shared_ptr<VariableAccessor<StrSet> > strset_acc] : STR_PREFIX n: SYMBOL { // get/put variable to variables vars.get_put<StrSet>(str_token_rem_grav(n)); // makes accessor for value VariableAccessor<StrSet> acc = vars.create_accessor<StrSet>(str_token_rem_grav(n)); strset_acc.reset(new VariableAccessor<StrSet>(acc)); } ; // Vargetter for StrSet variable // Returns boost::shared_ptr<VarGetter<StrSet> > str_set_variable [Variables& vars] returns [boost::shared_ptr<VarGetter<StrSet> > op] { boost::shared_ptr<VariableAccessor<StrSet> > strset_acc; } : strset_acc = str_set_variable_acc [vars] { op.reset(new VarGetter<StrSet>(*strset_acc.get())); } ; // ---------------------------------------------------------------------------- // Symbol set: $t:name // Returns boost::shared_ptr<VariableAccessor<TSet> > sym_set_variable_acc [Variables& vars] returns [boost::shared_ptr<VariableAccessor<TSet> > symset_acc] : TST_PREFIX n: SYMBOL { vars.get_put<TSet>(str_token_rem_grav(n)); VariableAccessor<TSet> acc = vars.create_accessor<TSet>(str_token_rem_grav(n)); symset_acc.reset(new VariableAccessor<TSet>(acc)); } ; // Vargetter for symbol set variable // Returns boost::shared_ptr<VarGetter<TSet> > sym_set_variable [Variables& vars] returns [boost::shared_ptr<VarGetter<TSet> > op] { boost::shared_ptr<VariableAccessor<TSet> > symset_acc; } : symset_acc = sym_set_variable_acc [vars] { op.reset(new VarGetter<TSet>(*symset_acc.get())); } ; // ---------------------------------------------------------------------------- // Bool: $b:name // Returns boost::shared_ptr<VariableAccessor<Bool> > boolean_variable_acc [Variables& vars] returns [boost::shared_ptr<VariableAccessor<Bool> > bool_acc] : BOOL_PREFIX n: SYMBOL { vars.get_put<Bool>(str_token_rem_grav(n)); VariableAccessor<Bool> acc = vars.create_accessor<Bool>(str_token_rem_grav(n)); bool_acc.reset(new VariableAccessor<Bool>(acc)); } ; // Vargetter for bool variable // Returns boost::shared_ptr<VarGetter<Bool> > boolean_variable [Variables& vars] returns [boost::shared_ptr<VarGetter<Bool> > op] { boost::shared_ptr<VariableAccessor<Bool> > bool_acc; } : bool_acc = boolean_variable_acc [vars] { op.reset(new VarGetter<Bool>(*bool_acc.get())); } ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // OPERATORS /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Setvar operator // Returns boost::shared_ptr<Function<Bool> > // ---------------------------------------------------------------------------- setvar_op [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > ret] : "setvar" LPAREN ( ret = setvar_body_pos [tagset, vars] | ret = setvar_body_bool [tagset, vars] | ret = setvar_body_sset [tagset, vars] | ret = setvar_body_tset [tagset, vars] ) RPAREN ; // Implementations of setvar: // ---------------------------------------------------------------------------- setvar_body_pos [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<Position> > ret_op; boost::shared_ptr<VariableAccessor<Position> > ret_acc; } : ret_acc = position_variable_acc [vars] COMMA ret_op = op_position [tagset, vars] { op.reset(new VarSetter<Position>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- setvar_body_bool [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<Bool> > ret_op; boost::shared_ptr<VariableAccessor<Bool> > ret_acc; } : ret_acc = boolean_variable_acc [vars] COMMA ret_op = logical_predicates [tagset, vars] { op.reset(new VarSetter<Bool>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- setvar_body_sset [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<StrSet> > ret_op; boost::shared_ptr<VariableAccessor<StrSet> > ret_acc; } : ret_acc = str_set_variable_acc [vars] COMMA ret_op = string_operators [tagset, vars] { op.reset(new VarSetter<StrSet>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- setvar_body_tset [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<TSet> > ret_op; boost::shared_ptr<VariableAccessor<TSet> > ret_acc; } : ret_acc = sym_set_variable_acc [vars] COMMA ret_op = sym_set_operators [tagset, vars] { op.reset(new VarSetter<TSet>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Symbol set (tagset) operators // Returns boost::shared_ptr<Function<TSet> > // ---------------------------------------------------------------------------- sym_set_operators [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<TSet> > ret] : ret = op_sym_set [tagset, vars] | ret = condit_sym [tagset, vars] ; // Implementations of symbol set operators: // ---------------------------------------------------------------------------- op_sym_set [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<TSet> > op] : op = sym_set_variable [vars] | op = sym_set_value [tagset] ; // ---------------------------------------------------------------------------- // if (Bool, TSet, TSet) // ? TSet ? Bool : {} condit_sym [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<TSet> > op] { boost::shared_ptr<Function<Bool> > test; boost::shared_ptr<Function<TSet> > p_true, p_false; } : "if" LPAREN test = logical_predicates [tagset, vars] COMMA p_true = sym_set_operators [tagset, vars] (COMMA p_false = sym_set_operators [tagset, vars])? RPAREN { if (p_false) { op.reset(new Conditional<TSet>(test, p_true, p_false)); } else { op.reset(new Conditional<TSet>(test, p_true)); } } | Q_MARK (p_true = sym_set_operators [tagset, vars]) Q_MARK (test = logical_predicates [tagset, vars]) { op.reset(new Conditional<TSet>(test, p_true)); } ; // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Position operators // Returns boost::shared_ptr<Function<Position> > // ---------------------------------------------------------------------------- position_operators [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Position> > ret] : ret = op_position [tagset, vars] ; // Implementations of symbol set operators: // ---------------------------------------------------------------------------- op_position [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Position> > op] : op = position_variable [vars] | op = position_value | op = relpos [tagset, vars] | op = condit_position [tagset, vars] ; // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Stiring operators // Returns boost::shared_ptr<Function<StrSet> > // ---------------------------------------------------------------------------- string_operators [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > ret] : ret = op_orth [tagset, vars] | ret = op_base [tagset, vars] | ret = op_lower [tagset, vars] | ret = op_upper [tagset, vars] | ret = op_affix [tagset, vars] | ret = op_str_set [tagset, vars] | ret = condit_str [tagset, vars] ; // Implementations of string operators: // ---------------------------------------------------------------------------- op_orth [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > ret] { boost::shared_ptr<Function<Position> > pos; } : "orth" LBRACKET pos = op_position [tagset, vars] RBRACKET { ret.reset(new GetOrth(pos)); } ; // ---------------------------------------------------------------------------- op_base [const Corpus2::Tagset& /*tagset*/, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > ret] { boost::shared_ptr<VarGetter<Position> > pos; } : "base" LBRACKET pos = position_variable [vars] RBRACKET { // ret = TODO } ; // ---------------------------------------------------------------------------- op_lower [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > ret] { boost::shared_ptr<Function<StrSet> > o_ret; } : "lower" LPAREN o_ret = string_operators[tagset, vars] RPAREN { ret.reset(new ToLower(o_ret)); } ; // ---------------------------------------------------------------------------- op_upper [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > ret] { boost::shared_ptr<Function<StrSet> > o_ret; } : "upper" LPAREN o_ret = string_operators[tagset, vars] RPAREN { ret.reset(new ToUpper(o_ret)); } ; // ---------------------------------------------------------------------------- op_affix [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > ret] { int offset = 0; boost::shared_ptr<Function<StrSet> > o_ret; } : "affix" LPAREN o_ret = string_operators[tagset, vars] COMMA offset = number RPAREN { ret.reset(new Affix(o_ret, offset)); } ; // ---------------------------------------------------------------------------- op_str_set [const Corpus2::Tagset& /*tagset*/, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > op] : op = str_set_variable [vars] | op = str_set_value ; // ---------------------------------------------------------------------------- // if (Bool, StrSet, StrSet) // ? StrSet ? Bool : [] condit_str [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > op] { boost::shared_ptr<Function<Bool> > test; boost::shared_ptr<Function<StrSet> > p_true, p_false; } : "if" LPAREN test = logical_predicates [tagset, vars] COMMA p_true = string_operators [tagset, vars] (COMMA p_false = string_operators [tagset, vars])? RPAREN { if (p_false) { op.reset(new Conditional<StrSet>(test, p_true, p_false)); } else { op.reset(new Conditional<StrSet>(test, p_true)); } } | Q_MARK p_true = string_operators [tagset, vars] Q_MARK test = logical_predicates [tagset, vars] { op.reset(new Conditional<StrSet>(test, p_true)); } ; // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Logical predicates // Returns boost::shared_ptr<Function<Bool> > // ---------------------------------------------------------------------------- logical_predicates [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > ret] : ret = lpred_and [tagset, vars] | ret = lpred_or [tagset, vars] | ret = lpred_nor [tagset, vars] | ret = lpred_bool [tagset, vars] | ret = lpred_in [tagset, vars] | ret = lpred_inter [tagset, vars] | ret = lpred_eq [tagset, vars] | ret = lpred_regex [tagset, vars] | ret = setvar_op [tagset, vars] | ret = lpred_inout [tagset, vars] | ret = condit_bool [tagset, vars] ; // ---------------------------------------------------------------------------- // comma-separated predicates logical_predicates_comma_sep [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr< std::vector<boost::shared_ptr<Function<Bool> > > > ret_v] { boost::shared_ptr<Function<Bool> > pred; ret_v.reset( new std::vector<boost::shared_ptr<Function<Bool> > > ); } : pred = logical_predicates [tagset, vars] { ret_v->push_back(pred); } ( COMMA pred = logical_predicates [tagset, vars] { ret_v->push_back(pred); })* ; // ---------------------------------------------------------------------------- lpred_and [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr< std::vector<boost::shared_ptr<Function<Bool> > > > ret_v; } : "and" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN { op.reset(new And(ret_v)); } ; // ---------------------------------------------------------------------------- lpred_or [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr< std::vector<boost::shared_ptr<Function<Bool> > > > ret_v; } : "or" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN { op.reset(new Or(ret_v)); } ; // ---------------------------------------------------------------------------- lpred_nor [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr< std::vector<boost::shared_ptr<Function<Bool> > > > ret_v; } : "not" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN { op.reset(new Nor(ret_v)); } ; // ---------------------------------------------------------------------------- lpred_bool [const Corpus2::Tagset& /*tagset*/, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] : op = boolean_variable [vars] | op = boolean_value ; // ---------------------------------------------------------------------------- lpred_in [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<TSet> > t1, t2; boost::shared_ptr<Function<StrSet> > s1, s2; } : "in" LPAREN ( (sym_set_operators [tagset, vars]) => ( t1 = sym_set_operators [tagset, vars] COMMA t2 = sym_set_operators [tagset, vars] { op.reset(new IsSubsetOf<TSet>(t1, t2)); } ) | ( s1 = string_operators [tagset, vars] COMMA s2 = string_operators [tagset, vars] { op.reset(new IsSubsetOf<StrSet>(s1, s2)); } ) ) RPAREN ; // ---------------------------------------------------------------------------- lpred_inter [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<TSet> > t1, t2; boost::shared_ptr<Function<StrSet> > s1, s2; } : "inter" LPAREN ( (sym_set_operators [tagset, vars]) => ( t1 = sym_set_operators [tagset, vars] COMMA t2 = sym_set_operators [tagset, vars] { op.reset(new Intersects<TSet>(t1, t2)); } ) | ( s1 = string_operators [tagset, vars] COMMA s2 = string_operators [tagset, vars] { op.reset(new Intersects<StrSet>(s1, s2)); } ) ) RPAREN ; // ---------------------------------------------------------------------------- lpred_eq [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<TSet> > t1, t2; boost::shared_ptr<Function<Bool> > b1, b2; boost::shared_ptr<Function<StrSet> > s1, s2; boost::shared_ptr<Function<Position> > p1, p2; } : "equal" LPAREN ( (position_operators [tagset, vars]) => ( p1 = position_operators [tagset, vars] COMMA p2 = position_operators [tagset, vars] { op.reset(new Equals<Position>(p1, p2)); } ) | (sym_set_operators [tagset, vars]) => ( t1 = sym_set_operators [tagset, vars] COMMA t2 = sym_set_operators [tagset, vars] { op.reset(new Equals<TSet>(t1, t2)); } ) | (string_operators [tagset, vars]) => ( s1 = string_operators [tagset, vars] COMMA s2 = string_operators [tagset, vars] { op.reset(new Equals<StrSet>(s1, s2)); } ) | ( b1 = logical_predicates [tagset, vars] COMMA b2 = logical_predicates [tagset, vars] { op.reset(new Equals<Bool>(b1, b2)); } ) ) RPAREN ; // ---------------------------------------------------------------------------- lpred_regex [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<StrSet> > expr; } : "regex" LPAREN expr = string_operators [tagset, vars] COMMA reg: STRING RPAREN { op.reset(new Regex(expr, token_ref_to_ustring(reg))); } ; // ---------------------------------------------------------------------------- lpred_inout [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<Position> > ret_pos; } : "inside" LPAREN ret_pos = position_operators [tagset, vars] RPAREN { op.reset(new IsInside(ret_pos)); } | "outside" LPAREN ret_pos = position_operators [tagset, vars] RPAREN { op.reset(new IsOutside(ret_pos)); } ; // ---------------------------------------------------------------------------- // if (Bool, Bool, Bool) // ? Bool ? Bool : False condit_bool [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<Bool> > test, p_true, p_false; } : "if" LPAREN test = logical_predicates [tagset, vars] COMMA p_true = logical_predicates [tagset, vars] (COMMA p_false = logical_predicates [tagset, vars])? RPAREN { if (p_false) { op.reset(new Conditional<Bool>(test, p_true, p_false)); } else { op.reset(new Conditional<Bool>(test, p_true)); } } | Q_MARK p_true = logical_predicates [tagset, vars] Q_MARK test = logical_predicates [tagset, vars] { op.reset(new Conditional<Bool>(test, p_true)); } ; // ---------------------------------------------------------------------------- // if (Bool, Position, Position) // ? Position ? Bool : 0 condit_position [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Position> > op] { boost::shared_ptr<Function<Bool> > test; boost::shared_ptr<Function<Position> > p_true, p_false; } : "if" LPAREN test = logical_predicates [tagset, vars] COMMA p_true = position_operators [tagset, vars] (COMMA p_false = position_operators [tagset, vars])? RPAREN { if (p_false) { op.reset(new Conditional<Position>(test, p_true, p_false)); } else { op.reset(new Conditional<Position>(test, p_true)); } } | Q_MARK p_true = position_operators [tagset, vars] Q_MARK test = logical_predicates [tagset, vars] { op.reset(new Conditional<Position>(test, p_true)); } ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ANTLR LEXER /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// class ANTLRLexer extends Lexer; options { exportVocab = ANTLRExpr; charVocabulary = '\3'..'\377'; testLiterals = false; k = 2; } STRING options { paraphrase = "a string"; } : '"'! (~'"')* '"'! | '\''! (~'\'')* '\''! ; SIGNED_INT options { paraphrase = "Signed integer"; } : ('-'|'+') ('0'..'9')+ ; UNSIGNED_INT options { paraphrase = "Unsigned integer"; } : ('0'..'9')+ ; QUOT_MARK options { paraphrase = "Quote"; } : '\'' ; APOS_MARK options { paraphrase = "Apostrophe"; } : '"' ; Q_MARK options { paraphrase = "Question mark"; } : '?' ; E_MARK options { paraphrase = "Exclamation mark"; } : '!' ; STR_PREFIX options { paraphrase = "String prefix"; } : "$s:" ; TST_PREFIX options { paraphrase = "Symset prefix"; } : "$t:" ; BOOL_PREFIX options { paraphrase = "Bool prefix"; } : "$b:" ; POS_PREFIX options { paraphrase = "Position prefix"; } : '$' ; LBRACKET options { paraphrase = "'['"; } : '[' ; RBRACKET options { paraphrase = "']'"; } : ']' ; LPAREN options { paraphrase = "'('"; } : '(' ; RPAREN options { paraphrase = "')'"; } : ')' ; LCURLY options { paraphrase = "'{'"; } : '{' ; RCURLY options { paraphrase = "'}'"; } : '}' ; AT_MARK options { paraphrase = "'@'"; } : '@' ; COMMA options { paraphrase = "','"; } : ',' ; SYMBOL options { paraphrase = "Symbol"; testLiterals = true; } : ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* | '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`' ; WS : ( ' ' | '\t' | '\f' | ( "\r\n" | '\r' | '\n' ) { newline(); } ) { $setType(antlr::Token::SKIP); } ; COMMENT options { paraphrase = "Single line comment"; } : "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); } ; ML_COMMENT options { paraphrase = "Multi line comment"; } : "/*" ( // TODO: test it and add reference to the site it's taken from! /* This actually works OK despite the ambiguity that '\r' '\n' can be matched in one alternative or by matching '\r' in one iteration and '\n' in another.. But this is really matched just by one rule per (...)* loop iteration, so it's OK. This is exactly how they do it all over the web - just turn off the warning for this particular token.*/ options { generateAmbigWarnings=false; } : { LA(2)!='/' }? '*' | '\r' '\n' { newline(); } | '\r' { newline(); } | '\n' { newline(); } | ~('*'|'\n'|'\r') )* "*/" {$setType(antlr::Token::SKIP);} ; HASH options { paraphrase = "'#'"; } : '#' ; //DSEPARATOR //options { // paraphrase = "':-'"; //} // : ":-" //;