header { //don't try to add all the headers inside our namespace ANTLR_END_NAMESPACE #include <libwccl/parser/ParserException.h> #include <cstdio> #include <antlr/Token.hpp> #include <boost/lexical_cast.hpp> // values/variables #include <libwccl/variables.h> #include <libwccl/values/bool.h> #include <libwccl/values/tset.h> #include <libwccl/values/strset.h> #include <libwccl/values/position.h> // sentence context #include <libwccl/sentencecontext.h> // operators #include <libwccl/ops/functions/constant.h> #include <libwccl/ops/functions/vargetter.h> #include <libwccl/ops/functions/conditional.h> #include <libwccl/ops/functions/bool/predicates/or.h> #include <libwccl/ops/functions/bool/predicates/nor.h> #include <libwccl/ops/functions/bool/predicates/and.h> #include <libwccl/ops/functions/bool/predicates/regex.h> #include <libwccl/ops/functions/bool/varsetter.h> #include <libwccl/ops/functions/bool/predicates/intersects.h> #include <libwccl/ops/functions/bool/predicates/issubsetof.h> #include <libwccl/ops/functions/bool/predicates/isinside.h> #include <libwccl/ops/functions/bool/predicates/isoutside.h> #include <libwccl/ops/functions/bool/predicates/equals.h> #include <libwccl/ops/functions/strset/affix.h> #include <libwccl/ops/functions/strset/getorth.h> #include <libwccl/ops/functions/strset/toupper.h> #include <libwccl/ops/functions/strset/tolower.h> #include <libwccl/ops/functions/strset/getlemmas.h> #include <libwccl/ops/functions/tset/getsymbols.h> #include <libwccl/ops/functions/tset/getwordclass.h> #include <libwccl/ops/functions/position/relativeposition.h> // Unicode String #include <unicode/uniset.h> #include <unicode/unistr.h> // #include <libwccl/parser/ANTLRParserResult.h> // start our namespace again ANTLR_BEGIN_NAMESPACE(Wccl) } options { language = "Cpp"; genHashLines = false; namespace = "Wccl"; // genHashLines = true; } // ---------------------------------------------------------------------------- // ANTLR PARSER // ---------------------------------------------------------------------------- class ANTLRParser extends Parser; options { k = 1; buildAST = false; exportVocab = ANTLRExpr; defaultErrorHandler = false; } { private: // const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText()).unescape(); } /* const UnicodeString str_token_ref_to_ustring(antlr::RefToken& rstr) const { UnicodeString ret_ustr, ustr = token_ref_to_ustring(rstr); if (ustr.length() < 3) { return ""; } ustr.extract(1, ustr.length() - 2, ret_ustr); return ret_ustr; } */ // const std::string str_token_rem_grav(antlr::RefToken& rstr) const { size_t len = 0; std::string ret = token_ref_to_std_string(rstr); if ((len = ret.length()) < 2) { return ret; } if (ret[0] == '`' && ret[len - 1] == '`') { return ret.substr(1, len - 2); } return ret; } // const std::string token_ref_to_std_string(antlr::RefToken& rstr) const { return (((antlr::Token*)rstr)->getText()); } // int token_ref_to_int(antlr::RefToken& rstr) { return atoi(((antlr::Token*)rstr)->getText().c_str()); } } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // "GLOBAL" RULES /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- // Rule for parsing string set operator with scope. // Returns boost::shared_ptr<Function<StrSet> > parse_strset_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<StrSet> > res] { res.reset(new ANTLRParserResult<StrSet>()); boost::shared_ptr<Function<StrSet> > op; } : op = string_operator [tagset, *res->variables.get()] { res->op = op; } EOF ; // ---------------------------------------------------------------------------- // Rule for parsing bool operator with scope. // Returns boost::shared_ptr<Function<Bool> > parse_bool_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<Bool> > res] { res.reset(new ANTLRParserResult<Bool>()); boost::shared_ptr<Function<Bool> > op; } : op = bool_operator [tagset, *res->variables.get()] { res->op = op; } EOF ; // ---------------------------------------------------------------------------- // Rule for parsing symbol set operator with scope. // Returns boost::shared_ptr<Function<TSet> > parse_symset_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<TSet> > res] { res.reset(new ANTLRParserResult<TSet>()); boost::shared_ptr<Function<TSet> > op; } : op = symset_operator [tagset, *res->variables.get()] { res->op = op; } EOF ; // ---------------------------------------------------------------------------- // Rule for parsing position operator with scope. // Returns boost::shared_ptr<Function<Position> > parse_position_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<Position> > res] { res.reset(new ANTLRParserResult<Position>()); boost::shared_ptr<Function<Position> > op; } : op = position_operator [tagset, *res->variables.get()] { res->op = op; } EOF ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // VALUES /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- // Single or muliple (comma separated) elements in string set, may be: // 'a' "a" [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"] // Parsing strset literal and returning plain strset value. // Returns boost::shared_ptr<StrSet> strset_literal returns [boost::shared_ptr<StrSet> s_set] { s_set.reset(new StrSet()); } : s0: STRING { s_set->insert(token_ref_to_ustring(s0)); } | LBRACKET ( s1: STRING { s_set->insert(token_ref_to_ustring(s1)); } ( COMMA s2: STRING { s_set->insert(token_ref_to_ustring(s2)); } )* )? RBRACKET ; // String set value as constrant string set: // Returns boost::shared_ptr<Constant<StrSet> > strset_value returns [boost::shared_ptr<Constant<StrSet> > val] { boost::shared_ptr<StrSet> set; } : set = strset_literal { val.reset(new Constant<StrSet>(*set.get())); } ; // ---------------------------------------------------------------------------- // Element of sym set. This rule, inserts element into symbol set // with corresponding tagset. // WARNING! This rule can throw ParserException! Be careful! symset_elem [const Corpus2::Tagset& tagset, boost::shared_ptr<TSet>& t_set] : s1: SYMBOL { try { t_set->insert_symbol(tagset, str_token_rem_grav(s1)); } catch(Corpus2::TagParseError &e) { throw(ParserException(e.info())); } } ; // Symset literal. Symset element may be: // a, `a ` (this is guaranteed by lexer rule - SYMBOL) or {a} {`a`} {a, b} // {`a`, `b`} {a, `b`} {`a`, b} // Parsing symset literal and returning plain symset value. // Returns boost::shared_ptr<TSet> symset_literal [const Corpus2::Tagset& tagset] returns [boost::shared_ptr<TSet> t_set] { t_set.reset(new TSet()); } : symset_elem [tagset, t_set] | LCURLY ( symset_elem [tagset, t_set] (COMMA symset_elem [tagset, t_set])* )? RCURLY ; // Symset value, as constant symbol set // Returns boost::shared_ptr<Constant<TSet> > symset_value [const Corpus2::Tagset& tagset] returns [boost::shared_ptr<Constant<TSet> > val] { boost::shared_ptr<TSet> set; } : set = symset_literal [tagset] { val.reset(new Constant<TSet>(*set.get())); } ; // ---------------------------------------------------------------------------- // Bool literal. May be True or False. Parsing bool literal and returning // plain bool value. // Returns boost::shared_ptr<Bool> bool_literal returns [boost::shared_ptr<Bool> val] : "True" { val.reset(new Bool(Bool(true ))); } | "False" { val.reset(new Bool(Bool(false))); } ; // Bool value, as constat bool Value // Returns boost::shared_ptr<Constant<Bool> > bool_value returns [boost::shared_ptr<Constant<Bool> > val] { boost::shared_ptr<Bool> bool_lit; } : bool_lit = bool_literal { val.reset(new Constant<Bool>(*bool_lit)); } ; // ---------------------------------------------------------------------------- // Position literal may be: // (+|-)?(0-9)+ or begin or end or nowhere // Parsing position literal and returning plain position value. // returns boost::shared_ptr<Position> position_literal returns [boost::shared_ptr<Position> val] { int i = 0; } : i = number { val.reset(new Position(Position(i))); } | "begin" { val.reset(new Position(Position(Position::Begin))); } | "end" { val.reset(new Position(Position(Position::End))); } | "nowhere" { val.reset(new Position(Position(Position::Nowhere))); } ; // Position as constant position value // Returns boost::shared_ptr<Constant<Position> > position_value returns [boost::shared_ptr<Constant<Position> > val] { boost::shared_ptr<Position> pos_lit; } : pos_lit = position_literal { val.reset(new Constant<Position>(*pos_lit)); } ; // ---------------------------------------------------------------------------- // Number may be unsigned or signed: 1, +1, -1 number returns [int ret] { ret = 0; } : s: SIGNED_INT { ret = token_ref_to_int(s); } | u: UNSIGNED_INT { ret = token_ref_to_int(u); } ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // VARIABLES /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- // Position: $Name // Get position variable (however, before put into) from variables // Returns boost::shared_ptr<VariableAccessor<Position> > position_variable_acc [Variables& vars] returns [boost::shared_ptr<VariableAccessor<Position> > pos_acc] : POS_PREFIX n: SYMBOL { vars.get_put<Position>(str_token_rem_grav(n)); VariableAccessor<Position> acc = vars.create_accessor<Position>(str_token_rem_grav(n)); pos_acc.reset(new VariableAccessor<Position>(acc)); } ; // VarGetter for Position variable. This rule wrapped position_variable_acc. // Returs boost::shared_ptr<VarGetter<Position> > position_variable [Variables& vars] returns [boost::shared_ptr<VarGetter<Position> > op] { boost::shared_ptr<VariableAccessor<Position> > pos_acc; } : pos_acc = position_variable_acc [vars] { op.reset(new VarGetter<Position>(*pos_acc.get())); } ; // ---------------------------------------------------------------------------- // String set, $s:name // This expression gets (however, before put into) variable of the type StrSet // from scope -- variables. // Returns boost::shared_ptr<VariableAccessor<StrSet> > strset_variable_acc [Variables& vars] returns [boost::shared_ptr<VariableAccessor<StrSet> > strset_acc] : STR_PREFIX n: SYMBOL { vars.get_put<StrSet>(str_token_rem_grav(n)); VariableAccessor<StrSet> acc = vars.create_accessor<StrSet>(str_token_rem_grav(n)); strset_acc.reset(new VariableAccessor<StrSet>(acc)); } ; // Vargetter for StrSet variable. This rule wrapped strset_variable_acc. // Returns boost::shared_ptr<VarGetter<StrSet> > strset_variable [Variables& vars] returns [boost::shared_ptr<VarGetter<StrSet> > op] { boost::shared_ptr<VariableAccessor<StrSet> > strset_acc; } : strset_acc = strset_variable_acc [vars] { op.reset(new VarGetter<StrSet>(*strset_acc.get())); } ; // ---------------------------------------------------------------------------- // Symbol set: $t:name // Get symset variable (however, before put into) from variables // Returns boost::shared_ptr<VariableAccessor<TSet> > symset_variable_acc [Variables& vars] returns [boost::shared_ptr<VariableAccessor<TSet> > symset_acc] : TST_PREFIX n: SYMBOL { vars.get_put<TSet>(str_token_rem_grav(n)); VariableAccessor<TSet> acc = vars.create_accessor<TSet>(str_token_rem_grav(n)); symset_acc.reset(new VariableAccessor<TSet>(acc)); } ; // Vargetter for symbol set variable. This rule wrapped symset_variable_acc // Returns boost::shared_ptr<VarGetter<TSet> > symset_variable [Variables& vars] returns [boost::shared_ptr<VarGetter<TSet> > op] { boost::shared_ptr<VariableAccessor<TSet> > symset_acc; } : symset_acc = symset_variable_acc [vars] { op.reset(new VarGetter<TSet>(*symset_acc.get())); } ; // ---------------------------------------------------------------------------- // Bool: $b:name // Get bool variable (however, before put into) from variables // Returns boost::shared_ptr<VariableAccessor<Bool> > bool_variable_acc [Variables& vars] returns [boost::shared_ptr<VariableAccessor<Bool> > bool_acc] : BOOL_PREFIX n: SYMBOL { vars.get_put<Bool>(str_token_rem_grav(n)); VariableAccessor<Bool> acc = vars.create_accessor<Bool>(str_token_rem_grav(n)); bool_acc.reset(new VariableAccessor<Bool>(acc)); } ; // Vargetter for bool variable. It is only wrapper for bool_variable_acc // Returns boost::shared_ptr<VarGetter<Bool> > bool_variable [Variables& vars] returns [boost::shared_ptr<VarGetter<Bool> > op] { boost::shared_ptr<VariableAccessor<Bool> > bool_acc; } : bool_acc = bool_variable_acc [vars] { op.reset(new VarGetter<Bool>(*bool_acc.get())); } ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // OPERATORS /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Symbol set (tagset) operators // Returns boost::shared_ptr<Function<TSet> > /////////////////////////////////////////////////////////////////////////////// symset_operator [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<TSet> > ret] : (symset_condition[tagset, vars]) => (ret = symset_condition [tagset, vars]) | (symset_getsymbol[tagset, vars]) => (ret = symset_getsymbol [tagset, vars]) | (symset_var_val [tagset, vars]) => (ret = symset_var_val [tagset, vars]) | (symset_class [tagset, vars]) => (ret = symset_class [tagset, vars]) // | LPAREN ret = symset_operator [tagset, vars] RPAREN ; // ---------------------------------------------------------------------------- // It's wrapper for symset variable and symset value. symset_var_val [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<TSet> > op] : op = symset_variable [vars] | op = symset_value [tagset] ; // ---------------------------------------------------------------------------- // Condition of the symset value: // if (Bool, TSet, TSet) // ? TSet ? Bool : {} symset_condition [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<TSet> > op] { boost::shared_ptr<Function<Bool> > test; boost::shared_ptr<Function<TSet> > p_true, p_false; } : "if" LPAREN test = bool_operator [tagset, vars] COMMA p_true = symset_operator [tagset, vars] (COMMA p_false = symset_operator [tagset, vars])? RPAREN { if (p_false) { op.reset(new Conditional<TSet>(test, p_true, p_false)); } else { op.reset(new Conditional<TSet>(test, p_true)); } } | Q_MARK (p_true = symset_operator [tagset, vars]) Q_MARK (test = bool_operator [tagset, vars]) { op.reset(new Conditional<TSet>(test, p_true)); } ; // ---------------------------------------------------------------------------- // GetSymbol operator may be cas, m1, f, sg... // WARNING! This rule can throw ParserException! Be careful! symset_getsymbol [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<TSet> > op] { Corpus2::Tag tag; boost::shared_ptr<Wccl::Function<Position> > position; } : t: SYMBOL LBRACKET position = position_operator [tagset, vars] RBRACKET { try { tag = tagset.parse_symbol(str_token_rem_grav(t)); } catch(Corpus2::TagParseError &e) { throw(ParserException(e.info())); } op.reset(new Wccl::GetSymbols(tag, position)); } ; // ---------------------------------------------------------------------------- // Class operator. symset_class [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<TSet> > ret] { boost::shared_ptr<Function<Position> > pos; } : "class" LBRACKET pos = position_operator [tagset, vars] RBRACKET { ret.reset(new GetWordClass(pos)); } ; /////////////////////////////////////////////////////////////////////////////// // Position operator // Returns boost::shared_ptr<Function<Position> > /////////////////////////////////////////////////////////////////////////////// position_operator [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Position> > ret] : ( ret = position_var_val [vars] | ret = position_condition [tagset, vars] | LPAREN ret = position_operator [tagset, vars] RPAREN ) ( // if there is SIGNED_INT after the position, it is actually a relative position i: SIGNED_INT { ret.reset(new RelativePosition(ret, token_ref_to_int(i))); } )? ; // ---------------------------------------------------------------------------- // Wrapper for position variable and position value position_var_val [Variables& vars] returns [boost::shared_ptr<Function<Position> > ret] : ret = position_value | ret = position_variable [vars] ; // ---------------------------------------------------------------------------- // Condition of the position value // if (Bool, Position, Position) // ? Position ? Bool : 0 position_condition [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Position> > op] { boost::shared_ptr<Function<Bool> > test; boost::shared_ptr<Function<Position> > p_true, p_false; } : "if" LPAREN test = bool_operator [tagset, vars] COMMA p_true = position_operator [tagset, vars] (COMMA p_false = position_operator [tagset, vars])? RPAREN { if (p_false) { op.reset(new Conditional<Position>(test, p_true, p_false)); } else { op.reset(new Conditional<Position>(test, p_true)); } } | Q_MARK p_true = position_operator [tagset, vars] Q_MARK test = bool_operator [tagset, vars] { op.reset(new Conditional<Position>(test, p_true)); } ; /////////////////////////////////////////////////////////////////////////////// // Stiring operator // Returns boost::shared_ptr<Function<StrSet> > /////////////////////////////////////////////////////////////////////////////// string_operator [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > ret] : ret = strset_orth [tagset, vars] | ret = strset_base [tagset, vars] | ret = strset_lower [tagset, vars] | ret = strset_upper [tagset, vars] | ret = strset_affix [tagset, vars] | ret = strset_var_val [tagset, vars] | ret = strset_condition [tagset, vars] // | LPAREN ret = string_operator [tagset, vars] RPAREN ; // ---------------------------------------------------------------------------- // Orth operator. strset_orth [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > ret] { boost::shared_ptr<Function<Position> > pos; } : "orth" LBRACKET pos = position_operator [tagset, vars] RBRACKET { ret.reset(new GetOrth(pos)); } ; // ---------------------------------------------------------------------------- // Base operator. strset_base [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > ret] { boost::shared_ptr<Function<Position> > pos; } : "base" LBRACKET pos = position_operator [tagset, vars] RBRACKET { ret.reset(new GetLemmas(pos)); } ; // ---------------------------------------------------------------------------- // Lower operator. strset_lower [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > ret] { boost::shared_ptr<Function<StrSet> > o_ret; } : "lower" LPAREN o_ret = string_operator [tagset, vars] RPAREN { ret.reset(new ToLower(o_ret)); } ; // ---------------------------------------------------------------------------- // Upper operator. strset_upper [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > ret] { boost::shared_ptr<Function<StrSet> > o_ret; } : "upper" LPAREN o_ret = string_operator [tagset, vars] RPAREN { ret.reset(new ToUpper(o_ret)); } ; // ---------------------------------------------------------------------------- // Affix operator. strset_affix [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > ret] { int offset = 0; boost::shared_ptr<Function<StrSet> > o_ret; } : "affix" LPAREN o_ret = string_operator [tagset, vars] COMMA offset = number RPAREN { ret.reset(new Affix(o_ret, offset)); } ; // ---------------------------------------------------------------------------- // Wrapper ofr strset value and strset variable strset_var_val [const Corpus2::Tagset& /*tagset*/, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > op] : op = strset_value | op = strset_variable [vars] ; // ---------------------------------------------------------------------------- // Condition of the strset value // if (Bool, StrSet, StrSet) // ? StrSet ? Bool : [] strset_condition [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<StrSet> > op] { boost::shared_ptr<Function<Bool> > test; boost::shared_ptr<Function<StrSet> > p_true, p_false; } : "if" LPAREN test = bool_operator [tagset, vars] COMMA p_true = string_operator [tagset, vars] (COMMA p_false = string_operator [tagset, vars])? RPAREN { if (p_false) { op.reset(new Conditional<StrSet>(test, p_true, p_false)); } else { op.reset(new Conditional<StrSet>(test, p_true)); } } | Q_MARK p_true = string_operator [tagset, vars] Q_MARK test = bool_operator [tagset, vars] { op.reset(new Conditional<StrSet>(test, p_true)); } ; /////////////////////////////////////////////////////////////////////////////// // Boool operator // Returns boost::shared_ptr<Function<Bool> > /////////////////////////////////////////////////////////////////////////////// bool_operator [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > ret] : ret = bool_and [tagset, vars] | ret = bool_or [tagset, vars] | ret = bool_nor [tagset, vars] | ret = bool_var_val [tagset, vars] | ret = bool_regex [tagset, vars] | ret = bool_inout [tagset, vars] | ret = bool_condition [tagset, vars] // setvar: | ret = setvar_operator [tagset, vars] // equal/in/inter: | ret = equal_operator [tagset, vars] | ret = in_operator [tagset, vars] | ret = inter_operator [tagset, vars] // | LPAREN ret = bool_operator [tagset, vars] RPAREN ; // ---------------------------------------------------------------------------- // comma-separated predicates (bool operators) bool_operator_comma_sep [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v] { boost::shared_ptr<Function<Bool> > pred; ret_v.reset( new std::vector<boost::shared_ptr<Function<Bool> > > ); } : pred = bool_operator [tagset, vars] { ret_v->push_back(pred); } ( COMMA pred = bool_operator [tagset, vars] { ret_v->push_back(pred); } )* ; // ---------------------------------------------------------------------------- // And operator. bool_and [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v; } : "and" LPAREN ret_v = bool_operator_comma_sep [tagset, vars] RPAREN { op.reset(new And(ret_v)); } ; // ---------------------------------------------------------------------------- // Or operator bool_or [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v; } : "or" LPAREN ret_v = bool_operator_comma_sep [tagset, vars] RPAREN { op.reset(new Or(ret_v)); } ; // ---------------------------------------------------------------------------- // Nor/Not operator bool_nor [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v; } : "not" LPAREN ret_v = bool_operator_comma_sep [tagset, vars] RPAREN { op.reset(new Nor(ret_v)); } ; // ---------------------------------------------------------------------------- // Wrapper for bool value and bool variable bool_var_val [const Corpus2::Tagset& /*tagset*/, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] : op = bool_value | op = bool_variable [vars] ; // ---------------------------------------------------------------------------- // Regex operator bool_regex [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<StrSet> > expr; } : "regex" LPAREN expr = string_operator [tagset, vars] COMMA reg: STRING RPAREN { op.reset(new Regex(expr, token_ref_to_ustring(reg))); } ; // ---------------------------------------------------------------------------- // Input/output operator bool_inout [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<Position> > ret_pos; } : "inside" LPAREN ret_pos = position_operator [tagset, vars] RPAREN { op.reset(new IsInside(ret_pos)); } | "outside" LPAREN ret_pos = position_operator [tagset, vars] RPAREN { op.reset(new IsOutside(ret_pos)); } ; // ---------------------------------------------------------------------------- // if (Bool, Bool, Bool) // ? Bool ? Bool : False bool_condition [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<Bool> > test, p_true, p_false; } : "if" LPAREN test = bool_operator [tagset, vars] COMMA p_true = bool_operator [tagset, vars] (COMMA p_false = bool_operator [tagset, vars])? RPAREN { if (p_false) { op.reset(new Conditional<Bool>(test, p_true, p_false)); } else { op.reset(new Conditional<Bool>(test, p_true)); } } | Q_MARK p_true = bool_operator [tagset, vars] Q_MARK test = bool_operator [tagset, vars] { op.reset(new Conditional<Bool>(test, p_true)); } ; // ---------------------------------------------------------------------------- // Equal operator equal_operator [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<TSet> > t1, t2; boost::shared_ptr<Function<Bool> > b1, b2; boost::shared_ptr<Function<StrSet> > s1, s2; boost::shared_ptr<Function<Position> > p1, p2; } : "equal" LPAREN ( (position_operator [tagset, vars]) => ( p1 = position_operator [tagset, vars] COMMA p2 = position_operator [tagset, vars] { op.reset(new Equals<Position>(p1, p2)); } ) | (symset_operator [tagset, vars]) => ( t1 = symset_operator [tagset, vars] COMMA t2 = symset_operator [tagset, vars] { op.reset(new Equals<TSet>(t1, t2)); } ) | (string_operator [tagset, vars]) => ( s1 = string_operator [tagset, vars] COMMA s2 = string_operator [tagset, vars] { op.reset(new Equals<StrSet>(s1, s2)); } ) | ( b1 = bool_operator [tagset, vars] COMMA b2 = bool_operator [tagset, vars] { op.reset(new Equals<Bool>(b1, b2)); } ) ) RPAREN ; // ---------------------------------------------------------------------------- // In operator in_operator [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<TSet> > t1, t2; boost::shared_ptr<Function<StrSet> > s1, s2; } : "in" LPAREN ( (symset_operator [tagset, vars]) => ( t1 = symset_operator [tagset, vars] COMMA t2 = symset_operator [tagset, vars] { op.reset(new IsSubsetOf<TSet>(t1, t2)); } ) | ( s1 = string_operator [tagset, vars] COMMA s2 = string_operator [tagset, vars] { op.reset(new IsSubsetOf<StrSet>(s1, s2)); } ) ) RPAREN ; // ---------------------------------------------------------------------------- // Inter operator inter_operator [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<TSet> > t1, t2; boost::shared_ptr<Function<StrSet> > s1, s2; } : "inter" LPAREN ( (symset_operator [tagset, vars]) => ( t1 = symset_operator [tagset, vars] COMMA t2 = symset_operator [tagset, vars] { op.reset(new Intersects<TSet>(t1, t2)); } ) | ( s1 = string_operator [tagset, vars] COMMA s2 = string_operator [tagset, vars] { op.reset(new Intersects<StrSet>(s1, s2)); } ) ) RPAREN ; // ---------------------------------------------------------------------------- // Setvar operator // Returns boost::shared_ptr<Function<Bool> > // ---------------------------------------------------------------------------- setvar_operator [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > ret] : "setvar" LPAREN ( ret = position_setvar [tagset, vars] | ret = bool_setvar [tagset, vars] | ret = strset_setvar [tagset, vars] | ret = symset_setvar [tagset, vars] ) RPAREN ; // ---------------------------------------------------------------------------- // Setvar for position position_setvar [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<Position> > ret_op; boost::shared_ptr<VariableAccessor<Position> > ret_acc; } : ret_acc = position_variable_acc [vars] COMMA ret_op = position_operator [tagset, vars] { op.reset(new VarSetter<Position>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- // Setvar for bool bool_setvar [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<Bool> > ret_op; boost::shared_ptr<VariableAccessor<Bool> > ret_acc; } : ret_acc = bool_variable_acc [vars] COMMA ret_op = bool_operator [tagset, vars] { op.reset(new VarSetter<Bool>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- // Setvar for strset strset_setvar [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<StrSet> > ret_op; boost::shared_ptr<VariableAccessor<StrSet> > ret_acc; } : ret_acc = strset_variable_acc [vars] COMMA ret_op = string_operator [tagset, vars] { op.reset(new VarSetter<StrSet>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- // Setvar for symset symset_setvar [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Bool> > op] { boost::shared_ptr<Function<TSet> > ret_op; boost::shared_ptr<VariableAccessor<TSet> > ret_acc; } : ret_acc = symset_variable_acc [vars] COMMA ret_op = symset_operator [tagset, vars] { op.reset(new VarSetter<TSet>(*ret_acc.get(), ret_op)); } ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ANTLR LEXER /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// class ANTLRLexer extends Lexer; options { exportVocab = ANTLRExpr; charVocabulary = '\3'..'\377'; testLiterals = false; k = 2; } STRING options { paraphrase = "a string"; } : '"'! (~'"')* '"'! | '\''! (~'\'')* '\''! ; SIGNED_INT options { paraphrase = "Signed integer"; } : ('-'|'+') (' '!|'\t'!)* ('0'..'9')+ ; UNSIGNED_INT options { paraphrase = "Unsigned integer"; } : ('0'..'9')+ ; QUOT_MARK options { paraphrase = "Quote"; } : '\'' ; APOS_MARK options { paraphrase = "Apostrophe"; } : '"' ; Q_MARK options { paraphrase = "Question mark"; } : '?' ; E_MARK options { paraphrase = "Exclamation mark"; } : '!' ; STR_PREFIX options { paraphrase = "String prefix"; } : "$s:" ; TST_PREFIX options { paraphrase = "Symset prefix"; } : "$t:" ; BOOL_PREFIX options { paraphrase = "Bool prefix"; } : "$b:" ; POS_PREFIX options { paraphrase = "Position prefix"; } : '$' ; LBRACKET options { paraphrase = "'['"; } : '[' ; RBRACKET options { paraphrase = "']'"; } : ']' ; LPAREN options { paraphrase = "'('"; } : '(' ; RPAREN options { paraphrase = "')'"; } : ')' ; LCURLY options { paraphrase = "'{'"; } : '{' ; RCURLY options { paraphrase = "'}'"; } : '}' ; AT_MARK options { paraphrase = "'@'"; } : '@' ; COMMA options { paraphrase = "','"; } : ',' ; SYMBOL options { paraphrase = "Symbol"; testLiterals = true; } : ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* | '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`' ; WS : ( ' ' | '\t' | '\f' | ( "\r\n" | '\r' | '\n' ) { newline(); } ) { $setType(antlr::Token::SKIP); } ; COMMENT options { paraphrase = "Single line comment"; } : "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); } ; ML_COMMENT options { paraphrase = "Multi line comment"; } : "/*" ( // TODO: test it and add reference to the site it's taken from! /* This actually works OK despite the ambiguity that '\r' '\n' can be matched in one alternative or by matching '\r' in one iteration and '\n' in another.. But this is really matched just by one rule per (...)* loop iteration, so it's OK. This is exactly how they do it all over the web - just turn off the warning for this particular token.*/ options { generateAmbigWarnings = false; } : { LA(2)!='/' }? '*' | '\r' '\n' { newline(); } | '\r' { newline(); } | '\n' { newline(); } | ~('*'|'\n'|'\r') )* "*/" { $setType(antlr::Token::SKIP); } ; HASH options { paraphrase = "'#'"; } : '#' ; //DSEPARATOR //options { // paraphrase = "':-'"; //} // : ":-" //;