header { //don't try to add all the headers inside our namespace // ANTLR_END_NAMESPACE #include <libwccl/parser/ParserException.h> #include <cstdio> #include <antlr/Token.hpp> #include <boost/lexical_cast.hpp> // values/variables #include <libwccl/variables.h> #include <libwccl/values/bool.h> #include <libwccl/values/tset.h> #include <libwccl/values/strset.h> #include <libwccl/values/position.h> // sentence context #include <libwccl/sentencecontext.h> // operators #include <libwccl/ops/functions/bool/predicates/or.h> #include <libwccl/ops/functions/bool/predicates/nor.h> #include <libwccl/ops/functions/bool/predicates/and.h> #include <libwccl/ops/functions/bool/predicates/regex.h> #include <libwccl/ops/functions/bool/predicates/equals.h> #include <libwccl/ops/functions/bool/varsetter.h> #include <libwccl/ops/functions/strset/affix.h> #include <libwccl/ops/functions/strset/getorth.h> #include <libwccl/ops/functions/strset/toupper.h> #include <libwccl/ops/functions/strset/tolower.h> #include <libwccl/ops/functions/constant.h> #include <libwccl/ops/functions/vargetter.h> #include <libwccl/ops/functions/bool/predicates/intersects.h> #include <libwccl/ops/functions/bool/predicates/issubsetof.h> #include <libwccl/ops/functions/bool/predicates/isinside.h> #include <libwccl/ops/functions/bool/predicates/isoutside.h> #include <libwccl/ops/functions/position/relativeposition.h> #include <libwccl/ops/functions/conditional.h> // Unicode String #include <unicode/uniset.h> #include <unicode/unistr.h> // #include <libwccl/parser/ANTLRParserResult.h> // start our namespace again // ANTLR_BEGIN_NAMESPACE(Wccl) } options { language = "Cpp"; genHashLines = false; // namespace = "Wccl"; // genHashLines = true; } // ---------------------------------------------------------------------------- // ANTLR PARSER // ---------------------------------------------------------------------------- class ANTLRParser extends Parser; options { k = 1; buildAST = false; exportVocab = ANTLRExpr; defaultErrorHandler = false; } { private: // const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText()).unescape(); } /* const UnicodeString str_token_ref_to_ustring(antlr::RefToken& rstr) const { UnicodeString ret_ustr, ustr = token_ref_to_ustring(rstr); if (ustr.length() < 3) { return ""; } ustr.extract(1, ustr.length() - 2, ret_ustr); return ret_ustr; } */ // const std::string str_token_rem_grav(antlr::RefToken& rstr) const { size_t len = 0; std::string ret = token_ref_to_std_string(rstr); if ((len = ret.length()) < 2) { return ret; } if (ret[0] == '`' && ret[len - 1] == '`') { return ret.substr(1, len - 2); } return ret; } // const std::string token_ref_to_std_string(antlr::RefToken& rstr) const { return (((antlr::Token*)rstr)->getText()); } // int token_ref_to_int(antlr::RefToken& rstr) { return atoi(((antlr::Token*)rstr)->getText().c_str()); } } // TODO // - base, orth // TEMPORARY CHANGES -> // -> tymczasowo zakomentowalem wywoalnie regul condit_* // -> tymczasowo zakomentowalem 2 reguly z equal /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // "GLOBAL" RULES /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- // Rules for parsing string operators in scope (variables). // Returns boost::shared_ptr<Wccl::Function<Wccl::StrSet> > parse_string_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<Wccl::StrSet> > res] { res.reset(new ANTLRParserResult<Wccl::StrSet>()); boost::shared_ptr<Wccl::Function<Wccl::StrSet> > op; } : op = string_operators [tagset, *res->variables.get()] { res->op = op; } EOF ; // ---------------------------------------------------------------------------- // Rules for parsing predicates in scope (variables). // Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> > parse_predicates [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<Wccl::Bool> > res] { res.reset(new ANTLRParserResult<Wccl::Bool>()); boost::shared_ptr<Wccl::Function<Wccl::Bool> > op; } : op = logical_predicates [tagset, *res->variables.get()] { res->op = op; } EOF ; // ---------------------------------------------------------------------------- // Rules for parsing tagset (symbol set) operators // Returns boost::shared_ptr<Wccl::Function<Wccl::TSet> > parse_sym_set_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<Wccl::TSet> > res] { res.reset(new ANTLRParserResult<Wccl::TSet>()); boost::shared_ptr<Wccl::Function<Wccl::TSet> > op; } : op = sym_set_operators [tagset, *res->variables.get()] { res->op = op; } EOF ; // ---------------------------------------------------------------------------- // Rules for parsing position operators // Returns boost::shared_ptr<Wccl::Function<Wccl::Position> > parse_position_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<Wccl::Position> > res] { res.reset(new ANTLRParserResult<Wccl::Position>()); boost::shared_ptr<Wccl::Function<Wccl::Position> > op; } : op = position_operators [tagset, *res->variables.get()] { res->op = op; } EOF ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // VALUES /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- // Single or muliple values in string set: // [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"] // Returns boost::shared_ptr<Wccl::StrSet> str_set_literal returns [boost::shared_ptr<Wccl::StrSet> s_set] { s_set.reset(new Wccl::StrSet()); } : s0: STRING { s_set->insert(token_ref_to_ustring(s0)); } | LBRACKET ( s1: STRING { s_set->insert(token_ref_to_ustring(s1)); } ( COMMA s2: STRING { s_set->insert(token_ref_to_ustring(s2)); } )* )? RBRACKET ; // Constrant string set // Returns boost::shared_ptr<Wccl::Constant<Wccl::StrSet> > str_set_value returns [boost::shared_ptr<Wccl::Constant<Wccl::StrSet> > val] { boost::shared_ptr<Wccl::StrSet> set; } : set = str_set_literal { val.reset(new Wccl::Constant<Wccl::StrSet>(*set.get())); } ; // ---------------------------------------------------------------------------- // Element of sym set. This rule, inset element into set. // Element may be: a or `a ` sym_set_elem [const Corpus2::Tagset& tagset, boost::shared_ptr<Wccl::TSet>& t_set] : s1: SYMBOL { try { t_set->insert_symbol(tagset, str_token_rem_grav(s1)); } catch(Corpus2::TagParseError &e) { throw(ParserException(e.info())); } } ; // sym set literal // {} {sym_set_elem} {sym_set_elem, ..., sym_set_elem} // Returns boost::shared_ptr<Wccl::TSet> sym_set_literal [const Corpus2::Tagset& tagset] returns [boost::shared_ptr<Wccl::TSet> t_set] { t_set.reset(new Wccl::TSet()); } : sym_set_elem[tagset, t_set] | LCURLY ( sym_set_elem[tagset, t_set] (COMMA sym_set_elem[tagset, t_set])* )? RCURLY ; // Constant symbol set // Returns boost::shared_ptr<Wccl::Constant<Wccl::TSet> > sym_set_value [const Corpus2::Tagset& tagset] returns [boost::shared_ptr<Wccl::Constant<Wccl::TSet> > val] { boost::shared_ptr<Wccl::TSet> set; } : set = sym_set_literal [tagset] { val.reset(new Wccl::Constant<Wccl::TSet>(*set.get())); } ; // ---------------------------------------------------------------------------- // boolean value: // Literal bool value may be True or False // Returns boost::shared_ptr<Wccl::Bool> bool_literal returns [boost::shared_ptr<Wccl::Bool> val] : "True" { val.reset(new Wccl::Bool(Wccl::Bool(true ))); } | "False" { val.reset(new Wccl::Bool(Wccl::Bool(false))); } ; // Constat bool Value // Returns boost::shared_ptr<Wccl::Constant<Wccl::Bool> > boolean_value returns [boost::shared_ptr<Wccl::Constant<Wccl::Bool> > val] { boost::shared_ptr<Wccl::Bool> bool_lit; } : bool_lit = bool_literal { val.reset(new Wccl::Constant<Wccl::Bool>(*bool_lit)); } ; // ---------------------------------------------------------------------------- // position value: // Position literal may be (+|-)?(0-9)+ or begin or end or nowhere // returns boost::shared_ptr<Wccl::Position> position_literal returns [boost::shared_ptr<Wccl::Position> val] { int i = 0; } : i = number { val.reset(new Wccl::Position(Wccl::Position(i))); } | "begin" { val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::Begin))); } | "end" { val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::End))); } | "nowhere" { val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::Nowhere))); } ; // Constant position value // Returns boost::shared_ptr<Wccl::Constant<Wccl::Position> > position_value returns [boost::shared_ptr<Wccl::Constant<Wccl::Position> > val] { boost::shared_ptr<Wccl::Position> pos_lit; } : pos_lit = position_literal { val.reset(new Wccl::Constant<Wccl::Position>(*pos_lit)); } ; // ---------------------------------------------------------------------------- // Number may be unsigned or signed number returns [int ret] { ret = 0; } : s: SIGNED_INT { ret = token_ref_to_int(s); } | u: UNSIGNED_INT { ret = token_ref_to_int(u); } ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // VARIABLES /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- // Position: $name // Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > position_variable_acc [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > pos_acc] : POS_PREFIX n: SYMBOL { vars.get_put<Wccl::Position>(str_token_rem_grav(n)); Wccl::VariableAccessor<Wccl::Position> acc = vars.create_accessor<Wccl::Position>(str_token_rem_grav(n)); pos_acc.reset(new Wccl::VariableAccessor<Wccl::Position>(acc)); } ; // Position vargetter // Returs boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > position_variable [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > op] { boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > pos_acc; } : pos_acc = position_variable_acc [vars] { op.reset(new Wccl::VarGetter<Wccl::Position>(*pos_acc.get())); } ; // ---------------------------------------------------------------------------- // realtive position relpos [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Position> > ret] { int n = 0; boost::shared_ptr<Wccl::Function<Wccl::Position> > pos; } : "relpos" LPAREN pos = op_position [tagset, vars] COMMA n = number RPAREN { ret.reset(new Wccl::RelativePosition(pos, n)); } ; // ---------------------------------------------------------------------------- // String set, $s:name // This expression gets variable of the type StrSet from string-named variable // Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > str_set_variable_acc [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > strset_acc] : STR_PREFIX n: SYMBOL { // get/put variable to variables vars.get_put<Wccl::StrSet>(str_token_rem_grav(n)); // makes accessor for value Wccl::VariableAccessor<Wccl::StrSet> acc = vars.create_accessor<Wccl::StrSet>(str_token_rem_grav(n)); strset_acc.reset(new Wccl::VariableAccessor<Wccl::StrSet>(acc)); } ; // Vargetter for StrSet variable // Returns boost::shared_ptr<Wccl::VarGetter<Wccl::StrSet> > str_set_variable [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VarGetter<Wccl::StrSet> > op] { boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > strset_acc; } : strset_acc = str_set_variable_acc [vars] { op.reset(new Wccl::VarGetter<Wccl::StrSet>(*strset_acc.get())); } ; // ---------------------------------------------------------------------------- // Symbol set: $t:name // Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > sym_set_variable_acc [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > symset_acc] : TST_PREFIX n: SYMBOL { vars.get_put<Wccl::TSet>(str_token_rem_grav(n)); Wccl::VariableAccessor<Wccl::TSet> acc = vars.create_accessor<Wccl::TSet>(str_token_rem_grav(n)); symset_acc.reset(new Wccl::VariableAccessor<Wccl::TSet>(acc)); } ; // Vargetter for symbol set variable // Returns boost::shared_ptr<Wccl::VarGetter<Wccl::TSet> > sym_set_variable [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VarGetter<Wccl::TSet> > op] { boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > symset_acc; } : symset_acc = sym_set_variable_acc [vars] { op.reset(new Wccl::VarGetter<Wccl::TSet>(*symset_acc.get())); } ; // ---------------------------------------------------------------------------- // Bool: $b:name // Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > boolean_variable_acc [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > bool_acc] : BOOL_PREFIX n: SYMBOL { vars.get_put<Wccl::Bool>(str_token_rem_grav(n)); Wccl::VariableAccessor<Wccl::Bool> acc = vars.create_accessor<Wccl::Bool>(str_token_rem_grav(n)); bool_acc.reset(new Wccl::VariableAccessor<Wccl::Bool>(acc)); } ; // Vargetter for bool variable // Returns boost::shared_ptr<Wccl::VarGetter<Wccl::Bool> > boolean_variable [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VarGetter<Wccl::Bool> > op] { boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > bool_acc; } : bool_acc = boolean_variable_acc [vars] { op.reset(new Wccl::VarGetter<Wccl::Bool>(*bool_acc.get())); } ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // OPERATORS /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Setvar operator // Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> > // ---------------------------------------------------------------------------- setvar_op [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret] : "setvar" LPAREN ( ret = setvar_body_pos [tagset, vars] | ret = setvar_body_bool [tagset, vars] | ret = setvar_body_sset [tagset, vars] | ret = setvar_body_tset [tagset, vars] ) RPAREN ; // Implementations of setvar: // ---------------------------------------------------------------------------- setvar_body_pos [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::Position> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > ret_acc; } : ret_acc = position_variable_acc [vars] COMMA ret_op = op_position [tagset, vars] { op.reset(new Wccl::VarSetter<Wccl::Position>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- setvar_body_bool [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > ret_acc; } : ret_acc = boolean_variable_acc [vars] COMMA ret_op = logical_predicates [tagset, vars] { op.reset(new Wccl::VarSetter<Wccl::Bool>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- setvar_body_sset [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > ret_acc; } : ret_acc = str_set_variable_acc [vars] COMMA ret_op = string_operators [tagset, vars] { op.reset(new Wccl::VarSetter<Wccl::StrSet>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- setvar_body_tset [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > ret_acc; } : ret_acc = sym_set_variable_acc [vars] COMMA ret_op = sym_set_operators [tagset, vars] { op.reset(new Wccl::VarSetter<Wccl::TSet>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Symbol set (tagset) operators // Returns boost::shared_ptr<Wccl::Function<Wccl::TSet> > // ---------------------------------------------------------------------------- sym_set_operators [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret] : ret = op_sym_set [tagset, vars] | ret = condit_sym [tagset, vars] ; // Implementations of symbol set operators: // ---------------------------------------------------------------------------- op_sym_set [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > op] : op = sym_set_variable [vars] | op = sym_set_value [tagset] ; // ---------------------------------------------------------------------------- // if (Bool, TSet, TSet) // ? TSet ? Bool : {} condit_sym [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > op] { boost::shared_ptr<Wccl::Function<Wccl::Bool> > test; boost::shared_ptr<Wccl::Function<Wccl::TSet> > p_true, p_false; } : "if" LPAREN test = logical_predicates [tagset, vars] COMMA p_true = sym_set_operators [tagset, vars] (COMMA p_false = sym_set_operators [tagset, vars])? RPAREN { if (p_false) { op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true, p_false)); } else { op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true)); } } | Q_MARK (p_true = sym_set_operators [tagset, vars]) Q_MARK (test = logical_predicates [tagset, vars]) { op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true)); } ; // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Position operators // Returns boost::shared_ptr<Wccl::Function<Wccl::Position> > // ---------------------------------------------------------------------------- position_operators [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Position> > ret] : ret = op_position [tagset, vars] ; // Implementations of symbol set operators: // ---------------------------------------------------------------------------- op_position [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Position> > op] : op = position_variable [vars] | op = position_value | op = relpos [tagset, vars] | op = condit_position [tagset, vars] ; // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Stiring operators // Returns boost::shared_ptr<Wccl::Function<Wccl::StrSet> > // ---------------------------------------------------------------------------- string_operators [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] : ret = op_orth [tagset, vars] | ret = op_base [tagset, vars] | ret = op_lower [tagset, vars] | ret = op_upper [tagset, vars] | ret = op_affix [tagset, vars] | ret = op_str_set [tagset, vars] | ret = condit_str [tagset, vars] ; // Implementations of string operators: // ---------------------------------------------------------------------------- op_orth [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { boost::shared_ptr<Wccl::Function<Wccl::Position> > pos; } : "orth" LBRACKET pos = op_position [tagset, vars] RBRACKET { ret.reset(new Wccl::GetOrth(pos)); } ; // ---------------------------------------------------------------------------- op_base [const Corpus2::Tagset& /*tagset*/, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > pos; } : "base" LBRACKET pos = position_variable [vars] RBRACKET { // ret = TODO } ; // ---------------------------------------------------------------------------- op_lower [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret; } : "lower" LPAREN o_ret = string_operators[tagset, vars] RPAREN { ret.reset(new Wccl::ToLower(o_ret)); } ; // ---------------------------------------------------------------------------- op_upper [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret; } : "upper" LPAREN o_ret = string_operators[tagset, vars] RPAREN { ret.reset(new Wccl::ToUpper(o_ret)); } ; // ---------------------------------------------------------------------------- op_affix [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { int offset = 0; boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret; } : "affix" LPAREN o_ret = string_operators[tagset, vars] COMMA offset = number RPAREN { ret.reset(new Wccl::Affix(o_ret, offset)); } ; // ---------------------------------------------------------------------------- op_str_set [const Corpus2::Tagset& /*tagset*/, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > op] : op = str_set_variable [vars] | op = str_set_value ; // ---------------------------------------------------------------------------- // if (Bool, StrSet, StrSet) // ? StrSet ? Bool : [] condit_str [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > op] { boost::shared_ptr<Wccl::Function<Wccl::Bool> > test; boost::shared_ptr<Wccl::Function<Wccl::StrSet> > p_true, p_false; } : "if" LPAREN test = logical_predicates [tagset, vars] COMMA p_true = string_operators [tagset, vars] (COMMA p_false = string_operators [tagset, vars])? RPAREN { if (p_false) { op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true, p_false)); } else { op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true)); } } | Q_MARK p_true = string_operators [tagset, vars] Q_MARK test = logical_predicates [tagset, vars] { op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true)); } ; // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Logical predicates // Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> > // ---------------------------------------------------------------------------- logical_predicates [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret] : ret = lpred_and [tagset, vars] | ret = lpred_or [tagset, vars] | ret = lpred_nor [tagset, vars] | ret = lpred_bool [tagset, vars] | ret = lpred_in [tagset, vars] | ret = lpred_inter [tagset, vars] | ret = lpred_eq [tagset, vars] | ret = lpred_regex [tagset, vars] | ret = setvar_op [tagset, vars] | ret = lpred_inout [tagset, vars] | ret = condit_bool [tagset, vars] ; // ---------------------------------------------------------------------------- // comma-separated predicates logical_predicates_comma_sep [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr< std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > > ret_v] { boost::shared_ptr<Wccl::Function<Wccl::Bool> > pred; ret_v.reset( new std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > ); } : pred = logical_predicates [tagset, vars] { ret_v->push_back(pred); } ( COMMA pred = logical_predicates [tagset, vars] { ret_v->push_back(pred); })* ; // ---------------------------------------------------------------------------- lpred_and [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr< std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > > ret_v; } : "and" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN { op.reset(new Wccl::And(ret_v)); } ; // ---------------------------------------------------------------------------- lpred_or [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr< std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > > ret_v; } : "or" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN { op.reset(new Wccl::Or(ret_v)); } ; // ---------------------------------------------------------------------------- lpred_nor [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr< std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > > ret_v; } : "not" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN { op.reset(new Wccl::Nor(ret_v)); } ; // ---------------------------------------------------------------------------- lpred_bool [const Corpus2::Tagset& /*tagset*/, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] : op = boolean_variable [vars] | op = boolean_value ; // ---------------------------------------------------------------------------- lpred_in [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2; boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2; } : "in" LPAREN ( (sym_set_operators [tagset, vars]) => ( t1 = sym_set_operators [tagset, vars] COMMA t2 = sym_set_operators [tagset, vars] { op.reset(new Wccl::IsSubsetOf<Wccl::TSet>(t1, t2)); } ) | ( s1 = string_operators [tagset, vars] COMMA s2 = string_operators [tagset, vars] { op.reset(new Wccl::IsSubsetOf<Wccl::StrSet>(s1, s2)); } ) ) RPAREN ; // ---------------------------------------------------------------------------- lpred_inter [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2; boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2; } : "inter" LPAREN ( (sym_set_operators [tagset, vars]) => ( t1 = sym_set_operators [tagset, vars] COMMA t2 = sym_set_operators [tagset, vars] { op.reset(new Wccl::Intersects<Wccl::TSet>(t1, t2)); } ) | ( s1 = string_operators [tagset, vars] COMMA s2 = string_operators [tagset, vars] { op.reset(new Wccl::Intersects<Wccl::StrSet>(s1, s2)); } ) ) RPAREN ; // ---------------------------------------------------------------------------- lpred_eq [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2; boost::shared_ptr<Wccl::Function<Wccl::Bool> > b1, b2; boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2; boost::shared_ptr<Wccl::Function<Wccl::Position> > p1, p2; } : "equal" LPAREN ( (position_operators [tagset, vars]) => ( p1 = position_operators [tagset, vars] COMMA p2 = position_operators [tagset, vars] { op.reset(new Wccl::Equals<Wccl::Position>(p1, p2)); } ) | (sym_set_operators [tagset, vars]) => ( t1 = sym_set_operators [tagset, vars] COMMA t2 = sym_set_operators [tagset, vars] { op.reset(new Wccl::Equals<Wccl::TSet>(t1, t2)); } ) | (string_operators [tagset, vars]) => ( s1 = string_operators [tagset, vars] COMMA s2 = string_operators [tagset, vars] { op.reset(new Wccl::Equals<Wccl::StrSet>(s1, s2)); } ) | ( b1 = logical_predicates [tagset, vars] COMMA b2 = logical_predicates [tagset, vars] { op.reset(new Wccl::Equals<Wccl::Bool>(b1, b2)); } ) ) RPAREN ; // ---------------------------------------------------------------------------- lpred_regex [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::StrSet> > expr; } : "regex" LPAREN expr = string_operators [tagset, vars] COMMA reg: STRING RPAREN { op.reset(new Wccl::Regex(expr, token_ref_to_ustring(reg))); } ; // ---------------------------------------------------------------------------- lpred_inout [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::Position> > ret_pos; } : "inside" LPAREN ret_pos = position_operators [tagset, vars] RPAREN { op.reset(new Wccl::IsInside(ret_pos)); } | "outside" LPAREN ret_pos = position_operators [tagset, vars] RPAREN { op.reset(new Wccl::IsOutside(ret_pos)); } ; // ---------------------------------------------------------------------------- // if (Bool, Bool, Bool) // ? Bool ? Bool : False condit_bool [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::Bool> > test, p_true, p_false; } : "if" LPAREN test = logical_predicates [tagset, vars] COMMA p_true = logical_predicates [tagset, vars] (COMMA p_false = logical_predicates [tagset, vars])? RPAREN { if (p_false) { op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true, p_false)); } else { op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true)); } } | Q_MARK p_true = logical_predicates [tagset, vars] Q_MARK test = logical_predicates [tagset, vars] { op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true)); } ; // ---------------------------------------------------------------------------- // if (Bool, Position, Position) // ? Position ? Bool : 0 condit_position [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Position> > op] { boost::shared_ptr<Wccl::Function<Wccl::Bool> > test; boost::shared_ptr<Wccl::Function<Wccl::Position> > p_true, p_false; } : "if" LPAREN test = logical_predicates [tagset, vars] COMMA p_true = position_operators [tagset, vars] (COMMA p_false = position_operators [tagset, vars])? RPAREN { if (p_false) { op.reset(new Wccl::Conditional<Wccl::Position>(test, p_true, p_false)); } else { op.reset(new Wccl::Conditional<Wccl::Position>(test, p_true)); } } | Q_MARK p_true = position_operators [tagset, vars] Q_MARK test = logical_predicates [tagset, vars] { op.reset(new Wccl::Conditional<Wccl::Position>(test, p_true)); } ; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ANTLR LEXER /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// class ANTLRLexer extends Lexer; options { exportVocab = ANTLRExpr; charVocabulary = '\3'..'\377'; testLiterals = false; k = 2; } STRING options { paraphrase = "a string"; } : '"'! (~'"')* '"'! | '\''! (~'\'')* '\''! ; SIGNED_INT options { paraphrase = "Signed integer"; } : ('-'|'+') ('0'..'9')+ ; UNSIGNED_INT options { paraphrase = "Unsigned integer"; } : ('0'..'9')+ ; QUOT_MARK options { paraphrase = "Quote"; } : '\'' ; APOS_MARK options { paraphrase = "Apostrophe"; } : '"' ; Q_MARK options { paraphrase = "Question mark"; } : '?' ; E_MARK options { paraphrase = "Exclamation mark"; } : '!' ; STR_PREFIX options { paraphrase = "String prefix"; } : "$s:" ; TST_PREFIX options { paraphrase = "Symset prefix"; } : "$t:" ; BOOL_PREFIX options { paraphrase = "Bool prefix"; } : "$b:" ; POS_PREFIX options { paraphrase = "Position prefix"; } : '$' ; LBRACKET options { paraphrase = "'['"; } : '[' ; RBRACKET options { paraphrase = "']'"; } : ']' ; LPAREN options { paraphrase = "'('"; } : '(' ; RPAREN options { paraphrase = "')'"; } : ')' ; LCURLY options { paraphrase = "'{'"; } : '{' ; RCURLY options { paraphrase = "'}'"; } : '}' ; AT_MARK options { paraphrase = "'@'"; } : '@' ; COMMA options { paraphrase = "','"; } : ',' ; SYMBOL options { paraphrase = "Symbol"; testLiterals = true; } : ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* | '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`' ; WS : ( ' ' | '\t' | '\f' | ( "\r\n" | '\r' | '\n' ) { newline(); } ) { $setType(antlr::Token::SKIP); } ; COMMENT options { paraphrase = "Single line comment"; } : "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); } ; ML_COMMENT options { paraphrase = "Multi line comment"; } : "/*" ( // TODO: test it and add reference to the site it's taken from! /* This actually works OK despite the ambiguity that '\r' '\n' can be matched in one alternative or by matching '\r' in one iteration and '\n' in another.. But this is really matched just by one rule per (...)* loop iteration, so it's OK. This is exactly how they do it all over the web - just turn off the warning for this particular token.*/ options { generateAmbigWarnings=false; } : { LA(2)!='/' }? '*' | '\r' '\n' { newline(); } | '\r' { newline(); } | '\n' { newline(); } | ~('*'|'\n'|'\r') )* "*/" {$setType(antlr::Token::SKIP);} ; HASH options { paraphrase = "'#'"; } : '#' ; //DSEPARATOR //options { // paraphrase = "':-'"; //} // : ":-" //;