From def7cb3972d728114b729575167c36e868f33c90 Mon Sep 17 00:00:00 2001 From: Adam Wardynski <award@.(win7-laptop)> Date: Sun, 28 Nov 2010 06:41:03 +0100 Subject: [PATCH] K=1 grammar with syntactic predicates. --- libwccl/parser/grammar.g | 243 ++++++++++++++++++++++----------------- 1 file changed, 138 insertions(+), 105 deletions(-) diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 2ba60ad..9a0128e 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -57,7 +57,7 @@ options { // ---------------------------------------------------------------------------- class ANTLRParser extends Parser; options { - k = 4; + k = 1; buildAST = false; exportVocab = ANTLRExpr; defaultErrorHandler = false; @@ -115,9 +115,6 @@ parse_string_operator : op = string_operators [tagset, *res->variables.get()] { res->op = op; } - | op = condit_str [tagset, *res->variables.get()] { - res->op = op; - } ; // ---------------------------------------------------------------------------- @@ -133,9 +130,6 @@ parse_predicates : op = logical_predicates [tagset, *res->variables.get()] { res->op = op; } - | op = condit_bool [tagset, *res->variables.get()] { - res->op = op; - } ; // ---------------------------------------------------------------------------- @@ -151,9 +145,6 @@ parse_sym_set_operator : op = sym_set_operators [tagset, *res->variables.get()] { res->op = op; } - | op = condit_sym [tagset, *res->variables.get()] { - res->op = op; - } ; /////////////////////////////////////////////////////////////////////////////// @@ -170,15 +161,16 @@ str_set_literal { s_set.reset(new Wccl::StrSet()); } - : LBRACKET RBRACKET - | LBRACKET s1: STRING { - s_set->insert(str_token_ref_to_ustring(s1)); - } - ( - COMMA s2: STRING { + : LBRACKET + ( s1: STRING { + s_set->insert(str_token_ref_to_ustring(s1)); + } + ( COMMA s2: STRING { s_set->insert(str_token_ref_to_ustring(s2)); } - )* RBRACKET + )* + )? + RBRACKET ; // Constrant string set // Returns boost::shared_ptr<Wccl::Constant<Wccl::StrSet> > @@ -216,10 +208,9 @@ sym_set_literal { t_set.reset(new Wccl::TSet()); } - : LCURLY RCURLY - | LCURLY - sym_set_elem[tagset, t_set] (COMMA sym_set_elem[tagset, t_set]) * - RCURLY + : LCURLY + ( sym_set_elem[tagset, t_set] (COMMA sym_set_elem[tagset, t_set])* )? + RCURLY ; // Constant symbol set // Returns boost::shared_ptr<Wccl::Constant<Wccl::TSet> > @@ -419,64 +410,71 @@ boolean_variable setvar_op [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret] - : ret = setvar_pos [tagset, vars] - | ret = setvar_bool [tagset, vars] - | ret = setvar_sset [tagset, vars] - | ret = setvar_tset [tagset, vars] + : "setvar" LPAREN + ( + ret = setvar_body_pos [tagset, vars] + | ret = setvar_body_bool [tagset, vars] + | ret = setvar_body_sset [tagset, vars] + | ret = setvar_body_tset [tagset, vars] + ) + RPAREN ; // Implementations of setvar: // ---------------------------------------------------------------------------- -setvar_pos +setvar_body_pos [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::Position> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > ret_acc; } - : "setvar" LPAREN ret_acc = position_variable_acc [vars] COMMA - ret_op = op_position [vars] RPAREN { - op.reset(new Wccl::VarSetter<Wccl::Position>(*ret_acc.get(), ret_op)); - } + : ret_acc = position_variable_acc [vars] + COMMA + ret_op = op_position [vars] { + op.reset(new Wccl::VarSetter<Wccl::Position>(*ret_acc.get(), ret_op)); + } ; // ---------------------------------------------------------------------------- -setvar_bool +setvar_body_bool [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > ret_acc; } - : "setvar" LPAREN ret_acc = boolean_variable_acc [vars] COMMA - ret_op = logical_predicates [tagset, vars] RPAREN { - op.reset(new Wccl::VarSetter<Wccl::Bool>(*ret_acc.get(), ret_op)); - } + : ret_acc = boolean_variable_acc [vars] + COMMA + ret_op = logical_predicates [tagset, vars] { + op.reset(new Wccl::VarSetter<Wccl::Bool>(*ret_acc.get(), ret_op)); + } ; // ---------------------------------------------------------------------------- -setvar_sset +setvar_body_sset [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > ret_acc; } - : "setvar" LPAREN ret_acc = str_set_variable_acc [vars] COMMA - ret_op = string_operators [tagset, vars] RPAREN { + : ret_acc = str_set_variable_acc [vars] + COMMA ret_op = string_operators [tagset, vars] { op.reset(new Wccl::VarSetter<Wccl::StrSet>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- -setvar_tset +setvar_body_tset [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > ret_acc; } - : "setvar" LPAREN ret_acc = sym_set_variable_acc [vars] COMMA - ret_op = sym_set_operators [tagset, vars] RPAREN { + : ret_acc = sym_set_variable_acc [vars] + COMMA + ret_op = sym_set_operators [tagset, vars] { op.reset(new Wccl::VarSetter<Wccl::TSet>(*ret_acc.get(), ret_op)); } ; @@ -490,7 +488,7 @@ sym_set_operators [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret] : ret = op_sym_set [tagset, vars] -// | ret = condit_sym [vars] + | ret = condit_sym [tagset, vars] ; // Implementations of symbol set operators: // ---------------------------------------------------------------------------- @@ -565,7 +563,7 @@ string_operators | ret = op_upper [tagset, vars] | ret = op_affix [tagset, vars] | ret = op_str_set [tagset, vars] -// | ret = condit_str [vars] + | ret = condit_str [tagset, vars] ; // Implementations of string operators: // ---------------------------------------------------------------------------- @@ -677,12 +675,12 @@ logical_predicates | ret = lpred_nor [tagset, vars] | ret = lpred_bool [tagset, vars] | ret = lpred_in [tagset, vars] - | ret = lpred_inter [tagset, vars] + | ret = lpred_inter [tagset, vars] | ret = lpred_eq [tagset, vars] | ret = lpred_regex [tagset, vars] | ret = setvar_op [tagset, vars] | ret = lpred_inout [tagset, vars] -// | ret = condit_bool [vars] + | ret = condit_bool [tagset, vars] ; // ---------------------------------------------------------------------------- // comma-separated predicates @@ -758,14 +756,26 @@ lpred_in boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2; boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2; } - : "in" LPAREN t1 = sym_set_operators [tagset, vars] COMMA - t2 = sym_set_operators [tagset, vars] RPAREN { - op.reset(new Wccl::IsSubsetOf<Wccl::TSet>(t1, t2)); - } - | "in" LPAREN s1 = string_operators [tagset, vars] COMMA - s2 = string_operators [tagset, vars] RPAREN { - op.reset(new Wccl::IsSubsetOf<Wccl::StrSet>(s1, s2)); - } +: + "in" LPAREN + ( + (sym_set_operators [tagset, vars]) => + ( + t1 = sym_set_operators [tagset, vars] COMMA + t2 = sym_set_operators [tagset, vars] { + op.reset(new Wccl::IsSubsetOf<Wccl::TSet>(t1, t2)); + } + ) + | + ( + s1 = string_operators [tagset, vars] COMMA + s2 = string_operators [tagset, vars] { + op.reset(new Wccl::IsSubsetOf<Wccl::StrSet>(s1, s2)); + } + ) + ) + RPAREN + ; // ---------------------------------------------------------------------------- @@ -776,14 +786,25 @@ lpred_inter boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2; boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2; } - : "inter" LPAREN s1 = string_operators [tagset, vars] COMMA - s2 = string_operators [tagset, vars] RPAREN { - op.reset(new Wccl::Intersects<Wccl::StrSet>(s1, s2)); - } - | "inter" LPAREN t1 = sym_set_operators [tagset, vars] COMMA - t2 = sym_set_operators [tagset, vars] RPAREN { - op.reset(new Wccl::Intersects<Wccl::TSet>(t1, t2)); - } + : + "inter" LPAREN + ( + (sym_set_operators [tagset, vars]) => + ( + t1 = sym_set_operators [tagset, vars] COMMA + t2 = sym_set_operators [tagset, vars] { + op.reset(new Wccl::Intersects<Wccl::TSet>(t1, t2)); + } + ) + | + ( + s1 = string_operators [tagset, vars] COMMA + s2 = string_operators [tagset, vars] { + op.reset(new Wccl::Intersects<Wccl::StrSet>(s1, s2)); + } + ) + ) + RPAREN ; // ---------------------------------------------------------------------------- @@ -796,22 +817,39 @@ lpred_eq boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2; boost::shared_ptr<Wccl::Function<Wccl::Position> > p1, p2; } - : "equal" LPAREN s1 = string_operators [tagset, vars] COMMA - s2 = string_operators [tagset, vars] RPAREN { - op.reset(new Wccl::Equals<Wccl::StrSet>(s1, s2)); - } - | "equal" LPAREN t1 = sym_set_operators [tagset, vars] COMMA - t2 = sym_set_operators [tagset, vars] RPAREN { - op.reset(new Wccl::Equals<Wccl::TSet>(t1, t2)); - } - | "equal" LPAREN p1 = position_operators [tagset, vars] COMMA - p2 = position_operators [tagset, vars] RPAREN { - op.reset(new Wccl::Equals<Wccl::Position>(p1, p2)); - } - | "equal" LPAREN b1 = logical_predicates [tagset, vars] COMMA - b2 = logical_predicates [tagset, vars] RPAREN { - op.reset(new Wccl::Equals<Wccl::Bool>(b1, b2)); - } + : "equal" LPAREN + ( + ( + p1 = position_operators [tagset, vars] COMMA + p2 = position_operators [tagset, vars] { + op.reset(new Wccl::Equals<Wccl::Position>(p1, p2)); + } + ) + | + (sym_set_operators [tagset, vars]) => + ( + t1 = sym_set_operators [tagset, vars] COMMA + t2 = sym_set_operators [tagset, vars] { + op.reset(new Wccl::Equals<Wccl::TSet>(t1, t2)); + } + ) + | + (string_operators [tagset, vars]) => + ( + s1 = string_operators [tagset, vars] COMMA + s2 = string_operators [tagset, vars] { + op.reset(new Wccl::Equals<Wccl::StrSet>(s1, s2)); + } + ) + | + ( + b1 = logical_predicates [tagset, vars] COMMA + b2 = logical_predicates [tagset, vars] { + op.reset(new Wccl::Equals<Wccl::Bool>(b1, b2)); + } + ) + ) + RPAREN ; // ---------------------------------------------------------------------------- @@ -881,13 +919,13 @@ class ANTLRLexer extends Lexer; options { exportVocab = ANTLRExpr; charVocabulary = '\3'..'\377'; + testLiterals = false; k = 3; } STRING options { paraphrase = "a string"; - testLiterals = true; } : '"' (~'"')* '"' | '\'' (~'\'')* '\'' @@ -896,7 +934,6 @@ options { INT options { paraphrase = "Integer"; - testLiterals = true; } : ('-'|'+')? ('0'..'9')+ ; @@ -904,7 +941,6 @@ options { QUOT_MARK options { paraphrase = "Quota mark"; - testLiterals = true; } : '\'' ; @@ -912,7 +948,6 @@ options { APOS_MARK options { paraphrase = "Aposptrophe mark"; - testLiterals = true; } : '"' ; @@ -920,7 +955,6 @@ options { Q_MARK options { paraphrase = "Query mark"; - testLiterals = true; } : '?' ; @@ -928,7 +962,6 @@ options { E_MARK options { paraphrase = "Exclamanation mark"; - testLiterals = true; } : '!' ; @@ -936,7 +969,6 @@ options { STR_PREFIX options { paraphrase = "String prefix"; - testLiterals = true; } : "$s:" ; @@ -944,7 +976,6 @@ options { TST_PREFIX options { paraphrase = "Tag set (symbol) prefix"; - testLiterals = true; } : "$t:" ; @@ -952,7 +983,6 @@ options { BOOL_PREFIX options { paraphrase = "Bool prefix"; - testLiterals = true; } : "$b:" ; @@ -967,7 +997,6 @@ options { LBRACKET options { paraphrase = "'['"; - testLiterals = true; } : '[' ; @@ -975,7 +1004,6 @@ options { RBRACKET options { paraphrase = "']'"; - testLiterals = true; } : ']' ; @@ -983,7 +1011,6 @@ options { LPAREN options { paraphrase = "'('"; - testLiterals = true; } : '(' ; @@ -991,7 +1018,6 @@ options { RPAREN options { paraphrase = "')'"; - testLiterals = true; } : ')' ; @@ -999,7 +1025,6 @@ options { LCURLY options { paraphrase = "'{'"; - testLiterals = true; } : '{' ; @@ -1007,7 +1032,6 @@ options { RCURLY options { paraphrase = "'}'"; - testLiterals = true; } : '}' ; @@ -1015,15 +1039,13 @@ options { AT_MARK options { paraphrase = "'@'"; - testLiterals = true; } : '@' ; COMMA options { - paraphrase = "','"; - testLiterals = true; + paraphrase = "','"; } : ',' ; @@ -1039,25 +1061,37 @@ options { WS : ( ' ' - | '\t' - | '\r' '\n' { newline(); } - | '\n' { newline(); } - ) { $setType(antlr::Token::SKIP); } + | '\t' + | '\f' + | ( "\r\n" + | '\r' + | '\n' + ) + { newline(); } + ) + { $setType(antlr::Token::SKIP); } ; COMMENT options { - paraphrase = "Comment"; - testLiterals = true; + paraphrase = "Single line comment"; +} + : "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); } +; + +ML_COMMENT +options { + paraphrase = "Multi line comment"; } - : "//" (~'\n')* '\n'{ $setType(antlr::Token::SKIP); newline(); } - | "/*" (.)* "*/" { $setType(antlr::Token::SKIP); } + : "/*" + (.)* + "*/" + { $setType(antlr::Token::SKIP); } ; HASH options { paraphrase = "'#'"; - testLiterals = true; } : '#' ; @@ -1065,7 +1099,6 @@ options { DSEPARATOR options { paraphrase = "':-'"; - testLiterals = true; } : ":-" ; -- GitLab