diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 0fb8f619a7b5439728c0c44b263618313b5270e4..e0af47afc57497ed8812b0f811080208bd7b211d 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -57,7 +57,7 @@ options { // ---------------------------------------------------------------------------- class ANTLRParser extends Parser; options { - k = 4; + k = 1; buildAST = false; exportVocab = ANTLRExpr; defaultErrorHandler = false; @@ -115,9 +115,6 @@ parse_string_operator : op = string_operators [tagset, *res->variables.get()] { res->op = op; } - | op = condit_str [tagset, *res->variables.get()] { - res->op = op; - } ; // ---------------------------------------------------------------------------- @@ -133,9 +130,6 @@ parse_predicates : op = logical_predicates [tagset, *res->variables.get()] { res->op = op; } - | op = condit_bool [tagset, *res->variables.get()] { - res->op = op; - } ; // ---------------------------------------------------------------------------- @@ -151,9 +145,6 @@ parse_sym_set_operator : op = sym_set_operators [tagset, *res->variables.get()] { res->op = op; } - | op = condit_sym [tagset, *res->variables.get()] { - res->op = op; - } ; /////////////////////////////////////////////////////////////////////////////// @@ -170,15 +161,16 @@ str_set_literal { s_set.reset(new Wccl::StrSet()); } - : LBRACKET RBRACKET - | LBRACKET s1: STRING { - s_set->insert(str_token_ref_to_ustring(s1)); - } - ( - COMMA s2: STRING { + : LBRACKET + ( s1: STRING { + s_set->insert(str_token_ref_to_ustring(s1)); + } + ( COMMA s2: STRING { s_set->insert(str_token_ref_to_ustring(s2)); } - )* RBRACKET + )* + )? + RBRACKET ; // Constrant string set // Returns boost::shared_ptr<Wccl::Constant<Wccl::StrSet> > @@ -216,10 +208,9 @@ sym_set_literal { t_set.reset(new Wccl::TSet()); } - : LCURLY RCURLY - | LCURLY - sym_set_elem[tagset, t_set] (COMMA sym_set_elem[tagset, t_set]) * - RCURLY + : LCURLY + ( sym_set_elem[tagset, t_set] (COMMA sym_set_elem[tagset, t_set])* )? + RCURLY ; // Constant symbol set // Returns boost::shared_ptr<Wccl::Constant<Wccl::TSet> > @@ -419,64 +410,71 @@ boolean_variable setvar_op [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret] - : ret = setvar_pos [tagset, vars] - | ret = setvar_bool [tagset, vars] - | ret = setvar_sset [tagset, vars] - | ret = setvar_tset [tagset, vars] + : "setvar" LPAREN + ( + ret = setvar_body_pos [tagset, vars] + | ret = setvar_body_bool [tagset, vars] + | ret = setvar_body_sset [tagset, vars] + | ret = setvar_body_tset [tagset, vars] + ) + RPAREN ; // Implementations of setvar: // ---------------------------------------------------------------------------- -setvar_pos +setvar_body_pos [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::Position> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > ret_acc; } - : "setvar" LPAREN ret_acc = position_variable_acc [vars] COMMA - ret_op = op_position [vars] RPAREN { - op.reset(new Wccl::VarSetter<Wccl::Position>(*ret_acc.get(), ret_op)); - } + : ret_acc = position_variable_acc [vars] + COMMA + ret_op = op_position [vars] { + op.reset(new Wccl::VarSetter<Wccl::Position>(*ret_acc.get(), ret_op)); + } ; // ---------------------------------------------------------------------------- -setvar_bool +setvar_body_bool [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > ret_acc; } - : "setvar" LPAREN ret_acc = boolean_variable_acc [vars] COMMA - ret_op = logical_predicates [tagset, vars] RPAREN { - op.reset(new Wccl::VarSetter<Wccl::Bool>(*ret_acc.get(), ret_op)); - } + : ret_acc = boolean_variable_acc [vars] + COMMA + ret_op = logical_predicates [tagset, vars] { + op.reset(new Wccl::VarSetter<Wccl::Bool>(*ret_acc.get(), ret_op)); + } ; // ---------------------------------------------------------------------------- -setvar_sset +setvar_body_sset [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > ret_acc; } - : "setvar" LPAREN ret_acc = str_set_variable_acc [vars] COMMA - ret_op = string_operators [tagset, vars] RPAREN { + : ret_acc = str_set_variable_acc [vars] + COMMA ret_op = string_operators [tagset, vars] { op.reset(new Wccl::VarSetter<Wccl::StrSet>(*ret_acc.get(), ret_op)); } ; // ---------------------------------------------------------------------------- -setvar_tset +setvar_body_tset [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op] { boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret_op; boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > ret_acc; } - : "setvar" LPAREN ret_acc = sym_set_variable_acc [vars] COMMA - ret_op = sym_set_operators [tagset, vars] RPAREN { + : ret_acc = sym_set_variable_acc [vars] + COMMA + ret_op = sym_set_operators [tagset, vars] { op.reset(new Wccl::VarSetter<Wccl::TSet>(*ret_acc.get(), ret_op)); } ; @@ -490,7 +488,7 @@ sym_set_operators [const Corpus2::Tagset& tagset, Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret] : ret = op_sym_set [tagset, vars] -// | ret = condit_sym [vars] + | ret = condit_sym [tagset, vars] ; // Implementations of symbol set operators: // ---------------------------------------------------------------------------- @@ -523,12 +521,9 @@ condit_sym } } | Q_MARK - (p_true = sym_set_operators [tagset, vars] | - p_true = condit_sym [tagset, vars]) + (p_true = sym_set_operators [tagset, vars]) Q_MARK - (test = logical_predicates [tagset, vars] | - test = condit_bool [tagset, vars]) - { + (test = logical_predicates [tagset, vars]) { op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true)); } ; @@ -565,7 +560,7 @@ string_operators | ret = op_upper [tagset, vars] | ret = op_affix [tagset, vars] | ret = op_str_set [tagset, vars] -// | ret = condit_str [vars] + | ret = condit_str [tagset, vars] ; // Implementations of string operators: // ---------------------------------------------------------------------------- @@ -654,12 +649,9 @@ condit_str } } | Q_MARK - (p_true = string_operators [tagset, vars] | - p_true = condit_str [tagset, vars]) + p_true = string_operators [tagset, vars] Q_MARK - (test = logical_predicates [tagset, vars] | - test = condit_bool [tagset, vars]) - { + test = logical_predicates [tagset, vars] { op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true)); } ; @@ -677,12 +669,12 @@ logical_predicates | ret = lpred_nor [tagset, vars] | ret = lpred_bool [tagset, vars] | ret = lpred_in [tagset, vars] - | ret = lpred_inter [tagset, vars] + | ret = lpred_inter [tagset, vars] | ret = lpred_eq [tagset, vars] | ret = lpred_regex [tagset, vars] | ret = setvar_op [tagset, vars] | ret = lpred_inout [tagset, vars] -// | ret = condit_bool [vars] + | ret = condit_bool [tagset, vars] ; // ---------------------------------------------------------------------------- // comma-separated predicates @@ -758,14 +750,26 @@ lpred_in boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2; boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2; } - : "in" LPAREN t1 = sym_set_operators [tagset, vars] COMMA - t2 = sym_set_operators [tagset, vars] RPAREN { - op.reset(new Wccl::IsSubsetOf<Wccl::TSet>(t1, t2)); - } - | "in" LPAREN s1 = string_operators [tagset, vars] COMMA - s2 = string_operators [tagset, vars] RPAREN { - op.reset(new Wccl::IsSubsetOf<Wccl::StrSet>(s1, s2)); - } +: + "in" LPAREN + ( + (sym_set_operators [tagset, vars]) => + ( + t1 = sym_set_operators [tagset, vars] COMMA + t2 = sym_set_operators [tagset, vars] { + op.reset(new Wccl::IsSubsetOf<Wccl::TSet>(t1, t2)); + } + ) + | + ( + s1 = string_operators [tagset, vars] COMMA + s2 = string_operators [tagset, vars] { + op.reset(new Wccl::IsSubsetOf<Wccl::StrSet>(s1, s2)); + } + ) + ) + RPAREN + ; // ---------------------------------------------------------------------------- @@ -776,14 +780,25 @@ lpred_inter boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2; boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2; } - : "inter" LPAREN s1 = string_operators [tagset, vars] COMMA - s2 = string_operators [tagset, vars] RPAREN { - op.reset(new Wccl::Intersects<Wccl::StrSet>(s1, s2)); - } - | "inter" LPAREN t1 = sym_set_operators [tagset, vars] COMMA - t2 = sym_set_operators [tagset, vars] RPAREN { - op.reset(new Wccl::Intersects<Wccl::TSet>(t1, t2)); - } + : + "inter" LPAREN + ( + (sym_set_operators [tagset, vars]) => + ( + t1 = sym_set_operators [tagset, vars] COMMA + t2 = sym_set_operators [tagset, vars] { + op.reset(new Wccl::Intersects<Wccl::TSet>(t1, t2)); + } + ) + | + ( + s1 = string_operators [tagset, vars] COMMA + s2 = string_operators [tagset, vars] { + op.reset(new Wccl::Intersects<Wccl::StrSet>(s1, s2)); + } + ) + ) + RPAREN ; // ---------------------------------------------------------------------------- @@ -796,22 +811,39 @@ lpred_eq boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2; boost::shared_ptr<Wccl::Function<Wccl::Position> > p1, p2; } - : "equal" LPAREN s1 = string_operators [tagset, vars] COMMA - s2 = string_operators [tagset, vars] RPAREN { - op.reset(new Wccl::Equals<Wccl::StrSet>(s1, s2)); - } - | "equal" LPAREN t1 = sym_set_operators [tagset, vars] COMMA - t2 = sym_set_operators [tagset, vars] RPAREN { - op.reset(new Wccl::Equals<Wccl::TSet>(t1, t2)); - } - | "equal" LPAREN p1 = position_operators [tagset, vars] COMMA - p2 = position_operators [tagset, vars] RPAREN { - op.reset(new Wccl::Equals<Wccl::Position>(p1, p2)); - } - | "equal" LPAREN b1 = logical_predicates [tagset, vars] COMMA - b2 = logical_predicates [tagset, vars] RPAREN { - op.reset(new Wccl::Equals<Wccl::Bool>(b1, b2)); - } + : "equal" LPAREN + ( + ( + p1 = position_operators [tagset, vars] COMMA + p2 = position_operators [tagset, vars] { + op.reset(new Wccl::Equals<Wccl::Position>(p1, p2)); + } + ) + | + (sym_set_operators [tagset, vars]) => + ( + t1 = sym_set_operators [tagset, vars] COMMA + t2 = sym_set_operators [tagset, vars] { + op.reset(new Wccl::Equals<Wccl::TSet>(t1, t2)); + } + ) + | + (string_operators [tagset, vars]) => + ( + s1 = string_operators [tagset, vars] COMMA + s2 = string_operators [tagset, vars] { + op.reset(new Wccl::Equals<Wccl::StrSet>(s1, s2)); + } + ) + | + ( + b1 = logical_predicates [tagset, vars] COMMA + b2 = logical_predicates [tagset, vars] { + op.reset(new Wccl::Equals<Wccl::Bool>(b1, b2)); + } + ) + ) + RPAREN ; // ---------------------------------------------------------------------------- @@ -862,12 +894,9 @@ condit_bool } } | Q_MARK - (p_true = logical_predicates [tagset, vars] | - p_true = condit_bool [tagset, vars]) + p_true = logical_predicates [tagset, vars] Q_MARK - (test = logical_predicates [tagset, vars] | - test = condit_bool [tagset, vars]) - { + test = logical_predicates [tagset, vars] { op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true)); } ; @@ -881,13 +910,13 @@ class ANTLRLexer extends Lexer; options { exportVocab = ANTLRExpr; charVocabulary = '\3'..'\377'; - k = 3; + testLiterals = false; + k = 2; } STRING options { paraphrase = "a string"; - testLiterals = true; } : '"' (~'"')* '"' | '\'' (~'\'')* '\'' @@ -896,7 +925,6 @@ options { INT options { paraphrase = "Integer"; - testLiterals = true; } : ('-'|'+')? ('0'..'9')+ ; @@ -904,7 +932,6 @@ options { QUOT_MARK options { paraphrase = "Quota mark"; - testLiterals = true; } : '\'' ; @@ -912,7 +939,6 @@ options { APOS_MARK options { paraphrase = "Aposptrophe mark"; - testLiterals = true; } : '"' ; @@ -920,7 +946,6 @@ options { Q_MARK options { paraphrase = "Query mark"; - testLiterals = true; } : '?' ; @@ -928,7 +953,6 @@ options { E_MARK options { paraphrase = "Exclamanation mark"; - testLiterals = true; } : '!' ; @@ -936,7 +960,6 @@ options { STR_PREFIX options { paraphrase = "String prefix"; - testLiterals = true; } : "$s:" ; @@ -944,7 +967,6 @@ options { TST_PREFIX options { paraphrase = "Tag set (symbol) prefix"; - testLiterals = true; } : "$t:" ; @@ -952,7 +974,6 @@ options { BOOL_PREFIX options { paraphrase = "Bool prefix"; - testLiterals = true; } : "$b:" ; @@ -967,7 +988,6 @@ options { LBRACKET options { paraphrase = "'['"; - testLiterals = true; } : '[' ; @@ -975,7 +995,6 @@ options { RBRACKET options { paraphrase = "']'"; - testLiterals = true; } : ']' ; @@ -983,7 +1002,6 @@ options { LPAREN options { paraphrase = "'('"; - testLiterals = true; } : '(' ; @@ -991,7 +1009,6 @@ options { RPAREN options { paraphrase = "')'"; - testLiterals = true; } : ')' ; @@ -999,7 +1016,6 @@ options { LCURLY options { paraphrase = "'{'"; - testLiterals = true; } : '{' ; @@ -1007,7 +1023,6 @@ options { RCURLY options { paraphrase = "'}'"; - testLiterals = true; } : '}' ; @@ -1015,15 +1030,13 @@ options { AT_MARK options { paraphrase = "'@'"; - testLiterals = true; } : '@' ; COMMA options { - paraphrase = "','"; - testLiterals = true; + paraphrase = "','"; } : ',' ; @@ -1039,25 +1052,50 @@ options { WS : ( ' ' - | '\t' - | '\r' '\n' { newline(); } - | '\n' { newline(); } - ) { $setType(antlr::Token::SKIP); } + | '\t' + | '\f' + | ( "\r\n" + | '\r' + | '\n' + ) + { newline(); } + ) + { $setType(antlr::Token::SKIP); } ; COMMENT options { - paraphrase = "Comment"; - testLiterals = true; + paraphrase = "Single line comment"; +} + : "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); } +; + +ML_COMMENT +options { + paraphrase = "Multi line comment"; } - : "//" (~'\n')* '\n'{ $setType(antlr::Token::SKIP); newline(); } - | "/*" (.)* "*/" { $setType(antlr::Token::SKIP); } + : "/*" + ( /* This actually works OK despite the ambiguity that + '\r' '\n' can be matched in one alternative or by matching + '\r' in one iteration and '\n' in another.. But + this is really matched just by one rule per (...)* + loop iteration, so it's OK. + This is exactly how they do it all over the web - just + turn off the warning for this particular token.*/ + options { generateAmbigWarnings=false; } + : { LA(2)!='/' }? '*' + | '\r' '\n' { newline(); } + | '\r' { newline(); } + | '\n' { newline(); } + | ~('*'|'\n'|'\r') + )* + "*/" + {$setType(antlr::Token::SKIP);} ; HASH options { paraphrase = "'#'"; - testLiterals = true; } : '#' ; @@ -1065,7 +1103,6 @@ options { DSEPARATOR options { paraphrase = "':-'"; - testLiterals = true; } : ":-" ; diff --git a/wcclparser/main.cpp b/wcclparser/main.cpp index 86e850b554c590845e54c561568a75316322ed85..c048ec83d51f0d002a710b34f804af51966c185d 100644 --- a/wcclparser/main.cpp +++ b/wcclparser/main.cpp @@ -26,6 +26,7 @@ namespace { void std_read_loop(boost::function<bool (const std::string&)>& line_cb) { while (std::cin.good()) { + std::cout << _prompt << std::flush; std::string s; getline(std::cin, s); if (line_cb(s)) {