From c4d45c5b0976ae17bc510183b48757d5deec59e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20K=C4=99dzia?= <Pawel.Kedzia@pwr.wroc.pl> Date: Wed, 24 Nov 2010 18:04:11 +0100 Subject: [PATCH] Fixed grammar and added literal rules to return shared_ptr to <StrSet, TSet, Bool, Position>. --- libwccl/parser/Parser.cpp | 6 +- libwccl/parser/grammar.g | 298 +++++++++++++++++++++++--------------- 2 files changed, 183 insertions(+), 121 deletions(-) diff --git a/libwccl/parser/Parser.cpp b/libwccl/parser/Parser.cpp index 5bcdb23..ee4734b 100644 --- a/libwccl/parser/Parser.cpp +++ b/libwccl/parser/Parser.cpp @@ -43,7 +43,7 @@ boost::shared_ptr<ANTLRParserResult<Wccl::StrSet> > Parser::parseStringOperator( ANTLRLexer lexer(istr); ANTLRParser parser(lexer); - return parser.parse_string_operator(); + return parser.parse_string_operator(this->tagset); } // ---------------------------------------------------------------------------- @@ -73,7 +73,7 @@ boost::shared_ptr<ANTLRParserResult<Wccl::Bool> > Parser::parsePredicate( ANTLRLexer lexer(istr); ANTLRParser parser(lexer); - return parser.parse_predicates(); + return parser.parse_predicates(this->tagset); } // ---------------------------------------------------------------------------- @@ -104,5 +104,5 @@ boost::shared_ptr<ANTLRParserResult<Wccl::TSet> > Parser::parseSymSetOperator( ANTLRLexer lexer(istr); ANTLRParser parser(lexer); - return parser.parse_sym_set_operator(); + return parser.parse_sym_set_operator(this->tagset); } diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index c4770a0..5317b67 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -49,6 +49,7 @@ header { options { language = "Cpp"; + genHashLines = false; } // ---------------------------------------------------------------------------- @@ -57,8 +58,8 @@ options { class ANTLRParser extends Parser; options { k = 4; - exportVocab = ANTLRExpr; buildAST = false; + exportVocab = ANTLRExpr; defaultErrorHandler = false; } { @@ -91,7 +92,10 @@ private: // TODO // - base, orth -// - equal do bool + +// TEMPORARY CHANGES -> +// -> tymczasowo zakomentowalem wywoalnie regul condit_* +// -> tymczasowo zakomentowalem 2 reguly z equal /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// @@ -102,6 +106,7 @@ private: // Rules for parsing string operators in scope (variables). // Returns boost::shared_ptr<Wccl::Function<Wccl::StrSet> > parse_string_operator + [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<Wccl::StrSet> > res] { res.reset(new ANTLRParserResult<Wccl::StrSet>()); @@ -110,12 +115,16 @@ parse_string_operator : op = string_operators [*res->variables.get()] { res->op = op; } + | op = condit_str [*res->variables.get()] { + res->op = op; + } ; // ---------------------------------------------------------------------------- // Rules for parsing predicates in scope (variables). // Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> > parse_predicates + [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<Wccl::Bool> > res] { res.reset(new ANTLRParserResult<Wccl::Bool>()); @@ -124,12 +133,16 @@ parse_predicates : op = logical_predicates [*res->variables.get()] { res->op = op; } + | op = condit_bool [*res->variables.get()] { + res->op = op; + } ; // ---------------------------------------------------------------------------- // Rules for parsing tagset (symbol set) operators // Returns boost::shared_ptr<Wccl::Function<Wccl::TSet> > parse_sym_set_operator + [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<ANTLRParserResult<Wccl::TSet> > res] { res.reset(new ANTLRParserResult<Wccl::TSet>()); @@ -138,6 +151,9 @@ parse_sym_set_operator : op = sym_set_operators [*res->variables.get()] { res->op = op; } + | op = condit_sym [*res->variables.get()] { + res->op = op; + } ; /////////////////////////////////////////////////////////////////////////////// @@ -146,103 +162,120 @@ parse_sym_set_operator /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- -// Single or muliple values in string set -str_set_value_in - [boost::shared_ptr<Wccl::StrSet>& s_set] - : v1: STRING { - s_set->insert(str_token_ref_to_ustring(v1)); +// Single or muliple values in string set: +// [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"] +// Returns boost::shared_ptr<Wccl::StrSet> +str_set_literal + returns [boost::shared_ptr<Wccl::StrSet> s_set] +{ + s_set.reset(new Wccl::StrSet()); +} + : LBRACKET RBRACKET + | LBRACKET s1: STRING { + s_set->insert(str_token_ref_to_ustring(s1)); } - | v2: STRING COMMA str_set_value_in [s_set] { - s_set->insert(str_token_ref_to_ustring(v2)); - } + ( + COMMA s2: STRING { + s_set->insert(str_token_ref_to_ustring(s2)); + } + )* RBRACKET ; -// string set, called as unnamed (temporary) StrSet: -// calls: [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"] or variable $A +// Constrant string set +// Returns boost::shared_ptr<Wccl::Constant<Wccl::StrSet> > str_set_value returns [boost::shared_ptr<Wccl::Constant<Wccl::StrSet> > val] { - boost::shared_ptr<Wccl::StrSet> set(new Wccl::StrSet); + boost::shared_ptr<Wccl::StrSet> set; } - : LBRACKET RBRACKET { - val.reset(new Wccl::Constant<Wccl::StrSet>(*set.get())); - } - | LBRACKET str_set_value_in [set] RBRACKET { + : set = str_set_literal { val.reset(new Wccl::Constant<Wccl::StrSet>(*set.get())); } ; + // ---------------------------------------------------------------------------- -// element of sym set -sym_set_elem +// Element of sym set. This rule, inset element into set. +// Element may be: a or `a ` +sym_set_elem [boost::shared_ptr<Wccl::TSet> &t_set] : s1: SYMBOL { -// t_set->insert(token_ref_to_ustring(s1)); - } - | G_MARK s2: SYMBOL G_MARK { -// t_set->insert(token_ref_to_ustring(s2)); +// t_set->insert(token_ref_to_ustring(s1)); } - - | s3: SYMBOL COMMA sym_set_elem [t_set] { -// t_set->insert(token_ref_to_ustring(s3)); - } - | G_MARK s4: SYMBOL G_MARK COMMA sym_set_elem [t_set] { -// t_set->insert(token_ref_to_ustring(s3)); + | G_MARK s2: SYMBOL G_MARK { +// t_set->insert(token_ref_to_ustring(s2)); } ; - -// sym set in -sym_set_value_in - [boost::shared_ptr<Wccl::TSet> &set] - : sym_set_elem [set] +// sym set literal +// {} {sym_set_elem} {sym_set_elem, ..., sym_set_elem} +// Returns boost::shared_ptr<Wccl::TSet> +sym_set_literal + returns [boost::shared_ptr<Wccl::TSet> t_set] +{ + t_set.reset(new Wccl::TSet()); +} + : LCURLY RCURLY + | LCURLY sym_set_elem[t_set] (COMMA sym_set_elem[t_set]) * ; -// sym set {} {a} {a, b} +// Constant symbol set +// Returns boost::shared_ptr<Wccl::Constant<Wccl::TSet> > sym_set_value returns [boost::shared_ptr<Wccl::Constant<Wccl::TSet> > val] { - boost::shared_ptr<Wccl::TSet> set(new Wccl::TSet); + boost::shared_ptr<Wccl::TSet> set; } - : LCURLY RCURLY { - val.reset(new Wccl::Constant<Wccl::TSet>(*set.get())); - } - | LCURLY sym_set_value_in [set] RCURLY { + : set = sym_set_literal { val.reset(new Wccl::Constant<Wccl::TSet>(*set.get())); } ; + // ---------------------------------------------------------------------------- // boolean value: +// Literal bool value may be True or False +// Returns boost::shared_ptr<Wccl::Bool> +bool_literal + returns [boost::shared_ptr<Wccl::Bool> val] + : "True" { val.reset(new Wccl::Bool(Wccl::Bool(true ))); } + | "False" { val.reset(new Wccl::Bool(Wccl::Bool(false))); } +; +// Constat bool Value +// Returns boost::shared_ptr<Wccl::Constant<Wccl::Bool> > boolean_value returns [boost::shared_ptr<Wccl::Constant<Wccl::Bool> > val] - : "True" { val.reset(new Wccl::Constant<Wccl::Bool>(Wccl::Bool(true ))); } - | "False" { val.reset(new Wccl::Constant<Wccl::Bool>(Wccl::Bool(false))); } +{ + boost::shared_ptr<Wccl::Bool> bool_lit; +} + : bool_lit = bool_literal { + val.reset(new Wccl::Constant<Wccl::Bool>(*bool_lit)); + } ; + // ---------------------------------------------------------------------------- // position value: -position_value - returns [boost::shared_ptr<Wccl::Constant<Wccl::Position> > val] +// Position literal may be (+|-)?(0-9)+ or begin or end or nowhere +// returns boost::shared_ptr<Wccl::Position> +position_literal + returns [boost::shared_ptr<Wccl::Position> val] : i: INT { - val.reset( - new Wccl::Constant<Wccl::Position>(Wccl::Position(token_ref_to_int(i))) - ); + val.reset(new Wccl::Position(Wccl::Position(token_ref_to_int(i)))); } | "begin" { - val.reset( - new Wccl::Constant<Wccl::Position>( - Wccl::Position(Wccl::Position::Begin) - ) - ); - } + val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::Begin))); + } | "end" { - val.reset( - new Wccl::Constant<Wccl::Position>( - Wccl::Position(Wccl::Position::End) - ) - ); + val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::End))); } | "nowhere" { - val.reset( - new Wccl::Constant<Wccl::Position>( - Wccl::Position(Wccl::Position::Nowhere) - ) - ); + val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::Nowhere))); + } +; +// Constat position value +// Returns boost::shared_ptr<Wccl::Constant<Wccl::Position> > +position_value + returns [boost::shared_ptr<Wccl::Constant<Wccl::Position> > val] +{ + boost::shared_ptr<Wccl::Position> pos_lit; +} + : pos_lit = position_literal { + new Wccl::Constant<Wccl::Position>(*pos_lit); } ; @@ -251,14 +284,13 @@ position_value // VARIABLES /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// -// Position: $name // ---------------------------------------------------------------------------- -// returns accessor +// Position: $name +// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > position_variable_acc [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > pos_acc] -// : DOLLAR POS_PREFIX n: SYMBOL { - : DOLLAR n: SYMBOL { + : POS_PREFIX n: SYMBOL { vars.get_put<Wccl::Position>(token_ref_to_std_string(n)); Wccl::VariableAccessor<Wccl::Position> acc = @@ -267,7 +299,8 @@ position_variable_acc pos_acc.reset(new Wccl::VariableAccessor<Wccl::Position>(acc)); } ; -// returs vargetter +// Position vargetter +// Returs boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > position_variable [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > op] @@ -278,24 +311,15 @@ position_variable op.reset(new Wccl::VarGetter<Wccl::Position>(*pos_acc.get())); } ; + // ---------------------------------------------------------------------------- -// Position reference: $(0-9)+name -// !! Cannot use for setvar(...,...) !! -position_ref_variable - [Wccl::Variables& vars] - : DOLLAR p_ref: INT n: SYMBOL { - // TODO - vars.get_put<Wccl::Position>(token_ref_to_std_string(n)); - } -; -// ---------------------------------------------------------------------------- -// String set, call examples: $name, $sName, $s_name, $s_Name etc. -// This expression gets variable of tyme StrSet from string-named variable -// returns accessor +// String set, $s:name +// This expression gets variable of the type StrSet from string-named variable +// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > str_set_variable_acc [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > strset_acc] - : DOLLAR STR_PREFIX n: SYMBOL { + : STR_PREFIX n: SYMBOL { // get/put variable to variables vars.get_put<Wccl::StrSet>(token_ref_to_std_string(n)); @@ -306,7 +330,8 @@ str_set_variable_acc strset_acc.reset(new Wccl::VariableAccessor<Wccl::StrSet>(acc)); } ; -// vargetter +// Vargetter for StrSet variable +// Returns boost::shared_ptr<Wccl::VarGetter<Wccl::StrSet> > str_set_variable [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VarGetter<Wccl::StrSet> > op] @@ -317,13 +342,14 @@ str_set_variable op.reset(new Wccl::VarGetter<Wccl::StrSet>(*strset_acc.get())); } ; + // ---------------------------------------------------------------------------- -// Symbol set: $tName -// returns accessor +// Symbol set: $t:name +// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > sym_set_variable_acc [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > symset_acc] - : DOLLAR TST_PREFIX n: SYMBOL { + : TST_PREFIX n: SYMBOL { vars.get_put<Wccl::TSet>(token_ref_to_std_string(n)); Wccl::VariableAccessor<Wccl::TSet> acc = @@ -332,7 +358,8 @@ sym_set_variable_acc symset_acc.reset(new Wccl::VariableAccessor<Wccl::TSet>(acc)); } ; -// returns vargetter +// Vargetter for symbol set variable +// Returns boost::shared_ptr<Wccl::VarGetter<Wccl::TSet> > sym_set_variable [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VarGetter<Wccl::TSet> > op] @@ -343,13 +370,14 @@ sym_set_variable op.reset(new Wccl::VarGetter<Wccl::TSet>(*symset_acc.get())); } ; + // ---------------------------------------------------------------------------- -// Bool: $bName -// returns accessor +// Bool: $b:name +// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > boolean_variable_acc [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > bool_acc] - : DOLLAR BOOL_PREFIX n: SYMBOL { + : BOOL_PREFIX n: SYMBOL { vars.get_put<Wccl::Bool>(token_ref_to_std_string(n)); Wccl::VariableAccessor<Wccl::Bool> acc = @@ -358,7 +386,8 @@ boolean_variable_acc bool_acc.reset(new Wccl::VariableAccessor<Wccl::Bool>(acc)); } ; -// returns vargetter +// Vargetter for bool variable +// Returns boost::shared_ptr<Wccl::VarGetter<Wccl::Bool> > boolean_variable [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::VarGetter<Wccl::Bool> > op] @@ -378,6 +407,7 @@ boolean_variable // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Setvar operator +// Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> > // ---------------------------------------------------------------------------- setvar_op [Wccl::Variables& vars] @@ -453,7 +483,7 @@ sym_set_operators [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret] : ret = op_sym_set [vars] - | ret = condit_sym [vars] +// | ret = condit_sym [vars] ; // Implementations of symbol set operators: // ---------------------------------------------------------------------------- @@ -485,10 +515,13 @@ condit_sym op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true)); } } - | Q_MARK p_true = sym_set_operators [vars] - Q_MARK test = logical_predicates [vars] { - op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true)); - } + | Q_MARK + (p_true = sym_set_operators [vars] | p_true = condit_sym [vars]) + Q_MARK + (test = logical_predicates [vars] | test = condit_bool [vars]) + { + op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true)); + } ; // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- @@ -523,7 +556,7 @@ string_operators | ret = op_upper [vars] | ret = op_affix [vars] | ret = op_str_set [vars] - | ret = condit_str [vars] +// | ret = condit_str [vars] ; // Implementations of string operators: // ---------------------------------------------------------------------------- @@ -531,9 +564,9 @@ op_orth [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { - // TODO + boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > pos; } - : "orth" LBRACKET position_ref_variable [vars] RBRACKET { + : "orth" LBRACKET pos = position_variable [vars] RBRACKET { // ret = TODO } ; @@ -542,9 +575,9 @@ op_base [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret] { - // TODO + boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > pos; } - : "base" LBRACKET position_ref_variable [vars] RBRACKET { + : "base" LBRACKET pos = position_variable [vars] RBRACKET { // ret = TODO } ; @@ -609,10 +642,13 @@ condit_str op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true)); } } - | Q_MARK p_true = string_operators [vars] - Q_MARK test = logical_predicates [vars] { - op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true)); - } + | Q_MARK + (p_true = string_operators [vars] | p_true = condit_str [vars]) + Q_MARK + (test = logical_predicates [vars] | test = condit_bool [vars]) + { + op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true)); + } ; // ---------------------------------------------------------------------------- @@ -633,7 +669,7 @@ logical_predicates | ret = lpred_regex [vars] | ret = setvar_op [vars] | ret = lpred_inout [vars] - | ret = condit_bool [vars] +// | ret = condit_bool [vars] ; // ---------------------------------------------------------------------------- // comma-separated predicates @@ -812,10 +848,13 @@ condit_bool op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true)); } } - | Q_MARK p_true = logical_predicates [vars] - Q_MARK test = logical_predicates [vars] { - op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true)); - } + | Q_MARK + (p_true = logical_predicates [vars] | p_true = condit_bool [vars]) + Q_MARK + (test = logical_predicates [vars] | test = condit_bool [vars]) + { + op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true)); + } ; /////////////////////////////////////////////////////////////////////////////// @@ -827,13 +866,14 @@ class ANTLRLexer extends Lexer; options { exportVocab = ANTLRExpr; charVocabulary = '\3'..'\377'; - testLiterals = false; - k = 2; +// testLiterals = false; + k = 4; } STRING options { paraphrase = "a string"; + testLiterals = true; } : '"' (~'"')* '"' | '\'' (~'\'')* '\'' @@ -842,6 +882,7 @@ options { INT options { paraphrase = "Integer"; + testLiterals = true; } : ('-'|'+')? ('0'..'9')+ ; @@ -849,6 +890,7 @@ options { QUOT_MARK options { paraphrase = "Quota mark"; + testLiterals = true; } : '\'' ; @@ -856,6 +898,7 @@ options { APOS_MARK options { paraphrase = "Aposptrophe mark"; + testLiterals = true; } : '"' ; @@ -863,6 +906,7 @@ options { Q_MARK options { paraphrase = "Query mark"; + testLiterals = true; } : '?' ; @@ -870,6 +914,7 @@ options { E_MARK options { paraphrase = "Exclamanation mark"; + testLiterals = true; } : '!' ; @@ -877,6 +922,7 @@ options { G_MARK options { paraphrase = "Gravis mark"; + testLiterals = true; } : '`' ; @@ -884,36 +930,38 @@ options { STR_PREFIX options { paraphrase = "String prefix"; + testLiterals = true; } - : "s:" + : "$s:" ; TST_PREFIX options { paraphrase = "Tag set (symbol) prefix"; + testLiterals = true; } - : "t:" + : "$t:" ; BOOL_PREFIX options { paraphrase = "Bool prefix"; + testLiterals = true; } - : "b:" + : "$b:" ; -/* POS_PREFIX options { paraphrase = "Position prefix"; } - : "p:" + : '$' ; -*/ LBRACKET options { paraphrase = "'['"; + testLiterals = true; } : '[' ; @@ -921,6 +969,7 @@ options { RBRACKET options { paraphrase = "']'"; + testLiterals = true; } : ']' ; @@ -928,6 +977,7 @@ options { LPAREN options { paraphrase = "'('"; + testLiterals = true; } : '(' ; @@ -935,6 +985,7 @@ options { RPAREN options { paraphrase = "')'"; + testLiterals = true; } : ')' ; @@ -942,6 +993,7 @@ options { LCURLY options { paraphrase = "'{'"; + testLiterals = true; } : '{' ; @@ -949,20 +1001,25 @@ options { RCURLY options { paraphrase = "'}'"; + testLiterals = true; } : '}' ; +/* DOLLAR options { paraphrase = "'$'"; + testLiterals = true; } : '$' ; +*/ AT_MARK options { paraphrase = "'@'"; + testLiterals = true; } : '@' ; @@ -970,6 +1027,7 @@ options { COMMA options { paraphrase = "','"; + testLiterals = true; } : ',' ; @@ -993,13 +1051,16 @@ WS COMMENT options { paraphrase = "Comment"; + testLiterals = true; } : "//" (~'\n')* '\n'{ $setType(antlr::Token::SKIP); newline(); } + | "/*" (.)* "*/" { $setType(antlr::Token::SKIP); } ; HASH options { paraphrase = "'#'"; + testLiterals = true; } : '#' ; @@ -1007,6 +1068,7 @@ options { DSEPARATOR options { paraphrase = "':-'"; + testLiterals = true; } : ":-" ; -- GitLab