diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index d06ac7f16a48bfd6c97185ba0da039670d43bd66..ef49f4ba784381f0bbd1f44db753dbdc4b4256d5 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -898,7 +898,7 @@ position_first_token [const Corpus2::Tagset& tagset, Variables& vars] { boost::shared_ptr<Function<Match> > m; } - : "first" LPAREN m = match_fit [tagset, vars] RPAREN { + : "first" LPAREN m = match_operator [tagset, vars] RPAREN { ret.reset(new FirstToken(m)); } ; @@ -911,7 +911,7 @@ position_last_token [const Corpus2::Tagset& tagset, Variables& vars] { boost::shared_ptr<Function<Match> > m; } - : "last" LPAREN m = match_fit [tagset, vars] RPAREN { + : "last" LPAREN m = match_operator [tagset, vars] RPAREN { ret.reset(new LastToken(m)); } ; @@ -1324,8 +1324,8 @@ bool_ann std::string chan_name; } : "ann" LPAREN - match_from = match_fit [tagset, vars] COMMA - (match_to = match_fit [tagset, vars] COMMA)? + match_from = match_operator [tagset, vars] COMMA + (match_to = match_operator [tagset, vars] COMMA)? name : STRING RPAREN { if (match_to) { @@ -1350,8 +1350,8 @@ bool_annsub std::string chan_name; } : "annsub" LPAREN - match_from = match_fit [tagset, vars] COMMA - (match_to = match_fit [tagset, vars] COMMA)? + match_from = match_operator [tagset, vars] COMMA + (match_to = match_operator [tagset, vars] COMMA)? name : STRING RPAREN { @@ -1402,7 +1402,7 @@ debug_print_operator ) | ( - v = match_fit [tagset, vars] { + v = match_operator [tagset, vars] { ret.reset(new DebugPrint(v)); } ) @@ -1687,7 +1687,7 @@ match_empty { boost::shared_ptr<Function<Match> > arg; } - : arg = match_fit [tagset, vars] { + : arg = match_operator [tagset, vars] { op.reset(new IsEmpty<Match>(arg)); } ; @@ -1720,6 +1720,46 @@ strset_empty } ; +/////////////////////////////////////////////////////////////////////////////// +// Match functional operators, +// which return boost::shared_ptr<Function<Match> > +/////////////////////////////////////////////////////////////////////////////// + +// ---------------------------------------------------------------------------- +// A wrapper for match variable and match value. +match_var_val [const Corpus2::Tagset& tagset, Variables& vars] + returns [boost::shared_ptr<Function<Match> > ret] + : ret = match_vector_variable [vars] + | ret = match_value_const +; + +/////////////////////////////////////////////////////////////////////////////// +// Match operators. +// Returns boost::shared_ptr<Function<Match> > +/////////////////////////////////////////////////////////////////////////////// +match_operator + [const Corpus2::Tagset& tagset, Variables& vars] + returns [boost::shared_ptr<Function<Match> > ret] +{ + // +} + : + ( ret = match_var_val [tagset, vars] + | {LA(1)==LITERAL_M || LA(1)==COLON}? ("M")? { + ret.reset(new VarGetter<Match>(vars.create_accessor<Match>("_M"))); + ret.reset(new Submatch(ret, 1)); + } + | "MA" { + ret.reset(new VarGetter<Match>(vars.create_accessor<Match>("_M"))); + ret.reset(new Submatch(ret, 2)); + } + | LPAREN ret = match_operator [tagset, vars] RPAREN + ) + ( // if there's an arrow after the match, we have a submatch reference + COLON i: UNSIGNED_INT { ret.reset(new Submatch(ret, token_ref_to_int(i))); } + )* +; + // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Tagging actions and rules: @@ -2203,9 +2243,9 @@ match_mark_action boost::shared_ptr<Function<Match> > head_match; } : "mark" LPAREN - match_from = match_fit[tagset, vars] COMMA - ( match_to = match_fit[tagset, vars] COMMA - ( head_match = match_fit[tagset, vars] COMMA )? + match_from = match_operator[tagset, vars] COMMA + ( match_to = match_operator[tagset, vars] COMMA + ( head_match = match_operator[tagset, vars] COMMA )? )? annotation_name : STRING RPAREN { @@ -2242,7 +2282,7 @@ match_unmark_action boost::shared_ptr<Function<Match> > match_at; } : "unmark" LPAREN - match_at = match_fit[tagset, vars] COMMA + match_at = match_operator[tagset, vars] COMMA annotation_name : STRING RPAREN { m_act.reset( @@ -2274,39 +2314,6 @@ match_action_comma_sep )* ; - -// Function<Match> is wrapper for Constant<Match> and Function<Match> -// Returns boost::shared_ptr<Function<Match> > -match_fit - [const Corpus2::Tagset& tagset, Variables& vars] - returns [boost::shared_ptr<Function<Match> > ret] -{ - // -} - : - ( ret = match_var_val [tagset, vars] - | {LA(1)==LITERAL_M || LA(1)==COLON}? ("M")? { - ret.reset(new VarGetter<Match>(vars.create_accessor<Match>("_M"))); - ret.reset(new Submatch(ret, 1)); - } - | "MA" { - ret.reset(new VarGetter<Match>(vars.create_accessor<Match>("_M"))); - ret.reset(new Submatch(ret, 2)); - } - | LPAREN ret = match_fit [tagset, vars] RPAREN - ) - ( // if there's an arrow after the match, we have a submatch reference - COLON i: UNSIGNED_INT { ret.reset(new Submatch(ret, token_ref_to_int(i))); } - )* -; - -match_var_val - [const Corpus2::Tagset& tagset, Variables& vars] - returns [boost::shared_ptr<Function<Match> > ret] - : ret = match_vector_variable [vars] - | ret = match_value_const -; - /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ANTLR LEXER