Newer
Older
;
// Match unmark action
// Returns boost::shared_ptr<UnmarkMatch>
match_unmark_action
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<UnmarkMatch> m_act]
{
boost::shared_ptr<Function<Match> > match_at;
}
: "unmark" LPAREN
match_at = match_fit[tagset, vars] COMMA
annotation_name : STRING
RPAREN {
m_act.reset(
new UnmarkMatch(
match_at,
((antlr::Token*)annotation_name)->getText()));
}
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
;
// Match action separated by comma
// Returns boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > >
match_action_comma_sep
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > r_vec]
{
boost::shared_ptr<MatchAction> act;
r_vec.reset(
new std::vector<boost::shared_ptr<MatchAction> >
);
}
: act = match_action [tagset, vars] {
r_vec->push_back(act);
}
(
COMMA act = match_action [tagset, vars] {
r_vec->push_back(act);
}
)*
;
Adam Wardynski
committed
// Function<Match> is wrapper for Constant<Match> and Function<Match>
// Returns boost::shared_ptr<Function<Match> >
match_fit
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Match> > ret]
{
//
}
: ret = match_vector_variable [vars]
| ret = match_value_const
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
class ANTLRLexer extends Lexer;
options {
exportVocab = ANTLRExpr;
charVocabulary = '\3'..'\377';
: '"'! (~('"' | '\n' | '\r'))* '"'!
| '\''! (~('\'' | '\n' | '\r'))* '\''!
: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+
UNSIGNED_INT
options {
paraphrase = "Unsigned integer";
}
: ('0'..'9')+
;
STR_PREFIX
options {
paraphrase = "String prefix";
}
Paweł Kędzia
committed
: "$s:"
;
TST_PREFIX
options {
Paweł Kędzia
committed
: "$t:"
;
BOOL_PREFIX
options {
paraphrase = "Bool prefix";
}
Paweł Kędzia
committed
: "$b:"
Paweł Kędzia
committed
paraphrase = "Position prefix";
Paweł Kędzia
committed
: '$'
MATCH_VECTOR_PREFIX
options {
paraphrase = "Match vector prefix";
}
: "$m:"
;
RBRACKET
options {
paraphrase = "']'";
}
: ']'
LPAREN
options {
paraphrase = "'('";
}
: '('
RPAREN
options {
paraphrase = "')'";
}
: ')'
LCURLY
options {
paraphrase = "'{'";
}
: '{'
RCURLY
options {
paraphrase = "'}'";
}
: '}'
AT_MARK
options {
paraphrase = "'@'";
}
: '@'
: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
| '\t'
| '\f'
|
( "\r\n"
| '\r'
| '\n'
) { newline(); }
) { $setType(antlr::Token::SKIP); }
paraphrase = "Single line comment";
}
: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); }
;
ML_COMMENT
options {
paraphrase = "Multi line comment";
( // TODO: test it and add reference to the site it's taken from!
/* This actually works OK despite the ambiguity that
'\r' '\n' can be matched in one alternative or by matching
'\r' in one iteration and '\n' in another.. But
this is really matched just by one rule per (...)*
loop iteration, so it's OK.
This is exactly how they do it all over the web - just
turn off the warning for this particular token.*/
options {
generateAmbigWarnings = false;
}
: { LA(2)!='/' }? '*'
| '\r' '\n' { newline(); }
| '\r' { newline(); }
| '\n' { newline(); }
)*
{ $setType(antlr::Token::SKIP); }
//DSEPARATOR
//options {
// paraphrase = "':-'";
//}
// : ":-"
//;