Newer
Older
// Match condition - token (wraps a L0 predicate)
// Returns boost::shared_ptr<const MatchCondition>
match_cond_token
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<const TokenCondition> ret]
{
boost::shared_ptr<Function<Bool> > bool_op;
}
: bool_op = bool_operator [tagset, vars] {
ret = boost::make_shared<TokenCondition>(bool_op);
}
;
// Match condition - optional
// Returns boost::shared_ptr<OptionalMatch>
match_cond_optional
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<OptionalMatch> mtch]
{
boost::shared_ptr<ConjConditions> m_cond;
}
: "optional" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
mtch.reset(new OptionalMatch(m_cond));
}
;
// Match condition - repeat
// Returns boost::shared_ptr<RepeatedMatch>
match_cond_repeate
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<RepeatedMatch> mtch]
{
boost::shared_ptr<ConjConditions> m_cond;
}
: "repeat" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
mtch.reset(new RepeatedMatch(m_cond));
}
;
// ----------------------------------------------------------------------------
// Match actions. Match action can be mark or unmark
// Returns boost::shared_ptr<MatchAction>
match_action
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<MatchAction> m_act]
: m_act = match_mark_action [tagset, vars]
| m_act = match_unmark_action [tagset, vars]
;
// Match mark action
match_mark_action
[const Corpus2::Tagset& tagset, Variables& vars]
{
boost::shared_ptr<Function<Match> > match_to;
boost::shared_ptr<Function<Match> > match_from;
boost::shared_ptr<Function<Match> > head_match;
}
: "mark" LPAREN
match_from = match_fit[tagset, vars] COMMA
( match_to = match_fit[tagset, vars] COMMA
( head_match = match_fit[tagset, vars] COMMA )?
)?
if (!match_to) {
m_act.reset(
new MarkMatch(
match_from,
((antlr::Token*)annotation_name)->getText()));
} else {
if (!head_match) {
m_act.reset(
new MarkMatch(
match_from,
match_to,
((antlr::Token*)annotation_name)->getText()));
} else {
m_act.reset(
new MarkMatch(
match_from,
match_to,
head_match,
((antlr::Token*)annotation_name)->getText()));
}
;
// Match unmark action
// Returns boost::shared_ptr<UnmarkMatch>
match_unmark_action
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<UnmarkMatch> m_act]
{
boost::shared_ptr<Function<Match> > match_at;
}
: "unmark" LPAREN
match_at = match_fit[tagset, vars] COMMA
annotation_name : STRING
RPAREN {
m_act.reset(
new UnmarkMatch(
match_at,
((antlr::Token*)annotation_name)->getText()));
}
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
;
// Match action separated by comma
// Returns boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > >
match_action_comma_sep
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > r_vec]
{
boost::shared_ptr<MatchAction> act;
r_vec.reset(
new std::vector<boost::shared_ptr<MatchAction> >
);
}
: act = match_action [tagset, vars] {
r_vec->push_back(act);
}
(
COMMA act = match_action [tagset, vars] {
r_vec->push_back(act);
}
)*
;
Adam Wardynski
committed
// Function<Match> is wrapper for Constant<Match> and Function<Match>
// Returns boost::shared_ptr<Function<Match> >
match_fit
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Match> > ret]
{
//
}
:
( ret = match_var_val [tagset, vars]
| "M" { ret.reset(new VarGetter<Match>(vars.create_accessor<Match>("_M"))); }
| LPAREN ret = match_fit [tagset, vars] RPAREN
)
( // if there's an arrow after the match, we have a submatch reference
ARROW i: UNSIGNED_INT { ret.reset(new Submatch(ret, token_ref_to_int(i))); }
;
match_var_val
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Match> > ret]
: ret = match_vector_variable [vars]
| ret = match_value_const
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
class ANTLRLexer extends Lexer;
options {
exportVocab = ANTLRExpr;
charVocabulary = '\3'..'\377';
: '"'! (~('"' | '\n' | '\r'))* '"'!
| '\''! (~('\'' | '\n' | '\r'))* '\''!
: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+
UNSIGNED_INT
options {
paraphrase = "Unsigned integer";
}
: ('0'..'9')+
;
STR_PREFIX
options {
paraphrase = "String prefix";
}
Paweł Kędzia
committed
: "$s:"
;
TST_PREFIX
options {
Paweł Kędzia
committed
: "$t:"
;
BOOL_PREFIX
options {
paraphrase = "Bool prefix";
}
Paweł Kędzia
committed
: "$b:"
Paweł Kędzia
committed
paraphrase = "Position prefix";
Paweł Kędzia
committed
: '$'
MATCH_VECTOR_PREFIX
options {
paraphrase = "Match vector prefix";
}
: "$m:"
;
RBRACKET
options {
paraphrase = "']'";
}
: ']'
LPAREN
options {
paraphrase = "'('";
}
: '('
RPAREN
options {
paraphrase = "')'";
}
: ')'
LCURLY
options {
paraphrase = "'{'";
}
: '{'
RCURLY
options {
paraphrase = "'}'";
}
: '}'
AT_MARK
options {
paraphrase = "'@'";
}
: '@'
ARROW
options {
paraphrase = "->";
}
: "->"
;
: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
| '\t'
| '\f'
|
( "\r\n"
| '\r'
| '\n'
) { newline(); }
) { $setType(antlr::Token::SKIP); }
paraphrase = "Single line comment";
}
: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); }
;
ML_COMMENT
options {
paraphrase = "Multi line comment";
( // TODO: test it and add reference to the site it's taken from!
/* This actually works OK despite the ambiguity that
'\r' '\n' can be matched in one alternative or by matching
'\r' in one iteration and '\n' in another.. But
this is really matched just by one rule per (...)*
loop iteration, so it's OK.
This is exactly how they do it all over the web - just
turn off the warning for this particular token.*/
options {
generateAmbigWarnings = false;
}
: { LA(2)!='/' }? '*'
| '\r' '\n' { newline(); }
| '\r' { newline(); }
| '\n' { newline(); }
)*
{ $setType(antlr::Token::SKIP); }
//DSEPARATOR
//options {
// paraphrase = "':-'";
//}
// : ":-"
//;