Newer
Older
| ret = match_cond_token [tagset, vars]
// Match condition - token (wraps a L0 predicate)
// Returns boost::shared_ptr<const MatchCondition>
match_cond_token
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<const TokenCondition> ret]
{
boost::shared_ptr<Function<Bool> > bool_op;
}
: bool_op = bool_operator [tagset, vars] {
ret = boost::make_shared<TokenCondition>(bool_op);
}
;
// Match condition - optional
// Returns boost::shared_ptr<OptionalMatch>
match_cond_optional
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<OptionalMatch> mtch]
{
boost::shared_ptr<ConjConditions> m_cond;
}
: "optional" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
mtch.reset(new OptionalMatch(m_cond));
}
;
// Match condition - repeat
// Returns boost::shared_ptr<RepeatedMatch>
match_cond_repeate
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<RepeatedMatch> mtch]
{
boost::shared_ptr<ConjConditions> m_cond;
}
: "repeat" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
mtch.reset(new RepeatedMatch(m_cond));
// Match condition - is(ann_name)
// Returns boost::shared_ptr<IsAnnotatedAs>
match_cond_is
returns [boost::shared_ptr<IsAnnotatedAs> mtch]
: "is" LPAREN annotation_name: STRING RPAREN {
mtch.reset(new IsAnnotatedAs(token_ref_to_std_string(annotation_name)));
}
;
// ----------------------------------------------------------------------------
// Match actions. Match action can be mark or unmark
// Returns boost::shared_ptr<MatchAction>
match_action
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<MatchAction> m_act]
: m_act = match_mark_action [tagset, vars]
| m_act = match_unmark_action [tagset, vars]
;
// Match mark action
match_mark_action
[const Corpus2::Tagset& tagset, Variables& vars]
{
boost::shared_ptr<Function<Match> > match_to;
boost::shared_ptr<Function<Match> > match_from;
boost::shared_ptr<Function<Match> > head_match;
}
: "mark" LPAREN
match_from = match_fit[tagset, vars] COMMA
( match_to = match_fit[tagset, vars] COMMA
( head_match = match_fit[tagset, vars] COMMA )?
)?
if (!match_to) {
m_act.reset(
new MarkMatch(
match_from,
((antlr::Token*)annotation_name)->getText()));
} else {
if (!head_match) {
m_act.reset(
new MarkMatch(
match_from,
match_to,
((antlr::Token*)annotation_name)->getText()));
} else {
m_act.reset(
new MarkMatch(
match_from,
match_to,
head_match,
((antlr::Token*)annotation_name)->getText()));
}
;
// Match unmark action
// Returns boost::shared_ptr<UnmarkMatch>
match_unmark_action
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<UnmarkMatch> m_act]
{
boost::shared_ptr<Function<Match> > match_at;
}
: "unmark" LPAREN
match_at = match_fit[tagset, vars] COMMA
annotation_name : STRING
RPAREN {
m_act.reset(
new UnmarkMatch(
match_at,
((antlr::Token*)annotation_name)->getText()));
}
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
;
// Match action separated by comma
// Returns boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > >
match_action_comma_sep
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > r_vec]
{
boost::shared_ptr<MatchAction> act;
r_vec.reset(
new std::vector<boost::shared_ptr<MatchAction> >
);
}
: act = match_action [tagset, vars] {
r_vec->push_back(act);
}
(
COMMA act = match_action [tagset, vars] {
r_vec->push_back(act);
}
)*
;
Adam Wardynski
committed
// Function<Match> is wrapper for Constant<Match> and Function<Match>
// Returns boost::shared_ptr<Function<Match> >
match_fit
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Match> > ret]
{
//
}
:
( ret = match_var_val [tagset, vars]
| "M" { ret.reset(new VarGetter<Match>(vars.create_accessor<Match>("_M"))); }
| LPAREN ret = match_fit [tagset, vars] RPAREN
)
( // if there's an arrow after the match, we have a submatch reference
ARROW i: UNSIGNED_INT { ret.reset(new Submatch(ret, token_ref_to_int(i))); }
;
match_var_val
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Match> > ret]
: ret = match_vector_variable [vars]
| ret = match_value_const
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
class ANTLRLexer extends Lexer;
options {
exportVocab = ANTLRExpr;
charVocabulary = '\3'..'\377';
: '"'! (~('"' | '\n' | '\r'))* '"'!
| '\''! (~('\'' | '\n' | '\r'))* '\''!
: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+
UNSIGNED_INT
options {
paraphrase = "Unsigned integer";
}
: ('0'..'9')+
;
STR_PREFIX
options {
paraphrase = "String prefix";
}
Paweł Kędzia
committed
: "$s:"
;
TST_PREFIX
options {
Paweł Kędzia
committed
: "$t:"
;
BOOL_PREFIX
options {
paraphrase = "Bool prefix";
}
Paweł Kędzia
committed
: "$b:"
Paweł Kędzia
committed
paraphrase = "Position prefix";
Paweł Kędzia
committed
: '$'
MATCH_VECTOR_PREFIX
options {
paraphrase = "Match vector prefix";
}
: "$m:"
;
RBRACKET
options {
paraphrase = "']'";
}
: ']'
LPAREN
options {
paraphrase = "'('";
}
: '('
RPAREN
options {
paraphrase = "')'";
}
: ')'
LCURLY
options {
paraphrase = "'{'";
}
: '{'
RCURLY
options {
paraphrase = "'}'";
}
: '}'
AT_MARK
options {
paraphrase = "'@'";
}
: '@'
ARROW
options {
paraphrase = "->";
}
: "->"
;
: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
| '\t'
| '\f'
|
( "\r\n"
| '\r'
| '\n'
) { newline(); }
) { $setType(antlr::Token::SKIP); }
paraphrase = "Single line comment";
}
: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); }
;
ML_COMMENT
options {
paraphrase = "Multi line comment";
( // TODO: test it and add reference to the site it's taken from!
/* This actually works OK despite the ambiguity that
'\r' '\n' can be matched in one alternative or by matching
'\r' in one iteration and '\n' in another.. But
this is really matched just by one rule per (...)*
loop iteration, so it's OK.
This is exactly how they do it all over the web - just
turn off the warning for this particular token.*/
options {
generateAmbigWarnings = false;
}
: { LA(2)!='/' }? '*'
| '\r' '\n' { newline(); }
| '\r' { newline(); }
| '\n' { newline(); }
)*
{ $setType(antlr::Token::SKIP); }
//DSEPARATOR
//options {
// paraphrase = "':-'";
//}
// : ":-"
//;