Newer
Older
// apply(match(), cond(conditions), actions(actions))
// apply(match(), actions(actions))
// Returns boost::shared_ptr<ApplyOperator>
match_apply_operator
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<ApplyOperator> ret_op]
{
VariableAccessor<Match> matches = vars.create_accessor<Match>("_M");;
boost::shared_ptr<const MatchOperator> match_op;
boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > actions;
boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > conditions;
: "apply" LPAREN
match_op = match_operator[tagset, vars] COMMA
("cond" LPAREN conditions = bool_operator_comma_sep [tagset, vars] RPAREN COMMA)?
"actions" LPAREN actions = match_action_comma_sep [tagset, vars] RPAREN
RPAREN {
if (conditions) {
ret_op.reset(
new ApplyOperator(matches, match_op, actions, conditions)
);
}
else {
ret_op.reset(
new ApplyOperator(matches, match_op, actions)
);
}
}
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
;
// Match operator: match(match_conditions)
// Returns boost::shared_ptr<MatchOperator>
match_operator
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<MatchOperator> op]
{
boost::shared_ptr<ConjConditions> match_cond;
}
: "match" LPAREN match_cond = match_condition [tagset,vars] RPAREN {
op.reset(new MatchOperator(match_cond));
}
;
// Match conditions. Wrapper for vector of the match conditions
match_condition
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<ConjConditions> condition]
{
std::vector<boost::shared_ptr<const MatchCondition> > m_cond;
}
: m_cond = match_condition_in [tagset, vars] {
condition.reset(new ConjConditions(m_cond));
}
;
// Match conditions.
// Retutns std::vector< boost::shared_ptr<const MatchCondition> >
match_condition_in
[const Corpus2::Tagset& tagset, Variables& vars]
returns [std::vector< boost::shared_ptr<const MatchCondition> > ret]
{
boost::shared_ptr<const MatchCondition> r_cond;
}
: r_cond = match_cond_all[tagset, vars] {
ret.push_back(r_cond);
}
(
COMMA
r_cond = match_cond_all[tagset, vars] {
ret.push_back(r_cond);
}
)*
;
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
// Match variants variant(v1), variant(v2), ...
// Retutns boost::shared_ptr<std::vector<ConjConditions> >
match_variants
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<std::vector<ConjConditions> > variants]
{
variants.reset(new std::vector<ConjConditions>());
boost::shared_ptr<ConjConditions> variant;
}
: "variant" LPAREN variant = match_condition [tagset, vars] RPAREN {
// TODO
// variants->push_back(variant);
}
(
COMMA "variant" LPAREN variant = match_condition [tagset, vars] RPAREN {
// TODO
// variants->push_back(variant);
}
)*
;
// One of the match condition
// Returns boost::shared_ptr<const MatchCondition>
match_cond_all
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<const MatchCondition> ret]
: ret = match_cond_optional [tagset, vars]
| ret = match_cond_repeate [tagset, vars]
| ret = match_cond_token [tagset, vars]
| ret = match_cond_oneof [tagset, vars]
| ret = match_cond_longest [tagset, vars]
// Match condition - token (wraps a L0 predicate)
// Returns boost::shared_ptr<const MatchCondition>
match_cond_token
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<const TokenCondition> ret]
{
boost::shared_ptr<Function<Bool> > bool_op;
}
: bool_op = bool_operator [tagset, vars] {
ret = boost::make_shared<TokenCondition>(bool_op);
}
;
// Match condition - optional
// Returns boost::shared_ptr<OptionalMatch>
match_cond_optional
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<OptionalMatch> mtch]
{
boost::shared_ptr<ConjConditions> m_cond;
}
: "optional" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
mtch.reset(new OptionalMatch(m_cond));
}
;
// Match condition - repeat
// Returns boost::shared_ptr<RepeatedMatch>
match_cond_repeate
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<RepeatedMatch> mtch]
{
boost::shared_ptr<ConjConditions> m_cond;
}
: "repeat" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
mtch.reset(new RepeatedMatch(m_cond));
// Match condition - is(ann_name)
// Returns boost::shared_ptr<IsAnnotatedAs>
match_cond_is
returns [boost::shared_ptr<IsAnnotatedAs> mtch]
: "is" LPAREN annotation_name: STRING RPAREN {
mtch.reset(new IsAnnotatedAs(token_ref_to_std_string(annotation_name)));
}
;
// Match condition - text(text)
// Returns boost::shared_ptr<MatchText>
match_cond_text
returns [boost::shared_ptr<MatchText> mtch]
: "text" LPAREN txt: STRING RPAREN {
mtch.reset(new MatchText(token_ref_to_ustring(txt)));
}
;
// Match condition - oneof(variant1(v1), variant(v2), ...)
// Returns boost::shared_ptr<OneOf>
match_cond_oneof
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<OneOf> onf]
{
boost::shared_ptr<std::vector<ConjConditions> > variants;
}
: "oneof" LPAREN variants = match_variants [tagset, vars] RPAREN {
onf.reset(new OneOf(variants));
}
;
// Match condition - longest(variant1(v1), variant(v2), ...)
// Returns boost::shared_ptr<Longest>
match_cond_longest
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Longest> lng]
{
boost::shared_ptr<std::vector<ConjConditions> > variants;
}
: "longest" LPAREN variants = match_variants [tagset, vars] RPAREN {
lng.reset(new Longest(variants));
}
;
// ----------------------------------------------------------------------------
// Match actions. Match action can be mark or unmark
// Returns boost::shared_ptr<MatchAction>
match_action
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<MatchAction> m_act]
: m_act = match_mark_action [tagset, vars]
| m_act = match_unmark_action [tagset, vars]
;
// Match mark action
match_mark_action
[const Corpus2::Tagset& tagset, Variables& vars]
{
boost::shared_ptr<Function<Match> > match_to;
boost::shared_ptr<Function<Match> > match_from;
boost::shared_ptr<Function<Match> > head_match;
}
: "mark" LPAREN
match_from = match_fit[tagset, vars] COMMA
( match_to = match_fit[tagset, vars] COMMA
( head_match = match_fit[tagset, vars] COMMA )?
)?
if (!match_to) {
m_act.reset(
new MarkMatch(
match_from,
((antlr::Token*)annotation_name)->getText()));
} else {
if (!head_match) {
m_act.reset(
new MarkMatch(
match_from,
match_to,
((antlr::Token*)annotation_name)->getText()));
} else {
m_act.reset(
new MarkMatch(
match_from,
match_to,
head_match,
((antlr::Token*)annotation_name)->getText()));
}
;
// Match unmark action
// Returns boost::shared_ptr<UnmarkMatch>
match_unmark_action
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<UnmarkMatch> m_act]
{
boost::shared_ptr<Function<Match> > match_at;
}
: "unmark" LPAREN
match_at = match_fit[tagset, vars] COMMA
annotation_name : STRING
RPAREN {
m_act.reset(
new UnmarkMatch(
match_at,
((antlr::Token*)annotation_name)->getText()));
}
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
;
// Match action separated by comma
// Returns boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > >
match_action_comma_sep
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > r_vec]
{
boost::shared_ptr<MatchAction> act;
r_vec.reset(
new std::vector<boost::shared_ptr<MatchAction> >
);
}
: act = match_action [tagset, vars] {
r_vec->push_back(act);
}
(
COMMA act = match_action [tagset, vars] {
r_vec->push_back(act);
}
)*
;
Adam Wardynski
committed
// Function<Match> is wrapper for Constant<Match> and Function<Match>
// Returns boost::shared_ptr<Function<Match> >
match_fit
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Match> > ret]
{
//
}
:
( ret = match_var_val [tagset, vars]
| "M" { ret.reset(new VarGetter<Match>(vars.create_accessor<Match>("_M"))); }
| LPAREN ret = match_fit [tagset, vars] RPAREN
)
( // if there's an arrow after the match, we have a submatch reference
ARROW i: UNSIGNED_INT { ret.reset(new Submatch(ret, token_ref_to_int(i))); }
;
match_var_val
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Match> > ret]
: ret = match_vector_variable [vars]
| ret = match_value_const
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
class ANTLRLexer extends Lexer;
options {
exportVocab = ANTLRExpr;
charVocabulary = '\3'..'\377';
: '"'! (~('"' | '\n' | '\r'))* '"'!
| '\''! (~('\'' | '\n' | '\r'))* '\''!
: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+
UNSIGNED_INT
options {
paraphrase = "Unsigned integer";
}
: ('0'..'9')+
;
STR_PREFIX
options {
paraphrase = "String prefix";
}
Paweł Kędzia
committed
: "$s:"
;
TST_PREFIX
options {
Paweł Kędzia
committed
: "$t:"
;
BOOL_PREFIX
options {
paraphrase = "Bool prefix";
}
Paweł Kędzia
committed
: "$b:"
Paweł Kędzia
committed
paraphrase = "Position prefix";
Paweł Kędzia
committed
: '$'
MATCH_VECTOR_PREFIX
options {
paraphrase = "Match vector prefix";
}
: "$m:"
;
RBRACKET
options {
paraphrase = "']'";
}
: ']'
LPAREN
options {
paraphrase = "'('";
}
: '('
RPAREN
options {
paraphrase = "')'";
}
: ')'
LCURLY
options {
paraphrase = "'{'";
}
: '{'
RCURLY
options {
paraphrase = "'}'";
}
: '}'
AT_MARK
options {
paraphrase = "'@'";
}
: '@'
ARROW
options {
paraphrase = "->";
}
: "->"
;
: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
| '\t'
| '\f'
|
( "\r\n"
| '\r'
| '\n'
) { newline(); }
) { $setType(antlr::Token::SKIP); }
paraphrase = "Single line comment";
}
: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); }
;
ML_COMMENT
options {
paraphrase = "Multi line comment";
( // TODO: test it and add reference to the site it's taken from!
/* This actually works OK despite the ambiguity that
'\r' '\n' can be matched in one alternative or by matching
'\r' in one iteration and '\n' in another.. But
this is really matched just by one rule per (...)*
loop iteration, so it's OK.
This is exactly how they do it all over the web - just
turn off the warning for this particular token.*/
options {
generateAmbigWarnings = false;
}
: { LA(2)!='/' }? '*'
| '\r' '\n' { newline(); }
| '\r' { newline(); }
| '\n' { newline(); }
)*
{ $setType(antlr::Token::SKIP); }
//DSEPARATOR
//options {
// paraphrase = "':-'";
//}
// : ":-"
//;