Newer
Older
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Mark> action]
{
boost::shared_ptr<Function<Position> > pos_begin, pos_end, pos_head;
}
: "mark" LPAREN
pos_begin = position_operator [tagset, vars] COMMA
pos_end = position_operator [tagset, vars] COMMA
(pos_head = position_operator [tagset, vars] COMMA)?
chan_name: STRING
RPAREN {
action.reset(new Mark(pos_begin, pos_end, pos_head, ((antlr::Token*)chan_name)->getText()));
}
;
// ----------------------------------------------------------------------------
// Unmark action
action_unmark
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Unmark> action]
{
boost::shared_ptr<Function<Position> > pos;
}
: "unmark" LPAREN
pos = position_operator [tagset, vars] COMMA
chan_name: STRING
RPAREN {
action.reset(new Unmark(pos, ((antlr::Token*)chan_name)->getText()));
}
;
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Match rules
ilor
committed
// Returns boost::shared_ptr<MatchRule>
match_rule_operator
[const Corpus2::Tagset& tagset, Variables& vars]
ilor
committed
returns [boost::shared_ptr<MatchRule> ret_op]
ilor
committed
boost::shared_ptr<ApplyOperator> apply;
ilor
committed
: apply = match_apply_operator [tagset, vars] {
ret_op = boost::make_shared<MatchRule>(vars, apply);
}
;
// Match apply operator:
// apply(match(), cond(conditions), actions(actions))
// apply(match(), actions(actions))
// Returns boost::shared_ptr<ApplyOperator>
match_apply_operator
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<ApplyOperator> ret_op]
{
VariableAccessor<Match> matches = vars.create_accessor<Match>("_M");;
boost::shared_ptr<ConjConditions> match_cond;
boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > actions;
boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > conditions;
: "apply" LPAREN
"match" LPAREN match_cond = match_condition [tagset,vars] RPAREN COMMA
("cond" LPAREN conditions = bool_operator_comma_sep [tagset, vars] RPAREN COMMA)?
"actions" LPAREN actions = match_action_comma_sep [tagset, vars] RPAREN
RPAREN {
if (conditions) {
ret_op.reset(
new ApplyOperator(matches, match_cond, actions, conditions)
);
}
else {
ret_op.reset(
new ApplyOperator(matches, match_cond, actions)
);
}
}
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
;
// Match conditions. Wrapper for vector of the match conditions
match_condition
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<ConjConditions> condition]
{
std::vector<boost::shared_ptr<const MatchCondition> > m_cond;
}
: m_cond = match_condition_in [tagset, vars] {
condition.reset(new ConjConditions(m_cond));
}
;
// Match conditions.
// Retutns std::vector< boost::shared_ptr<const MatchCondition> >
match_condition_in
[const Corpus2::Tagset& tagset, Variables& vars]
returns [std::vector< boost::shared_ptr<const MatchCondition> > ret]
{
boost::shared_ptr<const MatchCondition> r_cond;
}
: r_cond = match_cond_all[tagset, vars] {
ret.push_back(r_cond);
}
(
COMMA
r_cond = match_cond_all[tagset, vars] {
ret.push_back(r_cond);
}
)*
;
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
// Match variants variant(v1), variant(v2), ...
// Retutns boost::shared_ptr<std::vector<ConjConditions> >
match_variants
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<std::vector<ConjConditions> > variants]
{
variants.reset(new std::vector<ConjConditions>());
boost::shared_ptr<ConjConditions> variant;
}
: "variant" LPAREN variant = match_condition [tagset, vars] RPAREN {
// TODO
// variants->push_back(variant);
}
(
COMMA "variant" LPAREN variant = match_condition [tagset, vars] RPAREN {
// TODO
// variants->push_back(variant);
}
)*
;
// One of the match condition
// Returns boost::shared_ptr<const MatchCondition>
match_cond_all
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<const MatchCondition> ret]
: ret = match_cond_optional [tagset, vars]
| ret = match_cond_repeate [tagset, vars]
| ret = match_cond_token [tagset, vars]
| ret = match_cond_oneof [tagset, vars]
| ret = match_cond_longest [tagset, vars]
// Match condition - token (wraps a L0 predicate)
// Returns boost::shared_ptr<const MatchCondition>
match_cond_token
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<const TokenCondition> ret]
{
boost::shared_ptr<Function<Bool> > bool_op;
}
: bool_op = bool_operator [tagset, vars] {
ret = boost::make_shared<TokenCondition>(bool_op);
}
;
// Match condition - optional
// Returns boost::shared_ptr<OptionalMatch>
match_cond_optional
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<OptionalMatch> mtch]
{
boost::shared_ptr<ConjConditions> m_cond;
}
: "optional" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
mtch.reset(new OptionalMatch(m_cond));
}
;
// Match condition - repeat
// Returns boost::shared_ptr<RepeatedMatch>
match_cond_repeate
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<RepeatedMatch> mtch]
{
boost::shared_ptr<ConjConditions> m_cond;
}
: "repeat" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
mtch.reset(new RepeatedMatch(m_cond));
// Match condition - is(ann_name)
// Returns boost::shared_ptr<IsAnnotatedAs>
match_cond_is
returns [boost::shared_ptr<IsAnnotatedAs> mtch]
: "is" LPAREN annotation_name: STRING RPAREN {
mtch.reset(new IsAnnotatedAs(token_ref_to_std_string(annotation_name)));
}
;
// Match condition - text(text)
// Returns boost::shared_ptr<MatchText>
match_cond_text
returns [boost::shared_ptr<MatchText> mtch]
: "text" LPAREN txt: STRING RPAREN {
mtch.reset(new MatchText(token_ref_to_ustring(txt)));
}
;
// Match condition - oneof(variant1(v1), variant(v2), ...)
// Returns boost::shared_ptr<OneOf>
match_cond_oneof
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<OneOf> onf]
{
boost::shared_ptr<std::vector<ConjConditions> > variants;
}
: "oneof" LPAREN variants = match_variants [tagset, vars] RPAREN {
onf.reset(new OneOf(variants));
}
;
// Match condition - longest(variant1(v1), variant(v2), ...)
// Returns boost::shared_ptr<Longest>
match_cond_longest
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Longest> lng]
{
boost::shared_ptr<std::vector<ConjConditions> > variants;
}
: "longest" LPAREN variants = match_variants [tagset, vars] RPAREN {
lng.reset(new Longest(variants));
}
;
// ----------------------------------------------------------------------------
// Match actions. Match action can be mark or unmark
// Returns boost::shared_ptr<MatchAction>
match_action
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<MatchAction> m_act]
: m_act = match_mark_action [tagset, vars]
| m_act = match_unmark_action [tagset, vars]
;
// Match mark action
match_mark_action
[const Corpus2::Tagset& tagset, Variables& vars]
{
boost::shared_ptr<Function<Match> > match_to;
boost::shared_ptr<Function<Match> > match_from;
boost::shared_ptr<Function<Match> > head_match;
Adam Wardynski
committed
match_from = match_operator[tagset, vars] COMMA
( match_to = match_operator[tagset, vars] COMMA
( head_match = match_operator[tagset, vars] COMMA )?
if (!match_to) {
m_act.reset(
new MarkMatch(
match_from,
((antlr::Token*)annotation_name)->getText()));
} else {
if (!head_match) {
m_act.reset(
new MarkMatch(
match_from,
match_to,
((antlr::Token*)annotation_name)->getText()));
} else {
m_act.reset(
new MarkMatch(
match_from,
match_to,
head_match,
((antlr::Token*)annotation_name)->getText()));
}
;
// Match unmark action
// Returns boost::shared_ptr<UnmarkMatch>
match_unmark_action
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<UnmarkMatch> m_act]
{
boost::shared_ptr<Function<Match> > match_at;
}
: "unmark" LPAREN
Adam Wardynski
committed
match_at = match_operator[tagset, vars] COMMA
annotation_name : STRING
RPAREN {
m_act.reset(
new UnmarkMatch(
match_at,
((antlr::Token*)annotation_name)->getText()));
}
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
;
// Match action separated by comma
// Returns boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > >
match_action_comma_sep
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > r_vec]
{
boost::shared_ptr<MatchAction> act;
r_vec.reset(
new std::vector<boost::shared_ptr<MatchAction> >
);
}
: act = match_action [tagset, vars] {
r_vec->push_back(act);
}
(
COMMA act = match_action [tagset, vars] {
r_vec->push_back(act);
}
)*
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
class ANTLRLexer extends Lexer;
options {
exportVocab = ANTLRExpr;
charVocabulary = '\3'..'\377';
: '"'! (~('"' | '\n' | '\r'))* '"'!
| '\''! (~('\'' | '\n' | '\r'))* '\''!
: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+
UNSIGNED_INT
options {
paraphrase = "Unsigned integer";
}
: ('0'..'9')+
;
STR_PREFIX
options {
paraphrase = "String prefix";
}
Paweł Kędzia
committed
: "$s:"
;
TST_PREFIX
options {
Paweł Kędzia
committed
: "$t:"
;
BOOL_PREFIX
options {
paraphrase = "Bool prefix";
}
Paweł Kędzia
committed
: "$b:"
Paweł Kędzia
committed
paraphrase = "Position prefix";
Paweł Kędzia
committed
: '$'
MATCH_VECTOR_PREFIX
options {
paraphrase = "Match vector prefix";
}
: "$m:"
;
RBRACKET
options {
paraphrase = "']'";
}
: ']'
LPAREN
options {
paraphrase = "'('";
}
: '('
RPAREN
options {
paraphrase = "')'";
}
: ')'
LCURLY
options {
paraphrase = "'{'";
}
: '{'
RCURLY
options {
paraphrase = "'}'";
}
: '}'
AT_MARK
options {
paraphrase = "'@'";
}
: '@'
ARROW
options {
paraphrase = "->";
}
: "->"
;
Adam Wardynski
committed
COLON
options {
paraphrase = ":";
}
: ':'
;
: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
| '\t'
| '\f'
|
( "\r\n"
| '\r'
| '\n'
) { newline(); }
) { $setType(antlr::Token::SKIP); }
paraphrase = "Single line comment";
}
: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); }
;
ML_COMMENT
options {
paraphrase = "Multi line comment";
( // TODO: test it and add reference to the site it's taken from!
/* This actually works OK despite the ambiguity that
'\r' '\n' can be matched in one alternative or by matching
'\r' in one iteration and '\n' in another.. But
this is really matched just by one rule per (...)*
loop iteration, so it's OK.
This is exactly how they do it all over the web - just
turn off the warning for this particular token.*/
options {
generateAmbigWarnings = false;
}
: { LA(2)!='/' }? '*'
| '\r' '\n' { newline(); }
| '\r' { newline(); }
| '\n' { newline(); }
)*
{ $setType(antlr::Token::SKIP); }
//DSEPARATOR
//options {
// paraphrase = "':-'";
//}
// : ":-"
//;