Newer
Older
(
b1 = bool_operator [tagset, vars] COMMA
b2 = bool_operator [tagset, vars] {
op.reset(new Equals<Bool>(b1, b2));
// ----------------------------------------------------------------------------
// In operator
in_operator
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<TSet> > t1, t2;
boost::shared_ptr<Function<StrSet> > s1, s2;
Paweł Kędzia
committed
}
:
"in" LPAREN
(symset_operator [tagset, vars]) =>
t1 = symset_operator [tagset, vars] COMMA
t2 = symset_operator [tagset, vars] {
op.reset(new IsSubsetOf<TSet>(t1, t2));
s1 = strset_operator [tagset, vars] COMMA
s2 = strset_operator [tagset, vars] {
op.reset(new IsSubsetOf<StrSet>(s1, s2));
;
// ----------------------------------------------------------------------------
// Inter operator
inter_operator
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<TSet> > t1, t2;
boost::shared_ptr<Function<StrSet> > s1, s2;
:
"inter" LPAREN
(symset_operator [tagset, vars]) =>
t1 = symset_operator [tagset, vars] COMMA
t2 = symset_operator [tagset, vars] {
op.reset(new Intersects<TSet>(t1, t2));
s1 = strset_operator [tagset, vars] COMMA
s2 = strset_operator [tagset, vars] {
op.reset(new Intersects<StrSet>(s1, s2));
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
// ----------------------------------------------------------------------------
// Iterations:
bool_iteration
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > ret]
{
int min_match = 0;
boost::shared_ptr<Function<Bool> > expr;
boost::shared_ptr<Function<Position> > lpos, rpos;
boost::shared_ptr<VariableAccessor<Position> > pacc;
}
: "only" LPAREN
lpos = position_operator [tagset, vars] COMMA
rpos = position_operator [tagset, vars] COMMA
pacc = position_variable_acc [vars] COMMA
expr = bool_operator [tagset, vars]
RPAREN {
ret.reset(new Only(lpos, rpos, *pacc, expr));
}
| "atleast" LPAREN
lpos = position_operator [tagset, vars] COMMA
rpos = position_operator [tagset, vars] COMMA
pacc = position_variable_acc [vars] COMMA
expr = bool_operator [tagset, vars] COMMA
min_match = number
RPAREN {
ret.reset(new AtLeast(lpos, rpos, *pacc, expr, min_match));
}
| "llook" LPAREN
lpos = position_operator [tagset, vars] COMMA
rpos = position_operator [tagset, vars] COMMA
pacc = position_variable_acc [vars] COMMA
expr = bool_operator [tagset, vars]
RPAREN {
ret.reset(new LeftLook(lpos, rpos, *pacc, expr));
}
| "rlook" LPAREN
lpos = position_operator [tagset, vars] COMMA
rpos = position_operator [tagset, vars] COMMA
pacc = position_variable_acc [vars] COMMA
expr = bool_operator [tagset, vars]
RPAREN {
ret.reset(new RightLook(lpos, rpos, *pacc, expr));
}
;
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
// ----------------------------------------------------------------------------
// Agreement operator: agr, agrpp, wagr
bool_agreement
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > ret]
{
boost::shared_ptr<Function<TSet> > expr;
boost::shared_ptr<Function<Position> > lpos, rpos;
}
: "agr" LPAREN
lpos = position_operator [tagset, vars] COMMA
rpos = position_operator [tagset, vars] COMMA
expr = symset_operator [tagset, vars]
RPAREN {
ret.reset(new StrongAgreement(lpos, rpos, expr, tagset));
}
| "agrpp" LPAREN
lpos = position_operator [tagset, vars] COMMA
rpos = position_operator [tagset, vars] COMMA
expr = symset_operator [tagset, vars]
RPAREN {
ret.reset(new PointAgreement(lpos, rpos, expr, tagset));
}
| "wagr" LPAREN
lpos = position_operator [tagset, vars] COMMA
rpos = position_operator [tagset, vars] COMMA
expr = symset_operator [tagset, vars]
RPAREN {
ret.reset(new WeakAgreement(lpos, rpos, expr, tagset));
}
;
// ----------------------------------------------------------------------------
// Setvar operator
// Returns boost::shared_ptr<Function<Bool> >
// ----------------------------------------------------------------------------
setvar_operator
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > ret]
: "setvar" LPAREN
(
ret = position_setvar [tagset, vars]
| ret = bool_setvar [tagset, vars]
| ret = strset_setvar [tagset, vars]
| ret = symset_setvar [tagset, vars]
)
RPAREN
;
// ----------------------------------------------------------------------------
// Setvar for position
position_setvar
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<Position> > ret_op;
boost::shared_ptr<VariableAccessor<Position> > ret_acc;
: ret_acc = position_variable_acc [vars]
COMMA
ret_op = position_operator [tagset, vars] {
op.reset(new VarSetter<Position>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
// Setvar for bool
bool_setvar
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<Bool> > ret_op;
boost::shared_ptr<VariableAccessor<Bool> > ret_acc;
: ret_acc = bool_variable_acc [vars]
COMMA
ret_op = bool_operator [tagset, vars] {
op.reset(new VarSetter<Bool>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
// Setvar for strset
strset_setvar
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<StrSet> > ret_op;
boost::shared_ptr<VariableAccessor<StrSet> > ret_acc;
: ret_acc = strset_variable_acc [vars]
COMMA
ret_op = strset_operator [tagset, vars] {
op.reset(new VarSetter<StrSet>(*ret_acc, ret_op));
Paweł Kędzia
committed
}
// ----------------------------------------------------------------------------
// Setvar for symset
symset_setvar
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<TSet> > ret_op;
boost::shared_ptr<VariableAccessor<TSet> > ret_acc;
: ret_acc = symset_variable_acc [vars]
COMMA
ret_op = symset_operator [tagset, vars] {
op.reset(new VarSetter<TSet>(*ret_acc, ret_op));
}
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
class ANTLRLexer extends Lexer;
options {
exportVocab = ANTLRExpr;
charVocabulary = '\3'..'\377';
: '"'! (~('"' | '\n' | '\r'))* '"'!
| '\''! (~('\'' | '\n' | '\r'))* '\''!
: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+
UNSIGNED_INT
options {
paraphrase = "Unsigned integer";
}
: ('0'..'9')+
;
STR_PREFIX
options {
paraphrase = "String prefix";
}
Paweł Kędzia
committed
: "$s:"
;
TST_PREFIX
options {
Paweł Kędzia
committed
: "$t:"
;
BOOL_PREFIX
options {
paraphrase = "Bool prefix";
}
Paweł Kędzia
committed
: "$b:"
Paweł Kędzia
committed
paraphrase = "Position prefix";
Paweł Kędzia
committed
: '$'
RBRACKET
options {
paraphrase = "']'";
}
: ']'
LPAREN
options {
paraphrase = "'('";
}
: '('
RPAREN
options {
paraphrase = "')'";
}
: ')'
LCURLY
options {
paraphrase = "'{'";
}
: '{'
RCURLY
options {
paraphrase = "'}'";
}
: '}'
AT_MARK
options {
paraphrase = "'@'";
}
: '@'
: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
| '\t'
| '\f'
|
( "\r\n"
| '\r'
| '\n'
) { newline(); }
) { $setType(antlr::Token::SKIP); }
paraphrase = "Single line comment";
}
: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); }
;
ML_COMMENT
options {
paraphrase = "Multi line comment";
( // TODO: test it and add reference to the site it's taken from!
/* This actually works OK despite the ambiguity that
'\r' '\n' can be matched in one alternative or by matching
'\r' in one iteration and '\n' in another.. But
this is really matched just by one rule per (...)*
loop iteration, so it's OK.
This is exactly how they do it all over the web - just
turn off the warning for this particular token.*/
options {
generateAmbigWarnings = false;
}
: { LA(2)!='/' }? '*'
| '\r' '\n' { newline(); }
| '\r' { newline(); }
| '\n' { newline(); }
)*
{ $setType(antlr::Token::SKIP); }
//DSEPARATOR
//options {
// paraphrase = "':-'";
//}
// : ":-"
//;