Newer
Older
Adam Wardynski
committed
///////////////////////////////////////////////////////////////////////////////
// Match operators.
// Returns boost::shared_ptr<Function<Match> >
///////////////////////////////////////////////////////////////////////////////
match_operator
Adam Wardynski
committed
returns [boost::shared_ptr<Function<Match> > ret]
{
//
}
:
( ret = match_var_val [scope]
Adam Wardynski
committed
| {LA(1)==LITERAL_M || LA(1)==COLON}? ("M")? {
ret.reset(new VarGetter<Match>(scope.variables().create_accessor<Match>("_M")));
Adam Wardynski
committed
ret.reset(new Submatch(ret, 1));
}
| "MA" {
ret.reset(new VarGetter<Match>(scope.variables().create_accessor<Match>("_M")));
Adam Wardynski
committed
ret.reset(new Submatch(ret, 2));
}
| LPAREN ret = match_operator [scope] RPAREN
Adam Wardynski
committed
)
( // if there's a colon after the match, we have a submatch reference
Adam Wardynski
committed
COLON i: UNSIGNED_INT { ret.reset(new Submatch(ret, token_ref_to_int(i))); }
)*
;
// ----------------------------------------------------------------------------
// Wrapper from Function<Match> to Operator<Match>
functional_operator_match
[ParsingScope& scope]
returns [boost::shared_ptr<Operator<Match> > op]
{
boost::shared_ptr<Function<Match> > body;
}
: body = match_operator [scope] {
op.reset(new Operator<Match>(body, scope.variables()));
}
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// WCCL FILE PARSING RULES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
imports_section [WcclFile& wccl_file]
: (import [wccl_file])+
;
import [WcclFile& wccl_file]
: "import" LPAREN file_path : STRING COMMA lexicon_name : STRING RPAREN {
wccl_file.import_lexicon(
LexiconParser::parse_lexicon(
wccl_file.path(),
token_ref_to_std_string(lexicon_name),
token_ref_to_std_string(file_path)));
}
;
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
wccl_file_section [WcclFile& wccl_file]
: any_operator_section [wccl_file]
| tag_rules_section [wccl_file]
| match_rules_section [wccl_file]
;
tag_rules_section [WcclFile& wccl_file]
{
boost::shared_ptr<TagRuleSequence> rule_seq;
}
: rule_seq = parse_tag_rule_sequence [wccl_file.tagset()] {
if (wccl_file.has_tag_rules()) {
throw ParserException("Only one tag_rules section allowed in a WCCL file.");
}
wccl_file.set_tag_rules(rule_seq);
}
;
match_rules_section [WcclFile& wccl_file]
{
ParsingScope scope(wccl_file);
boost::shared_ptr<MatchRule> match_rule;
boost::shared_ptr<MatchRuleSequence> rule_seq = boost::make_shared<MatchRuleSequence>();
}
: "match_rules" {
if (wccl_file.has_match_rules()) {
throw ParserException("Only one match_rules section allowed in a WCCL file.");
}
}
LPAREN
match_rule = match_rule_operator [scope] {
rule_seq->push_back(*match_rule);
scope.reset_variables();
}
(
SEMI match_rule = match_rule_operator [scope] {
rule_seq->push_back(*match_rule);
)*
RPAREN { wccl_file.set_match_rules(rule_seq); }
any_operator_section
[WcclFile& wccl_file]
{
boost::shared_ptr<UntypedOpSequence> untyped_seq;
boost::shared_ptr<OpSequence<Bool> > bool_seq;
boost::shared_ptr<OpSequence<TSet> > symset_seq;
boost::shared_ptr<OpSequence<StrSet> > strset_seq;
boost::shared_ptr<OpSequence<Position> > pos_seq;
boost::shared_ptr<OpSequence<Match> > m_seq;
}
: untyped_seq = untyped_operator_sequence [wccl_file] {
| bool_seq = bool_operator_sequence [wccl_file] {
| symset_seq = symset_operator_sequence [wccl_file] {
| strset_seq = strset_operator_sequence [wccl_file] {
| pos_seq = position_operator_sequence [wccl_file] {
| m_seq = match_operator_sequence [wccl_file] {
wccl_file.add_section(m_seq);
}
;
bool_operator_sequence
[const WcclFile& wccl_file]
returns [boost::shared_ptr<OpSequence<Bool> > seq]
{
ParsingScope scope(wccl_file);
boost::shared_ptr<Operator<Bool> > op;
}
: BOOL_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<Bool>(token_ref_to_std_string(name)));
}
LPAREN
op = functional_operator_bool [scope] { seq->append(op); scope.reset_variables(); }
(SEMI op = functional_operator_bool [scope] { seq->append(op); scope.reset_variables(); })*
[const WcclFile& wccl_file]
returns [boost::shared_ptr<OpSequence<TSet> > seq]
{
ParsingScope scope(wccl_file);
boost::shared_ptr<Operator<TSet> > op;
}
: TST_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<TSet>(token_ref_to_std_string(name)));
}
LPAREN
op = functional_operator_symset [scope] { seq->append(op); scope.reset_variables(); }
(SEMI op = functional_operator_symset [scope] { seq->append(op); scope.reset_variables(); })*
[const WcclFile& wccl_file]
returns [boost::shared_ptr<OpSequence<StrSet> > seq]
{
ParsingScope scope(wccl_file);
boost::shared_ptr<Operator<StrSet> > op;
}
: STR_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<StrSet>(token_ref_to_std_string(name)));
}
LPAREN
op = functional_operator_strset [scope] { seq->append(op); scope.reset_variables(); }
(SEMI op = functional_operator_strset [scope] { seq->append(op); scope.reset_variables(); })*
[const WcclFile& wccl_file]
returns [boost::shared_ptr<OpSequence<Position> > seq]
{
ParsingScope scope(wccl_file);
boost::shared_ptr<Operator<Position> > op;
}
: POS_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<Position>(token_ref_to_std_string(name)));
}
LPAREN
op = functional_operator_position [scope] { seq->append(op); scope.reset_variables(); }
(SEMI op = functional_operator_position [scope] { seq->append(op); scope.reset_variables(); })*
[const WcclFile& wccl_file]
returns [boost::shared_ptr<UntypedOpSequence> seq]
{
ParsingScope scope(wccl_file);
boost::shared_ptr<FunctionalOperator> op;
}
: AT_MARK name: STRING {
seq.reset(new UntypedOpSequence(token_ref_to_std_string(name)));
}
LPAREN
op = functional_operator_any [scope] { seq->append(op); scope.reset_variables(); }
(SEMI op = functional_operator_any [scope] { seq->append(op); scope.reset_variables(); })*
[const WcclFile& wccl_file]
returns [boost::shared_ptr<OpSequence<Match> > seq]
{
ParsingScope scope(wccl_file);
boost::shared_ptr<Operator<Match> > op;
}
: MATCH_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<Match>(token_ref_to_std_string(name)));
}
LPAREN
op = functional_operator_match [scope] { seq->append(op); scope.reset_variables(); }
(SEMI op = functional_operator_match [scope] { seq->append(op); scope.reset_variables(); })*
functional_operator_any
[ParsingScope& scope]
returns [boost::shared_ptr<FunctionalOperator> op]
{
}
: (position_operator [scope]) => op = functional_operator_position [scope]
| (symset_operator [scope]) => op = functional_operator_symset [scope]
| (strset_operator [scope]) => op = functional_operator_strset [scope]
| (match_operator [scope]) => op = functional_operator_match [scope]
| op = functional_operator_bool [scope]
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Tagging actions and rules:
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Single action such as select, delete, relabel or unify
action
returns [boost::shared_ptr<TagAction> act]
: act = action_select [scope]
| act = action_delete [scope]
| act = action_relabel [scope]
| act = action_unify [scope]
| act = action_mark [scope]
| act = action_unmark [scope]
// Action sequence - the actions are separated with commas:
// select(...), select(...), delete(...)
action_sequence
returns [boost::shared_ptr<std::vector<boost::shared_ptr<TagAction> > > v_act]
boost::shared_ptr<TagAction> act;
v_act.reset(new std::vector<boost::shared_ptr<TagAction> >);
v_act->push_back(act);
}
(
v_act->push_back(act);
}
)*
;
// ----------------------------------------------------------------------------
// Single rule:
// rule(NAME, ACTIONS) or rule(NAME, COND, ACTIONS)
Adam Radziszewski
committed
tag_rule
{
boost::shared_ptr<Function<Bool> > condition;
boost::shared_ptr<std::vector<boost::shared_ptr<TagAction> > > actions;
: "rule" LPAREN name: STRING COMMA
(condition = bool_operator [scope] COMMA)?
actions = action_sequence [scope]
RPAREN {
if (condition) {
rle.reset(
new TagRule(token_ref_to_std_string(name), scope.variables(), actions, condition));
}
else {
rle.reset(
new TagRule(token_ref_to_std_string(name), scope.variables(), actions));
;
// Rule sequence
Adam Radziszewski
committed
tag_rule_sequence
[ParsingScope& scope]
returns [boost::shared_ptr<TagRuleSequence> rule_seq]
rule_seq.reset(new TagRuleSequence());
: rle = tag_rule [scope] {
scope.reset_variables();
SEMI rle = tag_rule [scope] {
}
)*
;
// ----------------------------------------------------------------------------
Adam Radziszewski
committed
// This is wrapper for tag_rule_sequence in rules section in the wccl file
tag_rules
[ParsingScope& scope]
returns [boost::shared_ptr<TagRuleSequence> rule_seq]
Adam Radziszewski
committed
: "tag_rules" LPAREN rule_seq = tag_rule_sequence [scope] RPAREN {
//
}
;
// ----------------------------------------------------------------------------
// Select action:
// select(position, predicate) or select(predicate);
action_select
returns [boost::shared_ptr<Select> action]
{
boost::shared_ptr<Function<Position> > pos;
boost::shared_ptr<Function<Bool> > condition;
}
: "select" LPAREN
(
(position_operator [scope]) =>
pos = position_operator [scope] COMMA
condition = bool_operator [scope] {
// select(positon, condition);
action.reset(new Select(condition, pos));
}
)
|
(
condition = bool_operator [scope] {
// select(condition);
action.reset(new Select(condition));
}
)
)
RPAREN
;
// ----------------------------------------------------------------------------
// Delete action
// delete(position, predicate) or delete(predicate);
action_delete
returns [boost::shared_ptr<Delete> action]
{
boost::shared_ptr<Function<Position> > pos;
boost::shared_ptr<Function<Bool> > condition;
}
: "delete" LPAREN
(
(position_operator [scope]) =>
pos = position_operator [scope] COMMA
condition = bool_operator [scope] {
// delete(positon, condition);
}
)
|
(
condition = bool_operator [scope] {
// delete(condition);
}
)
)
RPAREN
;
// ----------------------------------------------------------------------------
// Relabel action
// relabel(pos, symset, predicate) or relabel(symset, predicate)
action_relabel
returns [boost::shared_ptr<Relabel> action]
{
boost::shared_ptr<Function<Position> > pos;
boost::shared_ptr<Function<Bool> > condition;
boost::shared_ptr<Function<TSet> > replace_with;
}
: "relabel" LPAREN
(
(position_operator [scope]) =>
pos = position_operator [scope] COMMA
replace_with = symset_operator [scope] COMMA
condition = bool_operator [scope] {
// relabel(pos, symset, predicate)
action.reset(new Relabel(replace_with, condition, pos));
}
)
|
(
replace_with = symset_operator [scope] COMMA
condition = bool_operator [scope] {
// relabel(symset, predicate)
action.reset(new Relabel(replace_with, condition));
}
)
)
RPAREN
;
// ----------------------------------------------------------------------------
// Unify action
action_unify
returns [boost::shared_ptr<Unify> action]
{
boost::shared_ptr<Function<TSet> > attribs_expr;
boost::shared_ptr<Function<Position> > pos_begin, pos_end;
}
: "unify" LPAREN
pos_begin = position_operator [scope] COMMA
pos_end = position_operator [scope] COMMA
attribs_expr = symset_operator [scope]
RPAREN {
action.reset(new Unify(pos_begin, pos_end, attribs_expr));
}
;
// ----------------------------------------------------------------------------
// Mark action
action_mark
returns [boost::shared_ptr<Mark> action]
{
boost::shared_ptr<Function<Position> > pos_begin, pos_end, pos_head;
}
: "mark" LPAREN
pos_begin = position_operator [scope] COMMA
pos_end = position_operator [scope] COMMA
(pos_head = position_operator [scope] COMMA)?
chan_name: STRING
RPAREN {
action.reset(new Mark(pos_begin, pos_end, pos_head, ((antlr::Token*)chan_name)->getText()));
}
;
// ----------------------------------------------------------------------------
// Unmark action
action_unmark
returns [boost::shared_ptr<Unmark> action]
{
boost::shared_ptr<Function<Position> > pos;
}
: "unmark" LPAREN
pos = position_operator [scope] COMMA
chan_name: STRING
RPAREN {
action.reset(new Unmark(pos, ((antlr::Token*)chan_name)->getText()));
}
;
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Match rules
ilor
committed
// Returns boost::shared_ptr<MatchRule>
ilor
committed
returns [boost::shared_ptr<MatchRule> ret_op]
ilor
committed
boost::shared_ptr<ApplyOperator> apply;
scope.variables().get_put<Match>("_M");
: apply = match_apply_operator [scope] {
ret_op = boost::make_shared<MatchRule>(scope.variables(), apply);
ilor
committed
}
;
// Match apply operator:
// apply(match(), cond(conditions), actions(actions))
// apply(match(), actions(actions))
// Returns boost::shared_ptr<ApplyOperator>
match_apply_operator
returns [boost::shared_ptr<ApplyOperator> ret_op]
{
VariableAccessor<Match> matches = scope.variables().create_accessor<Match>("_M");;
boost::shared_ptr<ConjConditions> match_cond;
boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > actions;
boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > conditions;
: "apply" LPAREN
"match" LPAREN match_cond = match_condition [scope] RPAREN COMMA
("cond" LPAREN conditions = bool_operator_comma_sep [scope] RPAREN COMMA)?
"actions" LPAREN actions = match_action_comma_sep [scope] RPAREN
RPAREN {
if (conditions) {
ret_op.reset(
new ApplyOperator(matches, match_cond, actions, conditions)
);
}
else {
ret_op.reset(
new ApplyOperator(matches, match_cond, actions)
);
}
}
;
// Match conditions. Wrapper for vector of the match conditions
match_condition
returns [boost::shared_ptr<ConjConditions> condition]
{
std::vector<boost::shared_ptr<const MatchCondition> > m_cond;
}
: m_cond = match_condition_in [scope] {
condition.reset(new ConjConditions(m_cond));
}
;
// Match conditions.
// Retutns std::vector< boost::shared_ptr<const MatchCondition> >
match_condition_in
returns [std::vector< boost::shared_ptr<const MatchCondition> > ret]
{
boost::shared_ptr<const MatchCondition> r_cond;
}
: r_cond = match_cond_all[scope] {
ret.push_back(r_cond);
}
(
COMMA
r_cond = match_cond_all[scope] {
ret.push_back(r_cond);
}
)*
;
// Match variants variant(v1), variant(v2), ...
// Retutns boost::shared_ptr<std::vector<ConjConditions> >
match_variants
returns [boost::shared_ptr<std::vector<boost::shared_ptr<ConjConditions> > > variants]
variants.reset(new std::vector<boost::shared_ptr<ConjConditions> >());
boost::shared_ptr<ConjConditions> variant;
}
: "variant" LPAREN variant = match_condition [scope] RPAREN {
}
(
COMMA "variant" LPAREN variant = match_condition [scope] RPAREN {
}
)*
;
// One of the match condition
// Returns boost::shared_ptr<const MatchCondition>
match_cond_all
returns [boost::shared_ptr<const MatchCondition> ret]
: ret = match_cond_optional [scope]
| ret = match_cond_repeate [scope]
| ret = match_cond_token [scope]
| ret = match_cond_oneof [scope]
| ret = match_cond_longest [scope]
// Match condition - token (wraps a L0 predicate)
// Returns boost::shared_ptr<const MatchCondition>
match_cond_token
returns [boost::shared_ptr<const TokenCondition> ret]
{
boost::shared_ptr<Function<Bool> > bool_op;
}
: bool_op = bool_operator [scope] {
ret = boost::make_shared<TokenCondition>(bool_op);
}
;
// Match condition - optional
// Returns boost::shared_ptr<OptionalMatch>
match_cond_optional
returns [boost::shared_ptr<OptionalMatch> mtch]
{
boost::shared_ptr<ConjConditions> m_cond;
}
: "optional" LPAREN m_cond = match_condition [scope] RPAREN {
mtch.reset(new OptionalMatch(m_cond));
}
;
// Match condition - repeat
// Returns boost::shared_ptr<RepeatedMatch>
returns [boost::shared_ptr<RepeatedMatch> mtch]
{
boost::shared_ptr<ConjConditions> m_cond;
}
: "repeat" LPAREN m_cond = match_condition [scope] RPAREN {
mtch.reset(new RepeatedMatch(m_cond));
// Match condition - is(ann_name)
// Returns boost::shared_ptr<IsAnnotatedAs>
match_cond_is
returns [boost::shared_ptr<IsAnnotatedAs> mtch]
: "is" LPAREN annotation_name: STRING RPAREN {
mtch.reset(new IsAnnotatedAs(token_ref_to_std_string(annotation_name)));
}
;
// Match condition - text(text)
// Returns boost::shared_ptr<MatchText>
match_cond_text
returns [boost::shared_ptr<MatchText> mtch]
: "text" LPAREN txt: STRING RPAREN {
mtch.reset(new MatchText(token_ref_to_ustring(txt)));
}
;
// Match condition - oneof(variant1(v1), variant(v2), ...)
// Returns boost::shared_ptr<OneOf>
match_cond_oneof
returns [boost::shared_ptr<OneOf> onf]
{
boost::shared_ptr<std::vector<boost::shared_ptr<ConjConditions> > > variants;
: "oneof" LPAREN variants = match_variants [scope] RPAREN {
onf.reset(new OneOf(variants));
}
;
// Match condition - longest(variant1(v1), variant(v2), ...)
// Returns boost::shared_ptr<Longest>
match_cond_longest
returns [boost::shared_ptr<Longest> lng]
{
boost::shared_ptr<std::vector<boost::shared_ptr<ConjConditions> > > variants;
: "longest" LPAREN variants = match_variants [scope] RPAREN {
lng.reset(new Longest(variants));
}
;
// ----------------------------------------------------------------------------
// Match actions. Match action can be mark or unmark
// Returns boost::shared_ptr<MatchAction>
match_action
returns [boost::shared_ptr<MatchAction> m_act]
: m_act = match_mark_action [scope]
| m_act = match_unmark_action [scope]
;
// Match mark action
match_mark_action
{
boost::shared_ptr<Function<Match> > match_to;
boost::shared_ptr<Function<Match> > match_from;
boost::shared_ptr<Function<Match> > head_match;
match_from = match_operator[scope] COMMA
( match_to = match_operator[scope] COMMA
( head_match = match_operator[scope] COMMA )?
if (!match_to) {
m_act.reset(
new MarkMatch(
match_from,
((antlr::Token*)annotation_name)->getText()));
} else {
if (!head_match) {
m_act.reset(
new MarkMatch(
match_from,
match_to,
((antlr::Token*)annotation_name)->getText()));
} else {
m_act.reset(
new MarkMatch(
match_from,
match_to,
head_match,
((antlr::Token*)annotation_name)->getText()));
}
;
// Match unmark action
// Returns boost::shared_ptr<UnmarkMatch>
match_unmark_action
returns [boost::shared_ptr<UnmarkMatch> m_act]
{
boost::shared_ptr<Function<Match> > match_at;
}
: "unmark" LPAREN
match_at = match_operator[scope] COMMA
annotation_name : STRING
RPAREN {
m_act.reset(
new UnmarkMatch(
match_at,
((antlr::Token*)annotation_name)->getText()));
}
;
// Match action separated by comma
// Returns boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > >
match_action_comma_sep
returns [boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > r_vec]
{
boost::shared_ptr<MatchAction> act;
r_vec.reset(
new std::vector<boost::shared_ptr<MatchAction> >
);
}
: act = match_action [scope] {
r_vec->push_back(act);
}
(
COMMA act = match_action [scope] {
r_vec->push_back(act);
}
)*
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
class ANTLRLexer extends Lexer;
options {
exportVocab = ANTLRExpr;
charVocabulary = '\3'..'\377';
: '"'! (~('"' | '\n' | '\r'))* '"'!
| '\''! (~('\'' | '\n' | '\r'))* '\''!
: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+
UNSIGNED_INT
options {
paraphrase = "Unsigned integer";
}
: ('0'..'9')+
;
STR_PREFIX
options {
paraphrase = "String prefix";
}
Paweł Kędzia
committed
: "$s:"
;
TST_PREFIX
options {
Paweł Kędzia
committed
: "$t:"
;
BOOL_PREFIX
options {
paraphrase = "Bool prefix";
}
Paweł Kędzia
committed
: "$b:"
Paweł Kędzia
committed
paraphrase = "Position prefix";
Paweł Kędzia
committed
: '$'
MATCH_VECTOR_PREFIX
options {
paraphrase = "Match vector prefix";
}
: "$m:"
;
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
STR_SECTION_PREFIX
options {
paraphrase = "Wccl file string operators section prefix";
}
: "@s:"
;
POS_SECTION_PREFIX
options {
paraphrase = "Wccl file position operators section prefix";
}
: "@p:"
;
BOOL_SECTION_PREFIX
options {
paraphrase = "Wccl file bool operators section prefix";
}
: "@b:"
;
TST_SECTION_PREFIX
options {
paraphrase = "Wccl file symset operators section prefix";
}
: "@t:"
;
MATCH_SECTION_PREFIX
options {
paraphrase = "Wccl file symset operators section prefix";
}
: "@m:"
;
RBRACKET
options {
paraphrase = "']'";
}
: ']'
LPAREN
options {
paraphrase = "'('";
}
: '('
RPAREN
options {
paraphrase = "')'";
}
: ')'
LCURLY
options {
paraphrase = "'{'";
}
: '{'
RCURLY
options {
paraphrase = "'}'";
}
: '}'
AT_MARK
options {
paraphrase = "'@'";
}
: '@'
ARROW
options {
paraphrase = "->";
}
: "->"
;
Adam Wardynski
committed
COLON
options {
paraphrase = ":";
}
: ':'
;
SEMI
options {
paraphrase = ";";
}
: ';'
;
: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'