Skip to content
Snippets Groups Projects
grammar.g 74 KiB
Newer Older
Adam Wardynski's avatar
Adam Wardynski committed
	boost::shared_ptr<FunctionalOperator> op;
}
	: AT_MARK name: STRING { 
			seq.reset(new UntypedOpSequence(token_ref_to_std_string(name)));
		}
		LPAREN
			op = functional_operator_any [scope] { seq->append(op); }
			(SEMI op = functional_operator_any [scope.new_scope()] { seq->append(op); })*
Adam Wardynski's avatar
Adam Wardynski committed
		RPAREN
;

match_operator_sequence
	[const WcclFile& wccl_file]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<OpSequence<Match> > seq]
{
	ParsingScope scope(wccl_file);
Adam Wardynski's avatar
Adam Wardynski committed
	boost::shared_ptr<Operator<Match> > op;
}
	: MATCH_SECTION_PREFIX name: STRING {
			seq.reset(new OpSequence<Match>(token_ref_to_std_string(name)));
		}
		LPAREN
			op = functional_operator_match [scope] { seq->append(op); }
			(SEMI op = functional_operator_match [scope.new_scope()] { seq->append(op); })*
functional_operator_any 
	[ParsingScope& scope]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<FunctionalOperator> op]
{
}
	: (position_operator [scope]) => op = functional_operator_position [scope]
	| (symset_operator [scope]) => op = functional_operator_symset [scope]
	| (strset_operator [scope]) => op = functional_operator_strset [scope]
	| (match_operator [scope]) => op = functional_operator_match [scope]
	| op = functional_operator_bool [scope]
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Tagging actions and rules:
// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------
// Single action such as select, delete, relabel or unify
action
	[ParsingScope& scope]
	returns [boost::shared_ptr<TagAction> act]
	: act = action_select  [scope]
	| act = action_delete  [scope]
	| act = action_relabel [scope]
	| act = action_unify [scope]
	| act = action_mark [scope]
	| act = action_unmark [scope]

// Action sequence - the actions are separated with commas:
// 	select(...), select(...), delete(...)
action_sequence
	[ParsingScope& scope]
	returns [boost::shared_ptr<std::vector<boost::shared_ptr<TagAction> > > v_act]
	boost::shared_ptr<TagAction> act;
	v_act.reset(new std::vector<boost::shared_ptr<TagAction> >);
	: act = action[scope] {
		COMMA act = action[scope] {
			v_act->push_back(act);
		}
	)*
;

// ----------------------------------------------------------------------------
// Single rule:
// 	rule(NAME, ACTIONS) or rule(NAME, COND, ACTIONS)
	[ParsingScope& scope]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<TagRule> rle]
{
	boost::shared_ptr<Function<Bool> > condition;
	boost::shared_ptr<std::vector<boost::shared_ptr<TagAction> > > actions;
	: "rule" LPAREN name: STRING COMMA 
				(condition = bool_operator [scope] COMMA)?
				actions = action_sequence [scope] 
		RPAREN {
			if (condition) {
				rle.reset(
					new TagRule(token_ref_to_std_string(name), scope.variables(), actions, condition));
					new TagRule(token_ref_to_std_string(name), scope.variables(), actions));
	[const ParsingScope& scope]
	returns [boost::shared_ptr<TagRuleSequence> rule_seq]
Adam Wardynski's avatar
Adam Wardynski committed
	boost::shared_ptr<TagRule> rle;
	rule_seq.reset(new TagRuleSequence());
	: rle = tag_rule [scope.new_scope()] {
ilor's avatar
ilor committed
		rule_seq->push_back(*rle);
		SEMI rle = tag_rule [scope.new_scope()] {
ilor's avatar
ilor committed
			rule_seq->push_back(*rle);
// This is wrapper for tag_rule_sequence in rules section in the wccl file
tag_rules
	[const ParsingScope& scope]
	returns [boost::shared_ptr<TagRuleSequence> rule_seq]
	: "tag_rules" LPAREN rule_seq = tag_rule_sequence [scope] RPAREN {

// ----------------------------------------------------------------------------
// Select action: 
//	select(position, predicate) or select(predicate);
action_select 
	[ParsingScope& scope]
	returns [boost::shared_ptr<Select> action]
{
	boost::shared_ptr<Function<Position> > pos;
	boost::shared_ptr<Function<Bool> > condition;
}
	: "select" LPAREN
	(
		(position_operator [scope]) =>
			pos = position_operator [scope] COMMA
			condition = bool_operator [scope] {
				// select(positon, condition); 
				action.reset(new Select(condition, pos));
			condition = bool_operator [scope] {
				action.reset(new Select(condition));
			}
		)
	) 
	RPAREN
;

// ----------------------------------------------------------------------------
// Delete action
//	delete(position, predicate) or delete(predicate);
action_delete
	[ParsingScope& scope]
	returns [boost::shared_ptr<Delete> action]
{
	boost::shared_ptr<Function<Position> > pos;
	boost::shared_ptr<Function<Bool> > condition;
}
	: "delete" LPAREN
	(
		(position_operator [scope]) =>
			pos = position_operator [scope] COMMA
			condition = bool_operator [scope] {
				// delete(positon, condition); 
ilor's avatar
ilor committed
				action.reset(new Delete(condition, pos));
			condition = bool_operator [scope] {
ilor's avatar
ilor committed
				action.reset(new Delete(condition));
			}
		)
	) 
	RPAREN
;

// ----------------------------------------------------------------------------
// Relabel action
// 	relabel(pos, symset, predicate) or relabel(symset, predicate)
action_relabel
	[ParsingScope& scope]
	returns [boost::shared_ptr<Relabel> action]
{
	boost::shared_ptr<Function<Position> > pos;
	boost::shared_ptr<Function<Bool> > condition;
	boost::shared_ptr<Function<TSet> > replace_with;
}
	: "relabel" LPAREN
	(
		(position_operator [scope]) =>
			pos = position_operator [scope] COMMA
			replace_with = symset_operator [scope] COMMA
			condition = bool_operator [scope] {
				// relabel(pos, symset, predicate)
				action.reset(new Relabel(replace_with, condition, pos));
			}
		)
	|
		(
			replace_with = symset_operator [scope] COMMA
			condition = bool_operator [scope] {
				// relabel(symset, predicate)
				action.reset(new Relabel(replace_with, condition));
			}
		)
	)
	RPAREN
;

// ----------------------------------------------------------------------------
// Unify action 
action_unify
	[ParsingScope& scope]
	returns [boost::shared_ptr<Unify> action]
{
	boost::shared_ptr<Function<TSet> > attribs_expr;
	boost::shared_ptr<Function<Position> > pos_begin, pos_end;
}
	: "unify" LPAREN
			pos_begin    = position_operator [scope] COMMA
			pos_end      = position_operator [scope] COMMA
			attribs_expr = symset_operator   [scope]
		RPAREN  {
			action.reset(new Unify(pos_begin, pos_end, attribs_expr));
		}
;

// ----------------------------------------------------------------------------
// Mark action
action_mark
	[ParsingScope& scope]
	returns [boost::shared_ptr<Mark> action]
{
	boost::shared_ptr<Function<Position> > pos_begin, pos_end, pos_head;
}
	: "mark" LPAREN
			pos_begin    = position_operator [scope] COMMA
			pos_end      = position_operator [scope] COMMA
			(pos_head    = position_operator [scope] COMMA)?
			chan_name: STRING
		RPAREN  {
			action.reset(new Mark(pos_begin, pos_end, pos_head, ((antlr::Token*)chan_name)->getText()));
		}
;

// ----------------------------------------------------------------------------
// Unmark action
action_unmark
	[ParsingScope& scope]
	returns [boost::shared_ptr<Unmark> action]
{
	boost::shared_ptr<Function<Position> > pos;
}
	: "unmark" LPAREN
			pos = position_operator [scope] COMMA
			chan_name: STRING
		RPAREN  {
			action.reset(new Unmark(pos, ((antlr::Token*)chan_name)->getText()));
		}
;

// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Match rules
match_rule_operator
	[ParsingScope& scope]
	returns [boost::shared_ptr<MatchRule> ret_op]
	: apply = match_apply_operator [scope] {
		ret_op = boost::make_shared<MatchRule>(scope.variables(), apply);
;

// Match apply operator:
// 	apply(match(), cond(conditions), actions(actions)) 
// 	apply(match(), actions(actions))
// Returns boost::shared_ptr<ApplyOperator>
match_apply_operator
	[ParsingScope& scope]
	returns [boost::shared_ptr<ApplyOperator> ret_op]
{
	VariableAccessor<Match> matches = scope.variables().create_accessor<Match>("_M");;
	boost::shared_ptr<ConjConditions> match_cond;
	boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > actions;
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > conditions;
		"match" LPAREN match_cond = match_condition [scope] RPAREN COMMA
		("cond"   LPAREN conditions = bool_operator_comma_sep [scope] RPAREN COMMA)?
		"actions" LPAREN actions    =  match_action_comma_sep [scope] RPAREN
					new ApplyOperator(matches, match_cond, actions, conditions)
					new ApplyOperator(matches, match_cond, actions)
;

// Match conditions. Wrapper for vector of the match conditions
match_condition
	[ParsingScope& scope]
	returns [boost::shared_ptr<ConjConditions> condition]
{
	std::vector<boost::shared_ptr<const MatchCondition> > m_cond;
}
	: m_cond = match_condition_in [scope] {
		condition.reset(new ConjConditions(m_cond));
	}
;

// Match conditions.
// Retutns std::vector< boost::shared_ptr<const MatchCondition> >
match_condition_in
	[ParsingScope& scope]
	returns [std::vector< boost::shared_ptr<const MatchCondition> > ret]
{
	boost::shared_ptr<const MatchCondition> r_cond;
}
	: r_cond = match_cond_all[scope] {
		ret.push_back(r_cond);
	}
	(
		COMMA
		r_cond = match_cond_all[scope] {
// Match variants variant(v1), variant(v2), ...
// Retutns boost::shared_ptr<std::vector<ConjConditions> >
match_variants
	[ParsingScope& scope]
	returns [boost::shared_ptr<std::vector<boost::shared_ptr<ConjConditions> > > variants]
	variants.reset(new std::vector<boost::shared_ptr<ConjConditions> >());

	boost::shared_ptr<ConjConditions> variant;
}
	: "variant" LPAREN variant = match_condition [scope] RPAREN {
		variants->push_back(variant);
		COMMA "variant" LPAREN variant = match_condition [scope] RPAREN {
			variants->push_back(variant);
// One of the match condition
// Returns boost::shared_ptr<const MatchCondition>
match_cond_all
	[ParsingScope& scope]
	returns [boost::shared_ptr<const MatchCondition> ret]
	: ret = match_cond_optional [scope]
	| ret = match_cond_repeate  [scope]
	| ret = match_cond_token    [scope]
	| ret = match_cond_oneof    [scope]
	| ret = match_cond_longest  [scope]
	| ret = match_cond_is
	| ret = match_cond_text
// Match condition - token (wraps a L0 predicate)
// Returns boost::shared_ptr<const MatchCondition>
match_cond_token
	[ParsingScope& scope]
	returns [boost::shared_ptr<const TokenCondition> ret]
{
	boost::shared_ptr<Function<Bool> > bool_op;
}
	: bool_op = bool_operator [scope] {
		ret = boost::make_shared<TokenCondition>(bool_op);
	}
;


// Returns boost::shared_ptr<OptionalMatch>
match_cond_optional
	[ParsingScope& scope]
	returns [boost::shared_ptr<OptionalMatch> mtch]
{
	boost::shared_ptr<ConjConditions> m_cond;
}
	: "optional" LPAREN m_cond = match_condition [scope] RPAREN {
		mtch.reset(new OptionalMatch(m_cond));
	}
;

// Match condition - repeat
// Returns boost::shared_ptr<RepeatedMatch>
match_cond_repeate
	[ParsingScope& scope]
	returns [boost::shared_ptr<RepeatedMatch> mtch]
{
	boost::shared_ptr<ConjConditions> m_cond;
}
	: "repeat" LPAREN m_cond = match_condition [scope] RPAREN {
		mtch.reset(new RepeatedMatch(m_cond));
// Match condition - is(ann_name)
// Returns boost::shared_ptr<IsAnnotatedAs>
match_cond_is
	returns [boost::shared_ptr<IsAnnotatedAs> mtch]
	: "is" LPAREN annotation_name: STRING RPAREN {
		mtch.reset(new IsAnnotatedAs(token_ref_to_std_string(annotation_name)));
	}
;

// Match condition - text(text)
// Returns boost::shared_ptr<MatchText>
match_cond_text
	returns [boost::shared_ptr<MatchText> mtch]
	: "text" LPAREN txt: STRING RPAREN {
		mtch.reset(new MatchText(token_ref_to_ustring(txt)));
	}
;

// Match condition - oneof(variant1(v1), variant(v2), ...)
// Returns boost::shared_ptr<OneOf>
match_cond_oneof
	[ParsingScope& scope]
	returns [boost::shared_ptr<OneOf> onf]
{
	boost::shared_ptr<std::vector<boost::shared_ptr<ConjConditions> > > variants;
	: "oneof" LPAREN variants = match_variants [scope] RPAREN {
		onf.reset(new OneOf(variants));
	}
;

// Match condition - longest(variant1(v1), variant(v2), ...)
// Returns boost::shared_ptr<Longest>
match_cond_longest
	[ParsingScope& scope]
	returns [boost::shared_ptr<Longest> lng]
{
	boost::shared_ptr<std::vector<boost::shared_ptr<ConjConditions> > > variants;
	: "longest" LPAREN variants = match_variants [scope] RPAREN {
		lng.reset(new Longest(variants));
	}
;

// ----------------------------------------------------------------------------

// Match actions. Match action can be mark or unmark
// Returns boost::shared_ptr<MatchAction>
match_action
	[ParsingScope& scope]
	returns [boost::shared_ptr<MatchAction> m_act]
	: m_act = match_mark_action   [scope]
	| m_act = match_unmark_action [scope]
// Returns 
	[ParsingScope& scope]
	returns [boost::shared_ptr<MarkMatch> m_act]
Paweł Kędzia's avatar
Paweł Kędzia committed
{
	boost::shared_ptr<Function<Match> > match_to;
	boost::shared_ptr<Function<Match> > match_from;
	boost::shared_ptr<Function<Match> > head_match;
Paweł Kędzia's avatar
Paweł Kędzia committed
}
	: "mark" LPAREN 
			match_from = match_operator[scope] COMMA
			( match_to  = match_operator[scope] COMMA
				( head_match = match_operator[scope] COMMA )?
Paweł Kędzia's avatar
Paweł Kędzia committed
			annotation_name : STRING
		RPAREN {
			if (!match_to) {
				m_act.reset(
					new MarkMatch(
						match_from,
						((antlr::Token*)annotation_name)->getText()));
			} else {
				if (!head_match) {
					m_act.reset(
						new MarkMatch(
							match_from,
							match_to,
							((antlr::Token*)annotation_name)->getText()));
				} else {
					m_act.reset(
						new MarkMatch(
							match_from,
							match_to,
							head_match,
							((antlr::Token*)annotation_name)->getText()));
				}
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
// Returns boost::shared_ptr<UnmarkMatch>
	[ParsingScope& scope]
	returns [boost::shared_ptr<UnmarkMatch> m_act]
{
	boost::shared_ptr<Function<Match> > match_at;
}
	: "unmark" LPAREN
				match_at = match_operator[scope] COMMA
				annotation_name : STRING
			RPAREN {
				m_act.reset(
					new UnmarkMatch(
							match_at,
							((antlr::Token*)annotation_name)->getText()));
			}
;

// Match action separated by comma
// Returns boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > >
match_action_comma_sep
	[ParsingScope& scope]
	returns [boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > r_vec]
{
	boost::shared_ptr<MatchAction> act;

	r_vec.reset(
		new std::vector<boost::shared_ptr<MatchAction> >
	);
}
	: act = match_action [scope] {
		COMMA act = match_action [scope] {
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
// ANTLR LEXER
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
class ANTLRLexer extends Lexer;
options {
	exportVocab    = ANTLRExpr;
	charVocabulary = '\3'..'\377';
	testLiterals   = false;
rk's avatar
rk committed
}

rk's avatar
rk committed
STRING
options {
	paraphrase = "a string";
}
	: '"'!  (~('"'  | '\n' | '\r'))* '"'!
	| '\''! (~('\'' | '\n' | '\r'))* '\''!
SIGNED_INT
rk's avatar
rk committed
options {
	paraphrase = "Signed integer";
rk's avatar
rk committed
}
	: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+ 
rk's avatar
rk committed

UNSIGNED_INT
options {
	paraphrase = "Unsigned integer";
}
	: ('0'..'9')+ 
;	


rk's avatar
rk committed
QUOT_MARK
options {
	paraphrase = "Quote";
rk's avatar
rk committed

APOS_MARK
options {
	paraphrase = "Apostrophe";
rk's avatar
rk committed
Q_MARK
options {
	paraphrase = "Question mark";
rk's avatar
rk committed
}
	: '?'
rk's avatar
rk committed

E_MARK
options {
	paraphrase = "Exclamation mark";
rk's avatar
rk committed
}
	: '!'
rk's avatar
rk committed

STR_PREFIX
options {
	paraphrase = "String prefix";
}
	paraphrase = "Symset prefix";
;

BOOL_PREFIX
options {
	paraphrase = "Bool prefix";
}
rk's avatar
rk committed

POS_PREFIX
options {
MATCH_VECTOR_PREFIX
options {
	paraphrase = "Match vector prefix";
}
	: "$m:"
;

Adam Wardynski's avatar
Adam Wardynski committed
STR_SECTION_PREFIX
options {
	paraphrase = "Wccl file string operators section prefix";
}
	: "@s:"
;

POS_SECTION_PREFIX
options {
	paraphrase = "Wccl file position operators section prefix";
}
	: "@p:"
;

BOOL_SECTION_PREFIX
options {
	paraphrase = "Wccl file bool operators section prefix";
}
	: "@b:"
;

TST_SECTION_PREFIX
options {
	paraphrase = "Wccl file symset operators section prefix";
}
	: "@t:"
;

MATCH_SECTION_PREFIX
options {
	paraphrase = "Wccl file symset operators section prefix";
}
	: "@m:"
;

rk's avatar
rk committed
LBRACKET 
options {
	paraphrase = "'['";
}
	: '[' 
rk's avatar
rk committed

RBRACKET 
options {
	paraphrase = "']'";
}
	: ']' 
rk's avatar
rk committed

LPAREN
options {
	paraphrase = "'('";
}   
	: '(' 
rk's avatar
rk committed

RPAREN 
options {
	paraphrase = "')'";
} 
	: ')' 
rk's avatar
rk committed

LCURLY 
options {
	paraphrase = "'{'";
} 
	: '{' 
rk's avatar
rk committed

RCURLY 
options {
	paraphrase = "'}'";
} 
	: '}' 
rk's avatar
rk committed

AT_MARK 
options {
	paraphrase = "'@'";
} 
	: '@' 
rk's avatar
rk committed

Adam Wardynski's avatar
Adam Wardynski committed

rk's avatar
rk committed
COMMA
options { 
	paraphrase = "','";
rk's avatar
rk committed
}
	: ','
rk's avatar
rk committed

Adam Wardynski's avatar
Adam Wardynski committed
SEMI
options {
	paraphrase = ";";
}
	: ';'
;

rk's avatar
rk committed
SYMBOL
options { 
	paraphrase = "Symbol"; 
rk's avatar
rk committed
	testLiterals = true; 
}
	: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
	| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
rk's avatar
rk committed
WS
	  	| '\t'
	  	| '\f'
	  	| 
			( "\r\n"
				| '\r'
				| '\n'
			) { newline(); } 
		) { $setType(antlr::Token::SKIP); } 
rk's avatar
rk committed

COMMENT
options {
	paraphrase = "Single line comment";
}
	: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP);  }
;

ML_COMMENT
options {
	paraphrase = "Multi line comment";
rk's avatar
rk committed
}
	(			// TODO: test it and add reference to the site it's taken from!
				/* This actually works OK despite the ambiguity that
				'\r' '\n' can be matched in one alternative or by matching
				'\r' in one iteration and '\n' in another.. But 
				this is really matched just by one rule per (...)* 
				loop iteration, so it's OK.
				This is exactly how they do it all over the web - just
				turn off the warning for this particular token.*/
      : { LA(2)!='/' }? '*'
      | '\r' '\n' { newline(); }
      | '\r' { newline(); }
      | '\n' { newline(); }
      | ~('*'|'\n'|'\r')
rk's avatar
rk committed

HASH
options { 
	paraphrase = "'#'"; 
}
	: '#' 
rk's avatar
rk committed

//DSEPARATOR
//options { 
//	paraphrase = "':-'"; 
//}
//	: ":-" 
//;