grammar.g


// Match apply operator:
// 	apply(match(), cond(conditions), actions(actions)) 
// 	apply(match(), actions(actions))
// Returns boost::shared_ptr<ApplyOperator>
match_apply_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<ApplyOperator> ret_op]
{
	VariableAccessor<Match> matches = vars.create_accessor<Match>("_M");;
	boost::shared_ptr<const MatchOperator> match_op;
	boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > actions;
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > conditions;

}
	: "apply" LPAREN 
		match_op = match_operator[tagset, vars] COMMA
		("cond"   LPAREN conditions = bool_operator_comma_sep [tagset, vars] RPAREN COMMA)?
		"actions" LPAREN actions    =  match_action_comma_sep [tagset, vars] RPAREN
		RPAREN {
			if (conditions) {
				ret_op.reset(
					new ApplyOperator(matches, match_op, actions, conditions)
				);
			}
			else {
				ret_op.reset(
					new ApplyOperator(matches, match_op, actions)
				);
			}
		}
;

// Match operator: match(match_conditions)
// Returns boost::shared_ptr<MatchOperator>
match_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<MatchOperator> op]
{
	boost::shared_ptr<ConjConditions> match_cond;
}
	: "match" LPAREN match_cond = match_condition [tagset,vars] RPAREN {
		op.reset(new MatchOperator(match_cond));
	}
;

// Match conditions. Wrapper for vector of the match conditions
match_condition
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<ConjConditions> condition]
{
	std::vector<boost::shared_ptr<const MatchCondition> > m_cond;
}
	: m_cond = match_condition_in [tagset, vars] {
		condition.reset(new ConjConditions(m_cond));
	}
;

// Match conditions.
// Retutns std::vector< boost::shared_ptr<const MatchCondition> >
match_condition_in
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [std::vector< boost::shared_ptr<const MatchCondition> > ret]
{
	boost::shared_ptr<const MatchCondition> r_cond;
}
	: r_cond = match_cond_all[tagset, vars] {
		ret.push_back(r_cond);
	}
	(
		COMMA
		r_cond = match_cond_all[tagset, vars] {
			ret.push_back(r_cond);
		}
	)*
;

// Match variants variant(v1), variant(v2), ...
// Retutns boost::shared_ptr<std::vector<ConjConditions> >
match_variants
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<std::vector<ConjConditions> > variants]
{
	variants.reset(new std::vector<ConjConditions>());

	boost::shared_ptr<ConjConditions> variant;
}
	: "variant" LPAREN variant = match_condition [tagset, vars] RPAREN {
		// TODO
		// variants->push_back(variant);
	} 
	(
		COMMA "variant" LPAREN variant = match_condition [tagset, vars] RPAREN {
			// TODO
			// variants->push_back(variant);
		}
	)*
;

// One of the match condition
// Returns boost::shared_ptr<const MatchCondition>
match_cond_all
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<const MatchCondition> ret]
	: ret = match_cond_optional [tagset, vars]
	| ret = match_cond_repeate  [tagset, vars]
	| ret = match_cond_token    [tagset, vars]
	| ret = match_cond_oneof    [tagset, vars]
	| ret = match_cond_longest  [tagset, vars]
	| ret = match_cond_is
	| ret = match_cond_text
;

// Match condition - token (wraps a L0 predicate)
// Returns boost::shared_ptr<const MatchCondition>
match_cond_token
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<const TokenCondition> ret]
{
	boost::shared_ptr<Function<Bool> > bool_op;
}
	: bool_op = bool_operator [tagset, vars] {
		ret = boost::make_shared<TokenCondition>(bool_op);
	}
;


 // Match condition - optional
// Returns boost::shared_ptr<OptionalMatch>
match_cond_optional
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<OptionalMatch> mtch]
{
	boost::shared_ptr<ConjConditions> m_cond;
}
	: "optional" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
		mtch.reset(new OptionalMatch(m_cond));
	}
;

// Match condition - repeat
// Returns boost::shared_ptr<RepeatedMatch>
match_cond_repeate
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<RepeatedMatch> mtch]
{
	boost::shared_ptr<ConjConditions> m_cond;
}
	: "repeat" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
		mtch.reset(new RepeatedMatch(m_cond));
	}
;

// Match condition - is(ann_name)
// Returns boost::shared_ptr<IsAnnotatedAs>
match_cond_is
	returns [boost::shared_ptr<IsAnnotatedAs> mtch]
	: "is" LPAREN annotation_name: STRING RPAREN {
		mtch.reset(new IsAnnotatedAs(token_ref_to_std_string(annotation_name)));
	}
;

// Match condition - text(text)
// Returns boost::shared_ptr<MatchText>
match_cond_text
	returns [boost::shared_ptr<MatchText> mtch]
	: "text" LPAREN txt: STRING RPAREN {
		mtch.reset(new MatchText(token_ref_to_ustring(txt)));
	}
;

// Match condition - oneof(variant1(v1), variant(v2), ...)
// Returns boost::shared_ptr<OneOf>
match_cond_oneof
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<OneOf> onf]
{
	boost::shared_ptr<std::vector<ConjConditions> > variants;
}
	: "oneof" LPAREN variants = match_variants [tagset, vars] RPAREN {
		onf.reset(new OneOf(variants));
	}
;

// Match condition - longest(variant1(v1), variant(v2), ...)
// Returns boost::shared_ptr<Longest>
match_cond_longest
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Longest> lng]
{
	boost::shared_ptr<std::vector<ConjConditions> > variants;
}
	: "longest" LPAREN variants = match_variants [tagset, vars] RPAREN {
		lng.reset(new Longest(variants));
	}
;

// ----------------------------------------------------------------------------

// Match actions. Match action can be mark or unmark
// Returns boost::shared_ptr<MatchAction>
match_action
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<MatchAction> m_act]
	: m_act = match_mark_action   [tagset, vars]
	| m_act = match_unmark_action [tagset, vars]
;

// Match mark action
// Returns 
match_mark_action
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<MarkMatch> m_act]
{
	boost::shared_ptr<Function<Match> > match_to;
	boost::shared_ptr<Function<Match> > match_from;
	boost::shared_ptr<Function<Match> > head_match;
}
	: "mark" LPAREN 
			match_from = match_fit[tagset, vars] COMMA
			( match_to  = match_fit[tagset, vars] COMMA
				( head_match = match_fit[tagset, vars] COMMA )?
			)?
			annotation_name : STRING
		RPAREN {
			if (!match_to) {
				m_act.reset(
					new MarkMatch(
						match_from,
						((antlr::Token*)annotation_name)->getText()));
			} else {
				if (!head_match) {
					m_act.reset(
						new MarkMatch(
							match_from,
							match_to,
							((antlr::Token*)annotation_name)->getText()));
				} else {
					m_act.reset(
						new MarkMatch(
							match_from,
							match_to,
							head_match,
							((antlr::Token*)annotation_name)->getText()));
				}
			}
		}
;

// Match unmark action
// Returns boost::shared_ptr<UnmarkMatch>
match_unmark_action
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<UnmarkMatch> m_act]
{
	boost::shared_ptr<Function<Match> > match_at;
}
	: "unmark" LPAREN
				match_at = match_fit[tagset, vars] COMMA
				annotation_name : STRING
			RPAREN {
				m_act.reset(
					new UnmarkMatch(
							match_at,
							((antlr::Token*)annotation_name)->getText()));
			}
;

// Match action separated by comma
// Returns boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > >
match_action_comma_sep
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > r_vec]
{
	boost::shared_ptr<MatchAction> act;

	r_vec.reset(
		new std::vector<boost::shared_ptr<MatchAction> >
	);
}
	: act = match_action [tagset, vars] {
			r_vec->push_back(act);
	}
	(
		COMMA act = match_action [tagset, vars] {
			r_vec->push_back(act);
		}
	)*
;


// Function<Match> is wrapper for Constant<Match> and Function<Match>
// Returns boost::shared_ptr<Function<Match> >
match_fit
  [const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Match> > ret]
{
	//
}
	:
	( ret = match_var_val [tagset, vars]
	| "M" { ret.reset(new VarGetter<Match>(vars.create_accessor<Match>("_M")));	}
	| LPAREN ret = match_fit [tagset, vars] RPAREN
	)
	( // if there's an arrow after the match, we have a submatch reference
		ARROW i: UNSIGNED_INT { ret.reset(new Submatch(ret, token_ref_to_int(i))); }
	)*
;

match_var_val
  [const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Match> > ret]
	: ret = match_vector_variable [vars]
	| ret = match_value_const
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ANTLR LEXER
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
class ANTLRLexer extends Lexer;
options {
	exportVocab    = ANTLRExpr;
	charVocabulary = '\3'..'\377';
	testLiterals   = false;
	k              = 2;
}


// TODO
STRING
options {
	paraphrase = "a string";
}
	: '"'!  (~('"'  | '\n' | '\r'))* '"'!
	| '\''! (~('\'' | '\n' | '\r'))* '\''!
;

SIGNED_INT
options {
	paraphrase = "Signed integer";
}
	: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+ 
;	

UNSIGNED_INT
options {
	paraphrase = "Unsigned integer";
}
	: ('0'..'9')+ 
;	


QUOT_MARK
options {
	paraphrase = "Quote";
} 
	: '\'' 
;

APOS_MARK
options {
	paraphrase = "Apostrophe";
}
	: '"' 
;

Q_MARK
options {
	paraphrase = "Question mark";
}
	: '?'
;

E_MARK
options {
	paraphrase = "Exclamation mark";
}
	: '!'
;

STR_PREFIX
options {
	paraphrase = "String prefix";
}
	: "$s:"
;

TST_PREFIX
options {
	paraphrase = "Symset prefix";
}
	: "$t:"
;

BOOL_PREFIX
options {
	paraphrase = "Bool prefix";
}
	: "$b:"
;

POS_PREFIX
options {
	paraphrase = "Position prefix";
}
	: '$'
;

MATCH_VECTOR_PREFIX
options {
	paraphrase = "Match vector prefix";
}
	: "$m:"
;

LBRACKET 
options {
	paraphrase = "'['";
}
	: '[' 
;

RBRACKET 
options {
	paraphrase = "']'";
}
	: ']' 
;

LPAREN
options {
	paraphrase = "'('";
}   
	: '(' 
;

RPAREN 
options {
	paraphrase = "')'";
} 
	: ')' 
;

LCURLY 
options {
	paraphrase = "'{'";
} 
	: '{' 
;

RCURLY 
options {
	paraphrase = "'}'";
} 
	: '}' 
;

AT_MARK 
options {
	paraphrase = "'@'";
} 
	: '@' 
;

COMMA
options { 
	paraphrase = "','";
}
	: ','
;

ARROW
options {
	paraphrase = "->";
}
	: "->"
;

SYMBOL
options { 
	paraphrase = "Symbol"; 
	testLiterals = true; 
}
	: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
	| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
;

WS
	: ( ' '
	  	| '\t'
	  	| '\f'
	  	| 
			( "\r\n"
				| '\r'
				| '\n'
			) { newline(); } 
		) { $setType(antlr::Token::SKIP); } 
;

COMMENT
options {
	paraphrase = "Single line comment";
}
	: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP);  }
;

ML_COMMENT
options {
	paraphrase = "Multi line comment";
}
  : "/*"
	(			// TODO: test it and add reference to the site it's taken from!
				/* This actually works OK despite the ambiguity that
				'\r' '\n' can be matched in one alternative or by matching
				'\r' in one iteration and '\n' in another.. But 
				this is really matched just by one rule per (...)* 
				loop iteration, so it's OK.
				This is exactly how they do it all over the web - just
				turn off the warning for this particular token.*/
		options { 
			generateAmbigWarnings = false; 
		}
      : { LA(2)!='/' }? '*'
      | '\r' '\n' { newline(); }
      | '\r' { newline(); }
      | '\n' { newline(); }
      | ~('*'|'\n'|'\r')
  	)*
    "*/"
    { $setType(antlr::Token::SKIP); }
;

HASH
options { 
	paraphrase = "'#'"; 
}
	: '#' 
;

//DSEPARATOR
//options { 
//	paraphrase = "':-'"; 
//}
//	: ":-" 
//;