Skip to content
Snippets Groups Projects
grammar.g 39.6 KiB
Newer Older
			p1 = position_operator [tagset, vars] COMMA 
			p2 = position_operator [tagset, vars] {
				op.reset(new Equals<Position>(p1, p2));
			t1 = symset_operator [tagset, vars] COMMA  
			t2 = symset_operator [tagset, vars] {
				op.reset(new Equals<TSet>(t1, t2));
			}
		)
	|
		(strset_operator [tagset, vars]) =>
			s1 = strset_operator [tagset, vars] COMMA  
			s2 = strset_operator [tagset, vars] {
				op.reset(new Equals<StrSet>(s1, s2));
			}
		)
	|
		(
			b1 = bool_operator [tagset, vars] COMMA
			b2 = bool_operator [tagset, vars] {
				op.reset(new Equals<Bool>(b1, b2));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > t1, t2;
	boost::shared_ptr<Function<StrSet> > s1, s2;
		(symset_operator [tagset, vars]) =>
			t1 = symset_operator [tagset, vars] COMMA 
			t2 = symset_operator [tagset, vars] {
				op.reset(new IsSubsetOf<TSet>(t1, t2));
			s1 = strset_operator [tagset, vars] COMMA
			s2 = strset_operator [tagset, vars] {
				op.reset(new IsSubsetOf<StrSet>(s1, s2));
;
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > t1, t2;
	boost::shared_ptr<Function<StrSet> > s1, s2;
		(symset_operator [tagset, vars]) =>
			t1 = symset_operator [tagset, vars] COMMA  
			t2 = symset_operator [tagset, vars]  {
				op.reset(new Intersects<TSet>(t1, t2));
			s1 = strset_operator [tagset, vars] COMMA  
			s2 = strset_operator [tagset, vars]  {
				op.reset(new Intersects<StrSet>(s1, s2));
// ----------------------------------------------------------------------------
// Iterations:
bool_iteration
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	int min_match = 0;
	boost::shared_ptr<Function<Bool> > expr;
	boost::shared_ptr<Function<Position> > lpos, rpos;
	boost::shared_ptr<VariableAccessor<Position> > pacc;
}
	: "only" LPAREN 
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars]
		RPAREN {
			ret.reset(new Only(lpos, rpos, *pacc, expr));
		}

	| "atleast" LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars] COMMA
			min_match = number
		RPAREN {
			ret.reset(new AtLeast(lpos, rpos, *pacc, expr, min_match));
		}
	| "llook" LPAREN //note inverted rpos/lpos order
			rpos = position_operator [tagset, vars] COMMA 
			lpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars] 
		RPAREN {
			ret.reset(new LeftLook(lpos, rpos, *pacc, expr));
		}
	| "rlook" LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars] 
		RPAREN {
			ret.reset(new RightLook(lpos, rpos, *pacc, expr));
		}
;

// ----------------------------------------------------------------------------
// Agreement operator: agr, agrpp, wagr
bool_agreement
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<Function<TSet> > expr;
	boost::shared_ptr<Function<Position> > lpos, rpos;
}
	: "agr"   LPAREN 
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA 
			expr = symset_operator [tagset, vars]
		RPAREN {
			ret.reset(new StrongAgreement(lpos, rpos, expr, tagset));
		}
	| "agrpp" LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA 
			expr = symset_operator [tagset, vars]
		RPAREN {
			ret.reset(new PointAgreement(lpos, rpos, expr, tagset));
		}
	| "wagr"  LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA 
			expr = symset_operator [tagset, vars]
		RPAREN {
			ret.reset(new WeakAgreement(lpos, rpos, expr, tagset));
		}
;

// ----------------------------------------------------------------------------
// Setvar operator
// Returns boost::shared_ptr<Function<Bool> >
// ----------------------------------------------------------------------------
setvar_operator 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
	: "setvar" LPAREN
	  (
		  ret = position_setvar [tagset, vars]
		| ret = bool_setvar     [tagset, vars]
		| ret = strset_setvar   [tagset, vars]
		| ret = symset_setvar   [tagset, vars]
	  )
	  RPAREN
;

// ----------------------------------------------------------------------------
// Setvar for position
position_setvar 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<Position> > ret_op;
	boost::shared_ptr<VariableAccessor<Position> > ret_acc;
	:	ret_acc = position_variable_acc [vars]
		COMMA
		ret_op  = position_operator [tagset, vars] {
			op.reset(new VarSetter<Position>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<Bool> > ret_op;
	boost::shared_ptr<VariableAccessor<Bool> > ret_acc;
	:	ret_acc = bool_variable_acc [vars]
		COMMA
		ret_op  = bool_operator [tagset, vars] {
			op.reset(new VarSetter<Bool>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<StrSet> > ret_op;
	boost::shared_ptr<VariableAccessor<StrSet> > ret_acc;
	: ret_acc = strset_variable_acc [vars]
	  COMMA 
		ret_op  = strset_operator [tagset, vars] {
			op.reset(new VarSetter<StrSet>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > ret_op;
	boost::shared_ptr<VariableAccessor<TSet> > ret_acc;
	: ret_acc = symset_variable_acc [vars]
	  COMMA 
	  ret_op  = symset_operator [tagset, vars] {
			op.reset(new VarSetter<TSet>(*ret_acc, ret_op));
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
// ANTLR LEXER
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
class ANTLRLexer extends Lexer;
options {
	exportVocab    = ANTLRExpr;
	charVocabulary = '\3'..'\377';
	testLiterals   = false;
rk's avatar
rk committed
}

rk's avatar
rk committed
STRING
options {
	paraphrase = "a string";
}
	: '"'!  (~('"'  | '\n' | '\r'))* '"'!
	| '\''! (~('\'' | '\n' | '\r'))* '\''!
SIGNED_INT
rk's avatar
rk committed
options {
	paraphrase = "Signed integer";
rk's avatar
rk committed
}
	: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+ 
rk's avatar
rk committed

UNSIGNED_INT
options {
	paraphrase = "Unsigned integer";
}
	: ('0'..'9')+ 
;	


rk's avatar
rk committed
QUOT_MARK
options {
	paraphrase = "Quote";
rk's avatar
rk committed

APOS_MARK
options {
	paraphrase = "Apostrophe";
rk's avatar
rk committed
Q_MARK
options {
	paraphrase = "Question mark";
rk's avatar
rk committed
}
	: '?'
rk's avatar
rk committed

E_MARK
options {
	paraphrase = "Exclamation mark";
rk's avatar
rk committed
}
	: '!'
rk's avatar
rk committed

STR_PREFIX
options {
	paraphrase = "String prefix";
}
	paraphrase = "Symset prefix";
;

BOOL_PREFIX
options {
	paraphrase = "Bool prefix";
}
rk's avatar
rk committed

POS_PREFIX
options {
rk's avatar
rk committed
LBRACKET 
options {
	paraphrase = "'['";
}
	: '[' 
rk's avatar
rk committed

RBRACKET 
options {
	paraphrase = "']'";
}
	: ']' 
rk's avatar
rk committed

LPAREN
options {
	paraphrase = "'('";
}   
	: '(' 
rk's avatar
rk committed

RPAREN 
options {
	paraphrase = "')'";
} 
	: ')' 
rk's avatar
rk committed

LCURLY 
options {
	paraphrase = "'{'";
} 
	: '{' 
rk's avatar
rk committed

RCURLY 
options {
	paraphrase = "'}'";
} 
	: '}' 
rk's avatar
rk committed

AT_MARK 
options {
	paraphrase = "'@'";
} 
	: '@' 
rk's avatar
rk committed

COMMA
options { 
	paraphrase = "','";
rk's avatar
rk committed
}
	: ','
rk's avatar
rk committed

SYMBOL
options { 
	paraphrase = "Symbol"; 
rk's avatar
rk committed
	testLiterals = true; 
}
	: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
	| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
rk's avatar
rk committed
WS
	  	| '\t'
	  	| '\f'
	  	| 
			( "\r\n"
				| '\r'
				| '\n'
			) { newline(); } 
		) { $setType(antlr::Token::SKIP); } 
rk's avatar
rk committed

COMMENT
options {
	paraphrase = "Single line comment";
}
	: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP);  }
;

ML_COMMENT
options {
	paraphrase = "Multi line comment";
rk's avatar
rk committed
}
	(			// TODO: test it and add reference to the site it's taken from!
				/* This actually works OK despite the ambiguity that
				'\r' '\n' can be matched in one alternative or by matching
				'\r' in one iteration and '\n' in another.. But 
				this is really matched just by one rule per (...)* 
				loop iteration, so it's OK.
				This is exactly how they do it all over the web - just
				turn off the warning for this particular token.*/
      : { LA(2)!='/' }? '*'
      | '\r' '\n' { newline(); }
      | '\r' { newline(); }
      | '\n' { newline(); }
      | ~('*'|'\n'|'\r')
rk's avatar
rk committed

HASH
options { 
	paraphrase = "'#'"; 
}
	: '#' 
rk's avatar
rk committed

//DSEPARATOR
//options { 
//	paraphrase = "':-'"; 
//}
//	: ":-" 
//;