header {
	//don't try to add all the headers inside our namespace
	ANTLR_END_NAMESPACE

	#include <libwccl/parser/ParserException.h>

	#include <cstdio>
	#include <antlr/Token.hpp>
	#include <boost/lexical_cast.hpp>

	// values/variables
	#include <libwccl/variables.h>
	#include <libwccl/values/bool.h>
	#include <libwccl/values/tset.h>
	#include <libwccl/values/strset.h>
	#include <libwccl/values/position.h>
	
	// sentence context
	#include <libwccl/sentencecontext.h>

	// operators
	#include <libwccl/ops/operator.h>

	#include <libwccl/ops/functions/constant.h>
	#include <libwccl/ops/functions/vargetter.h>
	#include <libwccl/ops/functions/conditional.h>

	#include <libwccl/ops/functions/bool/varsetter.h>
	#include <libwccl/ops/functions/bool/predicates/or.h>
	#include <libwccl/ops/functions/bool/predicates/nor.h>
	#include <libwccl/ops/functions/bool/predicates/and.h>
	#include <libwccl/ops/functions/bool/predicates/regex.h>
	#include <libwccl/ops/functions/bool/predicates/intersects.h>
	#include <libwccl/ops/functions/bool/predicates/issubsetof.h>
	#include <libwccl/ops/functions/bool/predicates/isinside.h>
	#include <libwccl/ops/functions/bool/predicates/isoutside.h>
	#include <libwccl/ops/functions/bool/predicates/equals.h>
	#include <libwccl/ops/functions/bool/predicates/weakagreement.h>
	#include <libwccl/ops/functions/bool/predicates/pointagreement.h>
	#include <libwccl/ops/functions/bool/predicates/strongagreement.h>

	#include <libwccl/ops/functions/strset/affix.h>
	#include <libwccl/ops/functions/strset/getorth.h>
	#include <libwccl/ops/functions/strset/toupper.h>
	#include <libwccl/ops/functions/strset/tolower.h>
	#include <libwccl/ops/functions/strset/getlemmas.h>
	
	#include <libwccl/ops/functions/tset/agrfilter.h>
  #include <libwccl/ops/functions/tset/catfilter.h>
	#include <libwccl/ops/functions/tset/getsymbols.h>
	#include <libwccl/ops/functions/tset/getwordclass.h>
	#include <libwccl/ops/functions/tset/getsymbolsinrange.h>

	#include <libwccl/ops/functions/position/relativeposition.h>

	#include <libwccl/ops/functions/bool/iterations/only.h>
	#include <libwccl/ops/functions/bool/iterations/atleast.h>
	#include <libwccl/ops/functions/bool/iterations/leftlook.h>
	#include <libwccl/ops/functions/bool/iterations/rightlook.h>

	// Unicode String
	#include <unicode/uniset.h>
	#include <unicode/unistr.h>

	// start our namespace again
	ANTLR_BEGIN_NAMESPACE(Wccl)
}

options {
	language = "Cpp";
	genHashLines = false;
	namespace = "Wccl";
//	genHashLines = true;
}

// ----------------------------------------------------------------------------
// ANTLR PARSER
// ----------------------------------------------------------------------------
class ANTLRParser extends Parser;
options {
	k = 1;
	buildAST = false;
	exportVocab = ANTLRExpr;
	defaultErrorHandler = false;
}
{
private:
	// 
	const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { 
		return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText()).unescape();
	}
	/*
	const UnicodeString str_token_ref_to_ustring(antlr::RefToken& rstr) const { 
		UnicodeString ret_ustr, ustr = token_ref_to_ustring(rstr);

		if (ustr.length() < 3) {
			return "";
		}

		ustr.extract(1, ustr.length() - 2, ret_ustr);

		return ret_ustr;
	}
	*/
	//
	const std::string str_token_rem_grav(antlr::RefToken& rstr) const {
		size_t len = 0;
		std::string ret = token_ref_to_std_string(rstr);

		if ((len = ret.length()) < 2) {
			return ret;
		}

		if (ret[0] == '`' && ret[len - 1] == '`') {
			return ret.substr(1, len - 2);
		}

		return ret;
	}
	//
	const std::string token_ref_to_std_string(antlr::RefToken& rstr) const { 
		return (((antlr::Token*)rstr)->getText());
	}
	//
	int token_ref_to_int(antlr::RefToken& rstr) { 
		return atoi(((antlr::Token*)rstr)->getText().c_str());
	}
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// "GLOBAL" RULES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

// ----------------------------------------------------------------------------
// Rule for parsing string set operator with scope. 
// Returns boost::shared_ptr<Operator<StrSet> >
parse_strset_operator
	[const Corpus2::Tagset &tagset]
	returns [boost::shared_ptr<Operator<StrSet> > res]
{
	Variables vars;
	boost::shared_ptr<Function<StrSet> > body;
}
	: body = strset_operator [tagset, vars] {
			res.reset(new Operator<StrSet>(body, vars));
	}
	EOF
;

// ----------------------------------------------------------------------------
// Rule for parsing bool operator with scope. 
// Returns boost::shared_ptr<Operator<Bool> > 
parse_bool_operator 
	[const Corpus2::Tagset &tagset]
	returns [boost::shared_ptr<Operator<Bool> > res]
{
	Variables vars;
	boost::shared_ptr<Function<Bool> > body;
}
	: body = bool_operator [tagset, vars] {
			res.reset(new Operator<Bool>(body, vars));
	}
	EOF
;

// ----------------------------------------------------------------------------
// Rule for parsing symbol set operator with scope.
// Returns boost::shared_ptr<Operator<TSet> >
parse_symset_operator
	[const Corpus2::Tagset &tagset]
	returns [boost::shared_ptr<Operator<TSet> > res]
{
	Variables vars;
	boost::shared_ptr<Function<TSet> > body;
}
	: body = symset_operator [tagset, vars] {
			res.reset(new Operator<TSet>(body, vars));
	}
	EOF
;

// ----------------------------------------------------------------------------
// Rule for parsing position operator with scope.
// Returns boost::shared_ptr<Operator<Position> >
parse_position_operator
	[const Corpus2::Tagset &tagset]
	returns [boost::shared_ptr<Operator<Position> > res]
{	
	Variables vars;
	boost::shared_ptr<Function<Position> > body;
}
	: body = position_operator [tagset, vars] {
			res.reset(new Operator<Position>(body, vars));
	}
	EOF
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VALUES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

// ----------------------------------------------------------------------------
// Single or muliple (comma separated) elements in string set, may be:
//   'a' "a" [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"]
// Parsing strset literal and returning plain strset value.
// Returns boost::shared_ptr<StrSet> 
strset_literal 
	returns [boost::shared_ptr<StrSet> s_set]
{
	s_set.reset(new StrSet());
}
	: s0: STRING { 
			s_set->insert(token_ref_to_ustring(s0)); 
		}
	| LBRACKET 
		(
			s1: STRING { 
				s_set->insert(token_ref_to_ustring(s1)); 
			}
	  	(
				COMMA s2: STRING { 
					s_set->insert(token_ref_to_ustring(s2)); 
				}
			)*
	  )? 
		RBRACKET
;
// String set value as constrant string set:
// Returns boost::shared_ptr<Constant<StrSet> >
strset_value 
	returns [boost::shared_ptr<Constant<StrSet> > val]
{
	boost::shared_ptr<StrSet> set;
}
	: set = strset_literal {
		val.reset(new Constant<StrSet>(*set));
	}
;

// ----------------------------------------------------------------------------
// Element of sym set. This rule, inserts element into symbol set 
// with corresponding tagset. 
// WARNING! This rule can throw ParserException! Be careful!
symset_elem
	[const Corpus2::Tagset& tagset, boost::shared_ptr<TSet>& t_set]
	: s1: SYMBOL {
		try {
			t_set->insert_symbol(tagset, str_token_rem_grav(s1));
		}
		catch(Corpus2::TagParseError &e) {
			throw(ParserException(e.info()));
		}
	}
;

// Symset literal. Symset element may be: 
//  a, `a ` (this is guaranteed by lexer rule - SYMBOL) or {a} {`a`} {a, b} 
//  {`a`, `b`} {a, `b`} {`a`, b}
// Parsing symset literal and returning plain symset value.
// Returns boost::shared_ptr<TSet>
symset_literal
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<TSet> t_set]
{
	t_set.reset(new TSet());
}
	: symset_elem [tagset, t_set]
	| LCURLY 
		(
			symset_elem [tagset, t_set] (COMMA symset_elem [tagset, t_set])* 
		)?
	  RCURLY
;
// Symset value, as constant symbol set
// Returns boost::shared_ptr<Constant<TSet> >
symset_value 
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<Constant<TSet> > val]
{
	boost::shared_ptr<TSet> set;
}
	: set = symset_literal [tagset] {
		val.reset(new Constant<TSet>(*set));
	}
;

// ----------------------------------------------------------------------------
// Bool literal. May be True or False. Parsing bool literal and returning 
// plain bool value.
// Returns boost::shared_ptr<Bool>
bool_literal
	returns [boost::shared_ptr<Bool> val]
	: "True"  { val.reset(new Bool(Bool(true ))); }
	| "False" { val.reset(new Bool(Bool(false))); }
;
// Bool value, as constat bool Value
// Returns boost::shared_ptr<Constant<Bool> >
bool_value 
	returns [boost::shared_ptr<Constant<Bool> > val]
{
	boost::shared_ptr<Bool> bool_lit;
}
	: bool_lit = bool_literal {
		val.reset(new Constant<Bool>(*bool_lit));
	}
;

// ----------------------------------------------------------------------------
// Position literal may be:
// 	(+|-)?(0-9)+ or  begin or end or nowhere
// Parsing position literal and returning plain position value.
// returns boost::shared_ptr<Position>
position_literal
	returns [boost::shared_ptr<Position> val]
{
	int i = 0;
}
	: i = number {
		val.reset(new Position(Position(i)));
	}
	| "begin" {
		val.reset(new Position(Position(Position::Begin)));
	}
	| "end" {
		val.reset(new Position(Position(Position::End)));
	}
	| "nowhere" {
		val.reset(new Position(Position(Position::Nowhere)));
	}
;

// Position as constant position value
// Returns boost::shared_ptr<Constant<Position> >
position_value
	returns [boost::shared_ptr<Constant<Position> > val]
{
	boost::shared_ptr<Position> pos_lit;
}
	: pos_lit = position_literal {
		val.reset(new Constant<Position>(*pos_lit));
	}
;

// ----------------------------------------------------------------------------
// Number may be unsigned or signed: 1, +1, -1
number 
	returns [int ret]
{
	ret = 0;
}
	: s: SIGNED_INT   { ret = token_ref_to_int(s); }
	| u: UNSIGNED_INT { ret = token_ref_to_int(u); }
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VARIABLES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

// ----------------------------------------------------------------------------
// Position: $Name
// Get position variable (however, before put into) from variables
// Returns boost::shared_ptr<VariableAccessor<Position> > 
position_variable_acc
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<Position> > pos_acc]
	: POS_PREFIX n: SYMBOL { 
		vars.get_put<Position>(str_token_rem_grav(n));

		VariableAccessor<Position> acc = 
			vars.create_accessor<Position>(str_token_rem_grav(n));

		pos_acc.reset(new VariableAccessor<Position>(acc));
	}
;

// VarGetter for Position variable. This rule wrapped position_variable_acc.
// Returs boost::shared_ptr<VarGetter<Position> >
position_variable
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<Position> > op]
{
	boost::shared_ptr<VariableAccessor<Position> > pos_acc;
}
	: pos_acc = position_variable_acc [vars] {
		op.reset(new VarGetter<Position>(*pos_acc));
	}
;

// ----------------------------------------------------------------------------
// String set, $s:name
// This expression gets (however, before put into) variable of the type StrSet 
// from scope -- variables.
// Returns boost::shared_ptr<VariableAccessor<StrSet> >
strset_variable_acc
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<StrSet> > strset_acc]
	:	STR_PREFIX n: SYMBOL {
		vars.get_put<StrSet>(str_token_rem_grav(n));
		
		VariableAccessor<StrSet> acc = 
			vars.create_accessor<StrSet>(str_token_rem_grav(n));

		strset_acc.reset(new VariableAccessor<StrSet>(acc));
	}
;

// Vargetter for StrSet variable. This rule wrapped strset_variable_acc.
// Returns boost::shared_ptr<VarGetter<StrSet> > 
strset_variable 
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<StrSet> > op]
{
	boost::shared_ptr<VariableAccessor<StrSet> > strset_acc;
}
	: strset_acc = strset_variable_acc [vars] {
		op.reset(new VarGetter<StrSet>(*strset_acc));
	}
;

// ----------------------------------------------------------------------------
// Symbol set: $t:name
// Get symset variable (however, before put into) from variables
// Returns boost::shared_ptr<VariableAccessor<TSet> >
symset_variable_acc 
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<TSet> > symset_acc]
	: TST_PREFIX n: SYMBOL { 
			vars.get_put<TSet>(str_token_rem_grav(n)); 	

			VariableAccessor<TSet> acc = 
				vars.create_accessor<TSet>(str_token_rem_grav(n));

			symset_acc.reset(new VariableAccessor<TSet>(acc));
	}
;

// Vargetter for symbol set variable. This rule wrapped symset_variable_acc
// Returns boost::shared_ptr<VarGetter<TSet> > 
symset_variable 
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<TSet> > op]
{
	boost::shared_ptr<VariableAccessor<TSet> > symset_acc;
}
	: symset_acc = symset_variable_acc [vars] {
			op.reset(new VarGetter<TSet>(*symset_acc));
	}
;

// ----------------------------------------------------------------------------
// Bool: $b:name
// Get bool variable (however, before put into) from variables
// Returns boost::shared_ptr<VariableAccessor<Bool> >
bool_variable_acc
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<Bool> > bool_acc]
	: BOOL_PREFIX n: SYMBOL { 
			vars.get_put<Bool>(str_token_rem_grav(n));

			VariableAccessor<Bool> acc = 
				vars.create_accessor<Bool>(str_token_rem_grav(n));

			bool_acc.reset(new VariableAccessor<Bool>(acc));
	}
;

// Vargetter for bool variable. It is only wrapper for bool_variable_acc
// Returns boost::shared_ptr<VarGetter<Bool> >
bool_variable
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<Bool> > op]
{
	boost::shared_ptr<VariableAccessor<Bool> > bool_acc;
}
	: bool_acc = bool_variable_acc [vars] {
			op.reset(new VarGetter<Bool>(*bool_acc));
	}
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// OPERATORS
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

///////////////////////////////////////////////////////////////////////////////
// Symbol set (tagset) operators
// Returns boost::shared_ptr<Function<TSet> >
///////////////////////////////////////////////////////////////////////////////
symset_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > ret]
	: ret = symset_condition [tagset, vars]
	| (SYMBOL LBRACKET) => (ret = symset_getsymbol [tagset, vars])
	| ret = symset_var_val [tagset, vars]
	| ret = symset_class   [tagset, vars]
	| ret = symset_range   [tagset, vars]
	| ret = symset_catflt  [tagset, vars]
	| ret = symset_agrflt  [tagset, vars]
	//
	| LPAREN ret = symset_operator [tagset, vars] RPAREN
;

// ----------------------------------------------------------------------------
// It's wrapper for symset variable and symset value.
symset_var_val
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > op]
	: op = symset_variable [vars]
	| op = symset_value    [tagset]
;

// ----------------------------------------------------------------------------
// Condition of the symset value:
// 	if (Bool, TSet, TSet)
// 	? TSet ? Bool : {}
symset_condition
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > op]
{
	boost::shared_ptr<Function<Bool> > test;
	boost::shared_ptr<Function<TSet> > p_true, p_false;
}
	: "if" LPAREN test  = bool_operator [tagset, vars] COMMA 
							p_true  = symset_operator  [tagset, vars] 
							(COMMA p_false = symset_operator [tagset, vars])? 
		RPAREN {
			if (p_false) {
				op.reset(new Conditional<TSet>(test, p_true, p_false));
			}
			else {
				op.reset(new Conditional<TSet>(test, p_true));
			}
		}
	| Q_MARK 
			(p_true = symset_operator [tagset, vars])
		Q_MARK 
			(test = bool_operator [tagset, vars]) {
			op.reset(new Conditional<TSet>(test, p_true));
		}
;

// ----------------------------------------------------------------------------
// GetSymbol operator may be cas, m1, f, sg...
// WARNING! This rule can throw ParserException! Be careful!
symset_getsymbol
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > op]
{
	Corpus2::Tag tag;
	boost::shared_ptr<Wccl::Function<Position> > position;
}
	: t: SYMBOL LBRACKET position = position_operator [tagset, vars] RBRACKET {
			try {
				tag = tagset.parse_symbol(str_token_rem_grav(t));
			}
			catch(Corpus2::TagParseError &e) {
				throw(ParserException(e.info()));
			}
				
			op.reset(new Wccl::GetSymbols(tag, position));
		}
;

// ----------------------------------------------------------------------------
// Class operator.
symset_class 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > ret]
{
	boost::shared_ptr<Function<Position> > pos;
}
	: "class" LBRACKET pos = position_operator [tagset, vars] RBRACKET { 
		ret.reset(new GetWordClass(pos));
	}
;

// ----------------------------------------------------------------------------
// Range operator: range(class, begin, end) or range({...}, begin, end)
symset_range
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > ret]
{
	boost::shared_ptr<TSet> tset;
	boost::shared_ptr<Function<Position> > p1, p2;
}
	: "range" LPAREN
			(tset = symset_literal [tagset] | tag_class: "class") COMMA
			p1  = position_operator [tagset, vars] COMMA 
			p2  = position_operator [tagset, vars] 
		RPAREN {
			if (tag_class) {
				ret.reset(new GetSymbolsInRange(Corpus2::Tag(-1), p1, p2));
			}
			else {
		 		ret.reset(new GetSymbolsInRange(tset->get_value(), p1, p2));
			}
		}
;

// ----------------------------------------------------------------------------
// Catflt operator
symset_catflt
  [const Corpus2::Tagset& tagset, Variables& vars]
  returns [boost::shared_ptr<Function<TSet> > ret]
{
  boost::shared_ptr<Function<TSet> > selector, mask;
  boost::shared_ptr<Function<Position> > position;
}
  : "catflt" LPAREN 
			position = position_operator [tagset, vars] COMMA
			selector = symset_operator   [tagset, vars] COMMA
			mask    = symset_operator    [tagset, vars] 
  	RPAREN {
  	  ret.reset(new CatFilter(position, selector, mask));
  	}
;

// ----------------------------------------------------------------------------
// Agrflt operator
symset_agrflt
  [const Corpus2::Tagset& tagset, Variables& vars]
  returns [boost::shared_ptr<Function<TSet> > ret]
{
  boost::shared_ptr<Function<TSet> > attr, mask;
  boost::shared_ptr<Function<Position> > lpos, rpos;
}
  : "agrflt" LPAREN 
			lpos = position_operator [tagset, vars] COMMA
			rpos = position_operator [tagset, vars] COMMA
			attr = symset_operator   [tagset, vars] COMMA
			mask = symset_operator   [tagset, vars] 
  	RPAREN {
  	  ret.reset(new AgrFilter(lpos, rpos, attr, mask, tagset));
  	}
;


///////////////////////////////////////////////////////////////////////////////
// Position operator
// Returns boost::shared_ptr<Function<Position> >
///////////////////////////////////////////////////////////////////////////////
position_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Position> > ret]
	: 
	( ret = position_var_val   [vars]
	| ret = position_condition [tagset, vars]
	| LPAREN ret = position_operator [tagset, vars] RPAREN
	) 
	( // if there is SIGNED_INT after the position, it is actually a relative position
		i: SIGNED_INT {
			ret.reset(new RelativePosition(ret, token_ref_to_int(i)));
		}
	)?
;

// ----------------------------------------------------------------------------
// Wrapper for position variable and position value
position_var_val
	[Variables& vars]
	returns [boost::shared_ptr<Function<Position> > ret]
	: ret = position_value
	| ret = position_variable [vars]
;

// ----------------------------------------------------------------------------
// Condition of the position value
// if (Bool, Position, Position)
// ? Position ? Bool : 0
position_condition
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Position> > op]
{
	boost::shared_ptr<Function<Bool> > test;
	boost::shared_ptr<Function<Position> > p_true, p_false;
}
	: "if" LPAREN test  = bool_operator [tagset, vars] COMMA 
							p_true  = position_operator [tagset, vars] 
							(COMMA p_false = position_operator [tagset, vars])? 
	RPAREN {
		if (p_false) {
			op.reset(new Conditional<Position>(test, p_true, p_false));
		}
		else {
			op.reset(new Conditional<Position>(test, p_true));
		}
	}
	| Q_MARK 
			p_true = position_operator [tagset, vars]
		Q_MARK 
			test = bool_operator [tagset, vars] {
			op.reset(new Conditional<Position>(test, p_true));
		}
;

///////////////////////////////////////////////////////////////////////////////
// Stiring operator
// Returns boost::shared_ptr<Function<StrSet> >
///////////////////////////////////////////////////////////////////////////////
strset_operator [const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	: ret = strset_orth      [tagset, vars] 
	| ret = strset_base      [tagset, vars]
	| ret = strset_lower     [tagset, vars] 
	| ret = strset_upper     [tagset, vars]
	| ret = strset_affix     [tagset, vars] 
	| ret = strset_var_val   [tagset, vars] 
	| ret = strset_condition [tagset, vars]
	//
	| LPAREN ret = strset_operator [tagset, vars] RPAREN
;

// ----------------------------------------------------------------------------
// Orth operator.
strset_orth 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
{
	boost::shared_ptr<Function<Position> > pos;
}
	: "orth" LBRACKET pos = position_operator [tagset, vars] RBRACKET { 
			ret.reset(new GetOrth(pos));
	}
;

// ----------------------------------------------------------------------------
// Base operator.
strset_base 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
{
	boost::shared_ptr<Function<Position> > pos;
}
	: "base" LBRACKET pos = position_operator [tagset, vars] RBRACKET { 
		ret.reset(new GetLemmas(pos));
	}
;

// ----------------------------------------------------------------------------
// Lower operator.
strset_lower 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
{
	boost::shared_ptr<Function<StrSet> > o_ret;
}
	: "lower" LPAREN o_ret = strset_operator [tagset, vars] RPAREN {
		ret.reset(new ToLower(o_ret));
	}
;

// ----------------------------------------------------------------------------
// Upper operator.
strset_upper 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
{
	boost::shared_ptr<Function<StrSet> > o_ret;
}
	: "upper" LPAREN o_ret = strset_operator [tagset, vars] RPAREN {
		ret.reset(new ToUpper(o_ret));
	}
;

// ----------------------------------------------------------------------------
// Affix operator.
strset_affix 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
{
	int offset = 0;
	boost::shared_ptr<Function<StrSet> > o_ret;
}
	: "affix" LPAREN 
			o_ret = strset_operator [tagset, vars] COMMA offset = number 
		RPAREN {
			ret.reset(new Affix(o_ret, offset));
		}
;

// ----------------------------------------------------------------------------
// Wrapper ofr strset value and strset variable
strset_var_val
	[const Corpus2::Tagset& /*tagset*/, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > op]
	: op = strset_value 
	| op = strset_variable [vars]
;

// ----------------------------------------------------------------------------
// Condition of the strset value
// if (Bool, StrSet, StrSet)
// ? StrSet ? Bool : []
strset_condition
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > op]
{
	boost::shared_ptr<Function<Bool> > test;
	boost::shared_ptr<Function<StrSet> > p_true, p_false;
}
	: "if" LPAREN test  = bool_operator [tagset, vars] COMMA 
							p_true  = strset_operator   [tagset, vars] 
							(COMMA p_false = strset_operator [tagset, vars])? 
	RPAREN {
		if (p_false) {
			op.reset(new Conditional<StrSet>(test, p_true, p_false));
		}
		else {
			op.reset(new Conditional<StrSet>(test, p_true));
		}
	}
	| Q_MARK 
			p_true = strset_operator [tagset, vars]
		Q_MARK 
			test = bool_operator [tagset, vars] {
			op.reset(new Conditional<StrSet>(test, p_true));
		}
;

///////////////////////////////////////////////////////////////////////////////
// Boool operator 
// Returns boost::shared_ptr<Function<Bool> >
///////////////////////////////////////////////////////////////////////////////
bool_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
	: ret = bool_and        [tagset, vars]  
	| ret = bool_or         [tagset, vars]  
	| ret = bool_nor        [tagset, vars]  
	| ret = bool_var_val    [tagset, vars]	
	| ret = bool_regex      [tagset, vars]
	| ret = bool_inout      [tagset, vars]
	| ret = bool_condition  [tagset, vars]
	// setvar:
	| ret = setvar_operator [tagset, vars]
	// equal/in/inter:
	| ret = equal_operator  [tagset, vars]
	| ret = in_operator     [tagset, vars]
	| ret = inter_operator  [tagset, vars]
	// iterations
	| ret = bool_iteration  [tagset, vars]
	// agreement
	| ret = bool_agreement  [tagset, vars]
	//
	| ret = bool_phrase     [tagset, vars]
	//
	| LPAREN ret = bool_operator [tagset, vars] RPAREN
;

// ----------------------------------------------------------------------------
// comma-separated predicates (bool operators)
bool_operator_comma_sep
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns 
		[boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v]
{
	boost::shared_ptr<Function<Bool> > pred;
	ret_v.reset(
		new std::vector<boost::shared_ptr<Function<Bool> > >
	);
}
	: pred = bool_operator [tagset, vars] { 
		ret_v->push_back(pred);
	} 
	(
		COMMA pred = bool_operator [tagset, vars] {
			ret_v->push_back(pred);
		}
	)*
;

// ----------------------------------------------------------------------------
// And operator.
bool_and 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
}
	: "and" LPAREN ret_v = bool_operator_comma_sep [tagset, vars] RPAREN {
			op.reset(new And(ret_v));
	}
;

// ----------------------------------------------------------------------------
// Or operator
bool_or
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
}
	: "or" LPAREN ret_v = bool_operator_comma_sep [tagset, vars] RPAREN {
			op.reset(new Or(ret_v));
	}
;

// ----------------------------------------------------------------------------
// Nor/Not operator
bool_nor
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
}
	: "not" LPAREN ret_v = bool_operator_comma_sep [tagset, vars] RPAREN {
			op.reset(new Nor(ret_v));
	}
;

// ----------------------------------------------------------------------------
// Wrapper for bool value and bool variable
bool_var_val
	[const Corpus2::Tagset& /*tagset*/, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	: op = bool_value 
	| op = bool_variable [vars]
;

// ----------------------------------------------------------------------------
// Regex operator
bool_regex
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<StrSet> > expr;
}
	: "regex" 
		LPAREN 
			expr = strset_operator [tagset, vars] COMMA reg: STRING 
		RPAREN {
			op.reset(new Regex(expr, token_ref_to_ustring(reg)));
		}
;

// ----------------------------------------------------------------------------
// Input/output operator
bool_inout
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<Position> > ret_pos;
}
	: "inside"  LPAREN ret_pos = position_operator [tagset, vars] RPAREN {
		op.reset(new IsInside(ret_pos));
	}
	| "outside" LPAREN ret_pos = position_operator [tagset, vars] RPAREN {
		op.reset(new IsOutside(ret_pos));
	}
;

// ----------------------------------------------------------------------------
// if (Bool, Bool, Bool)
// ? Bool ? Bool : False
bool_condition
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<Bool> > test, p_true, p_false;
}
	: "if" LPAREN test = bool_operator [tagset, vars] COMMA 
							p_true = bool_operator [tagset, vars] 
							(COMMA p_false = bool_operator [tagset, vars])? 
	RPAREN {
		if (p_false) {
			op.reset(new Conditional<Bool>(test, p_true, p_false));
		}
		else {
			op.reset(new Conditional<Bool>(test, p_true));
		}
	}
	| Q_MARK 
			p_true = bool_operator [tagset, vars]
		Q_MARK 
			test = bool_operator [tagset, vars] {
			op.reset(new Conditional<Bool>(test, p_true));
		}
;

// ----------------------------------------------------------------------------
// Equal operator
equal_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<TSet> > t1, t2;
	boost::shared_ptr<Function<Bool> > b1, b2;
	boost::shared_ptr<Function<StrSet>  > s1, s2;
	boost::shared_ptr<Function<Position> > p1, p2;
}
	: "equal" LPAREN
	(
		(position_operator [tagset, vars]) =>
		(
			p1 = position_operator [tagset, vars] COMMA 
			p2 = position_operator [tagset, vars] {
				op.reset(new Equals<Position>(p1, p2));
			}
		)
	|
		(symset_operator [tagset, vars]) =>
		(
			t1 = symset_operator [tagset, vars] COMMA  
			t2 = symset_operator [tagset, vars] {
				op.reset(new Equals<TSet>(t1, t2));
			}
		)
	|
		(strset_operator [tagset, vars]) =>
		(
			s1 = strset_operator [tagset, vars] COMMA  
			s2 = strset_operator [tagset, vars] {
				op.reset(new Equals<StrSet>(s1, s2));
			}
		)
	|
		(
			b1 = bool_operator [tagset, vars] COMMA
			b2 = bool_operator [tagset, vars] {
				op.reset(new Equals<Bool>(b1, b2));
			}
		)
	)
	RPAREN
;

// ----------------------------------------------------------------------------
// In operator
in_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<TSet> > t1, t2;
	boost::shared_ptr<Function<StrSet> > s1, s2;
}
:
	"in" LPAREN
	(
		(symset_operator [tagset, vars]) =>
		(
			t1 = symset_operator [tagset, vars] COMMA 
			t2 = symset_operator [tagset, vars] {
				op.reset(new IsSubsetOf<TSet>(t1, t2));
			}
		)
		|
		(
			s1 = strset_operator [tagset, vars] COMMA
			s2 = strset_operator [tagset, vars] {
				op.reset(new IsSubsetOf<StrSet>(s1, s2));
			}
		)
	)
	RPAREN
;
// ----------------------------------------------------------------------------
// Inter operator
inter_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<TSet> > t1, t2;
	boost::shared_ptr<Function<StrSet> > s1, s2;
}
	:
	"inter" LPAREN
	(
		(symset_operator [tagset, vars]) =>
		(
			t1 = symset_operator [tagset, vars] COMMA  
			t2 = symset_operator [tagset, vars]  {
				op.reset(new Intersects<TSet>(t1, t2));
			}
		)
	|
		(
			s1 = strset_operator [tagset, vars] COMMA  
			s2 = strset_operator [tagset, vars]  {
				op.reset(new Intersects<StrSet>(s1, s2));
			}
		)
	)
	RPAREN
;

// ----------------------------------------------------------------------------
// Iterations:
bool_iteration
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	int min_match = 0;
	boost::shared_ptr<Function<Bool> > expr;
	boost::shared_ptr<Function<Position> > lpos, rpos;
	boost::shared_ptr<VariableAccessor<Position> > pacc;
}
	: "only" LPAREN 
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars]
		RPAREN {
			ret.reset(new Only(lpos, rpos, *pacc, expr));
		}

	| "atleast" LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars] COMMA
			min_match = number
		RPAREN {
			ret.reset(new AtLeast(lpos, rpos, *pacc, expr, min_match));
		}
	| "llook" LPAREN //note inverted rpos/lpos order
			rpos = position_operator [tagset, vars] COMMA 
			lpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars] 
		RPAREN {
			ret.reset(new LeftLook(lpos, rpos, *pacc, expr));
		}
	| "rlook" LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars] 
		RPAREN {
			ret.reset(new RightLook(lpos, rpos, *pacc, expr));
		}
;

// ----------------------------------------------------------------------------
// Agreement operator: agr, agrpp, wagr
bool_agreement
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<Function<TSet> > expr;
	boost::shared_ptr<Function<Position> > lpos, rpos;
}
	: "agr"   LPAREN 
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA 
			expr = symset_operator [tagset, vars]
		RPAREN {
			ret.reset(new StrongAgreement(lpos, rpos, expr, tagset));
		}
	| "agrpp" LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA 
			expr = symset_operator [tagset, vars]
		RPAREN {
			ret.reset(new PointAgreement(lpos, rpos, expr, tagset));
		}
	| "wagr"  LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA 
			expr = symset_operator [tagset, vars]
		RPAREN {
			ret.reset(new WeakAgreement(lpos, rpos, expr, tagset));
		}
;

// ----------------------------------------------------------------------------
// Parse operator on L1 level
bool_phrase
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
	: ret = bool_phrase_annotation [tagset, vars]
	| ret = bool_phrase_iteration  [tagset, vars]
;

// ----------------------------------------------------------------------------
// Annotation operator: phrase, phrase_beg, phrase_end, phrase_whole, phrase_pp
bool_phrase_annotation
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<Function<Position> > lpos, rpos;
}
	: "phrase" LPAREN 
			lpos = position_operator [tagset, vars] COMMA n1: STRING 
		RPAREN {
			// TODO
		}
	| "phrase_beg" LPAREN 
			lpos = position_operator [tagset, vars] COMMA n2: STRING 
		RPAREN {
			// TODO
		}
	| "phrase_end" LPAREN 
			lpos = position_operator [tagset, vars] COMMA n3: STRING 
		RPAREN {
			// TODO
		}
	| "phrase_whole" LPAREN 
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA n4: STRING 
		RPAREN {
			// TODO
		}
	| "phrase_pp" LPAREN 
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA n5: STRING 
		RPAREN {
			// TODO
		}
;

// ----------------------------------------------------------------------------
// Phrase iteration operator: lphrase, rphrase
bool_phrase_iteration
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<Function<Position> > position;
	boost::shared_ptr<VarGetter<Position> > var_position;
}
	: "lphrase" LPAREN
			position     = position_operator [tagset, vars] COMMA
			var_position = position_variable [vars]         COMMA
			n1: STRING
		RPAREN {
			// TODO
		}
	| "rphrase" LPAREN 
			position     = position_operator [tagset, vars] COMMA
			var_position = position_variable [vars]         COMMA
			n2: STRING
		RPAREN {
			// TODO
		}
;


// ----------------------------------------------------------------------------
// Setvar operator
// Returns boost::shared_ptr<Function<Bool> >
// ----------------------------------------------------------------------------
setvar_operator 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
	: "setvar" LPAREN
	  (
		  ret = position_setvar [tagset, vars]
		| ret = bool_setvar     [tagset, vars]
		| ret = strset_setvar   [tagset, vars]
		| ret = symset_setvar   [tagset, vars]
	  )
	  RPAREN
;

// ----------------------------------------------------------------------------
// Setvar for position
position_setvar 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<Position> > ret_op;
	boost::shared_ptr<VariableAccessor<Position> > ret_acc;
}
	:	ret_acc = position_variable_acc [vars]
		COMMA
		ret_op  = position_operator [tagset, vars] {
			op.reset(new VarSetter<Position>(*ret_acc, ret_op));
		}
;

// ----------------------------------------------------------------------------
// Setvar for bool
bool_setvar 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<Bool> > ret_op;
	boost::shared_ptr<VariableAccessor<Bool> > ret_acc;
}
	:	ret_acc = bool_variable_acc [vars]
		COMMA
		ret_op  = bool_operator [tagset, vars] {
			op.reset(new VarSetter<Bool>(*ret_acc, ret_op));
		}
;

// ----------------------------------------------------------------------------
// Setvar for strset
strset_setvar
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<StrSet> > ret_op;
	boost::shared_ptr<VariableAccessor<StrSet> > ret_acc;
}
	: ret_acc = strset_variable_acc [vars]
	  COMMA 
		ret_op  = strset_operator [tagset, vars] {
			op.reset(new VarSetter<StrSet>(*ret_acc, ret_op));
		}
;

// ----------------------------------------------------------------------------
// Setvar for symset
symset_setvar 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<TSet> > ret_op;
	boost::shared_ptr<VariableAccessor<TSet> > ret_acc;
}
	: ret_acc = symset_variable_acc [vars]
	  COMMA 
	  ret_op  = symset_operator [tagset, vars] {
			op.reset(new VarSetter<TSet>(*ret_acc, ret_op));
		}
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ANTLR LEXER
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
class ANTLRLexer extends Lexer;
options {
	exportVocab    = ANTLRExpr;
	charVocabulary = '\3'..'\377';
	testLiterals   = false;
	k              = 2;
}


// TODO
STRING
options {
	paraphrase = "a string";
}
	: '"'!  (~('"'  | '\n' | '\r'))* '"'!
	| '\''! (~('\'' | '\n' | '\r'))* '\''!
;

SIGNED_INT
options {
	paraphrase = "Signed integer";
}
	: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+ 
;	

UNSIGNED_INT
options {
	paraphrase = "Unsigned integer";
}
	: ('0'..'9')+ 
;	


QUOT_MARK
options {
	paraphrase = "Quote";
} 
	: '\'' 
;

APOS_MARK
options {
	paraphrase = "Apostrophe";
}
	: '"' 
;

Q_MARK
options {
	paraphrase = "Question mark";
}
	: '?'
;

E_MARK
options {
	paraphrase = "Exclamation mark";
}
	: '!'
;

STR_PREFIX
options {
	paraphrase = "String prefix";
}
	: "$s:"
;

TST_PREFIX
options {
	paraphrase = "Symset prefix";
}
	: "$t:"
;

BOOL_PREFIX
options {
	paraphrase = "Bool prefix";
}
	: "$b:"
;

POS_PREFIX
options {
	paraphrase = "Position prefix";
}
	: '$'
;

LBRACKET 
options {
	paraphrase = "'['";
}
	: '[' 
;

RBRACKET 
options {
	paraphrase = "']'";
}
	: ']' 
;

LPAREN
options {
	paraphrase = "'('";
}   
	: '(' 
;

RPAREN 
options {
	paraphrase = "')'";
} 
	: ')' 
;

LCURLY 
options {
	paraphrase = "'{'";
} 
	: '{' 
;

RCURLY 
options {
	paraphrase = "'}'";
} 
	: '}' 
;

AT_MARK 
options {
	paraphrase = "'@'";
} 
	: '@' 
;

COMMA
options { 
	paraphrase = "','";
}
	: ','
;

SYMBOL
options { 
	paraphrase = "Symbol"; 
	testLiterals = true; 
}
	: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
	| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
;

WS
	: ( ' '
	  	| '\t'
	  	| '\f'
	  	| 
			( "\r\n"
				| '\r'
				| '\n'
			) { newline(); } 
		) { $setType(antlr::Token::SKIP); } 
;

COMMENT
options {
	paraphrase = "Single line comment";
}
	: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP);  }
;

ML_COMMENT
options {
	paraphrase = "Multi line comment";
}
  : "/*"
	(			// TODO: test it and add reference to the site it's taken from!
				/* This actually works OK despite the ambiguity that
				'\r' '\n' can be matched in one alternative or by matching
				'\r' in one iteration and '\n' in another.. But 
				this is really matched just by one rule per (...)* 
				loop iteration, so it's OK.
				This is exactly how they do it all over the web - just
				turn off the warning for this particular token.*/
		options { 
			generateAmbigWarnings = false; 
		}
      : { LA(2)!='/' }? '*'
      | '\r' '\n' { newline(); }
      | '\r' { newline(); }
      | '\n' { newline(); }
      | ~('*'|'\n'|'\r')
  	)*
    "*/"
    { $setType(antlr::Token::SKIP); }
;

HASH
options { 
	paraphrase = "'#'"; 
}
	: '#' 
;

//DSEPARATOR
//options { 
//	paraphrase = "':-'"; 
//}
//	: ":-" 
//;