Skip to content
Snippets Groups Projects
grammar.g 59.9 KiB
Newer Older
rk's avatar
rk committed
header {
Paweł Kędzia's avatar
Paweł Kędzia committed
	//don't try to add all the headers inside our namespace
	#include <libwccl/parser/ParserException.h>
rk's avatar
rk committed

	#include <cstdio>
	#include <antlr/Token.hpp>
	#include <boost/lexical_cast.hpp>

	// values/variables
	#include <libwccl/variables.h>
	#include <libwccl/values/bool.h>
	#include <libwccl/values/tset.h>
	#include <libwccl/values/strset.h>
	#include <libwccl/values/position.h>
	
	// sentence context
	#include <libwccl/sentencecontext.h>

	// operators
	#include <libwccl/ops/operator.h>

	#include <libwccl/ops/functions/constant.h>
	#include <libwccl/ops/functions/vargetter.h>
	#include <libwccl/ops/functions/conditional.h>

	#include <libwccl/ops/functions/bool/varsetter.h>
	#include <libwccl/ops/functions/bool/predicates/debug.h>
	#include <libwccl/ops/functions/bool/predicates/or.h>
	#include <libwccl/ops/functions/bool/predicates/nor.h>
	#include <libwccl/ops/functions/bool/predicates/and.h>
	#include <libwccl/ops/functions/bool/predicates/regex.h>
	#include <libwccl/ops/functions/bool/predicates/intersects.h>
	#include <libwccl/ops/functions/bool/predicates/issubsetof.h>
	#include <libwccl/ops/functions/bool/predicates/isinside.h>
	#include <libwccl/ops/functions/bool/predicates/isoutside.h>
	#include <libwccl/ops/functions/bool/predicates/equals.h>
	#include <libwccl/ops/functions/bool/predicates/weakagreement.h>
	#include <libwccl/ops/functions/bool/predicates/pointagreement.h>
	#include <libwccl/ops/functions/bool/predicates/strongagreement.h>

	#include <libwccl/ops/functions/strset/affix.h>
	#include <libwccl/ops/functions/strset/getorth.h>
	#include <libwccl/ops/functions/strset/toupper.h>
	#include <libwccl/ops/functions/strset/tolower.h>
	#include <libwccl/ops/functions/strset/getlemmas.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	#include <libwccl/ops/functions/tset/agrfilter.h>
Adam Radziszewski's avatar
Adam Radziszewski committed
	#include <libwccl/ops/functions/tset/catfilter.h>
	#include <libwccl/ops/functions/tset/getsymbols.h>
	#include <libwccl/ops/functions/tset/getwordclass.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	#include <libwccl/ops/functions/tset/getsymbolsinrange.h>
	#include <libwccl/ops/functions/position/relativeposition.h>

	#include <libwccl/ops/functions/bool/iterations/only.h>
	#include <libwccl/ops/functions/bool/iterations/atleast.h>
	#include <libwccl/ops/functions/bool/iterations/leftlook.h>
	#include <libwccl/ops/functions/bool/iterations/rightlook.h>
Adam Wardynski's avatar
Adam Wardynski committed
	#include <libwccl/ops/tagrule.h>
	#include <libwccl/ops/rulesequence.h>
	//
	#include <libwccl/ops/tagactions/unify.h>
	#include <libwccl/ops/tagactions/delete.h>
	#include <libwccl/ops/tagactions/select.h>
	#include <libwccl/ops/tagactions/relabel.h>
	#include <libwccl/ops/tagactions/mark.h>
	#include <libwccl/ops/tagactions/unmark.h>
	// Match operators
	#include <libwccl/values/tokenmatch.h>
	#include <libwccl/values/annotationmatch.h>
	#include <libwccl/values/matchvector.h>
	#include <libwccl/ops/match/applyoperator.h>
	#include <libwccl/ops/match/conditions/optionalmatch.h>
	#include <libwccl/ops/match/conditions/repeatedmatch.h>
	#include <libwccl/ops/match/conditions/conjconditions.h>
	#include <libwccl/ops/match/actions/markmatch.h>
rk's avatar
rk committed
	// Unicode String
	#include <unicode/uniset.h>
	#include <unicode/unistr.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	// start our namespace again
	ANTLR_BEGIN_NAMESPACE(Wccl)
rk's avatar
rk committed
}

options {
	language = "Cpp";
//	genHashLines = true;
rk's avatar
rk committed
}

// ----------------------------------------------------------------------------
rk's avatar
rk committed
// ANTLR PARSER
// ----------------------------------------------------------------------------
rk's avatar
rk committed
class ANTLRParser extends Parser;
options {
rk's avatar
rk committed
	buildAST = false;
rk's avatar
rk committed
	defaultErrorHandler = false;
}
{
private:
	// 
rk's avatar
rk committed
	const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { 
		return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText().c_str()).unescape();
	const UnicodeString str_token_ref_to_ustring(antlr::RefToken& rstr) const { 
		UnicodeString ret_ustr, ustr = token_ref_to_ustring(rstr);

		if (ustr.length() < 3) {
			return "";
		}

		ustr.extract(1, ustr.length() - 2, ret_ustr);

		return ret_ustr;
	}
Paweł Kędzia's avatar
Paweł Kędzia committed
	const std::string str_token_rem_grav(antlr::RefToken& rstr) const {
		size_t len = 0;
		std::string ret = token_ref_to_std_string(rstr);

		if ((len = ret.length()) < 2) {
			return ret;
		}

		if (ret[0] == '`' && ret[len - 1] == '`') {
			return ret.substr(1, len - 2);
		}

		return ret;
	}
	//
rk's avatar
rk committed
	const std::string token_ref_to_std_string(antlr::RefToken& rstr) const { 
		return (((antlr::Token*)rstr)->getText());
rk's avatar
rk committed
	}
	//
	int token_ref_to_int(antlr::RefToken& rstr) { 
rk's avatar
rk committed
		return atoi(((antlr::Token*)rstr)->getText().c_str());
rk's avatar
rk committed
	}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// "GLOBAL" RULES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Rule for parsing string set operator with scope. 
// Returns boost::shared_ptr<Operator<StrSet> >
parse_strset_operator
	returns [boost::shared_ptr<Operator<StrSet> > res]
	Variables vars;
	boost::shared_ptr<Function<StrSet> > body;
	: body = strset_operator [tagset, vars] {
			res.reset(new Operator<StrSet>(body, vars));
// ----------------------------------------------------------------------------
// Rule for parsing bool operator with scope. 
// Returns boost::shared_ptr<Operator<Bool> > 
	returns [boost::shared_ptr<Operator<Bool> > res]
	Variables vars;
	boost::shared_ptr<Function<Bool> > body;
	: body = bool_operator [tagset, vars] {
			res.reset(new Operator<Bool>(body, vars));

// ----------------------------------------------------------------------------
// Rule for parsing symbol set operator with scope.
// Returns boost::shared_ptr<Operator<TSet> >
parse_symset_operator
	returns [boost::shared_ptr<Operator<TSet> > res]
	Variables vars;
	boost::shared_ptr<Function<TSet> > body;
	: body = symset_operator [tagset, vars] {
			res.reset(new Operator<TSet>(body, vars));
ilor's avatar
ilor committed
// ----------------------------------------------------------------------------
// Rule for parsing position operator with scope.
// Returns boost::shared_ptr<Operator<Position> >
parse_position_operator
ilor's avatar
ilor committed
	[const Corpus2::Tagset &tagset]
	returns [boost::shared_ptr<Operator<Position> > res]
	Variables vars;
	boost::shared_ptr<Function<Position> > body;
	: body = position_operator [tagset, vars] {
			res.reset(new Operator<Position>(body, vars));
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Rule for parsing single WCCL Rule
Adam Wardynski's avatar
Adam Wardynski committed
// Returns boost::shared_ptr<TagRule>
parse_single_rule
	[const Corpus2::Tagset &tagset]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<TagRule> rle]
{	
	Variables vars;
}
	: rle = rule [tagset, vars]
;

// Rule for parsing rules section in the wccl file
// Returns boost::shared_ptr<RuleSequence>
parse_rule_sequence
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<RuleSequence> rule_seq]
{
	Variables vars;
}
	: rule_seq = rules[tagset, vars]
;

// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Rule for parsing the match rules
// Returns boost::shared_ptr<Expression>
parse_match_rule
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<Expression> ret_match]
{
	Variables vars;
}
	: ret_match = match_rule_operator[tagset, vars] 
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VALUES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Single or muliple (comma separated) elements in string set, may be:
//   'a' "a" [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"]
// Parsing strset literal and returning plain strset value.
// Returns boost::shared_ptr<StrSet> 
strset_literal 
	returns [boost::shared_ptr<StrSet> s_set]
	s_set.reset(new StrSet());
			s_set->insert(token_ref_to_ustring(s0)); 
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
	| LBRACKET 
		(
			s1: STRING { 
				s_set->insert(token_ref_to_ustring(s1)); 
			}
	  	(
				COMMA s2: STRING { 
					s_set->insert(token_ref_to_ustring(s2)); 
				}
			)*
	  )? 
		RBRACKET
;
// String set value as constrant string set:
// Returns boost::shared_ptr<Constant<StrSet> >
	returns [boost::shared_ptr<Constant<StrSet> > val]
	boost::shared_ptr<StrSet> set;
		val.reset(new Constant<StrSet>(*set));
// ----------------------------------------------------------------------------
// Element of sym set. This rule, inserts element into symbol set 
// with corresponding tagset. 
// WARNING! This rule can throw ParserException! Be careful!
symset_elem
	[const Corpus2::Tagset& tagset, boost::shared_ptr<TSet>& t_set]
	: s1: SYMBOL {
Paweł Kędzia's avatar
Paweł Kędzia committed
		try {
Paweł Kędzia's avatar
Paweł Kędzia committed
			t_set->insert_symbol(tagset, str_token_rem_grav(s1));
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
		catch(Corpus2::TagParseError &e) {
			throw(ParserException(e.info()));
		}
// Symset literal. Symset element may be: 
//  a, `a ` (this is guaranteed by lexer rule - SYMBOL) or {a} {`a`} {a, b} 
//  {`a`, `b`} {a, `b`} {`a`, b}
// Parsing symset literal and returning plain symset value.
// Returns boost::shared_ptr<TSet>
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<TSet> t_set]
	t_set.reset(new TSet());
Paweł Kędzia's avatar
Paweł Kędzia committed
	| LCURLY 
		(
			symset_elem [tagset, t_set] (COMMA symset_elem [tagset, t_set])* 
		)?
// Symset value, as constant symbol set
// Returns boost::shared_ptr<Constant<TSet> >
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<Constant<TSet> > val]
	boost::shared_ptr<TSet> set;
	: set = symset_literal [tagset] {
		val.reset(new Constant<TSet>(*set));
// ----------------------------------------------------------------------------
// Bool literal. May be True or False. Parsing bool literal and returning 
// plain bool value.
// Returns boost::shared_ptr<Bool>
	returns [boost::shared_ptr<Bool> val]
	: "True"  { val.reset(new Bool(Bool(true ))); }
	| "False" { val.reset(new Bool(Bool(false))); }
// Bool value, as constat bool Value
// Returns boost::shared_ptr<Constant<Bool> >
	returns [boost::shared_ptr<Constant<Bool> > val]
	boost::shared_ptr<Bool> bool_lit;
		val.reset(new Constant<Bool>(*bool_lit));
// ----------------------------------------------------------------------------
// Position literal may be:
// 	(+|-)?(0-9)+ or  begin or end or nowhere
// Parsing position literal and returning plain position value.
// returns boost::shared_ptr<Position>
	returns [boost::shared_ptr<Position> val]
Paweł Kędzia's avatar
Paweł Kędzia committed
{
	int i = 0;
}
	: i = number {
		val.reset(new Position(Position(i)));
		val.reset(new Position(Position(Position::Begin)));
		val.reset(new Position(Position(Position::End)));
		val.reset(new Position(Position(Position::Nowhere)));

// Position as constant position value
// Returns boost::shared_ptr<Constant<Position> >
	returns [boost::shared_ptr<Constant<Position> > val]
	boost::shared_ptr<Position> pos_lit;
		val.reset(new Constant<Position>(*pos_lit));
// ----------------------------------------------------------------------------
// Value used into match operator such as TOK[position] and ANN[position, name]
// Returns boost::shared_ptr<Match>
match_value
	returns [boost::shared_ptr<Match> val]
{
	boost::shared_ptr<MatchData> m;
}
	: m = match_data_value {
		val.reset(new Match(m));
	}
;

// Constant match value
// Returns boost::shared_ptr<Constant<Match> >
match_value_const
	returns [boost::shared_ptr<Constant<Match> > val]
{
	boost::shared_ptr<Match> m;
}
	: m = match_value {
		val.reset(new Constant<Match>(*m));
	}
;

// ----------------------------------------------------------------------------
// Value used into match operator such as TOK[position] and ANN[position, name]
// Returns boost::shared_ptr<MatchData>
match_data_value
	returns [boost::shared_ptr<MatchData> val]
	: val = token_match_value
	| val = ann_match_value
	| val = match_vector_value
;

// token match value
// Returns boost::shared_ptr<TokenMatch>
token_match_value
	returns [boost::shared_ptr<TokenMatch> val]
{
	boost::shared_ptr<Position> p;
}
	: "TOK" LBRACKET p = position_literal RBRACKET {
		val.reset(new TokenMatch(*p));
	}
;

// annotation match value
// Returns boost::shared_ptr<AnnotationMatch> 
ann_match_value
	returns [boost::shared_ptr<AnnotationMatch> val]
{
	boost::shared_ptr<Position> p;
}
	: "ANN" LBRACKET p = position_literal COMMA channel : STRING RBRACKET {
		val.reset(new AnnotationMatch(*p, token_ref_to_std_string(channel)));
	}
;

// annotation match vector: MATCH() or MATCH(token, ann, MATCH())
// Returns boost::shared_ptr<MatchVector>
match_vector_value
	returns [boost::shared_ptr<MatchVector> val]
{
	val.reset(new MatchVector());
}
	: "MATCH" LPAREN (match_vector_value_item[val])? RPAREN
;

// Body of the MATCH value. It only adds vector items to the MatchVector
// Item may be single or multiple
match_vector_value_item [boost::shared_ptr<MatchVector>& mvector]
{
	boost::shared_ptr<Match> m_val;
}
	: m_val = match_value {
		mvector->append(m_val);
	} 
	(
		COMMA
		m_val = match_value {
			mvector->append(m_val);
		}
	)*
;

// ----------------------------------------------------------------------------
// Number may be unsigned or signed: 1, +1, -1
number 
	returns [int ret]
{
	ret = 0;
}
	: s: SIGNED_INT   { ret = token_ref_to_int(s); }
	| u: UNSIGNED_INT { ret = token_ref_to_int(u); }
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VARIABLES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Position: $Name
// Get position variable (however, before put into) from variables
// Returns boost::shared_ptr<VariableAccessor<Position> > 
position_variable_acc
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<Position> > pos_acc]
		vars.get_put<Position>(str_token_rem_grav(n));
		VariableAccessor<Position> acc = 
			vars.create_accessor<Position>(str_token_rem_grav(n));
		pos_acc.reset(new VariableAccessor<Position>(acc));

// VarGetter for Position variable. This rule wrapped position_variable_acc.
// Returs boost::shared_ptr<VarGetter<Position> >
position_variable
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<Position> > op]
	boost::shared_ptr<VariableAccessor<Position> > pos_acc;
}
	: pos_acc = position_variable_acc [vars] {
		op.reset(new VarGetter<Position>(*pos_acc));
// ----------------------------------------------------------------------------
// This expression gets (however, before put into) variable of the type StrSet 
// from scope -- variables.
// Returns boost::shared_ptr<VariableAccessor<StrSet> >
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<StrSet> > strset_acc]
		vars.get_put<StrSet>(str_token_rem_grav(n));
		VariableAccessor<StrSet> acc = 
			vars.create_accessor<StrSet>(str_token_rem_grav(n));
		strset_acc.reset(new VariableAccessor<StrSet>(acc));

// Vargetter for StrSet variable. This rule wrapped strset_variable_acc.
// Returns boost::shared_ptr<VarGetter<StrSet> > 
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<StrSet> > op]
	boost::shared_ptr<VariableAccessor<StrSet> > strset_acc;
	: strset_acc = strset_variable_acc [vars] {
		op.reset(new VarGetter<StrSet>(*strset_acc));
// ----------------------------------------------------------------------------
// Get symset variable (however, before put into) from variables
// Returns boost::shared_ptr<VariableAccessor<TSet> >
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<TSet> > symset_acc]
			vars.get_put<TSet>(str_token_rem_grav(n)); 	
			VariableAccessor<TSet> acc = 
				vars.create_accessor<TSet>(str_token_rem_grav(n));
			symset_acc.reset(new VariableAccessor<TSet>(acc));

// Vargetter for symbol set variable. This rule wrapped symset_variable_acc
// Returns boost::shared_ptr<VarGetter<TSet> > 
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<TSet> > op]
	boost::shared_ptr<VariableAccessor<TSet> > symset_acc;
	: symset_acc = symset_variable_acc [vars] {
			op.reset(new VarGetter<TSet>(*symset_acc));
// ----------------------------------------------------------------------------
// Get bool variable (however, before put into) from variables
// Returns boost::shared_ptr<VariableAccessor<Bool> >
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<Bool> > bool_acc]
			vars.get_put<Bool>(str_token_rem_grav(n));
			VariableAccessor<Bool> acc = 
				vars.create_accessor<Bool>(str_token_rem_grav(n));
			bool_acc.reset(new VariableAccessor<Bool>(acc));

// Vargetter for bool variable. It is only wrapper for bool_variable_acc
// Returns boost::shared_ptr<VarGetter<Bool> >
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<Bool> > op]
	boost::shared_ptr<VariableAccessor<Bool> > bool_acc;
	: bool_acc = bool_variable_acc [vars] {
			op.reset(new VarGetter<Bool>(*bool_acc));
rk's avatar
rk committed

// ----------------------------------------------------------------------------
// Match: $m:name
// Get mach vector variable from variavles (before put into variables)
// Returns boost::shared_ptr<VariableAccessor<Match> >
match_vector_variable_acc
	[Variables& vars]
	returns [boost::shared_ptr<VariableAccessor<Match> > mvv_acc]
	: MATCH_VECTOR_PREFIX n: SYMBOL {
			vars.get_put<Match>(str_token_rem_grav(n));

			VariableAccessor<Match> acc = 
				vars.create_accessor<Match>(str_token_rem_grav(n));

			mvv_acc.reset(new VariableAccessor<Match>(acc));
	}
;

// Vargetter for the match vector variavle. Wrapper for match_vector_variable_acc
// Returns boost::shared_ptr<VarGetter<Match> >
match_vector_variable
	[Variables& vars]
	returns [boost::shared_ptr<VarGetter<Match> > mvv]
{
	boost::shared_ptr<VariableAccessor<Match> > mvv_acc;
}
	: mvv_acc = match_vector_variable_acc [vars] {
		mvv.reset(new VarGetter<Match>(*mvv_acc));
	}
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
// OPERATORS
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Symbol set (tagset) operators
// Returns boost::shared_ptr<Function<TSet> >
///////////////////////////////////////////////////////////////////////////////
symset_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > ret]
	: ret = symset_condition [tagset, vars]
	| {LA(1)==SYMBOL && LA(2)==LBRACKET}? (ret = symset_getsymbol [tagset, vars])
Paweł Kędzia's avatar
Paweł Kędzia committed
	| ret = symset_var_val [tagset, vars]
	| ret = symset_class   [tagset, vars]
	| ret = symset_range   [tagset, vars]
	| ret = symset_catflt  [tagset, vars]
	| ret = symset_agrflt  [tagset, vars]
	//
	| LPAREN ret = symset_operator [tagset, vars] RPAREN
// ----------------------------------------------------------------------------
// It's wrapper for symset variable and symset value.
symset_var_val
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > op]
	: op = symset_variable [vars]
	| op = symset_value    [tagset]
// ----------------------------------------------------------------------------
// Condition of the symset value:
// 	if (Bool, TSet, TSet)
// 	? TSet ? Bool : {}
symset_condition
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > op]
	boost::shared_ptr<Function<Bool> > test;
	boost::shared_ptr<Function<TSet> > p_true, p_false;
	: "if" LPAREN test  = bool_operator [tagset, vars] COMMA 
							p_true  = symset_operator  [tagset, vars] 
							(COMMA p_false = symset_operator [tagset, vars])? 
				op.reset(new Conditional<TSet>(test, p_true, p_false));
				op.reset(new Conditional<TSet>(test, p_true));
			(p_true = symset_operator [tagset, vars])
			(test = bool_operator [tagset, vars]) {
			op.reset(new Conditional<TSet>(test, p_true));
// ----------------------------------------------------------------------------
// GetSymbol operator may be cas, m1, f, sg...
// WARNING! This rule can throw ParserException! Be careful!
symset_getsymbol
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > op]
{
	Corpus2::Tag tag;
	boost::shared_ptr<Wccl::Function<Position> > position;
}
	: t: SYMBOL LBRACKET position = position_operator [tagset, vars] RBRACKET {
			try {
				tag = tagset.parse_symbol(str_token_rem_grav(t));
			}
			catch(Corpus2::TagParseError &e) {
				throw(ParserException(e.info()));
			}
				
			op.reset(new Wccl::GetSymbols(tag, position));
		}
;

// ----------------------------------------------------------------------------
// Class operator.
symset_class 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > ret]
{
	boost::shared_ptr<Function<Position> > pos;
}
	: "class" LBRACKET pos = position_operator [tagset, vars] RBRACKET { 
		ret.reset(new GetWordClass(pos));
	}
;

Paweł Kędzia's avatar
Paweł Kędzia committed
// ----------------------------------------------------------------------------
// Range operator: range(class, begin, end) or range({...}, begin, end)
Paweł Kędzia's avatar
Paweł Kędzia committed
symset_range
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > ret]
{
	boost::shared_ptr<TSet> tset;
Paweł Kędzia's avatar
Paweł Kędzia committed
	boost::shared_ptr<Function<Position> > p1, p2;
}
	: "range" LPAREN
			(tset = symset_literal [tagset] | tag_class: "class") COMMA
			p1  = position_operator [tagset, vars] COMMA 
			p2  = position_operator [tagset, vars] 
Paweł Kędzia's avatar
Paweł Kędzia committed
		RPAREN {
			if (tag_class) {
				ret.reset(new GetSymbolsInRange(Corpus2::Tag(-1), p1, p2));
Paweł Kędzia's avatar
Paweł Kędzia committed
			}
			else {
		 		ret.reset(new GetSymbolsInRange(tset->get_value(), p1, p2));
// ----------------------------------------------------------------------------
// Catflt operator
symset_catflt
  [const Corpus2::Tagset& tagset, Variables& vars]
  returns [boost::shared_ptr<Function<TSet> > ret]
{
  boost::shared_ptr<Function<TSet> > selector, mask;
  boost::shared_ptr<Function<Position> > position;
}
  : "catflt" LPAREN 
			position = position_operator [tagset, vars] COMMA
			selector = symset_operator   [tagset, vars] COMMA
			mask    = symset_operator    [tagset, vars] 
  	RPAREN {
  	  ret.reset(new CatFilter(position, selector, mask));
  	}
;

Paweł Kędzia's avatar
Paweł Kędzia committed
// ----------------------------------------------------------------------------
// Agrflt operator
symset_agrflt
  [const Corpus2::Tagset& tagset, Variables& vars]
  returns [boost::shared_ptr<Function<TSet> > ret]
{
  boost::shared_ptr<Function<TSet> > attr, mask;
  boost::shared_ptr<Function<Position> > lpos, rpos;
}
  : "agrflt" LPAREN 
			lpos = position_operator [tagset, vars] COMMA
			rpos = position_operator [tagset, vars] COMMA
			attr = symset_operator   [tagset, vars] COMMA
			mask = symset_operator   [tagset, vars] 
  	RPAREN {
  	  ret.reset(new AgrFilter(lpos, rpos, attr, mask, tagset));
  	}
;


///////////////////////////////////////////////////////////////////////////////
// Position operator
// Returns boost::shared_ptr<Function<Position> >
///////////////////////////////////////////////////////////////////////////////
position_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Position> > ret]
	: 
	( ret = position_var_val   [vars]
	| ret = position_condition [tagset, vars]
	| LPAREN ret = position_operator [tagset, vars] RPAREN
	) 
	( // if there is SIGNED_INT after the position, it is actually a relative position
		i: SIGNED_INT {
			ret.reset(new RelativePosition(ret, token_ref_to_int(i)));
		}
	)?
// ----------------------------------------------------------------------------
// Wrapper for position variable and position value
position_var_val
	[Variables& vars]
	returns [boost::shared_ptr<Function<Position> > ret]
	: ret = position_value
	| ret = position_variable [vars]
;

// ----------------------------------------------------------------------------
// Condition of the position value
// if (Bool, Position, Position)
// ? Position ? Bool : 0
position_condition
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Position> > op]
{
	boost::shared_ptr<Function<Bool> > test;
	boost::shared_ptr<Function<Position> > p_true, p_false;
}
	: "if" LPAREN test  = bool_operator [tagset, vars] COMMA 
							p_true  = position_operator [tagset, vars] 
							(COMMA p_false = position_operator [tagset, vars])? 
	RPAREN {
		if (p_false) {
			op.reset(new Conditional<Position>(test, p_true, p_false));
		}
		else {
			op.reset(new Conditional<Position>(test, p_true));
		}
	}
	| Q_MARK 
			p_true = position_operator [tagset, vars]
		Q_MARK 
			test = bool_operator [tagset, vars] {
			op.reset(new Conditional<Position>(test, p_true));
		}
///////////////////////////////////////////////////////////////////////////////
// Stiring operator
// Returns boost::shared_ptr<Function<StrSet> >
///////////////////////////////////////////////////////////////////////////////
strset_operator [const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	: ret = strset_orth      [tagset, vars] 
	| ret = strset_base      [tagset, vars]
	| ret = strset_lower     [tagset, vars] 
	| ret = strset_upper     [tagset, vars]
	| ret = strset_affix     [tagset, vars] 
	| ret = strset_var_val   [tagset, vars] 
	| ret = strset_condition [tagset, vars]
	| LPAREN ret = strset_operator [tagset, vars] RPAREN
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	boost::shared_ptr<Function<Position> > pos;
	: "orth" LBRACKET pos = position_operator [tagset, vars] RBRACKET { 
			ret.reset(new GetOrth(pos));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	boost::shared_ptr<Function<Position> > pos;
	: "base" LBRACKET pos = position_operator [tagset, vars] RBRACKET { 
		ret.reset(new GetLemmas(pos));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	boost::shared_ptr<Function<StrSet> > o_ret;
	: "lower" LPAREN o_ret = strset_operator [tagset, vars] RPAREN {
		ret.reset(new ToLower(o_ret));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	boost::shared_ptr<Function<StrSet> > o_ret;
	: "upper" LPAREN o_ret = strset_operator [tagset, vars] RPAREN {
		ret.reset(new ToUpper(o_ret));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	int offset = 0;
	boost::shared_ptr<Function<StrSet> > o_ret;
			o_ret = strset_operator [tagset, vars] COMMA offset = number 
		RPAREN {
			ret.reset(new Affix(o_ret, offset));
		}
// ----------------------------------------------------------------------------
// Wrapper ofr strset value and strset variable
strset_var_val
	[const Corpus2::Tagset& /*tagset*/, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > op]
	: op = strset_value 
	| op = strset_variable [vars]
// ----------------------------------------------------------------------------
// if (Bool, StrSet, StrSet)
// ? StrSet ? Bool : []
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > op]
	boost::shared_ptr<Function<Bool> > test;
	boost::shared_ptr<Function<StrSet> > p_true, p_false;
	: "if" LPAREN test  = bool_operator [tagset, vars] COMMA 
							p_true  = strset_operator   [tagset, vars] 
							(COMMA p_false = strset_operator [tagset, vars])? 
			op.reset(new Conditional<StrSet>(test, p_true, p_false));
			op.reset(new Conditional<StrSet>(test, p_true));
			p_true = strset_operator [tagset, vars]
			test = bool_operator [tagset, vars] {
			op.reset(new Conditional<StrSet>(test, p_true));
///////////////////////////////////////////////////////////////////////////////
// Boool operator 
// Returns boost::shared_ptr<Function<Bool> >
///////////////////////////////////////////////////////////////////////////////
bool_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
	: ret = bool_and        [tagset, vars]  
	| ret = bool_or         [tagset, vars]  
	| ret = bool_nor        [tagset, vars]  
	| ret = bool_var_val    [tagset, vars]	
	| ret = bool_regex      [tagset, vars]
	| ret = bool_inout      [tagset, vars]
	| ret = bool_condition  [tagset, vars]
	// setvar:
	| ret = setvar_operator [tagset, vars]
	// equal/in/inter:
	| ret = equal_operator  [tagset, vars]
	| ret = in_operator     [tagset, vars]
	| ret = inter_operator  [tagset, vars]
	// iterations
	| ret = bool_iteration  [tagset, vars]
	// agreement
	| ret = bool_agreement  [tagset, vars]
Paweł Kędzia's avatar
Paweł Kędzia committed
	| ret = bool_phrase     [tagset, vars]
	// debug operators
	| ret = debug_print_operator [tagset, vars]
Paweł Kędzia's avatar
Paweł Kędzia committed
	//
	| LPAREN ret = bool_operator [tagset, vars] RPAREN
// ----------------------------------------------------------------------------
// comma-separated predicates (bool operators)
bool_operator_comma_sep
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns 
		[boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v]
	boost::shared_ptr<Function<Bool> > pred;
	ret_v.reset(
		new std::vector<boost::shared_ptr<Function<Bool> > >
	: pred = bool_operator [tagset, vars] { 
		ret_v->push_back(pred);
	} 
	(
		COMMA pred = bool_operator [tagset, vars] {
			ret_v->push_back(pred);
		}
	)*
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
	: "and" LPAREN ret_v = bool_operator_comma_sep [tagset, vars] RPAREN {
			op.reset(new And(ret_v));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
	: "or" LPAREN ret_v = bool_operator_comma_sep [tagset, vars] RPAREN {
			op.reset(new Or(ret_v));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
	: "not" LPAREN ret_v = bool_operator_comma_sep [tagset, vars] RPAREN {
			op.reset(new Nor(ret_v));
// ----------------------------------------------------------------------------
// Wrapper for bool value and bool variable
bool_var_val
	[const Corpus2::Tagset& /*tagset*/, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	: op = bool_value 
	| op = bool_variable [vars]
// ----------------------------------------------------------------------------
// Regex operator
bool_regex
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<StrSet> > expr;
}
	: "regex" 
		LPAREN 
			expr = strset_operator [tagset, vars] COMMA reg: STRING 
		RPAREN {
			op.reset(new Regex(expr, token_ref_to_ustring(reg)));
		}
;

// ----------------------------------------------------------------------------
// Input/output operator
bool_inout
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<Position> > ret_pos;
}
	: "inside"  LPAREN ret_pos = position_operator [tagset, vars] RPAREN {
		op.reset(new IsInside(ret_pos));
	}
	| "outside" LPAREN ret_pos = position_operator [tagset, vars] RPAREN {
		op.reset(new IsOutside(ret_pos));
	}
;

// ----------------------------------------------------------------------------
// if (Bool, Bool, Bool)
// ? Bool ? Bool : False
bool_condition
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<Bool> > test, p_true, p_false;
}
	: "if" LPAREN test = bool_operator [tagset, vars] COMMA 
							p_true = bool_operator [tagset, vars] 
							(COMMA p_false = bool_operator [tagset, vars])? 
	RPAREN {
		if (p_false) {
			op.reset(new Conditional<Bool>(test, p_true, p_false));
		}
		else {
			op.reset(new Conditional<Bool>(test, p_true));
		}
	}
	| Q_MARK 
			p_true = bool_operator [tagset, vars]
		Q_MARK 
			test = bool_operator [tagset, vars] {
			op.reset(new Conditional<Bool>(test, p_true));
		}
;

// ----------------------------------------------------------------------------
// Equal operator
equal_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > t1, t2;
	boost::shared_ptr<Function<Bool> > b1, b2;
	boost::shared_ptr<Function<StrSet>  > s1, s2;
	boost::shared_ptr<Function<Position> > p1, p2;
		(position_operator [tagset, vars]) =>
			p1 = position_operator [tagset, vars] COMMA 
			p2 = position_operator [tagset, vars] {
				op.reset(new Equals<Position>(p1, p2));
			t1 = symset_operator [tagset, vars] COMMA  
			t2 = symset_operator [tagset, vars] {
				op.reset(new Equals<TSet>(t1, t2));
			}
		)
	|
		(strset_operator [tagset, vars]) =>
			s1 = strset_operator [tagset, vars] COMMA  
			s2 = strset_operator [tagset, vars] {
				op.reset(new Equals<StrSet>(s1, s2));
			}
		)
	|
		(
			b1 = bool_operator [tagset, vars] COMMA
			b2 = bool_operator [tagset, vars] {
				op.reset(new Equals<Bool>(b1, b2));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > t1, t2;
	boost::shared_ptr<Function<StrSet> > s1, s2;
		(symset_operator [tagset, vars]) =>
			t1 = symset_operator [tagset, vars] COMMA 
			t2 = symset_operator [tagset, vars] {
				op.reset(new IsSubsetOf<TSet>(t1, t2));
			s1 = strset_operator [tagset, vars] COMMA
			s2 = strset_operator [tagset, vars] {
				op.reset(new IsSubsetOf<StrSet>(s1, s2));
;
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > t1, t2;
	boost::shared_ptr<Function<StrSet> > s1, s2;
		(symset_operator [tagset, vars]) =>
			t1 = symset_operator [tagset, vars] COMMA  
			t2 = symset_operator [tagset, vars]  {
				op.reset(new Intersects<TSet>(t1, t2));
			s1 = strset_operator [tagset, vars] COMMA  
			s2 = strset_operator [tagset, vars]  {
				op.reset(new Intersects<StrSet>(s1, s2));
// ----------------------------------------------------------------------------
// Debug printing:
debug_print_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<FunctionBase> v;
}
	: "debug" LPAREN
	(
		(position_operator [tagset, vars]) =>
		(
			v = position_operator [tagset, vars] {
				ret.reset(new DebugPrint(v));
			}
		)
	|
		(symset_operator [tagset, vars]) =>
		(
			v = symset_operator [tagset, vars] {
				ret.reset(new DebugPrint(v));
			}
		)
	|
		(strset_operator [tagset, vars]) =>
		(
			v = strset_operator [tagset, vars] {
				ret.reset(new DebugPrint(v));
			}
		)
	|
		(
			v = bool_operator [tagset, vars] {
				ret.reset(new DebugPrint(v));
			}
		)
	)
	RPAREN
;

// ----------------------------------------------------------------------------
// Iterations:
bool_iteration
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	int min_match = 0;
	boost::shared_ptr<Function<Bool> > expr;
	boost::shared_ptr<Function<Position> > lpos, rpos;
	boost::shared_ptr<VariableAccessor<Position> > pacc;
}
	: "only" LPAREN 
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars]
		RPAREN {
			ret.reset(new Only(lpos, rpos, *pacc, expr));
		}

	| "atleast" LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars] COMMA
			min_match = number
		RPAREN {
			ret.reset(new AtLeast(lpos, rpos, *pacc, expr, min_match));
		}
	| "llook" LPAREN //note inverted rpos/lpos order
			rpos = position_operator [tagset, vars] COMMA 
			lpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars] 
		RPAREN {
			ret.reset(new LeftLook(lpos, rpos, *pacc, expr));
		}
	| "rlook" LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars] 
		RPAREN {
			ret.reset(new RightLook(lpos, rpos, *pacc, expr));
		}
;

// ----------------------------------------------------------------------------
// Agreement operator: agr, agrpp, wagr
bool_agreement
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<Function<TSet> > expr;
	boost::shared_ptr<Function<Position> > lpos, rpos;
}
	: "agr"   LPAREN 
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA 
			expr = symset_operator [tagset, vars]
		RPAREN {
			ret.reset(new StrongAgreement(lpos, rpos, expr, tagset));
		}
	| "agrpp" LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA 
			expr = symset_operator [tagset, vars]
		RPAREN {
			ret.reset(new PointAgreement(lpos, rpos, expr, tagset));
		}
	| "wagr"  LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA 
			expr = symset_operator [tagset, vars]
		RPAREN {
			ret.reset(new WeakAgreement(lpos, rpos, expr, tagset));
		}
;

Paweł Kędzia's avatar
Paweł Kędzia committed
// ----------------------------------------------------------------------------
// Parse operator on L1 level
bool_phrase
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
	: ret = bool_phrase_annotation [tagset, vars]
	| ret = bool_phrase_iteration  [tagset, vars]
;

// ----------------------------------------------------------------------------
// Annotation operator: phrase, phrase_beg, phrase_end, phrase_whole, phrase_pp
bool_phrase_annotation
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<Function<Position> > lpos, rpos;
}
	: "phrase" LPAREN 
			lpos = position_operator [tagset, vars] COMMA n1: STRING 
		RPAREN {
			// TODO
		}
	| "phrase_beg" LPAREN 
			lpos = position_operator [tagset, vars] COMMA n2: STRING 
		RPAREN {
			// TODO
		}
	| "phrase_end" LPAREN 
			lpos = position_operator [tagset, vars] COMMA n3: STRING 
		RPAREN {
			// TODO
		}
	| "phrase_whole" LPAREN 
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA n4: STRING 
		RPAREN {
			// TODO
		}
	| "phrase_pp" LPAREN 
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA n5: STRING 
		RPAREN {
			// TODO
		}
;

// ----------------------------------------------------------------------------
// Phrase iteration operator: lphrase, rphrase
bool_phrase_iteration
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<Function<Position> > position;
	boost::shared_ptr<VarGetter<Position> > var_position;
}
	: "lphrase" LPAREN
			position     = position_operator [tagset, vars] COMMA
			var_position = position_variable [vars]         COMMA
			n1: STRING
		RPAREN {
			// TODO
		}
	| "rphrase" LPAREN 
			position     = position_operator [tagset, vars] COMMA
			var_position = position_variable [vars]         COMMA
			n2: STRING
		RPAREN {
			// TODO
		}
;


// ----------------------------------------------------------------------------
// Setvar operator
// Returns boost::shared_ptr<Function<Bool> >
// ----------------------------------------------------------------------------
setvar_operator 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
	: "setvar" LPAREN
	  (
		  ret = position_setvar [tagset, vars]
		| ret = bool_setvar     [tagset, vars]
		| ret = strset_setvar   [tagset, vars]
		| ret = symset_setvar   [tagset, vars]
	  )
	  RPAREN
;

// ----------------------------------------------------------------------------
// Setvar for position
position_setvar 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<Position> > ret_op;
	boost::shared_ptr<VariableAccessor<Position> > ret_acc;
	:	ret_acc = position_variable_acc [vars]
		COMMA
		ret_op  = position_operator [tagset, vars] {
			op.reset(new VarSetter<Position>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<Bool> > ret_op;
	boost::shared_ptr<VariableAccessor<Bool> > ret_acc;
	:	ret_acc = bool_variable_acc [vars]
		COMMA
		ret_op  = bool_operator [tagset, vars] {
			op.reset(new VarSetter<Bool>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<StrSet> > ret_op;
	boost::shared_ptr<VariableAccessor<StrSet> > ret_acc;
	: ret_acc = strset_variable_acc [vars]
	  COMMA 
		ret_op  = strset_operator [tagset, vars] {
			op.reset(new VarSetter<StrSet>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > ret_op;
	boost::shared_ptr<VariableAccessor<TSet> > ret_acc;
	: ret_acc = symset_variable_acc [vars]
	  COMMA 
	  ret_op  = symset_operator [tagset, vars] {
			op.reset(new VarSetter<TSet>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Tagging actions and rules:
// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------
// Single action such as select, delete, relabel or unify
action
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<TagAction> act]
	: act = action_select  [tagset, vars]
	| act = action_delete  [tagset, vars]
	| act = action_relabel [tagset, vars]
	| act = action_unify [tagset, vars]
	| act = action_mark [tagset, vars]
	| act = action_unmark [tagset, vars]

// Action sequence - the actions are separated with commas:
// 	select(...), select(...), delete(...)
action_sequence
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<std::vector<boost::shared_ptr<TagAction> > > v_act]
	boost::shared_ptr<TagAction> act;
	v_act.reset(new std::vector<boost::shared_ptr<TagAction> >);
}
	: act = action[tagset, vars] {
		v_act->push_back(act);
	} 
	(
		COMMA act = action[tagset, vars] {
			v_act->push_back(act);
		}
	)*
;

// ----------------------------------------------------------------------------
// Single rule:
// 	rule(NAME, ACTIONS) or rule(NAME, COND, ACTIONS)
rule
	[const Corpus2::Tagset& tagset, Variables& vars]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<TagRule> rle]
{
	boost::shared_ptr<Function<Bool> > condition;
	boost::shared_ptr<std::vector<boost::shared_ptr<TagAction> > > actions;
	: "rule" LPAREN name: STRING COMMA 
				(condition = bool_operator [tagset, vars] COMMA)?
				actions = action_sequence [tagset, vars] 
		RPAREN {
			if (condition) {
				rle.reset(
Adam Wardynski's avatar
Adam Wardynski committed
					new TagRule(token_ref_to_std_string(name), vars, actions, condition));
Adam Wardynski's avatar
Adam Wardynski committed
					new TagRule(token_ref_to_std_string(name), vars, actions));
	: "rule" LPAREN name: STRING COMMA
	(
		(bool_operator[tagset, vars]) =>
		(
			condition = bool_operator [tagset, vars] COMMA
			actions = action_sequence [tagset, vars] {
				// rule(NAME, COND, ACTIONS)
				rle.reset(
Adam Wardynski's avatar
Adam Wardynski committed
					new TagRule(token_ref_to_std_string(name), vars, actions, condition));
			}
		)
	|
		(
			actions = action_sequence [tagset, vars] {
				// rule(NAME, ACTIONS)
Adam Wardynski's avatar
Adam Wardynski committed
				rle.reset(new TagRule(token_ref_to_std_string(name), vars, actions));
;

// Rule sequence
rule_sequence
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<RuleSequence> rule_seq]
{
	// FIXME czy tutaj przypadkiem nie powinno byc shared_ptr?
Adam Wardynski's avatar
Adam Wardynski committed
	boost::shared_ptr<TagRule> rle;
ilor's avatar
ilor committed
	rule_seq.reset(new RuleSequence());
}
	: rle = rule [tagset, vars] {
ilor's avatar
ilor committed
		rule_seq->push_back(*rle);
	}
	(
		COMMA rle = rule [tagset, vars] {
ilor's avatar
ilor committed
			rule_seq->push_back(*rle);
		}
	)*
;

// Temporary name. 
// This is wrapper for rule_sequence in rules section in the wccl file
rules
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<RuleSequence> rule_seq]
	: "rules" LPAREN rule_seq = rule_sequence [tagset, vars] RPAREN {
		//
	}
;

// ----------------------------------------------------------------------------
// Select action: 
//	select(position, predicate) or select(predicate);
action_select 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Select> action]
{
	boost::shared_ptr<Function<Position> > pos;
	boost::shared_ptr<Function<Bool> > condition;
}
	: "select" LPAREN
	(
		(position_operator [tagset, vars]) =>
		(
			pos = position_operator [tagset, vars] COMMA
			condition = bool_operator [tagset, vars] {
				// select(positon, condition); 
				action.reset(new Select(condition, pos));
			}
		)
	|
		(
			condition = bool_operator [tagset, vars] {
				// select(condition); 
				action.reset(new Select(condition));
			}
		)
	) 
	RPAREN
;

// ----------------------------------------------------------------------------
// Delete action
//	delete(position, predicate) or delete(predicate);
action_delete
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Delete> action]
{
	boost::shared_ptr<Function<Position> > pos;
	boost::shared_ptr<Function<Bool> > condition;
}
	: "delete" LPAREN
	(
		(position_operator [tagset, vars]) =>
		(
			pos = position_operator [tagset, vars] COMMA
			condition = bool_operator [tagset, vars] {
				// delete(positon, condition); 
ilor's avatar
ilor committed
				action.reset(new Delete(condition, pos));
			}
		)
	|
		(
			condition = bool_operator [tagset, vars] {
				// delete(condition); 
ilor's avatar
ilor committed
				action.reset(new Delete(condition));
			}
		)
	) 
	RPAREN
;

// ----------------------------------------------------------------------------
// Relabel action
// 	relabel(pos, symset, predicate) or relabel(symset, predicate)
action_relabel
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Relabel> action]
{
	boost::shared_ptr<Function<Position> > pos;
	boost::shared_ptr<Function<Bool> > condition;
	boost::shared_ptr<Function<TSet> > replace_with;
}
	: "relabel" LPAREN
	(
		(position_operator [tagset, vars]) =>
		(
			pos = position_operator [tagset, vars] COMMA
			replace_with = symset_operator [tagset, vars] COMMA
			condition = bool_operator [tagset, vars] {
				// relabel(pos, symset, predicate)
				action.reset(new Relabel(replace_with, condition, pos));
			}
		)
	|
		(
			replace_with = symset_operator [tagset, vars] COMMA
			condition = bool_operator [tagset, vars] {
				// relabel(symset, predicate)
				action.reset(new Relabel(replace_with, condition));
			}
		)
	)
	RPAREN
;

// ----------------------------------------------------------------------------
// Unify action 
action_unify
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Unify> action]
{
	boost::shared_ptr<Function<TSet> > attribs_expr;
	boost::shared_ptr<Function<Position> > pos_begin, pos_end;
}
	: "unify" LPAREN
			pos_begin    = position_operator [tagset, vars] COMMA
			pos_end      = position_operator [tagset, vars] COMMA
			attribs_expr = symset_operator   [tagset, vars]
		RPAREN  {
			action.reset(new Unify(pos_begin, pos_end, attribs_expr));
		}
;

// ----------------------------------------------------------------------------
// Mark action
action_mark
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Mark> action]
{
	boost::shared_ptr<Function<Position> > pos_begin, pos_end, pos_head;
}
	: "mark" LPAREN
			pos_begin    = position_operator [tagset, vars] COMMA
			pos_end      = position_operator [tagset, vars] COMMA
			(pos_head    = position_operator [tagset, vars] COMMA)?
			chan_name: STRING
		RPAREN  {
			action.reset(new Mark(pos_begin, pos_end, pos_head, ((antlr::Token*)chan_name)->getText()));
		}
;

// ----------------------------------------------------------------------------
// Unmark action
action_unmark
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Unmark> action]
{
	boost::shared_ptr<Function<Position> > pos;
}
	: "unmark" LPAREN
			pos = position_operator [tagset, vars] COMMA
			chan_name: STRING
		RPAREN  {
			action.reset(new Unmark(pos, ((antlr::Token*)chan_name)->getText()));
		}
;

// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Match rules
// Returns boost::shared_ptr<Expression>
match_rule_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Expression> ret_op]
{
	//
}
	: ret_op = match_apply_operator [tagset, vars]
;

// Match apply operator:
// 	apply(match(), cond(conditions), actions(actions)) 
// 	apply(match(), actions(actions))
// Returns boost::shared_ptr<ApplyOperator>
match_apply_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<ApplyOperator> ret_op]
{
	VariableAccessor<Match> matches = vars.create_accessor<Match>("_M");;
	boost::shared_ptr<const MatchOperator> match_op;
	boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > actions;
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > conditions;

	: "apply" LPAREN 
		match_op = match_operator[tagset, vars] COMMA
		("cond"   LPAREN conditions = bool_operator_comma_sep [tagset, vars] RPAREN COMMA)?
		"actions" LPAREN actions    =  match_action_comma_sep [tagset, vars] RPAREN
		RPAREN {
			if (conditions) {
				ret_op.reset(
					new ApplyOperator(matches, match_op, actions, conditions)
				);
			}
			else {
				ret_op.reset(
					new ApplyOperator(matches, match_op, actions)
				);
			}
		}
;

// Match operator: match(match_conditions)
// Returns boost::shared_ptr<MatchOperator>
match_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<MatchOperator> op]
{
	boost::shared_ptr<ConjConditions> match_cond;
}
	: "match" LPAREN match_cond = match_condition [tagset,vars] RPAREN {
		op.reset(new MatchOperator(match_cond));
	}
;

// Match conditions. Wrapper for vector of the match conditions
match_condition
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<ConjConditions> condition]
{
	std::vector<boost::shared_ptr<const MatchCondition> > m_cond;
}
	: m_cond = match_condition_in [tagset, vars] {
		condition.reset(new ConjConditions(m_cond));
	}
;

// Match conditions.
// Retutns std::vector< boost::shared_ptr<const MatchCondition> >
match_condition_in
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [std::vector< boost::shared_ptr<const MatchCondition> > ret]
{
	boost::shared_ptr<const MatchCondition> r_cond;
}
	: r_cond = match_cond_all[tagset, vars] {
		ret.push_back(r_cond);
	}
	(
		COMMA
		r_cond = match_cond_all[tagset, vars] {
			ret.push_back(r_cond);
		}
	)*
;

// One of the match condition
// Returns boost::shared_ptr<const MatchCondition>
match_cond_all
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<const MatchCondition> ret]
	: ret = match_cond_optional [tagset, vars]
	| ret = match_cond_repeate  [tagset, vars]
;

// Returns boost::shared_ptr<OptionalMatch>
match_cond_optional
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<OptionalMatch> mtch]
{
	boost::shared_ptr<ConjConditions> m_cond;
}
	: "optional" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
		mtch.reset(new OptionalMatch(m_cond));
	}
;

// Match condition - repeat
// Returns boost::shared_ptr<RepeatedMatch>
match_cond_repeate
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<RepeatedMatch> mtch]
{
	boost::shared_ptr<ConjConditions> m_cond;
}
	: "repeat" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
		mtch.reset(new RepeatedMatch(m_cond));
	}
;

// ----------------------------------------------------------------------------

// Match actions. Match action can be mark or unmark
// Returns boost::shared_ptr<MatchAction>
match_action
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<MatchAction> m_act]
	: m_act = match_mark_action   [tagset, vars]
	| m_act = match_unmark_action [tagset, vars]
;

// Match mark action
// Returns 
match_mark_action
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<MarkMatch> m_act]
Paweł Kędzia's avatar
Paweł Kędzia committed
{
	boost::shared_ptr<Function<Match> > match_to;
	boost::shared_ptr<Function<Match> > match_from;
}
	: "mark" LPAREN 
			match_from = match_fit[tagset, vars] COMMA
			(match_to  = match_fit[tagset, vars] COMMA) ?
Paweł Kędzia's avatar
Paweł Kędzia committed
			annotation_name : STRING
		RPAREN {
			if (!match_to) {
				m_act.reset(
					new MarkMatch(
						match_from,
						((antlr::Token*)annotation_name)->getText()));
			} else {
				m_act.reset(
					new MarkMatch(
						match_from,
						match_to,
						((antlr::Token*)annotation_name)->getText()));
			}
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
;

// Match unmark action
// Returns ???
match_unmark_action
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<MatchAction> m_act]
	: "unmark" LPAREN /* TODO */ RPAREN
;

// Match action separated by comma
// Returns boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > >
match_action_comma_sep
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > r_vec]
{
	boost::shared_ptr<MatchAction> act;

	r_vec.reset(
		new std::vector<boost::shared_ptr<MatchAction> >
	);
}
	: act = match_action [tagset, vars] {
			r_vec->push_back(act);
	}
	(
		COMMA act = match_action [tagset, vars] {
			r_vec->push_back(act);
		}
	)*
;

Paweł Kędzia's avatar
Paweł Kędzia committed
// Function<Match> is wrapper for Constant<Match> and Function<Match>
// Returns boost::shared_ptr<Function<Match> >
match_fit
  [const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Match> > ret]
{
	//
}
	: ret = match_vector_variable [vars]
	| ret = match_value_const
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
// ANTLR LEXER
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
class ANTLRLexer extends Lexer;
options {
	exportVocab    = ANTLRExpr;
	charVocabulary = '\3'..'\377';
	testLiterals   = false;
rk's avatar
rk committed
}

rk's avatar
rk committed
STRING
options {
	paraphrase = "a string";
}
	: '"'!  (~('"'  | '\n' | '\r'))* '"'!
	| '\''! (~('\'' | '\n' | '\r'))* '\''!
SIGNED_INT
rk's avatar
rk committed
options {
	paraphrase = "Signed integer";
rk's avatar
rk committed
}
	: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+ 
rk's avatar
rk committed

UNSIGNED_INT
options {
	paraphrase = "Unsigned integer";
}
	: ('0'..'9')+ 
;	


rk's avatar
rk committed
QUOT_MARK
options {
	paraphrase = "Quote";
rk's avatar
rk committed

APOS_MARK
options {
	paraphrase = "Apostrophe";
rk's avatar
rk committed
Q_MARK
options {
	paraphrase = "Question mark";
rk's avatar
rk committed
}
	: '?'
rk's avatar
rk committed

E_MARK
options {
	paraphrase = "Exclamation mark";
rk's avatar
rk committed
}
	: '!'
rk's avatar
rk committed

STR_PREFIX
options {
	paraphrase = "String prefix";
}
	paraphrase = "Symset prefix";
;

BOOL_PREFIX
options {
	paraphrase = "Bool prefix";
}
rk's avatar
rk committed

POS_PREFIX
options {
MATCH_VECTOR_PREFIX
options {
	paraphrase = "Match vector prefix";
}
	: "$m:"
;

rk's avatar
rk committed
LBRACKET 
options {
	paraphrase = "'['";
}
	: '[' 
rk's avatar
rk committed

RBRACKET 
options {
	paraphrase = "']'";
}
	: ']' 
rk's avatar
rk committed

LPAREN
options {
	paraphrase = "'('";
}   
	: '(' 
rk's avatar
rk committed

RPAREN 
options {
	paraphrase = "')'";
} 
	: ')' 
rk's avatar
rk committed

LCURLY 
options {
	paraphrase = "'{'";
} 
	: '{' 
rk's avatar
rk committed

RCURLY 
options {
	paraphrase = "'}'";
} 
	: '}' 
rk's avatar
rk committed

AT_MARK 
options {
	paraphrase = "'@'";
} 
	: '@' 
rk's avatar
rk committed

COMMA
options { 
	paraphrase = "','";
rk's avatar
rk committed
}
	: ','
rk's avatar
rk committed

SYMBOL
options { 
	paraphrase = "Symbol"; 
rk's avatar
rk committed
	testLiterals = true; 
}
	: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
	| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
rk's avatar
rk committed
WS
	  	| '\t'
	  	| '\f'
	  	| 
			( "\r\n"
				| '\r'
				| '\n'
			) { newline(); } 
		) { $setType(antlr::Token::SKIP); } 
rk's avatar
rk committed

COMMENT
options {
	paraphrase = "Single line comment";
}
	: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP);  }
;

ML_COMMENT
options {
	paraphrase = "Multi line comment";
rk's avatar
rk committed
}
	(			// TODO: test it and add reference to the site it's taken from!
				/* This actually works OK despite the ambiguity that
				'\r' '\n' can be matched in one alternative or by matching
				'\r' in one iteration and '\n' in another.. But 
				this is really matched just by one rule per (...)* 
				loop iteration, so it's OK.
				This is exactly how they do it all over the web - just
				turn off the warning for this particular token.*/
      : { LA(2)!='/' }? '*'
      | '\r' '\n' { newline(); }
      | '\r' { newline(); }
      | '\n' { newline(); }
      | ~('*'|'\n'|'\r')
rk's avatar
rk committed

HASH
options { 
	paraphrase = "'#'"; 
}
	: '#' 
rk's avatar
rk committed

//DSEPARATOR
//options { 
//	paraphrase = "':-'"; 
//}
//	: ":-" 
//;