Skip to content
Snippets Groups Projects
grammar.g 64.9 KiB
Newer Older
rk's avatar
rk committed
header {
Paweł Kędzia's avatar
Paweł Kędzia committed
	//don't try to add all the headers inside our namespace
	#include <libwccl/parser/ParserException.h>
rk's avatar
rk committed

	#include <cstdio>
	#include <antlr/Token.hpp>
	#include <boost/lexical_cast.hpp>

	// values/variables
	#include <libwccl/variables.h>
	#include <libwccl/values/bool.h>
	#include <libwccl/values/tset.h>
	#include <libwccl/values/strset.h>
	#include <libwccl/values/position.h>
	
	// sentence context
	#include <libwccl/sentencecontext.h>

	// operators
	#include <libwccl/ops/operator.h>

	#include <libwccl/ops/functions/constant.h>
	#include <libwccl/ops/functions/vargetter.h>
	#include <libwccl/ops/functions/conditional.h>

	#include <libwccl/ops/functions/bool/varsetter.h>
	#include <libwccl/ops/functions/bool/predicates/debug.h>
	#include <libwccl/ops/functions/bool/predicates/or.h>
	#include <libwccl/ops/functions/bool/predicates/nor.h>
	#include <libwccl/ops/functions/bool/predicates/and.h>
	#include <libwccl/ops/functions/bool/predicates/regex.h>
	#include <libwccl/ops/functions/bool/predicates/intersects.h>
	#include <libwccl/ops/functions/bool/predicates/issubsetof.h>
	#include <libwccl/ops/functions/bool/predicates/isinside.h>
	#include <libwccl/ops/functions/bool/predicates/isoutside.h>
	#include <libwccl/ops/functions/bool/predicates/equals.h>
	#include <libwccl/ops/functions/bool/predicates/weakagreement.h>
	#include <libwccl/ops/functions/bool/predicates/pointagreement.h>
	#include <libwccl/ops/functions/bool/predicates/strongagreement.h>
ilor's avatar
ilor committed
	#include <libwccl/ops/functions/bool/predicates/annsub.h>

	#include <libwccl/ops/functions/strset/affix.h>
	#include <libwccl/ops/functions/strset/getorth.h>
	#include <libwccl/ops/functions/strset/toupper.h>
	#include <libwccl/ops/functions/strset/tolower.h>
	#include <libwccl/ops/functions/strset/getlemmas.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	#include <libwccl/ops/functions/tset/agrfilter.h>
Adam Radziszewski's avatar
Adam Radziszewski committed
	#include <libwccl/ops/functions/tset/catfilter.h>
	#include <libwccl/ops/functions/tset/getsymbols.h>
	#include <libwccl/ops/functions/tset/getwordclass.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	#include <libwccl/ops/functions/tset/getsymbolsinrange.h>
	#include <libwccl/ops/functions/position/relativeposition.h>
	#include <libwccl/ops/functions/position/lasttoken.h>
	#include <libwccl/ops/functions/position/firsttoken.h>
	#include <libwccl/ops/functions/bool/iterations/only.h>
	#include <libwccl/ops/functions/bool/iterations/atleast.h>
	#include <libwccl/ops/functions/bool/iterations/leftlook.h>
	#include <libwccl/ops/functions/bool/iterations/rightlook.h>
Adam Wardynski's avatar
Adam Wardynski committed
	#include <libwccl/ops/tagrule.h>
	#include <libwccl/ops/rulesequence.h>
	//
	#include <libwccl/ops/tagactions/unify.h>
	#include <libwccl/ops/tagactions/delete.h>
	#include <libwccl/ops/tagactions/select.h>
	#include <libwccl/ops/tagactions/relabel.h>
	#include <libwccl/ops/tagactions/mark.h>
	#include <libwccl/ops/tagactions/unmark.h>
	// Match operators
	#include <libwccl/values/tokenmatch.h>
	#include <libwccl/values/annotationmatch.h>
	#include <libwccl/values/matchvector.h>
	#include <libwccl/ops/match/applyoperator.h>
	#include <libwccl/ops/match/conditions/optionalmatch.h>
	#include <libwccl/ops/match/conditions/repeatedmatch.h>
	#include <libwccl/ops/match/conditions/isannotatedas.h>
	#include <libwccl/ops/match/conditions/matchtext.h>
	#include <libwccl/ops/match/conditions/conjconditions.h>
	#include <libwccl/ops/match/conditions/tokencondition.h>
	#include <libwccl/ops/match/actions/markmatch.h>
	#include <libwccl/ops/match/actions/unmarkmatch.h>
	#include <libwccl/ops/functions/match/submatch.h>
rk's avatar
rk committed
	// Unicode String
	#include <unicode/uniset.h>
	#include <unicode/unistr.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	// start our namespace again
	ANTLR_BEGIN_NAMESPACE(Wccl)
rk's avatar
rk committed
}

options {
	language = "Cpp";
//	genHashLines = true;
rk's avatar
rk committed
}

// ----------------------------------------------------------------------------
rk's avatar
rk committed
// ANTLR PARSER
// ----------------------------------------------------------------------------
rk's avatar
rk committed
class ANTLRParser extends Parser;
options {
rk's avatar
rk committed
	buildAST = false;
rk's avatar
rk committed
	defaultErrorHandler = false;
}
{
private:
	// 
rk's avatar
rk committed
	const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { 
		return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText().c_str()).unescape();
	const UnicodeString str_token_ref_to_ustring(antlr::RefToken& rstr) const { 
		UnicodeString ret_ustr, ustr = token_ref_to_ustring(rstr);

		if (ustr.length() < 3) {
			return "";
		}

		ustr.extract(1, ustr.length() - 2, ret_ustr);

		return ret_ustr;
	}
Paweł Kędzia's avatar
Paweł Kędzia committed
	const std::string str_token_rem_grav(antlr::RefToken& rstr) const {
		size_t len = 0;
		std::string ret = token_ref_to_std_string(rstr);

		if ((len = ret.length()) < 2) {
			return ret;
		}

		if (ret[0] == '`' && ret[len - 1] == '`') {
			return ret.substr(1, len - 2);
		}

		return ret;
	}
	//
rk's avatar
rk committed
	const std::string token_ref_to_std_string(antlr::RefToken& rstr) const { 
		return (((antlr::Token*)rstr)->getText());
rk's avatar
rk committed
	}
	//
	int token_ref_to_int(antlr::RefToken& rstr) { 
rk's avatar
rk committed
		return atoi(((antlr::Token*)rstr)->getText().c_str());
rk's avatar
rk committed
	}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// "GLOBAL" RULES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Rule for parsing string set operator with scope. 
// Returns boost::shared_ptr<Operator<StrSet> >
parse_strset_operator
	returns [boost::shared_ptr<Operator<StrSet> > res]
	Variables vars;
	boost::shared_ptr<Function<StrSet> > body;
	: body = strset_operator [tagset, vars] {
			res.reset(new Operator<StrSet>(body, vars));
// ----------------------------------------------------------------------------
// Rule for parsing bool operator with scope. 
// Returns boost::shared_ptr<Operator<Bool> > 
	returns [boost::shared_ptr<Operator<Bool> > res]
	Variables vars;
	boost::shared_ptr<Function<Bool> > body;
	: body = bool_operator [tagset, vars] {
			res.reset(new Operator<Bool>(body, vars));

// ----------------------------------------------------------------------------
// Rule for parsing symbol set operator with scope.
// Returns boost::shared_ptr<Operator<TSet> >
parse_symset_operator
	returns [boost::shared_ptr<Operator<TSet> > res]
	Variables vars;
	boost::shared_ptr<Function<TSet> > body;
	: body = symset_operator [tagset, vars] {
			res.reset(new Operator<TSet>(body, vars));
ilor's avatar
ilor committed
// ----------------------------------------------------------------------------
// Rule for parsing position operator with scope.
// Returns boost::shared_ptr<Operator<Position> >
parse_position_operator
ilor's avatar
ilor committed
	[const Corpus2::Tagset &tagset]
	returns [boost::shared_ptr<Operator<Position> > res]
	Variables vars;
	boost::shared_ptr<Function<Position> > body;
	: body = position_operator [tagset, vars] {
			res.reset(new Operator<Position>(body, vars));
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Rule for parsing single WCCL Rule
Adam Wardynski's avatar
Adam Wardynski committed
// Returns boost::shared_ptr<TagRule>
parse_single_rule
	[const Corpus2::Tagset &tagset]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<TagRule> rle]
{	
	Variables vars;
}
	: rle = rule [tagset, vars]
;

// Rule for parsing rules section in the wccl file
// Returns boost::shared_ptr<RuleSequence>
parse_rule_sequence
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<RuleSequence> rule_seq]
{
	Variables vars;
}
	: rule_seq = rules[tagset, vars]
;

// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Rule for parsing the match rules
parse_match_rule
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<MatchRule> ret_match]
}
	: ret_match = match_rule_operator[tagset, vars] 
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VALUES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Single or muliple (comma separated) elements in string set, may be:
//   'a' "a" [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"]
// Parsing strset literal and returning plain strset value.
// Returns boost::shared_ptr<StrSet> 
strset_literal 
	returns [boost::shared_ptr<StrSet> s_set]
	s_set.reset(new StrSet());
			s_set->insert(token_ref_to_ustring(s0)); 
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
	| LBRACKET 
		(
			s1: STRING { 
				s_set->insert(token_ref_to_ustring(s1)); 
			}
	  	(
				COMMA s2: STRING { 
					s_set->insert(token_ref_to_ustring(s2)); 
				}
			)*
	  )? 
		RBRACKET
;
// String set value as constrant string set:
// Returns boost::shared_ptr<Constant<StrSet> >
	returns [boost::shared_ptr<Constant<StrSet> > val]
	boost::shared_ptr<StrSet> set;
		val.reset(new Constant<StrSet>(*set));
// ----------------------------------------------------------------------------
// Element of sym set. This rule, inserts element into symbol set 
// with corresponding tagset. 
// WARNING! This rule can throw ParserException! Be careful!
symset_elem
	[const Corpus2::Tagset& tagset, boost::shared_ptr<TSet>& t_set]
	: s1: SYMBOL {
Paweł Kędzia's avatar
Paweł Kędzia committed
		try {
Paweł Kędzia's avatar
Paweł Kędzia committed
			t_set->insert_symbol(tagset, str_token_rem_grav(s1));
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
		catch(Corpus2::TagParseError &e) {
			throw(ParserException(e.info()));
		}
// Symset literal. Symset element may be: 
//  a, `a ` (this is guaranteed by lexer rule - SYMBOL) or {a} {`a`} {a, b} 
//  {`a`, `b`} {a, `b`} {`a`, b}
// Parsing symset literal and returning plain symset value.
// Returns boost::shared_ptr<TSet>
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<TSet> t_set]
	t_set.reset(new TSet());
Paweł Kędzia's avatar
Paweł Kędzia committed
	| LCURLY 
		(
			symset_elem [tagset, t_set] (COMMA symset_elem [tagset, t_set])* 
		)?
// Symset value, as constant symbol set
// Returns boost::shared_ptr<Constant<TSet> >
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<Constant<TSet> > val]
	boost::shared_ptr<TSet> set;
	: set = symset_literal [tagset] {
		val.reset(new Constant<TSet>(*set));
// ----------------------------------------------------------------------------
// Bool literal. May be True or False. Parsing bool literal and returning 
// plain bool value.
// Returns boost::shared_ptr<Bool>
	returns [boost::shared_ptr<Bool> val]
	: "True"  { val.reset(new Bool(Bool(true ))); }
	| "False" { val.reset(new Bool(Bool(false))); }
// Bool value, as constat bool Value
// Returns boost::shared_ptr<Constant<Bool> >
	returns [boost::shared_ptr<Constant<Bool> > val]
	boost::shared_ptr<Bool> bool_lit;
		val.reset(new Constant<Bool>(*bool_lit));
// ----------------------------------------------------------------------------
// Position literal may be:
// 	(+|-)?(0-9)+ or  begin or end or nowhere
// Parsing position literal and returning plain position value.
// returns boost::shared_ptr<Position>
	returns [boost::shared_ptr<Position> val]
Paweł Kędzia's avatar
Paweł Kędzia committed
{
	int i = 0;
}
	: i = number {
		val.reset(new Position(Position(i)));
		val.reset(new Position(Position(Position::Begin)));
		val.reset(new Position(Position(Position::End)));
		val.reset(new Position(Position(Position::Nowhere)));

// Position as constant position value
// Returns boost::shared_ptr<Constant<Position> >
	returns [boost::shared_ptr<Constant<Position> > val]
	boost::shared_ptr<Position> pos_lit;
		val.reset(new Constant<Position>(*pos_lit));
// ----------------------------------------------------------------------------
// Value used into match operator such as TOK[position] and ANN[position, name]
// Returns boost::shared_ptr<Match>
	returns [boost::shared_ptr<Match> val]
{
	boost::shared_ptr<MatchData> m;
}
// Constant match value
// Returns boost::shared_ptr<Constant<Match> >
match_value_const
	returns [boost::shared_ptr<Constant<Match> > val]
{
	boost::shared_ptr<Match> m;
}
		val.reset(new Constant<Match>(*m));
	}
;

// ----------------------------------------------------------------------------
// Value used into match operator such as TOK[position] and ANN[position, name]
// Returns boost::shared_ptr<MatchData>
	returns [boost::shared_ptr<MatchData> val]
	: val = token_match_literal
	| val = ann_match_literal
	| val = match_vector_literal
// Returns boost::shared_ptr<TokenMatch>
	returns [boost::shared_ptr<TokenMatch> val]
{
	boost::shared_ptr<Position> p;
}
	: "TOK" LBRACKET p = position_literal RBRACKET {
		val.reset(new TokenMatch(*p));
	}
;

// annotation match literal - ANN[position, name]
// Returns boost::shared_ptr<AnnotationMatch> 
	returns [boost::shared_ptr<AnnotationMatch> val]
{
	boost::shared_ptr<Position> p;
}
	: "ANN" LBRACKET p = position_literal COMMA channel : STRING RBRACKET {
		val.reset(new AnnotationMatch(*p, token_ref_to_std_string(channel)));
	}
;

// annotation match vector literal: MATCH() or MATCH(token, ann, MATCH())
// Returns boost::shared_ptr<MatchVector>
	returns [boost::shared_ptr<MatchVector> val]
{
	val.reset(new MatchVector());
}
	: "MATCH" LPAREN (match_vector_literal_item[val])? RPAREN
;

// Body of the MATCH value. It only adds vector items to the MatchVector
// Item may be single or multiple
match_vector_literal_item [boost::shared_ptr<MatchVector>& mvector]
{
	boost::shared_ptr<Match> m_val;
}
		mvector->append(m_val);
	} 
	(
		COMMA
// ----------------------------------------------------------------------------
// Number may be unsigned or signed: 1, +1, -1
number 
	returns [int ret]
{
	ret = 0;
}
	: s: SIGNED_INT   { ret = token_ref_to_int(s); }
	| u: UNSIGNED_INT { ret = token_ref_to_int(u); }
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VARIABLES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Position: $Name
// Get position variable (however, before put into) from variables
// Returns boost::shared_ptr<VariableAccessor<Position> > 
position_variable_acc
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<Position> > pos_acc]
		vars.get_put<Position>(str_token_rem_grav(n));
		VariableAccessor<Position> acc = 
			vars.create_accessor<Position>(str_token_rem_grav(n));
		pos_acc.reset(new VariableAccessor<Position>(acc));

// VarGetter for Position variable. This rule wrapped position_variable_acc.
// Returs boost::shared_ptr<VarGetter<Position> >
position_variable
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<Position> > op]
	boost::shared_ptr<VariableAccessor<Position> > pos_acc;
}
	: pos_acc = position_variable_acc [vars] {
		op.reset(new VarGetter<Position>(*pos_acc));
// ----------------------------------------------------------------------------
// This expression gets (however, before put into) variable of the type StrSet 
// from scope -- variables.
// Returns boost::shared_ptr<VariableAccessor<StrSet> >
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<StrSet> > strset_acc]
		vars.get_put<StrSet>(str_token_rem_grav(n));
		VariableAccessor<StrSet> acc = 
			vars.create_accessor<StrSet>(str_token_rem_grav(n));
		strset_acc.reset(new VariableAccessor<StrSet>(acc));

// Vargetter for StrSet variable. This rule wrapped strset_variable_acc.
// Returns boost::shared_ptr<VarGetter<StrSet> > 
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<StrSet> > op]
	boost::shared_ptr<VariableAccessor<StrSet> > strset_acc;
	: strset_acc = strset_variable_acc [vars] {
		op.reset(new VarGetter<StrSet>(*strset_acc));
// ----------------------------------------------------------------------------
// Get symset variable (however, before put into) from variables
// Returns boost::shared_ptr<VariableAccessor<TSet> >
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<TSet> > symset_acc]
			vars.get_put<TSet>(str_token_rem_grav(n)); 	
			VariableAccessor<TSet> acc = 
				vars.create_accessor<TSet>(str_token_rem_grav(n));
			symset_acc.reset(new VariableAccessor<TSet>(acc));

// Vargetter for symbol set variable. This rule wrapped symset_variable_acc
// Returns boost::shared_ptr<VarGetter<TSet> > 
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<TSet> > op]
	boost::shared_ptr<VariableAccessor<TSet> > symset_acc;
	: symset_acc = symset_variable_acc [vars] {
			op.reset(new VarGetter<TSet>(*symset_acc));
// ----------------------------------------------------------------------------
// Get bool variable (however, before put into) from variables
// Returns boost::shared_ptr<VariableAccessor<Bool> >
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<Bool> > bool_acc]
			vars.get_put<Bool>(str_token_rem_grav(n));
			VariableAccessor<Bool> acc = 
				vars.create_accessor<Bool>(str_token_rem_grav(n));
			bool_acc.reset(new VariableAccessor<Bool>(acc));

// Vargetter for bool variable. It is only wrapper for bool_variable_acc
// Returns boost::shared_ptr<VarGetter<Bool> >
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<Bool> > op]
	boost::shared_ptr<VariableAccessor<Bool> > bool_acc;
	: bool_acc = bool_variable_acc [vars] {
			op.reset(new VarGetter<Bool>(*bool_acc));
rk's avatar
rk committed

// ----------------------------------------------------------------------------
// Match: $m:name
// Get mach vector variable from variavles (before put into variables)
// Returns boost::shared_ptr<VariableAccessor<Match> >
match_vector_variable_acc
	[Variables& vars]
	returns [boost::shared_ptr<VariableAccessor<Match> > mvv_acc]
	: MATCH_VECTOR_PREFIX n: SYMBOL {
			vars.get_put<Match>(str_token_rem_grav(n));

			VariableAccessor<Match> acc = 
				vars.create_accessor<Match>(str_token_rem_grav(n));

			mvv_acc.reset(new VariableAccessor<Match>(acc));
	}
;

// Vargetter for the match vector variavle. Wrapper for match_vector_variable_acc
// Returns boost::shared_ptr<VarGetter<Match> >
match_vector_variable
	[Variables& vars]
	returns [boost::shared_ptr<VarGetter<Match> > mvv]
{
	boost::shared_ptr<VariableAccessor<Match> > mvv_acc;
}
	: mvv_acc = match_vector_variable_acc [vars] {
		mvv.reset(new VarGetter<Match>(*mvv_acc));
	}
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
// OPERATORS
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Symbol set (tagset) operators
// Returns boost::shared_ptr<Function<TSet> >
///////////////////////////////////////////////////////////////////////////////
symset_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > ret]
	: ret = symset_condition [tagset, vars]
	| {LA(1)==SYMBOL && LA(2)==LBRACKET}? (ret = symset_getsymbol [tagset, vars])
Paweł Kędzia's avatar
Paweł Kędzia committed
	| ret = symset_var_val [tagset, vars]
	| ret = symset_class   [tagset, vars]
	| ret = symset_range   [tagset, vars]
	| ret = symset_catflt  [tagset, vars]
	| ret = symset_agrflt  [tagset, vars]
	//
	| LPAREN ret = symset_operator [tagset, vars] RPAREN
// ----------------------------------------------------------------------------
ilor's avatar
ilor committed
// A wrapper for symset variable and symset value.
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > op]
	: op = symset_variable [vars]
	| op = symset_value    [tagset]
// ----------------------------------------------------------------------------
// Condition of the symset value:
// 	if (Bool, TSet, TSet)
// 	? TSet ? Bool : {}
symset_condition
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > op]
	boost::shared_ptr<Function<Bool> > test;
	boost::shared_ptr<Function<TSet> > p_true, p_false;
	: "if" LPAREN test  = bool_operator [tagset, vars] COMMA 
							p_true  = symset_operator  [tagset, vars] 
							(COMMA p_false = symset_operator [tagset, vars])? 
				op.reset(new Conditional<TSet>(test, p_true, p_false));
				op.reset(new Conditional<TSet>(test, p_true));
			(p_true = symset_operator [tagset, vars])
			(test = bool_operator [tagset, vars]) {
			op.reset(new Conditional<TSet>(test, p_true));
// ----------------------------------------------------------------------------
// GetSymbol operator may be cas, m1, f, sg...
// WARNING! This rule can throw ParserException! Be careful!
symset_getsymbol
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > op]
{
	Corpus2::Tag tag;
	boost::shared_ptr<Wccl::Function<Position> > position;
}
	: t: SYMBOL LBRACKET position = position_operator [tagset, vars] RBRACKET {
			try {
				tag = tagset.parse_symbol(str_token_rem_grav(t));
			}
			catch(Corpus2::TagParseError &e) {
				throw(ParserException(e.info()));
			}
				
			op.reset(new Wccl::GetSymbols(tag, position));
		}
;

// ----------------------------------------------------------------------------
// Class operator.
symset_class 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > ret]
{
	boost::shared_ptr<Function<Position> > pos;
}
	: "class" LBRACKET pos = position_operator [tagset, vars] RBRACKET { 
		ret.reset(new GetWordClass(pos));
	}
;

Paweł Kędzia's avatar
Paweł Kędzia committed
// ----------------------------------------------------------------------------
// Range operator: range(class, begin, end) or range({...}, begin, end)
Paweł Kędzia's avatar
Paweł Kędzia committed
symset_range
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<TSet> > ret]
{
	boost::shared_ptr<TSet> tset;
Paweł Kędzia's avatar
Paweł Kędzia committed
	boost::shared_ptr<Function<Position> > p1, p2;
}
	: "range" LPAREN
			(tset = symset_literal [tagset] | tag_class: "class") COMMA
			p1  = position_operator [tagset, vars] COMMA 
			p2  = position_operator [tagset, vars] 
Paweł Kędzia's avatar
Paweł Kędzia committed
		RPAREN {
			if (tag_class) {
				ret.reset(new GetSymbolsInRange(Corpus2::Tag(-1), p1, p2));
Paweł Kędzia's avatar
Paweł Kędzia committed
			}
			else {
		 		ret.reset(new GetSymbolsInRange(tset->get_value(), p1, p2));
// ----------------------------------------------------------------------------
// Catflt operator
symset_catflt
  [const Corpus2::Tagset& tagset, Variables& vars]
  returns [boost::shared_ptr<Function<TSet> > ret]
{
  boost::shared_ptr<Function<TSet> > selector, mask;
  boost::shared_ptr<Function<Position> > position;
}
  : "catflt" LPAREN 
			position = position_operator [tagset, vars] COMMA
			selector = symset_operator   [tagset, vars] COMMA
			mask    = symset_operator    [tagset, vars] 
  	RPAREN {
  	  ret.reset(new CatFilter(position, selector, mask));
  	}
;

Paweł Kędzia's avatar
Paweł Kędzia committed
// ----------------------------------------------------------------------------
// Agrflt operator
symset_agrflt
  [const Corpus2::Tagset& tagset, Variables& vars]
  returns [boost::shared_ptr<Function<TSet> > ret]
{
  boost::shared_ptr<Function<TSet> > attr, mask;
  boost::shared_ptr<Function<Position> > lpos, rpos;
}
  : "agrflt" LPAREN 
			lpos = position_operator [tagset, vars] COMMA
			rpos = position_operator [tagset, vars] COMMA
			attr = symset_operator   [tagset, vars] COMMA
			mask = symset_operator   [tagset, vars] 
  	RPAREN {
  	  ret.reset(new AgrFilter(lpos, rpos, attr, mask, tagset));
  	}
;


///////////////////////////////////////////////////////////////////////////////
// Position operator
// Returns boost::shared_ptr<Function<Position> >
///////////////////////////////////////////////////////////////////////////////
position_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Position> > ret]
	( ret = position_var_val     [vars]
	| ret = position_condition   [tagset, vars]
	| ret = position_first_token [tagset, vars]
	| ret = position_last_token  [tagset, vars]
	| LPAREN ret = position_operator [tagset, vars] RPAREN
	) 
	( // if there is SIGNED_INT after the position, it is actually a relative position
		i: SIGNED_INT {
			ret.reset(new RelativePosition(ret, token_ref_to_int(i)));
		}
	)?
// ----------------------------------------------------------------------------
// Wrapper for position variable and position value
position_var_val
	[Variables& vars]
	returns [boost::shared_ptr<Function<Position> > ret]
	: ret = position_value
	| ret = position_variable [vars]
;

// ----------------------------------------------------------------------------
// Condition of the position value
// if (Bool, Position, Position)
// ? Position ? Bool : 0
position_condition
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Position> > op]
{
	boost::shared_ptr<Function<Bool> > test;
	boost::shared_ptr<Function<Position> > p_true, p_false;
}
	: "if" LPAREN test  = bool_operator [tagset, vars] COMMA 
							p_true  = position_operator [tagset, vars] 
							(COMMA p_false = position_operator [tagset, vars])? 
	RPAREN {
		if (p_false) {
			op.reset(new Conditional<Position>(test, p_true, p_false));
		}
		else {
			op.reset(new Conditional<Position>(test, p_true));
		}
	}
	| Q_MARK 
			p_true = position_operator [tagset, vars]
		Q_MARK 
			test = bool_operator [tagset, vars] {
			op.reset(new Conditional<Position>(test, p_true));
		}
// ----------------------------------------------------------------------------
// Taking position of a first token in a match
// first(Match)
position_first_token [const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Position> > ret]
{
	boost::shared_ptr<Function<Match> > m;
}
	: "first" LPAREN m = match_fit [tagset, vars] RPAREN {
		ret.reset(new FirstToken(m));
	}
;

// ----------------------------------------------------------------------------
// Taking position of a first token in a match
// last(Match)
position_last_token [const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Position> > ret]
{
	boost::shared_ptr<Function<Match> > m;
}
	: "last" LPAREN m = match_fit [tagset, vars] RPAREN {
		ret.reset(new LastToken(m));
	}
;

///////////////////////////////////////////////////////////////////////////////
// Stiring operator
// Returns boost::shared_ptr<Function<StrSet> >
///////////////////////////////////////////////////////////////////////////////
strset_operator [const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	: ret = strset_orth      [tagset, vars] 
	| ret = strset_base      [tagset, vars]
	| ret = strset_lower     [tagset, vars] 
	| ret = strset_upper     [tagset, vars]
	| ret = strset_affix     [tagset, vars] 
	| ret = strset_var_val   [tagset, vars] 
	| ret = strset_condition [tagset, vars]
	| LPAREN ret = strset_operator [tagset, vars] RPAREN
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	boost::shared_ptr<Function<Position> > pos;
	: "orth" LBRACKET pos = position_operator [tagset, vars] RBRACKET { 
			ret.reset(new GetOrth(pos));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	boost::shared_ptr<Function<Position> > pos;
	: "base" LBRACKET pos = position_operator [tagset, vars] RBRACKET { 
		ret.reset(new GetLemmas(pos));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	boost::shared_ptr<Function<StrSet> > o_ret;
	: "lower" LPAREN o_ret = strset_operator [tagset, vars] RPAREN {
		ret.reset(new ToLower(o_ret));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	boost::shared_ptr<Function<StrSet> > o_ret;
	: "upper" LPAREN o_ret = strset_operator [tagset, vars] RPAREN {
		ret.reset(new ToUpper(o_ret));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	int offset = 0;
	boost::shared_ptr<Function<StrSet> > o_ret;
			o_ret = strset_operator [tagset, vars] COMMA offset = number 
		RPAREN {
			ret.reset(new Affix(o_ret, offset));
		}
// ----------------------------------------------------------------------------
// Wrapper ofr strset value and strset variable
strset_var_val
	[const Corpus2::Tagset& /*tagset*/, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > op]
	: op = strset_value 
	| op = strset_variable [vars]
// ----------------------------------------------------------------------------
// if (Bool, StrSet, StrSet)
// ? StrSet ? Bool : []
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<StrSet> > op]
	boost::shared_ptr<Function<Bool> > test;
	boost::shared_ptr<Function<StrSet> > p_true, p_false;
	: "if" LPAREN test  = bool_operator [tagset, vars] COMMA 
							p_true  = strset_operator   [tagset, vars] 
							(COMMA p_false = strset_operator [tagset, vars])? 
			op.reset(new Conditional<StrSet>(test, p_true, p_false));
			op.reset(new Conditional<StrSet>(test, p_true));
			p_true = strset_operator [tagset, vars]
			test = bool_operator [tagset, vars] {
			op.reset(new Conditional<StrSet>(test, p_true));
///////////////////////////////////////////////////////////////////////////////
// Boool operator 
// Returns boost::shared_ptr<Function<Bool> >
///////////////////////////////////////////////////////////////////////////////
bool_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
	: ret = bool_and        [tagset, vars]  
	| ret = bool_or         [tagset, vars]  
	| ret = bool_nor        [tagset, vars]  
	| ret = bool_var_val    [tagset, vars]	
	| ret = bool_regex      [tagset, vars]
	| ret = bool_inout      [tagset, vars]
	| ret = bool_condition  [tagset, vars]
	// setvar:
	| ret = setvar_operator [tagset, vars]
	// equal/in/inter:
	| ret = equal_operator  [tagset, vars]
	| ret = in_operator     [tagset, vars]
	| ret = inter_operator  [tagset, vars]
	// iterations
	| ret = bool_iteration  [tagset, vars]
	// agreement
	| ret = bool_agreement  [tagset, vars]
Paweł Kędzia's avatar
Paweł Kędzia committed
	| ret = bool_phrase     [tagset, vars]
	// annotation
	| ret = bool_ann        [tagset, vars]
ilor's avatar
ilor committed
	| ret = bool_annsub     [tagset, vars]
	// debug operators
	| ret = debug_print_operator [tagset, vars]
Paweł Kędzia's avatar
Paweł Kędzia committed
	//
	| LPAREN ret = bool_operator [tagset, vars] RPAREN
// ----------------------------------------------------------------------------
// comma-separated predicates (bool operators)
bool_operator_comma_sep
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns 
		[boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v]
	boost::shared_ptr<Function<Bool> > pred;
	ret_v.reset(
		new std::vector<boost::shared_ptr<Function<Bool> > >
	: pred = bool_operator [tagset, vars] { 
		ret_v->push_back(pred);
	} 
	(
		COMMA pred = bool_operator [tagset, vars] {
			ret_v->push_back(pred);
		}
	)*
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
	: "and" LPAREN ret_v = bool_operator_comma_sep [tagset, vars] RPAREN {
			op.reset(new And(ret_v));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
	: "or" LPAREN ret_v = bool_operator_comma_sep [tagset, vars] RPAREN {
			op.reset(new Or(ret_v));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
	: "not" LPAREN ret_v = bool_operator_comma_sep [tagset, vars] RPAREN {
			op.reset(new Nor(ret_v));
// ----------------------------------------------------------------------------
// Wrapper for bool value and bool variable
bool_var_val
	[const Corpus2::Tagset& /*tagset*/, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	: op = bool_value 
	| op = bool_variable [vars]
// ----------------------------------------------------------------------------
// Regex operator
bool_regex
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<StrSet> > expr;
}
	: "regex" 
		LPAREN 
			expr = strset_operator [tagset, vars] COMMA reg: STRING 
		RPAREN {
			op.reset(new Regex(expr, token_ref_to_ustring(reg)));
		}
;

// ----------------------------------------------------------------------------
// Input/output operator
bool_inout
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<Position> > ret_pos;
}
	: "inside"  LPAREN ret_pos = position_operator [tagset, vars] RPAREN {
		op.reset(new IsInside(ret_pos));
	}
	| "outside" LPAREN ret_pos = position_operator [tagset, vars] RPAREN {
		op.reset(new IsOutside(ret_pos));
	}
;

// ----------------------------------------------------------------------------
// if (Bool, Bool, Bool)
// ? Bool ? Bool : False
bool_condition
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<Bool> > test, p_true, p_false;
}
	: "if" LPAREN test = bool_operator [tagset, vars] COMMA 
							p_true = bool_operator [tagset, vars] 
							(COMMA p_false = bool_operator [tagset, vars])? 
	RPAREN {
		if (p_false) {
			op.reset(new Conditional<Bool>(test, p_true, p_false));
		}
		else {
			op.reset(new Conditional<Bool>(test, p_true));
		}
	}
	| Q_MARK 
			p_true = bool_operator [tagset, vars]
		Q_MARK 
			test = bool_operator [tagset, vars] {
			op.reset(new Conditional<Bool>(test, p_true));
		}
;

// ----------------------------------------------------------------------------
// Equal operator
equal_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > t1, t2;
	boost::shared_ptr<Function<Bool> > b1, b2;
	boost::shared_ptr<Function<StrSet>  > s1, s2;
	boost::shared_ptr<Function<Position> > p1, p2;
		(position_operator [tagset, vars]) =>
			p1 = position_operator [tagset, vars] COMMA 
			p2 = position_operator [tagset, vars] {
				op.reset(new Equals<Position>(p1, p2));
			t1 = symset_operator [tagset, vars] COMMA  
			t2 = symset_operator [tagset, vars] {
				op.reset(new Equals<TSet>(t1, t2));
			}
		)
	|
		(strset_operator [tagset, vars]) =>
			s1 = strset_operator [tagset, vars] COMMA  
			s2 = strset_operator [tagset, vars] {
				op.reset(new Equals<StrSet>(s1, s2));
			}
		)
	|
		(
			b1 = bool_operator [tagset, vars] COMMA
			b2 = bool_operator [tagset, vars] {
				op.reset(new Equals<Bool>(b1, b2));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > t1, t2;
	boost::shared_ptr<Function<StrSet> > s1, s2;
		(symset_operator [tagset, vars]) =>
			t1 = symset_operator [tagset, vars] COMMA 
			t2 = symset_operator [tagset, vars] {
				op.reset(new IsSubsetOf<TSet>(t1, t2));
			s1 = strset_operator [tagset, vars] COMMA
			s2 = strset_operator [tagset, vars] {
				op.reset(new IsSubsetOf<StrSet>(s1, s2));
;
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > t1, t2;
	boost::shared_ptr<Function<StrSet> > s1, s2;
		(symset_operator [tagset, vars]) =>
			t1 = symset_operator [tagset, vars] COMMA  
			t2 = symset_operator [tagset, vars]  {
				op.reset(new Intersects<TSet>(t1, t2));
			s1 = strset_operator [tagset, vars] COMMA  
			s2 = strset_operator [tagset, vars]  {
				op.reset(new Intersects<StrSet>(s1, s2));
// ----------------------------------------------------------------------------
// Annotation operator.
bool_ann
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr< Function<Match> > match_from;
	boost::shared_ptr< Function<Match> > match_to;
	std::string chan_name;
}
	: "ann" LPAREN
			match_from = match_fit [tagset, vars] COMMA
			(match_to  = match_fit [tagset, vars] COMMA)?
			name : STRING
		RPAREN {
			if (match_to) {
				// TODO
				// op.reset(new Ann(match_from, match_to, chan_name));
			} else {
				// TODO
				// op.reset(new Ann(match_from, chan_name));
			}
		}
;


ilor's avatar
ilor committed
// ----------------------------------------------------------------------------
// Annotation-sub operator.
bool_annsub
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr< Function<Match> > match_from;
	boost::shared_ptr< Function<Match> > match_to;
	std::string chan_name;
}
	: "annsub" LPAREN
		match_from = match_fit [tagset, vars] COMMA
		(match_to = match_fit [tagset, vars] COMMA)?
		name : STRING
		RPAREN
		{
			if (match_to) {
				op.reset(new AnnSub(match_from, match_to, chan_name));
			} else {
				op.reset(new AnnSub(match_from, chan_name));
			}
		}
;

// ----------------------------------------------------------------------------
// Debug printing:
debug_print_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<FunctionBase> v;
}
	: "debug" LPAREN
	(
		(position_operator [tagset, vars]) =>
		(
			v = position_operator [tagset, vars] {
				ret.reset(new DebugPrint(v));
			}
		)
	|
		(symset_operator [tagset, vars]) =>
		(
			v = symset_operator [tagset, vars] {
				ret.reset(new DebugPrint(v));
			}
		)
	|
		(strset_operator [tagset, vars]) =>
		(
			v = strset_operator [tagset, vars] {
				ret.reset(new DebugPrint(v));
			}
		)
	|
		(bool_operator [tagset, vars]) =>
		(
			v = bool_operator [tagset, vars] {
				ret.reset(new DebugPrint(v));
			}
		)
	|
		(
			v = match_fit [tagset, vars] {
				ret.reset(new DebugPrint(v));
			}
		)
// ----------------------------------------------------------------------------
// Iterations:
bool_iteration
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	int min_match = 0;
	boost::shared_ptr<Function<Bool> > expr;
	boost::shared_ptr<Function<Position> > lpos, rpos;
	boost::shared_ptr<VariableAccessor<Position> > pacc;
}
	: "only" LPAREN 
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars]
		RPAREN {
			ret.reset(new Only(lpos, rpos, *pacc, expr));
		}

	| "atleast" LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars] COMMA
			min_match = number
		RPAREN {
			ret.reset(new AtLeast(lpos, rpos, *pacc, expr, min_match));
		}
	| "llook" LPAREN //note inverted rpos/lpos order
			rpos = position_operator [tagset, vars] COMMA 
			lpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars] 
		RPAREN {
			ret.reset(new LeftLook(lpos, rpos, *pacc, expr));
		}
	| "rlook" LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA
			pacc = position_variable_acc [vars]     COMMA
			expr = bool_operator     [tagset, vars] 
		RPAREN {
			ret.reset(new RightLook(lpos, rpos, *pacc, expr));
		}
;

// ----------------------------------------------------------------------------
// Agreement operator: agr, agrpp, wagr
bool_agreement
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<Function<TSet> > expr;
	boost::shared_ptr<Function<Position> > lpos, rpos;
}
	: "agr"   LPAREN 
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA 
			expr = symset_operator [tagset, vars]
		RPAREN {
			ret.reset(new StrongAgreement(lpos, rpos, expr, tagset));
		}
	| "agrpp" LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA 
			expr = symset_operator [tagset, vars]
		RPAREN {
			ret.reset(new PointAgreement(lpos, rpos, expr, tagset));
		}
	| "wagr"  LPAREN
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA 
			expr = symset_operator [tagset, vars]
		RPAREN {
			ret.reset(new WeakAgreement(lpos, rpos, expr, tagset));
		}
;

Paweł Kędzia's avatar
Paweł Kędzia committed
// ----------------------------------------------------------------------------
// Parse operator on L1 level
bool_phrase
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
	: ret = bool_phrase_annotation [tagset, vars]
	| ret = bool_phrase_iteration  [tagset, vars]
;

// ----------------------------------------------------------------------------
// Annotation operator: phrase, phrase_beg, phrase_end, phrase_whole, phrase_pp
bool_phrase_annotation
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<Function<Position> > lpos, rpos;
}
	: "phrase" LPAREN 
			lpos = position_operator [tagset, vars] COMMA n1: STRING 
		RPAREN {
			// TODO
		}
	| "phrase_beg" LPAREN 
			lpos = position_operator [tagset, vars] COMMA n2: STRING 
		RPAREN {
			// TODO
		}
	| "phrase_end" LPAREN 
			lpos = position_operator [tagset, vars] COMMA n3: STRING 
		RPAREN {
			// TODO
		}
	| "phrase_whole" LPAREN 
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA n4: STRING 
		RPAREN {
			// TODO
		}
	| "phrase_pp" LPAREN 
			lpos = position_operator [tagset, vars] COMMA 
			rpos = position_operator [tagset, vars] COMMA n5: STRING 
		RPAREN {
			// TODO
		}
;

// ----------------------------------------------------------------------------
// Phrase iteration operator: lphrase, rphrase
bool_phrase_iteration
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<Function<Position> > position;
	boost::shared_ptr<VarGetter<Position> > var_position;
}
	: "lphrase" LPAREN
			position     = position_operator [tagset, vars] COMMA
			var_position = position_variable [vars]         COMMA
			n1: STRING
		RPAREN {
			// TODO
		}
	| "rphrase" LPAREN 
			position     = position_operator [tagset, vars] COMMA
			var_position = position_variable [vars]         COMMA
			n2: STRING
		RPAREN {
			// TODO
		}
;


// ----------------------------------------------------------------------------
// Setvar operator
// Returns boost::shared_ptr<Function<Bool> >
// ----------------------------------------------------------------------------
setvar_operator 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > ret]
	: "setvar" LPAREN
	  (
		  ret = position_setvar [tagset, vars]
		| ret = bool_setvar     [tagset, vars]
		| ret = strset_setvar   [tagset, vars]
		| ret = symset_setvar   [tagset, vars]
	  )
	  RPAREN
;

// ----------------------------------------------------------------------------
// Setvar for position
position_setvar 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<Position> > ret_op;
	boost::shared_ptr<VariableAccessor<Position> > ret_acc;
	:	ret_acc = position_variable_acc [vars]
		COMMA
		ret_op  = position_operator [tagset, vars] {
			op.reset(new VarSetter<Position>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<Bool> > ret_op;
	boost::shared_ptr<VariableAccessor<Bool> > ret_acc;
	:	ret_acc = bool_variable_acc [vars]
		COMMA
		ret_op  = bool_operator [tagset, vars] {
			op.reset(new VarSetter<Bool>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<StrSet> > ret_op;
	boost::shared_ptr<VariableAccessor<StrSet> > ret_acc;
	: ret_acc = strset_variable_acc [vars]
	  COMMA 
		ret_op  = strset_operator [tagset, vars] {
			op.reset(new VarSetter<StrSet>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > ret_op;
	boost::shared_ptr<VariableAccessor<TSet> > ret_acc;
	: ret_acc = symset_variable_acc [vars]
	  COMMA 
	  ret_op  = symset_operator [tagset, vars] {
			op.reset(new VarSetter<TSet>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Tagging actions and rules:
// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------
// Single action such as select, delete, relabel or unify
action
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<TagAction> act]
	: act = action_select  [tagset, vars]
	| act = action_delete  [tagset, vars]
	| act = action_relabel [tagset, vars]
	| act = action_unify [tagset, vars]
	| act = action_mark [tagset, vars]
	| act = action_unmark [tagset, vars]

// Action sequence - the actions are separated with commas:
// 	select(...), select(...), delete(...)
action_sequence
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<std::vector<boost::shared_ptr<TagAction> > > v_act]
	boost::shared_ptr<TagAction> act;
	v_act.reset(new std::vector<boost::shared_ptr<TagAction> >);
}
	: act = action[tagset, vars] {
		v_act->push_back(act);
	} 
	(
		COMMA act = action[tagset, vars] {
			v_act->push_back(act);
		}
	)*
;

// ----------------------------------------------------------------------------
// Single rule:
// 	rule(NAME, ACTIONS) or rule(NAME, COND, ACTIONS)
rule
	[const Corpus2::Tagset& tagset, Variables& vars]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<TagRule> rle]
{
	boost::shared_ptr<Function<Bool> > condition;
	boost::shared_ptr<std::vector<boost::shared_ptr<TagAction> > > actions;
	: "rule" LPAREN name: STRING COMMA 
				(condition = bool_operator [tagset, vars] COMMA)?
				actions = action_sequence [tagset, vars] 
		RPAREN {
			if (condition) {
				rle.reset(
Adam Wardynski's avatar
Adam Wardynski committed
					new TagRule(token_ref_to_std_string(name), vars, actions, condition));
Adam Wardynski's avatar
Adam Wardynski committed
					new TagRule(token_ref_to_std_string(name), vars, actions));
	: "rule" LPAREN name: STRING COMMA
	(
		(bool_operator[tagset, vars]) =>
		(
			condition = bool_operator [tagset, vars] COMMA
			actions = action_sequence [tagset, vars] {
				// rule(NAME, COND, ACTIONS)
				rle.reset(
Adam Wardynski's avatar
Adam Wardynski committed
					new TagRule(token_ref_to_std_string(name), vars, actions, condition));
			}
		)
	|
		(
			actions = action_sequence [tagset, vars] {
				// rule(NAME, ACTIONS)
Adam Wardynski's avatar
Adam Wardynski committed
				rle.reset(new TagRule(token_ref_to_std_string(name), vars, actions));
;

// Rule sequence
rule_sequence
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<RuleSequence> rule_seq]
{
	// FIXME czy tutaj przypadkiem nie powinno byc shared_ptr?
Adam Wardynski's avatar
Adam Wardynski committed
	boost::shared_ptr<TagRule> rle;
ilor's avatar
ilor committed
	rule_seq.reset(new RuleSequence());
}
	: rle = rule [tagset, vars] {
ilor's avatar
ilor committed
		rule_seq->push_back(*rle);
	}
	(
		COMMA rle = rule [tagset, vars] {
ilor's avatar
ilor committed
			rule_seq->push_back(*rle);
		}
	)*
;

// Temporary name. 
// This is wrapper for rule_sequence in rules section in the wccl file
rules
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<RuleSequence> rule_seq]
	: "rules" LPAREN rule_seq = rule_sequence [tagset, vars] RPAREN {
		//
	}
;

// ----------------------------------------------------------------------------
// Select action: 
//	select(position, predicate) or select(predicate);
action_select 
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Select> action]
{
	boost::shared_ptr<Function<Position> > pos;
	boost::shared_ptr<Function<Bool> > condition;
}
	: "select" LPAREN
	(
		(position_operator [tagset, vars]) =>
		(
			pos = position_operator [tagset, vars] COMMA
			condition = bool_operator [tagset, vars] {
				// select(positon, condition); 
				action.reset(new Select(condition, pos));
			}
		)
	|
		(
			condition = bool_operator [tagset, vars] {
				// select(condition); 
				action.reset(new Select(condition));
			}
		)
	) 
	RPAREN
;

// ----------------------------------------------------------------------------
// Delete action
//	delete(position, predicate) or delete(predicate);
action_delete
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Delete> action]
{
	boost::shared_ptr<Function<Position> > pos;
	boost::shared_ptr<Function<Bool> > condition;
}
	: "delete" LPAREN
	(
		(position_operator [tagset, vars]) =>
		(
			pos = position_operator [tagset, vars] COMMA
			condition = bool_operator [tagset, vars] {
				// delete(positon, condition); 
ilor's avatar
ilor committed
				action.reset(new Delete(condition, pos));
			}
		)
	|
		(
			condition = bool_operator [tagset, vars] {
				// delete(condition); 
ilor's avatar
ilor committed
				action.reset(new Delete(condition));
			}
		)
	) 
	RPAREN
;

// ----------------------------------------------------------------------------
// Relabel action
// 	relabel(pos, symset, predicate) or relabel(symset, predicate)
action_relabel
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Relabel> action]
{
	boost::shared_ptr<Function<Position> > pos;
	boost::shared_ptr<Function<Bool> > condition;
	boost::shared_ptr<Function<TSet> > replace_with;
}
	: "relabel" LPAREN
	(
		(position_operator [tagset, vars]) =>
		(
			pos = position_operator [tagset, vars] COMMA
			replace_with = symset_operator [tagset, vars] COMMA
			condition = bool_operator [tagset, vars] {
				// relabel(pos, symset, predicate)
				action.reset(new Relabel(replace_with, condition, pos));
			}
		)
	|
		(
			replace_with = symset_operator [tagset, vars] COMMA
			condition = bool_operator [tagset, vars] {
				// relabel(symset, predicate)
				action.reset(new Relabel(replace_with, condition));
			}
		)
	)
	RPAREN
;

// ----------------------------------------------------------------------------
// Unify action 
action_unify
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Unify> action]
{
	boost::shared_ptr<Function<TSet> > attribs_expr;
	boost::shared_ptr<Function<Position> > pos_begin, pos_end;
}
	: "unify" LPAREN
			pos_begin    = position_operator [tagset, vars] COMMA
			pos_end      = position_operator [tagset, vars] COMMA
			attribs_expr = symset_operator   [tagset, vars]
		RPAREN  {
			action.reset(new Unify(pos_begin, pos_end, attribs_expr));
		}
;

// ----------------------------------------------------------------------------
// Mark action
action_mark
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Mark> action]
{
	boost::shared_ptr<Function<Position> > pos_begin, pos_end, pos_head;
}
	: "mark" LPAREN
			pos_begin    = position_operator [tagset, vars] COMMA
			pos_end      = position_operator [tagset, vars] COMMA
			(pos_head    = position_operator [tagset, vars] COMMA)?
			chan_name: STRING
		RPAREN  {
			action.reset(new Mark(pos_begin, pos_end, pos_head, ((antlr::Token*)chan_name)->getText()));
		}
;

// ----------------------------------------------------------------------------
// Unmark action
action_unmark
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Unmark> action]
{
	boost::shared_ptr<Function<Position> > pos;
}
	: "unmark" LPAREN
			pos = position_operator [tagset, vars] COMMA
			chan_name: STRING
		RPAREN  {
			action.reset(new Unmark(pos, ((antlr::Token*)chan_name)->getText()));
		}
;

// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Match rules
match_rule_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<MatchRule> ret_op]
	: apply = match_apply_operator [tagset, vars] {
		ret_op = boost::make_shared<MatchRule>(vars, apply);
	}
;

// Match apply operator:
// 	apply(match(), cond(conditions), actions(actions)) 
// 	apply(match(), actions(actions))
// Returns boost::shared_ptr<ApplyOperator>
match_apply_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<ApplyOperator> ret_op]
{
	VariableAccessor<Match> matches = vars.create_accessor<Match>("_M");;
	boost::shared_ptr<const MatchOperator> match_op;
	boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > actions;
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > conditions;

	: "apply" LPAREN 
		match_op = match_operator[tagset, vars] COMMA
		("cond"   LPAREN conditions = bool_operator_comma_sep [tagset, vars] RPAREN COMMA)?
		"actions" LPAREN actions    =  match_action_comma_sep [tagset, vars] RPAREN
		RPAREN {
			if (conditions) {
				ret_op.reset(
					new ApplyOperator(matches, match_op, actions, conditions)
				);
			}
			else {
				ret_op.reset(
					new ApplyOperator(matches, match_op, actions)
				);
			}
		}
;

// Match operator: match(match_conditions)
// Returns boost::shared_ptr<MatchOperator>
match_operator
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<MatchOperator> op]
{
	boost::shared_ptr<ConjConditions> match_cond;
}
	: "match" LPAREN match_cond = match_condition [tagset,vars] RPAREN {
		op.reset(new MatchOperator(match_cond));
	}
;

// Match conditions. Wrapper for vector of the match conditions
match_condition
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<ConjConditions> condition]
{
	std::vector<boost::shared_ptr<const MatchCondition> > m_cond;
}
	: m_cond = match_condition_in [tagset, vars] {
		condition.reset(new ConjConditions(m_cond));
	}
;

// Match conditions.
// Retutns std::vector< boost::shared_ptr<const MatchCondition> >
match_condition_in
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [std::vector< boost::shared_ptr<const MatchCondition> > ret]
{
	boost::shared_ptr<const MatchCondition> r_cond;
}
	: r_cond = match_cond_all[tagset, vars] {
		ret.push_back(r_cond);
	}
	(
		COMMA
		r_cond = match_cond_all[tagset, vars] {
			ret.push_back(r_cond);
		}
	)*
;

// One of the match condition
// Returns boost::shared_ptr<const MatchCondition>
match_cond_all
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<const MatchCondition> ret]
	: ret = match_cond_optional [tagset, vars]
	| ret = match_cond_repeate  [tagset, vars]
	| ret = match_cond_token    [tagset, vars]
	| ret = match_cond_is
	| ret = match_cond_text
// Match condition - token (wraps a L0 predicate)
// Returns boost::shared_ptr<const MatchCondition>
match_cond_token
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<const TokenCondition> ret]
{
	boost::shared_ptr<Function<Bool> > bool_op;
}
	: bool_op = bool_operator [tagset, vars] {
		ret = boost::make_shared<TokenCondition>(bool_op);
	}
;


// Returns boost::shared_ptr<OptionalMatch>
match_cond_optional
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<OptionalMatch> mtch]
{
	boost::shared_ptr<ConjConditions> m_cond;
}
	: "optional" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
		mtch.reset(new OptionalMatch(m_cond));
	}
;

// Match condition - repeat
// Returns boost::shared_ptr<RepeatedMatch>
match_cond_repeate
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<RepeatedMatch> mtch]
{
	boost::shared_ptr<ConjConditions> m_cond;
}
	: "repeat" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
		mtch.reset(new RepeatedMatch(m_cond));
// Match condition - is(ann_name)
// Returns boost::shared_ptr<IsAnnotatedAs>
match_cond_is
	returns [boost::shared_ptr<IsAnnotatedAs> mtch]
	: "is" LPAREN annotation_name: STRING RPAREN {
		mtch.reset(new IsAnnotatedAs(token_ref_to_std_string(annotation_name)));
	}
;

// Match condition - text(text)
// Returns boost::shared_ptr<MatchText>
match_cond_text
	returns [boost::shared_ptr<MatchText> mtch]
	: "text" LPAREN txt: STRING RPAREN {
		mtch.reset(new MatchText(token_ref_to_ustring(txt)));
	}
;

// ----------------------------------------------------------------------------

// Match actions. Match action can be mark or unmark
// Returns boost::shared_ptr<MatchAction>
match_action
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<MatchAction> m_act]
	: m_act = match_mark_action   [tagset, vars]
	| m_act = match_unmark_action [tagset, vars]
;

// Match mark action
// Returns 
match_mark_action
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<MarkMatch> m_act]
Paweł Kędzia's avatar
Paweł Kędzia committed
{
	boost::shared_ptr<Function<Match> > match_to;
	boost::shared_ptr<Function<Match> > match_from;
	boost::shared_ptr<Function<Match> > head_match;
Paweł Kędzia's avatar
Paweł Kędzia committed
}
	: "mark" LPAREN 
			match_from = match_fit[tagset, vars] COMMA
			( match_to  = match_fit[tagset, vars] COMMA
				( head_match = match_fit[tagset, vars] COMMA )?
			)?
Paweł Kędzia's avatar
Paweł Kędzia committed
			annotation_name : STRING
		RPAREN {
			if (!match_to) {
				m_act.reset(
					new MarkMatch(
						match_from,
						((antlr::Token*)annotation_name)->getText()));
			} else {
				if (!head_match) {
					m_act.reset(
						new MarkMatch(
							match_from,
							match_to,
							((antlr::Token*)annotation_name)->getText()));
				} else {
					m_act.reset(
						new MarkMatch(
							match_from,
							match_to,
							head_match,
							((antlr::Token*)annotation_name)->getText()));
				}
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
// Returns boost::shared_ptr<UnmarkMatch>
match_unmark_action
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<UnmarkMatch> m_act]
{
	boost::shared_ptr<Function<Match> > match_at;
}
	: "unmark" LPAREN
				match_at = match_fit[tagset, vars] COMMA
				annotation_name : STRING
			RPAREN {
				m_act.reset(
					new UnmarkMatch(
							match_at,
							((antlr::Token*)annotation_name)->getText()));
			}
;

// Match action separated by comma
// Returns boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > >
match_action_comma_sep
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > r_vec]
{
	boost::shared_ptr<MatchAction> act;

	r_vec.reset(
		new std::vector<boost::shared_ptr<MatchAction> >
	);
}
	: act = match_action [tagset, vars] {
			r_vec->push_back(act);
	}
	(
		COMMA act = match_action [tagset, vars] {
			r_vec->push_back(act);
		}
	)*
;

Paweł Kędzia's avatar
Paweł Kędzia committed
// Function<Match> is wrapper for Constant<Match> and Function<Match>
// Returns boost::shared_ptr<Function<Match> >
match_fit
  [const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Match> > ret]
{
	//
}
	:
	( ret = match_var_val [tagset, vars]
	| "M" { ret.reset(new VarGetter<Match>(vars.create_accessor<Match>("_M")));	}
	| LPAREN ret = match_fit [tagset, vars] RPAREN
	)
	( // if there's an arrow after the match, we have a submatch reference
		ARROW i: UNSIGNED_INT { ret.reset(new Submatch(ret, token_ref_to_int(i))); }
;

match_var_val
  [const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Match> > ret]
Paweł Kędzia's avatar
Paweł Kędzia committed
	: ret = match_vector_variable [vars]
	| ret = match_value_const
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
// ANTLR LEXER
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
class ANTLRLexer extends Lexer;
options {
	exportVocab    = ANTLRExpr;
	charVocabulary = '\3'..'\377';
	testLiterals   = false;
rk's avatar
rk committed
}

rk's avatar
rk committed
STRING
options {
Loading full blame...