Skip to content
Snippets Groups Projects
grammar.g 83.4 KiB
Newer Older
rk's avatar
rk committed
header {
Paweł Kędzia's avatar
Paweł Kędzia committed
	//don't try to add all the headers inside our namespace
	#include <libwccl/parser/ParserException.h>
	#include <libwccl/parser/parsingscope.h>
rk's avatar
rk committed

	#include <cstdio>
	#include <antlr/Token.hpp>
	#include <boost/lexical_cast.hpp>
	// values/variables
	#include <libwccl/variables.h>
	#include <libwccl/values/bool.h>
	#include <libwccl/values/tset.h>
	#include <libwccl/values/strset.h>
	#include <libwccl/values/position.h>
	
	// sentence context
	#include <libwccl/sentencecontext.h>

	// operators
	#include <libwccl/ops/operator.h>

	#include <libwccl/ops/functions/constant.h>
	#include <libwccl/ops/functions/vargetter.h>
	#include <libwccl/ops/functions/conditional.h>

	#include <libwccl/ops/functions/setops.h>
	#include <libwccl/ops/functions/bool/varsetter.h>
	#include <libwccl/ops/functions/bool/predicates/debug.h>
	#include <libwccl/ops/functions/bool/predicates/ambiguous.h>
	#include <libwccl/ops/functions/bool/predicates/issingular.h>
	#include <libwccl/ops/functions/bool/predicates/or.h>
	#include <libwccl/ops/functions/bool/predicates/nor.h>
	#include <libwccl/ops/functions/bool/predicates/and.h>
	#include <libwccl/ops/functions/bool/predicates/regex.h>
	#include <libwccl/ops/functions/bool/predicates/intersects.h>
	#include <libwccl/ops/functions/bool/predicates/issubsetof.h>
	#include <libwccl/ops/functions/bool/predicates/isinside.h>
	#include <libwccl/ops/functions/bool/predicates/isoutside.h>
	#include <libwccl/ops/functions/bool/predicates/isempty.h>
	#include <libwccl/ops/functions/bool/predicates/equals.h>
	#include <libwccl/ops/functions/bool/predicates/weakagreement.h>
	#include <libwccl/ops/functions/bool/predicates/pointagreement.h>
	#include <libwccl/ops/functions/bool/predicates/strongagreement.h>
ilor's avatar
ilor committed
	#include <libwccl/ops/functions/bool/predicates/annsub.h>
ilor's avatar
ilor committed
	#include <libwccl/ops/functions/bool/predicates/ann.h>
	#include <libwccl/ops/functions/bool/predicates/anntoken.h>

	#include <libwccl/ops/functions/strset/affix.h>
	#include <libwccl/ops/functions/strset/getorth.h>
	#include <libwccl/ops/functions/strset/toupper.h>
	#include <libwccl/ops/functions/strset/tolower.h>
	#include <libwccl/ops/functions/strset/getlemmas.h>
	#include <libwccl/ops/functions/strset/lextranslator.h>
	#include <libwccl/ops/functions/strset/propval.h>
	#include <libwccl/ops/functions/strset/anninter.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	#include <libwccl/ops/functions/tset/agrfilter.h>
Adam Radziszewski's avatar
Adam Radziszewski committed
	#include <libwccl/ops/functions/tset/catfilter.h>
	#include <libwccl/ops/functions/tset/getsymbols.h>
	#include <libwccl/ops/functions/tset/getwordclass.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	#include <libwccl/ops/functions/tset/getsymbolsinrange.h>
	#include <libwccl/ops/functions/position/relativeposition.h>
	#include <libwccl/ops/functions/position/lasttoken.h>
	#include <libwccl/ops/functions/position/firsttoken.h>
	#include <libwccl/ops/functions/bool/iterations/only.h>
	#include <libwccl/ops/functions/bool/iterations/atleast.h>
	#include <libwccl/ops/functions/bool/iterations/leftlook.h>
	#include <libwccl/ops/functions/bool/iterations/rightlook.h>
	#include <libwccl/ops/functions/bool/iterations/skip.h>
Adam Wardynski's avatar
Adam Wardynski committed
	#include <libwccl/ops/tagrule.h>
	#include <libwccl/ops/tagrulesequence.h>
	#include <libwccl/ops/tagactions/unify.h>
	#include <libwccl/ops/tagactions/delete.h>
	#include <libwccl/ops/tagactions/select.h>
	#include <libwccl/ops/tagactions/relabel.h>
	#include <libwccl/ops/tagactions/mark.h>
	#include <libwccl/ops/tagactions/unmark.h>
	// Match operators
	#include <libwccl/values/tokenmatch.h>
	#include <libwccl/values/annotationmatch.h>
	#include <libwccl/values/matchvector.h>
	#include <libwccl/ops/match/applyoperator.h>
	#include <libwccl/ops/match/conditions/optionalmatch.h>
	#include <libwccl/ops/match/conditions/repeatedmatch.h>
	#include <libwccl/ops/match/conditions/isannotatedas.h>
	#include <libwccl/ops/match/conditions/matchtext.h>
	#include <libwccl/ops/match/conditions/conjconditions.h>
	#include <libwccl/ops/match/conditions/tokencondition.h>
	#include <libwccl/ops/match/conditions/oneof.h>
	#include <libwccl/ops/match/conditions/longest.h>
	#include <libwccl/ops/match/actions/markmatch.h>
	#include <libwccl/ops/match/actions/unmarkmatch.h>
	#include <libwccl/ops/match/actions/setpropmatch.h>
	#include <libwccl/ops/match/actions/overwritematch.h>
	#include <libwccl/ops/functions/match/submatch.h>
Adam Wardynski's avatar
Adam Wardynski committed
	// Wccl whole file syntax
	#include <libwccl/wcclfile.h>
	
	#include <libwccl/lexicon/lexiconparser.h>

rk's avatar
rk committed
	// Unicode String
	#include <unicode/uniset.h>
	#include <unicode/unistr.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	// start our namespace again
	ANTLR_BEGIN_NAMESPACE(Wccl)
rk's avatar
rk committed
}

options {
	language = "Cpp";
//	genHashLines = true;
rk's avatar
rk committed
}

// ----------------------------------------------------------------------------
rk's avatar
rk committed
// ANTLR PARSER
// ----------------------------------------------------------------------------
rk's avatar
rk committed
class ANTLRParser extends Parser;
options {
rk's avatar
rk committed
	buildAST = false;
rk's avatar
rk committed
	defaultErrorHandler = false;
}
{
private:
	// 
rk's avatar
rk committed
	const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { 
		return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText().c_str()).unescape();
	const UnicodeString str_token_ref_to_ustring(antlr::RefToken& rstr) const { 
		UnicodeString ret_ustr, ustr = token_ref_to_ustring(rstr);

		if (ustr.length() < 3) {
			return "";
		}

		ustr.extract(1, ustr.length() - 2, ret_ustr);

		return ret_ustr;
	}
Paweł Kędzia's avatar
Paweł Kędzia committed
	const std::string str_token_rem_grav(antlr::RefToken& rstr) const {
		size_t len = 0;
		std::string ret = token_ref_to_std_string(rstr);

		if ((len = ret.length()) < 2) {
			return ret;
		}

		if (ret[0] == '`' && ret[len - 1] == '`') {
			return ret.substr(1, len - 2);
		}

		return ret;
	}
	//
rk's avatar
rk committed
	const std::string token_ref_to_std_string(antlr::RefToken& rstr) const { 
		return (((antlr::Token*)rstr)->getText());
rk's avatar
rk committed
	}
	//
	int token_ref_to_int(antlr::RefToken& rstr) { 
rk's avatar
rk committed
		return atoi(((antlr::Token*)rstr)->getText().c_str());
rk's avatar
rk committed
	}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// "GLOBAL" RULES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Rule for parsing string set operator with scope. 
// Returns boost::shared_ptr<Operator<StrSet> >
parse_strset_operator
	returns [boost::shared_ptr<Operator<StrSet> > res]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
	: res = functional_operator_strset [scope]
	EOF
// ----------------------------------------------------------------------------
// Rule for parsing bool operator with scope. 
// Returns boost::shared_ptr<Operator<Bool> > 
	returns [boost::shared_ptr<Operator<Bool> > res]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
	: res = functional_operator_bool [scope]
	EOF

// ----------------------------------------------------------------------------
// Rule for parsing symbol set operator with scope.
// Returns boost::shared_ptr<Operator<TSet> >
parse_symset_operator
	returns [boost::shared_ptr<Operator<TSet> > res]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
	: res = functional_operator_symset [scope]
	EOF
ilor's avatar
ilor committed
// ----------------------------------------------------------------------------
// Rule for parsing position operator with scope.
// Returns boost::shared_ptr<Operator<Position> >
parse_position_operator
ilor's avatar
ilor committed
	[const Corpus2::Tagset &tagset]
	returns [boost::shared_ptr<Operator<Position> > res]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
	: res = functional_operator_position [scope]
	EOF
Adam Wardynski's avatar
Adam Wardynski committed
;

// ----------------------------------------------------------------------------
// Rule for parsing match operator with scope.
// Returns boost::shared_ptr<Operator<Position> >
parse_match_operator
	[const Corpus2::Tagset &tagset]
	returns [boost::shared_ptr<Operator<Match> > res]
{	
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
Adam Wardynski's avatar
Adam Wardynski committed
}
	: res = functional_operator_match [scope]
	EOF
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Rule for parsing single WCCL Rule
Adam Wardynski's avatar
Adam Wardynski committed
// Returns boost::shared_ptr<TagRule>
	[const Corpus2::Tagset &tagset]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<TagRule> rle]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
// Rule for parsing tag rule section in the wccl file
// Returns boost::shared_ptr<TagRuleSequence>
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<TagRuleSequence> rule_seq]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Rule for parsing the match rules
// Returns boost::shared_ptr<Matching::MatchRule>
parse_match_rule
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<Matching::MatchRule> ret_match]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
	: ret_match = match_rule_operator[scope] 
Adam Wardynski's avatar
Adam Wardynski committed
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Rule for parsing wccl files
parse_wccl_file
	[const Corpus2::Tagset& tagset, const std::string search_path]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<WcclFile> wccl_file]
{
	wccl_file = boost::make_shared<WcclFile>(tagset, search_path);
Adam Wardynski's avatar
Adam Wardynski committed
}
	: (imports_section [*wccl_file])?
	  (wccl_file_section [*wccl_file])+
	  EOF
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VALUES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
Adam Wardynski's avatar
Adam Wardynski committed
// Single or multiple (comma separated) elements in string set, may be:
//   'a' "a" [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"]
// Parsing strset literal and returning plain strset value.
// Returns boost::shared_ptr<StrSet> 
strset_literal 
	returns [boost::shared_ptr<StrSet> s_set]
	s_set.reset(new StrSet());
			s_set->insert(token_ref_to_ustring(s0)); 
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
	| LBRACKET 
		(
			s1: STRING { 
				s_set->insert(token_ref_to_ustring(s1)); 
			}
	  	(
				COMMA s2: STRING { 
					s_set->insert(token_ref_to_ustring(s2)); 
				}
			)*
	  )? 
		RBRACKET
;
Adam Wardynski's avatar
Adam Wardynski committed
// String set value as a constant string set:
// Returns boost::shared_ptr<Constant<StrSet> >
	returns [boost::shared_ptr<Constant<StrSet> > val]
	boost::shared_ptr<StrSet> set;
		val.reset(new Constant<StrSet>(*set));
// ----------------------------------------------------------------------------
Adam Wardynski's avatar
Adam Wardynski committed
// Element of sym set. This rule inserts element into symbol set 
// using corresponding tagset. 
// WARNING! This rule can throw ParserException! Be careful!
symset_elem
	[const Corpus2::Tagset& tagset, boost::shared_ptr<TSet>& t_set]
	: s1: SYMBOL {
Paweł Kędzia's avatar
Paweł Kędzia committed
		try {
Paweł Kędzia's avatar
Paweł Kędzia committed
			t_set->insert_symbol(tagset, str_token_rem_grav(s1));
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
		catch(Corpus2::TagParseError &e) {
			throw(ParserException(e.info()));
		}
// Symset literal. Symset element may be: 
//  a, `a ` (this is guaranteed by lexer rule - SYMBOL) or {a} {`a`} {a, b} 
//  {`a`, `b`} {a, `b`} {`a`, b}
// Parsing symset literal and returning plain symset value.
// Returns boost::shared_ptr<TSet>
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<TSet> t_set]
	t_set.reset(new TSet());
Paweł Kędzia's avatar
Paweł Kędzia committed
	| LCURLY 
		(
			symset_elem [tagset, t_set] (COMMA symset_elem [tagset, t_set])* 
		)?
// Symset value, as constant symbol set
// Returns boost::shared_ptr<Constant<TSet> >
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<Constant<TSet> > val]
	boost::shared_ptr<TSet> set;
	: set = symset_literal [tagset] {
		val.reset(new Constant<TSet>(*set));
// ----------------------------------------------------------------------------
// Bool literal. May be True or False. Parsing bool literal and returning 
// plain bool value.
// Returns boost::shared_ptr<Bool>
	returns [boost::shared_ptr<Bool> val]
	: "True"  { val.reset(new Bool(Bool(true ))); }
	| "False" { val.reset(new Bool(Bool(false))); }
// Bool value, as constat bool Value
// Returns boost::shared_ptr<Constant<Bool> >
	returns [boost::shared_ptr<Constant<Bool> > val]
	boost::shared_ptr<Bool> bool_lit;
		val.reset(new Constant<Bool>(*bool_lit));
// ----------------------------------------------------------------------------
// Position literal may be:
// 	(+|-)?(0-9)+ or  begin or end or nowhere
// Parsing position literal and returning plain position value.
// returns boost::shared_ptr<Position>
	returns [boost::shared_ptr<Position> val]
Paweł Kędzia's avatar
Paweł Kędzia committed
{
	int i = 0;
}
	: i = number {
		val.reset(new Position(Position(i)));
		val.reset(new Position(Position(Position::Begin)));
		val.reset(new Position(Position(Position::End)));
		val.reset(new Position(Position(Position::Nowhere)));

// Position as constant position value
// Returns boost::shared_ptr<Constant<Position> >
	returns [boost::shared_ptr<Constant<Position> > val]
	boost::shared_ptr<Position> pos_lit;
		val.reset(new Constant<Position>(*pos_lit));
// ----------------------------------------------------------------------------
// Value used into match operator such as TOK[position] and ANN[position, name]
// Returns boost::shared_ptr<Match>
	returns [boost::shared_ptr<Match> val]
{
	boost::shared_ptr<MatchData> m;
}
// Constant match value
// Returns boost::shared_ptr<Constant<Match> >
match_value_const
	returns [boost::shared_ptr<Constant<Match> > val]
{
	boost::shared_ptr<Match> m;
}
		val.reset(new Constant<Match>(*m));
	}
;

// ----------------------------------------------------------------------------
// Value used into match operator such as TOK[position] and ANN[position, name]
// Returns boost::shared_ptr<MatchData>
	returns [boost::shared_ptr<MatchData> val]
	: val = token_match_literal
	| val = ann_match_literal
	| val = match_vector_literal
// Returns boost::shared_ptr<TokenMatch>
	returns [boost::shared_ptr<TokenMatch> val]
{
}
	: "TOK" LBRACKET u: UNSIGNED_INT RBRACKET {
		val.reset(new TokenMatch(token_ref_to_int(u)));
// annotation match literal - ANN[position, name]
// Returns boost::shared_ptr<AnnotationMatch> 
	returns [boost::shared_ptr<AnnotationMatch> val]
{
}
	: "ANN" LBRACKET u : UNSIGNED_INT COMMA channel : STRING RBRACKET {
		val.reset(new AnnotationMatch(token_ref_to_int(u), token_ref_to_std_string(channel)));
// annotation match vector literal: MATCH() or MATCH(token, ann, MATCH())
// Returns boost::shared_ptr<MatchVector>
	returns [boost::shared_ptr<MatchVector> val]
{
	val.reset(new MatchVector());
}
	: "MATCH" LPAREN (match_vector_literal_item[val])? RPAREN
;

// Body of the MATCH value. It only adds vector items to the MatchVector
// Item may be single or multiple
match_vector_literal_item [boost::shared_ptr<MatchVector>& mvector]
{
	boost::shared_ptr<Match> m_val;
}
		mvector->append(m_val);
	} 
	(
		COMMA
// ----------------------------------------------------------------------------
// Number may be unsigned or signed: 1, +1, -1
number 
	returns [int ret]
{
	ret = 0;
}
	: s: SIGNED_INT   { ret = token_ref_to_int(s); }
	| u: UNSIGNED_INT { ret = token_ref_to_int(u); }
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VARIABLES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Position: $Name
// Get position variable (however, before put into) from variables
// Returns boost::shared_ptr<VariableAccessor<Position> > 
position_variable_acc
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<Position> > pos_acc]
		vars.get_put<Position>(str_token_rem_grav(n));
		VariableAccessor<Position> acc = 
			vars.create_accessor<Position>(str_token_rem_grav(n));
		pos_acc.reset(new VariableAccessor<Position>(acc));

// VarGetter for Position variable. This rule wrapped position_variable_acc.
// Returs boost::shared_ptr<VarGetter<Position> >
position_variable
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<Position> > op]
	boost::shared_ptr<VariableAccessor<Position> > pos_acc;
}
	: pos_acc = position_variable_acc [vars] {
		op.reset(new VarGetter<Position>(*pos_acc));
// ----------------------------------------------------------------------------
Adam Wardynski's avatar
Adam Wardynski committed
// This expression gets variable of StrSet type from the scope's Variables.
// Creates one first if it's not yet there.
// 
// Returns boost::shared_ptr<VariableAccessor<StrSet> >
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<StrSet> > strset_acc]
		vars.get_put<StrSet>(str_token_rem_grav(n));
		VariableAccessor<StrSet> acc = 
			vars.create_accessor<StrSet>(str_token_rem_grav(n));
		strset_acc.reset(new VariableAccessor<StrSet>(acc));

// Vargetter for StrSet variable. This rule wrapped strset_variable_acc.
// Returns boost::shared_ptr<VarGetter<StrSet> > 
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<StrSet> > op]
	boost::shared_ptr<VariableAccessor<StrSet> > strset_acc;
	: strset_acc = strset_variable_acc [vars] {
		op.reset(new VarGetter<StrSet>(*strset_acc));
// ----------------------------------------------------------------------------
Adam Wardynski's avatar
Adam Wardynski committed
// This expression gets variable of TSet type from the scope's Variables.
// Creates one first if it's not there yet.
// 
// Returns boost::shared_ptr<VariableAccessor<TSet> >
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<TSet> > symset_acc]
			vars.get_put<TSet>(str_token_rem_grav(n)); 	
			VariableAccessor<TSet> acc = 
				vars.create_accessor<TSet>(str_token_rem_grav(n));
			symset_acc.reset(new VariableAccessor<TSet>(acc));

// Vargetter for symbol set variable. This rule wrapped symset_variable_acc
// Returns boost::shared_ptr<VarGetter<TSet> > 
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<TSet> > op]
	boost::shared_ptr<VariableAccessor<TSet> > symset_acc;
	: symset_acc = symset_variable_acc [vars] {
			op.reset(new VarGetter<TSet>(*symset_acc));
// ----------------------------------------------------------------------------
Adam Wardynski's avatar
Adam Wardynski committed
// This expression gets variable of Bool type from the scope's Variables.
// Creates one first if it's not there yet.
// Returns boost::shared_ptr<VariableAccessor<Bool> >
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<Bool> > bool_acc]
			vars.get_put<Bool>(str_token_rem_grav(n));
			VariableAccessor<Bool> acc = 
				vars.create_accessor<Bool>(str_token_rem_grav(n));
			bool_acc.reset(new VariableAccessor<Bool>(acc));

// Vargetter for bool variable. It is only wrapper for bool_variable_acc
// Returns boost::shared_ptr<VarGetter<Bool> >
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<Bool> > op]
	boost::shared_ptr<VariableAccessor<Bool> > bool_acc;
	: bool_acc = bool_variable_acc [vars] {
			op.reset(new VarGetter<Bool>(*bool_acc));
rk's avatar
rk committed

// ----------------------------------------------------------------------------
// Match: $m:name
Adam Wardynski's avatar
Adam Wardynski committed
// This expression gets variable of Match type from the scope's Variables.
// Creates one first if it's not there yet.
// Returns boost::shared_ptr<VariableAccessor<Match> >
match_vector_variable_acc
	[Variables& vars]
	returns [boost::shared_ptr<VariableAccessor<Match> > mvv_acc]
	: MATCH_VECTOR_PREFIX n: SYMBOL {
			vars.get_put<Match>(str_token_rem_grav(n));

			VariableAccessor<Match> acc = 
				vars.create_accessor<Match>(str_token_rem_grav(n));

			mvv_acc.reset(new VariableAccessor<Match>(acc));
	}
;

Adam Wardynski's avatar
Adam Wardynski committed
// Vargetter for the match vector variable. Wrapper for match_vector_variable_acc
// Returns boost::shared_ptr<VarGetter<Match> >
match_vector_variable
	[Variables& vars]
	returns [boost::shared_ptr<VarGetter<Match> > mvv]
{
	boost::shared_ptr<VariableAccessor<Match> > mvv_acc;
}
	: mvv_acc = match_vector_variable_acc [vars] {
		mvv.reset(new VarGetter<Match>(*mvv_acc));
	}
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
// OPERATORS
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Symbol set (tagset) operators
// Returns boost::shared_ptr<Function<TSet> >
///////////////////////////////////////////////////////////////////////////////
symset_operator
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<TSet> > ret]
	: ret = symset_condition [scope]
	| {LA(1)==SYMBOL && LA(2)==LBRACKET}? (ret = symset_getsymbol [scope])
	| ret = symset_var_val [scope]
	| ret = symset_class   [scope]
	| ret = symset_range   [scope]
	| ret = symset_catflt  [scope]
	| ret = symset_agrflt  [scope]
	| ret = symset_union   [scope]
	| ret = symset_intersection  [scope]
	| LPAREN ret = symset_operator [scope] RPAREN
// ----------------------------------------------------------------------------
// comma-separated symset operators
symset_operator_comma_sep
	[ParsingScope& scope]
	returns
		[boost::shared_ptr<std::vector<boost::shared_ptr<Function<TSet> > > > ret_v]
{
	boost::shared_ptr<Function<TSet> > pred;
	ret_v.reset(
		new std::vector<boost::shared_ptr<Function<TSet> > >
	);
}
	: pred = symset_operator [scope] {
		ret_v->push_back(pred);
	}
	(
		COMMA pred = symset_operator [scope] {
			ret_v->push_back(pred);
		}
	)*
;

// ----------------------------------------------------------------------------
// Wrapper from Function<TSet> to Operator<TSet>
functional_operator_symset
	[ParsingScope& scope]
	returns [boost::shared_ptr<Operator<TSet> > op]
{
	boost::shared_ptr<Function<TSet> > body;
}
	: body = symset_operator [scope] {
		op.reset(new Operator<TSet>(body, scope.variables()));
	}
;

// ----------------------------------------------------------------------------
ilor's avatar
ilor committed
// A wrapper for symset variable and symset value.
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<TSet> > op]
	: op = symset_variable [scope.variables()]
	| op = symset_value    [scope.tagset()]
// ----------------------------------------------------------------------------
// Condition of the symset value:
// 	if (Bool, TSet, TSet)
// 	? TSet ? Bool : {}
symset_condition
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<TSet> > op]
	boost::shared_ptr<Function<Bool> > test;
	boost::shared_ptr<Function<TSet> > p_true, p_false;
	: "if" LPAREN test  = bool_operator [scope] COMMA 
							p_true  = symset_operator  [scope] 
							(COMMA p_false = symset_operator [scope])? 
				op.reset(new Conditional<TSet>(test, p_true, p_false));
				op.reset(new Conditional<TSet>(test, p_true));
			(p_true = symset_operator [scope])
			(test = bool_operator [scope]) {
			op.reset(new Conditional<TSet>(test, p_true));
// ----------------------------------------------------------------------------
// GetSymbol operator may be cas, m1, f, sg...
// WARNING! This rule can throw ParserException! Be careful!
symset_getsymbol
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<TSet> > op]
{
	Corpus2::Tag tag;
	boost::shared_ptr<Wccl::Function<Position> > position;
}
	: t: SYMBOL LBRACKET position = position_operator [scope] RBRACKET {
				tag = scope.tagset().parse_symbol(str_token_rem_grav(t));
			}
			catch(Corpus2::TagParseError &e) {
				throw(ParserException(e.info()));
			}
				
			op.reset(new Wccl::GetSymbols(tag, position));
		}
;

// ----------------------------------------------------------------------------
// Class operator.
symset_class 
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<TSet> > ret]
{
	boost::shared_ptr<Function<Position> > pos;
}
	: "class" LBRACKET pos = position_operator [scope] RBRACKET { 
		ret.reset(new GetWordClass(pos));
	}
;

Paweł Kędzia's avatar
Paweł Kędzia committed
// ----------------------------------------------------------------------------
// Range operator: range(class, begin, end) or range({...}, begin, end)
Paweł Kędzia's avatar
Paweł Kędzia committed
symset_range
	[ParsingScope& scope]
Paweł Kędzia's avatar
Paweł Kędzia committed
	returns [boost::shared_ptr<Function<TSet> > ret]
{
	boost::shared_ptr<TSet> tset;
Paweł Kędzia's avatar
Paweł Kędzia committed
	boost::shared_ptr<Function<Position> > p1, p2;
}
	: "range" LPAREN
			(tset = symset_literal [scope.tagset()] | tag_class: "class") COMMA
			p1  = position_operator [scope] COMMA 
			p2  = position_operator [scope] 
Paweł Kędzia's avatar
Paweł Kędzia committed
		RPAREN {
			if (tag_class) {
				ret.reset(new GetSymbolsInRange(Corpus2::Tag::all_pos_mask, p1, p2));
Paweł Kędzia's avatar
Paweł Kędzia committed
			}
			else {
		 		ret.reset(new GetSymbolsInRange(tset->get_value(), p1, p2));
// ----------------------------------------------------------------------------
// Catflt operator
symset_catflt
  [ParsingScope& scope]
  returns [boost::shared_ptr<Function<TSet> > ret]
{
  boost::shared_ptr<Function<TSet> > selector, mask;
  boost::shared_ptr<Function<Position> > position;
}
  : "catflt" LPAREN 
			position = position_operator [scope] COMMA
			selector = symset_operator   [scope] COMMA
			mask    = symset_operator    [scope] 
  	RPAREN {
  	  ret.reset(new CatFilter(position, selector, mask));
  	}
;

Paweł Kędzia's avatar
Paweł Kędzia committed
// ----------------------------------------------------------------------------
// Agrflt operator
symset_agrflt
  [ParsingScope& scope]
Paweł Kędzia's avatar
Paweł Kędzia committed
  returns [boost::shared_ptr<Function<TSet> > ret]
{
  boost::shared_ptr<Function<TSet> > attr, mask;
  boost::shared_ptr<Function<Position> > lpos, rpos;
}
  : "agrflt" LPAREN 
			lpos = position_operator [scope] COMMA
			rpos = position_operator [scope] COMMA
			attr = symset_operator   [scope] COMMA
			mask = symset_operator   [scope] 
Paweł Kędzia's avatar
Paweł Kędzia committed
  	RPAREN {
  	  ret.reset(new AgrFilter(lpos, rpos, attr, mask, scope.tagset()));
// ----------------------------------------------------------------------------
// Union operator
symset_union
  [ParsingScope& scope]
  returns [boost::shared_ptr<Function<TSet> > ret]
{
  boost::shared_ptr< std::vector< boost::shared_ptr<Function<TSet> > > > sets;
}
  : "union" LPAREN
			sets = symset_operator_comma_sep   [scope]
  	RPAREN {
  	  ret.reset(new SetUnion<TSet>(sets));
  	}
;

// ----------------------------------------------------------------------------
// Intersection operator
symset_intersection
  [ParsingScope& scope]
  returns [boost::shared_ptr<Function<TSet> > ret]
{
  boost::shared_ptr< std::vector< boost::shared_ptr<Function<TSet> > > > sets;
}
  : "intersection" LPAREN
			sets = symset_operator_comma_sep   [scope]
  	RPAREN {
  	  ret.reset(new SetIntersection<TSet>(sets));
  	}
;
///////////////////////////////////////////////////////////////////////////////
// Position operator
// Returns boost::shared_ptr<Function<Position> >
///////////////////////////////////////////////////////////////////////////////
position_operator
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Position> > ret]
	( ret = position_var_val     [scope.variables()]
	| ret = position_condition   [scope]
	| ret = position_first_token [scope]
	| ret = position_last_token  [scope]
	| LPAREN ret = position_operator [scope] RPAREN
	) 
	( // if there is SIGNED_INT after the position, it is actually a relative position
		i: SIGNED_INT {
			ret.reset(new RelativePosition(ret, token_ref_to_int(i)));
		}
	)?
// ----------------------------------------------------------------------------
// Wrapper from Function<Position> to Operator<Position>
functional_operator_position
	[ParsingScope& scope]
	returns [boost::shared_ptr<Operator<Position> > op]
{
	boost::shared_ptr<Function<Position> > body;
}
	: body = position_operator [scope] {
		op.reset(new Operator<Position>(body, scope.variables()));
	}
;

// ----------------------------------------------------------------------------
// Wrapper for position variable and position value
position_var_val
	[Variables& vars]
	returns [boost::shared_ptr<Function<Position> > ret]
	: ret = position_value
	| ret = position_variable [vars]
;

// ----------------------------------------------------------------------------
// Condition of the position value
// if (Bool, Position, Position)
// ? Position ? Bool : 0
position_condition
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Position> > op]
{
	boost::shared_ptr<Function<Bool> > test;
	boost::shared_ptr<Function<Position> > p_true, p_false;
}
	: "if" LPAREN test  = bool_operator [scope] COMMA 
							p_true  = position_operator [scope] 
							(COMMA p_false = position_operator [scope])? 
	RPAREN {
		if (p_false) {
			op.reset(new Conditional<Position>(test, p_true, p_false));
		}
		else {
			op.reset(new Conditional<Position>(test, p_true));
		}
	}
	| Q_MARK 
			p_true = position_operator [scope]
			test = bool_operator [scope] {
			op.reset(new Conditional<Position>(test, p_true));
		}
// ----------------------------------------------------------------------------
// Taking position of a first token in a match
// first(Match)
position_first_token [ParsingScope& scope]
	returns [boost::shared_ptr<Function<Position> > ret]
{
	boost::shared_ptr<Function<Match> > m;
}
	: "first" LPAREN m = match_operator [scope] RPAREN {
		ret.reset(new FirstToken(m));
	}
;

// ----------------------------------------------------------------------------
// Taking position of a first token in a match
// last(Match)
position_last_token [ParsingScope& scope]
	returns [boost::shared_ptr<Function<Position> > ret]
{
	boost::shared_ptr<Function<Match> > m;
}
	: "last" LPAREN m = match_operator [scope] RPAREN {
		ret.reset(new LastToken(m));
	}
;

///////////////////////////////////////////////////////////////////////////////
Adam Wardynski's avatar
Adam Wardynski committed
// String operator
// Returns boost::shared_ptr<Function<StrSet> >
///////////////////////////////////////////////////////////////////////////////
strset_operator [ParsingScope& scope]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	: ret = strset_orth      [scope] 
	| ret = strset_base      [scope]
	| ret = strset_prop      [scope]
	| ret = strset_lower     [scope] 
	| ret = strset_upper     [scope]
	| ret = strset_affix     [scope] 
	| ret = strset_var_val   [scope] 
	| ret = strset_condition [scope]
	| ret = strset_union     [scope]
	| ret = strset_intersection [scope]
	| ret = strset_lex       [scope]
	| LPAREN ret = strset_operator [scope] RPAREN
// ----------------------------------------------------------------------------
// comma-separated strset operators
strset_operator_comma_sep
	[ParsingScope& scope]
	returns
		[boost::shared_ptr< std::vector<boost::shared_ptr<Function<StrSet> > > > ret_v]
{
	boost::shared_ptr<Function<StrSet> > pred;
	ret_v.reset(
		new std::vector<boost::shared_ptr<Function<StrSet> > >
	);
}
	: pred = strset_operator [scope] {
		ret_v->push_back(pred);
	}
	(
		COMMA pred = strset_operator [scope] {
			ret_v->push_back(pred);
		}
	)*
;


// ----------------------------------------------------------------------------
// Wrapper from Function<StrSet> to Operator<StrSet>
functional_operator_strset
	[ParsingScope& scope]
	returns [boost::shared_ptr<Operator<StrSet> > op]
{
	boost::shared_ptr<Function<StrSet> > body;
}
	: body = strset_operator [scope] {
		op.reset(new Operator<StrSet>(body, scope.variables()));
	}
;
// ----------------------------------------------------------------------------
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	boost::shared_ptr<Function<Position> > pos;
	: "orth" LBRACKET pos = position_operator [scope] RBRACKET { 
			ret.reset(new GetOrth(pos));
// ----------------------------------------------------------------------------
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	boost::shared_ptr<Function<Position> > pos;
	: "base" LBRACKET pos = position_operator [scope] RBRACKET { 
		ret.reset(new GetLemmas(pos));
// ----------------------------------------------------------------------------
// Token-level property value operator.
strset_prop
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<StrSet> > ret]
{
	boost::shared_ptr<Function<Position> > pos;
	boost::shared_ptr<Function<StrSet> > keys;
}
	: "prop" LPAREN 
			pos = position_operator [scope] COMMA
			keys = strset_operator [scope]
	RPAREN {
		ret.reset(new PropVal(pos, keys));
	}
;

// ----------------------------------------------------------------------------
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	boost::shared_ptr<Function<StrSet> > o_ret;
	: "lower" LPAREN o_ret = strset_operator [scope] RPAREN {
		ret.reset(new ToLower(o_ret));
// ----------------------------------------------------------------------------
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	boost::shared_ptr<Function<StrSet> > o_ret;
	: "upper" LPAREN o_ret = strset_operator [scope] RPAREN {
		ret.reset(new ToUpper(o_ret));
// ----------------------------------------------------------------------------
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<StrSet> > ret]
	int offset = 0;
	boost::shared_ptr<Function<StrSet> > o_ret;
			o_ret = strset_operator [scope] COMMA offset = number 
		RPAREN {
			ret.reset(new Affix(o_ret, offset));
		}
// ----------------------------------------------------------------------------
Adam Wardynski's avatar
Adam Wardynski committed
// A wrapper for strset value and strset variable
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<StrSet> > op]
	| op = strset_variable [scope.variables()]
// ----------------------------------------------------------------------------
// if (Bool, StrSet, StrSet)
// ? StrSet ? Bool : []
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<StrSet> > op]
	boost::shared_ptr<Function<Bool> > test;
	boost::shared_ptr<Function<StrSet> > p_true, p_false;
	: "if" LPAREN test  = bool_operator [scope] COMMA 
							p_true  = strset_operator   [scope] 
							(COMMA p_false = strset_operator [scope])? 
			op.reset(new Conditional<StrSet>(test, p_true, p_false));
			op.reset(new Conditional<StrSet>(test, p_true));
			p_true = strset_operator [scope]
			test = bool_operator [scope] {
			op.reset(new Conditional<StrSet>(test, p_true));
// ----------------------------------------------------------------------------
// Union operator, strset
strset_union
  [ParsingScope& scope]
  returns [boost::shared_ptr<Function<StrSet> > ret]
{
  boost::shared_ptr<std::vector<boost::shared_ptr<Function<StrSet> > > > sets;
}
  : "union" LPAREN
			sets = strset_operator_comma_sep   [scope]
  	RPAREN {
  	  ret.reset(new SetUnion<StrSet>(sets));
  	}
;

// ----------------------------------------------------------------------------
// Intersection operator, strset
strset_intersection
  [ParsingScope& scope]
  returns [boost::shared_ptr<Function<StrSet> > ret]
{
  boost::shared_ptr<std::vector<boost::shared_ptr<Function<StrSet> > > > sets;
}
  : "intersection" LPAREN
			sets = strset_operator_comma_sep   [scope]
  	RPAREN {
  	  ret.reset(new SetIntersection<StrSet>(sets));
  	}
;

strset_anninter
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<StrSet> > op]
{
	boost::shared_ptr<Function<Position> > pos;
	boost::shared_ptr<Function<StrSet> > in_strs;
}
	: "anninter"
	LPAREN
		pos = position_operator [scope]
		COMMA
		in_strs = strset_operator [scope]
	RPAREN
	{
		op.reset(new AnnInter(pos, in_strs));
	}
;

strset_lex
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<StrSet> > op]
{
	boost::shared_ptr<Function<StrSet> > s;
}
	: "lex" LPAREN s = strset_operator [scope] COMMA name : STRING RPAREN 
		{
			op.reset(new LexTranslator(
				s,
				scope.lexicons().get_ptr(token_ref_to_std_string(name))));
		}
		exception catch [WcclError ex] {
			throw ParserException(ex.what());
		}
;
///////////////////////////////////////////////////////////////////////////////
Adam Wardynski's avatar
Adam Wardynski committed
// Bool operator 
// Returns boost::shared_ptr<Function<Bool> >
///////////////////////////////////////////////////////////////////////////////
bool_operator
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > ret]
	: ret = bool_and        [scope]  
	| ret = bool_or         [scope]  
	| ret = bool_nor        [scope]  
	| ret = bool_var_val    [scope]	
	| ret = bool_regex      [scope]
	| ret = bool_inout      [scope]
	| ret = bool_condition  [scope]
	| ret = setvar_operator [scope]
	| ret = empty_operator  [scope]
	| ret = equal_operator  [scope]
	| ret = in_operator     [scope]
	| ret = inter_operator  [scope]
	| ret = bool_iteration  [scope]
	| ret = bool_agreement  [scope]
	| ret = bool_phrase     [scope]
	// annotation
	| ret = bool_ann        [scope]
	| ret = bool_annsub     [scope]
	// singular/amb
	| ret = bool_ambiguous  [scope]
	| ret = bool_singular   [scope]
	| ret = debug_print_operator [scope]
Paweł Kędzia's avatar
Paweł Kędzia committed
	//
	| LPAREN ret = bool_operator [scope] RPAREN
// ----------------------------------------------------------------------------
// wrapper from Function<Bool> to Operator<Bool>
functional_operator_bool
	[ParsingScope& scope]
	returns [boost::shared_ptr<Operator<Bool> > op]
{
	boost::shared_ptr<Function<Bool> > body;
}
	: body = bool_operator [scope] {
		op.reset(new Operator<Bool>(body, scope.variables()));
	}
;

// ----------------------------------------------------------------------------
// comma-separated predicates (bool operators)
bool_operator_comma_sep
	[ParsingScope& scope]
	returns 
		[boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v]
	boost::shared_ptr<Function<Bool> > pred;
	ret_v.reset(
		new std::vector<boost::shared_ptr<Function<Bool> > >
	: pred = bool_operator [scope] { 
		ret_v->push_back(pred);
		COMMA pred = bool_operator [scope] {
// ----------------------------------------------------------------------------
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
	: "and" LPAREN ret_v = bool_operator_comma_sep [scope] RPAREN {
			op.reset(new And(ret_v));
// ----------------------------------------------------------------------------
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
	: "or" LPAREN ret_v = bool_operator_comma_sep [scope] RPAREN {
			op.reset(new Or(ret_v));
// ----------------------------------------------------------------------------
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
	: "not" LPAREN ret_v = bool_operator_comma_sep [scope] RPAREN {
			op.reset(new Nor(ret_v));
// ----------------------------------------------------------------------------
// Wrapper for bool value and bool variable
bool_var_val
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
	| op = bool_variable [scope.variables()]
// ----------------------------------------------------------------------------
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<StrSet> > expr;
}
	: "regex" 
		LPAREN 
			expr = strset_operator [scope] COMMA reg: STRING 
		RPAREN {
			op.reset(new Regex(expr, token_ref_to_ustring(reg)));
		}
;

// ----------------------------------------------------------------------------
// Input/output operator
bool_inout
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<Position> > ret_pos;
}
	: "inside"  LPAREN ret_pos = position_operator [scope] RPAREN {
	| "outside" LPAREN ret_pos = position_operator [scope] RPAREN {
		op.reset(new IsOutside(ret_pos));
	}
;

// ----------------------------------------------------------------------------
// if (Bool, Bool, Bool)
// ? Bool ? Bool : False
bool_condition
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<Bool> > test, p_true, p_false;
}
	: "if" LPAREN test = bool_operator [scope] COMMA 
							p_true = bool_operator [scope] 
							(COMMA p_false = bool_operator [scope])? 
	RPAREN {
		if (p_false) {
			op.reset(new Conditional<Bool>(test, p_true, p_false));
		}
		else {
			op.reset(new Conditional<Bool>(test, p_true));
		}
	}
	| Q_MARK 
			p_true = bool_operator [scope]
			test = bool_operator [scope] {
			op.reset(new Conditional<Bool>(test, p_true));
		}
;

// ----------------------------------------------------------------------------
// Equal operator
equal_operator
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > t1, t2;
	boost::shared_ptr<Function<Bool> > b1, b2;
	boost::shared_ptr<Function<StrSet>  > s1, s2;
	boost::shared_ptr<Function<Position> > p1, p2;
		(position_operator [scope]) =>
			p1 = position_operator [scope] COMMA 
			p2 = position_operator [scope] {
				op.reset(new Equals<Position>(p1, p2));
		(symset_operator [scope]) =>
			t1 = symset_operator [scope] COMMA  
			t2 = symset_operator [scope] {
		(strset_operator [scope]) =>
			s1 = strset_operator [scope] COMMA  
			s2 = strset_operator [scope] {
			b1 = bool_operator [scope] COMMA
			b2 = bool_operator [scope] {
				op.reset(new Equals<Bool>(b1, b2));
// ----------------------------------------------------------------------------
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > t1, t2;
	boost::shared_ptr<Function<StrSet> > s1, s2;
		(symset_operator [scope]) =>
			t1 = symset_operator [scope] COMMA 
			t2 = symset_operator [scope] {
				op.reset(new IsSubsetOf<TSet>(t1, t2));
			s1 = strset_operator [scope] COMMA
			s2 = strset_operator [scope] {
				op.reset(new IsSubsetOf<StrSet>(s1, s2));
;
// ----------------------------------------------------------------------------
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > t1, t2;
	boost::shared_ptr<Function<StrSet> > s1, s2;
		(symset_operator [scope]) =>
			t1 = symset_operator [scope] COMMA  
			t2 = symset_operator [scope]  {
				op.reset(new Intersects<TSet>(t1, t2));
			s1 = strset_operator [scope] COMMA  
			s2 = strset_operator [scope]  {
				op.reset(new Intersects<StrSet>(s1, s2));
// ----------------------------------------------------------------------------
// Annotation operator.
bool_ann
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr< Function<Match> > match_from;
	boost::shared_ptr< Function<Match> > match_to;
}
	: "ann" LPAREN
			match_from = match_operator [scope] COMMA
			(match_to  = match_operator [scope] COMMA)?
		RPAREN {
			if (match_to) {
				op.reset(new Ann(match_from, match_to, token_ref_to_std_string(channel)));
				op.reset(new Ann(match_from, token_ref_to_std_string(channel)));
ilor's avatar
ilor committed
// ----------------------------------------------------------------------------
// Annotation-sub operator.
bool_annsub
	[ParsingScope& scope]
ilor's avatar
ilor committed
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr< Function<Match> > match_from;
	boost::shared_ptr< Function<Match> > match_to;
}
	: "annsub" LPAREN
		match_from = match_operator [scope] COMMA
		(match_to = match_operator [scope] COMMA)?
ilor's avatar
ilor committed
		RPAREN
		{
			if (match_to) {
				op.reset(new AnnSub(match_from, match_to, token_ref_to_std_string(channel)));
ilor's avatar
ilor committed
			} else {
				op.reset(new AnnSub(match_from, token_ref_to_std_string(channel)));
// ----------------------------------------------------------------------------
// Ambiguity checking operator
bool_ambiguous
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<Function<TSet> > tf;
	boost::shared_ptr<Function<StrSet> > sf;
	boost::shared_ptr<Function<Position> > pf;
}
	: "ambiguous" LPAREN
	(
		(position_operator [scope]) =>
		(
			pf = position_operator [scope] {
				ret.reset(new IsAmbiguous<Position>(pf));
			}
		)
	|
		(symset_operator [scope]) =>
		(
			tf = symset_operator [scope] {
				ret.reset(new IsAmbiguous<TSet>(tf));
			}
		)
	|
		(strset_operator [scope]) =>
		(
			sf = strset_operator [scope] {
				ret.reset(new IsAmbiguous<StrSet>(sf));
			}
		)
	)
	RPAREN
;

// ----------------------------------------------------------------------------
// Tag singularity checking operator
bool_singular
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr< Function<TSet> > v;
}
	: "singular" LPAREN
		v = symset_operator [scope]
		RPAREN
		{
			ret.reset(new IsSingular(v, scope.tagset()));
		}
;

// ----------------------------------------------------------------------------
// Debug printing:
debug_print_operator
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<FunctionBase> v;
}
	: "debug" LPAREN
	(
		(position_operator [scope]) =>
			v = position_operator [scope] {
		(symset_operator [scope]) =>
			v = symset_operator [scope] {
		(strset_operator [scope]) =>
			v = strset_operator [scope] {
		(bool_operator [scope]) =>
			v = bool_operator [scope] {
			v = match_operator [scope] {
				ret.reset(new DebugPrint(v));
			}
		)
// ----------------------------------------------------------------------------
// Iterations:
bool_iteration
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	int min_match = 0;
	int offset = 0;
	boost::shared_ptr<Function<Bool> > expr;
	boost::shared_ptr<Function<Position> > lpos, rpos;
	boost::shared_ptr<VariableAccessor<Position> > pacc;
}
	: "only" LPAREN 
			lpos = position_operator [scope] COMMA 
			rpos = position_operator [scope] COMMA
			pacc = position_variable_acc [scope.variables()]     COMMA
			expr = bool_operator     [scope]
		RPAREN {
			ret.reset(new Only(lpos, rpos, *pacc, expr));
		}

	| "atleast" LPAREN
			lpos = position_operator [scope] COMMA 
			rpos = position_operator [scope] COMMA
			pacc = position_variable_acc [scope.variables()]     COMMA
			expr = bool_operator     [scope] COMMA
			min_match = number
		RPAREN {
			ret.reset(new AtLeast(lpos, rpos, *pacc, expr, min_match));
		}
	| "llook" LPAREN //note inverted rpos/lpos order
			rpos = position_operator [scope] COMMA 
			lpos = position_operator [scope] COMMA
			pacc = position_variable_acc [scope.variables()]     COMMA
			expr = bool_operator     [scope] 
		RPAREN {
			ret.reset(new LeftLook(lpos, rpos, *pacc, expr));
		}
	| "rlook" LPAREN
			lpos = position_operator [scope] COMMA 
			rpos = position_operator [scope] COMMA
			pacc = position_variable_acc [scope.variables()]     COMMA
			expr = bool_operator     [scope] 
		RPAREN {
			ret.reset(new RightLook(lpos, rpos, *pacc, expr));
		}
	| "skip" LPAREN
			lpos = position_operator [scope] COMMA 
			pacc = position_variable_acc [scope.variables()]     COMMA
			expr = bool_operator     [scope] COMMA
			offset = number
		RPAREN {
			ret.reset(new Skip(lpos, *pacc, expr, offset));
		}
// ----------------------------------------------------------------------------
// Agreement operator: agr, agrpp, wagr
bool_agreement
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<Function<TSet> > expr;
	boost::shared_ptr<Function<Position> > lpos, rpos;
}
	: "agr"   LPAREN 
			lpos = position_operator [scope] COMMA 
			rpos = position_operator [scope] COMMA 
			expr = symset_operator [scope]
			ret.reset(new StrongAgreement(lpos, rpos, expr, scope.tagset()));
			lpos = position_operator [scope] COMMA 
			rpos = position_operator [scope] COMMA 
			expr = symset_operator [scope]
			ret.reset(new PointAgreement(lpos, rpos, expr, scope.tagset()));
			lpos = position_operator [scope] COMMA 
			rpos = position_operator [scope] COMMA 
			expr = symset_operator [scope]
			ret.reset(new WeakAgreement(lpos, rpos, expr, scope.tagset()));
Paweł Kędzia's avatar
Paweł Kędzia committed
// ----------------------------------------------------------------------------
// Parse operator on L1 level
bool_phrase
	[ParsingScope& scope]
Paweł Kędzia's avatar
Paweł Kędzia committed
	returns [boost::shared_ptr<Function<Bool> > ret]
	: ret = bool_phrase_annotation [scope]
	| ret = bool_phrase_iteration  [scope]
Paweł Kędzia's avatar
Paweł Kędzia committed
;

// ----------------------------------------------------------------------------
// Annotation operator: phrase, phrase_beg, phrase_end, phrase_whole, phrase_pp
bool_phrase_annotation
	[ParsingScope& scope]
Paweł Kędzia's avatar
Paweł Kędzia committed
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<Function<Position> > lpos, rpos;
}
	: "isannpart" LPAREN 
			lpos = position_operator [scope] COMMA n1: STRING 
Paweł Kędzia's avatar
Paweł Kędzia committed
		RPAREN {
			ret.reset(new AnnToken(lpos, token_ref_to_std_string(n1), AnnToken::O_ANY));
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
	| "isannbeg" LPAREN 
			lpos = position_operator [scope] COMMA n2: STRING 
Paweł Kędzia's avatar
Paweł Kędzia committed
		RPAREN {
			ret.reset(new AnnToken(lpos, token_ref_to_std_string(n2), AnnToken::O_FIRST));
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
	| "isannend" LPAREN 
			lpos = position_operator [scope] COMMA n3: STRING 
Paweł Kędzia's avatar
Paweł Kędzia committed
		RPAREN {
			ret.reset(new AnnToken(lpos, token_ref_to_std_string(n3), AnnToken::O_LAST));
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
	| "isannhead" LPAREN 
			lpos = position_operator [scope] COMMA n4: STRING 
Paweł Kędzia's avatar
Paweł Kędzia committed
		RPAREN {
			ret.reset(new AnnToken(lpos, token_ref_to_std_string(n4), AnnToken::O_HEAD));
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
	| "isannwhole" LPAREN 
			lpos = position_operator [scope] COMMA 
			rpos = position_operator [scope] COMMA n5: STRING 
Paweł Kędzia's avatar
Paweł Kędzia committed
		RPAREN {
			// TODO
		}
	| "isannpp" LPAREN 
			lpos = position_operator [scope] COMMA 
			rpos = position_operator [scope] COMMA n6: STRING 
		RPAREN {
			// TODO
		}
Paweł Kędzia's avatar
Paweł Kędzia committed
;

// ----------------------------------------------------------------------------
// Phrase iteration operator: lphrase, rphrase
bool_phrase_iteration
	[ParsingScope& scope]
Paweł Kędzia's avatar
Paweł Kędzia committed
	returns [boost::shared_ptr<Function<Bool> > ret]
{
	boost::shared_ptr<Function<Position> > position;
	boost::shared_ptr<VarGetter<Position> > var_position;
}
	: "lanniter" LPAREN
			position     = position_operator [scope] COMMA
			var_position = position_variable [scope.variables()]         COMMA
Paweł Kędzia's avatar
Paweł Kędzia committed
			n1: STRING
		RPAREN {
			// TODO
		}
	| "ranniter" LPAREN 
			position     = position_operator [scope] COMMA
			var_position = position_variable [scope.variables()]         COMMA
Paweł Kędzia's avatar
Paweł Kędzia committed
			n2: STRING
		RPAREN {
			// TODO
		}
;


// ----------------------------------------------------------------------------
// Setvar operator
// Returns boost::shared_ptr<Function<Bool> >
// ----------------------------------------------------------------------------
setvar_operator 
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > ret]
	: "setvar" LPAREN
	  (
		  ret = position_setvar [scope]
		| ret = bool_setvar     [scope]
		| ret = strset_setvar   [scope]
		| ret = symset_setvar   [scope]
	  )
	  RPAREN
;

// ----------------------------------------------------------------------------
// Setvar for position
position_setvar 
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<Position> > ret_op;
	boost::shared_ptr<VariableAccessor<Position> > ret_acc;
	:	ret_acc = position_variable_acc [scope.variables()]
		ret_op  = position_operator [scope] {
			op.reset(new VarSetter<Position>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<Bool> > ret_op;
	boost::shared_ptr<VariableAccessor<Bool> > ret_acc;
	:	ret_acc = bool_variable_acc [scope.variables()]
		ret_op  = bool_operator [scope] {
			op.reset(new VarSetter<Bool>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<StrSet> > ret_op;
	boost::shared_ptr<VariableAccessor<StrSet> > ret_acc;
	: ret_acc = strset_variable_acc [scope.variables()]
		ret_op  = strset_operator [scope] {
			op.reset(new VarSetter<StrSet>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
	boost::shared_ptr<Function<TSet> > ret_op;
	boost::shared_ptr<VariableAccessor<TSet> > ret_acc;
	: ret_acc = symset_variable_acc [scope.variables()]
	  ret_op  = symset_operator [scope] {
			op.reset(new VarSetter<TSet>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
// empty() operator
// Returns boost::shared_ptr<Function<Bool> > 
//----------------------------------------------------------------------------
empty_operator
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
	: "empty" LPAREN
		(
			  (match_empty[scope]) => 
					op = match_empty  [scope]
			| (symset_empty [scope]) => 
					op = symset_empty [scope]
			| op = strset_empty [scope]
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
	: "empty" LPAREN
	(
		  op = match_empty  [scope]
		| op = symset_empty [scope]
		| op = strset_empty [scope]
	)
	RPAREN
;
*/

//----------------------------------------------------------------------------
// match empty() operator
// Returns boost::shared_ptr<Function<Bool> > 
match_empty
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<Match> > arg;
}
	: arg = match_operator [scope] {
		op.reset(new IsEmpty<Match>(arg));
	}
;

//----------------------------------------------------------------------------
// SymSet empty() operator
// Returns boost::shared_ptr<Function<Bool> > 
symset_empty
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<TSet> > arg;
}
	: arg = symset_operator [scope] {
		op.reset(new IsEmpty<TSet>(arg));
	}
;

//----------------------------------------------------------------------------
// Strset empty() operator
// Returns boost::shared_ptr<Function<Bool> >
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Bool> > op]
{
	boost::shared_ptr<Function<StrSet> > arg;
}
	: arg = strset_operator [scope] {
///////////////////////////////////////////////////////////////////////////////
// Match functional operators,
// which return boost::shared_ptr<Function<Match> >
///////////////////////////////////////////////////////////////////////////////

// ----------------------------------------------------------------------------
// A wrapper for match variable and match value.
match_var_val [ParsingScope& scope]
	returns [boost::shared_ptr<Function<Match> > ret]
	: ret = match_vector_variable [scope.variables()]
	| ret = match_value_const
;

///////////////////////////////////////////////////////////////////////////////
// Match operators.
// Returns boost::shared_ptr<Function<Match> >
///////////////////////////////////////////////////////////////////////////////
match_operator
  [ParsingScope& scope]
	( ret = match_var_val [scope]
	| {LA(1)==LITERAL_M || LA(1)==COLON}? ("M")? {
			ret.reset(new VarGetter<Match>(scope.variables().create_accessor<Match>("_M")));
			ret.reset(new VarGetter<Match>(scope.variables().create_accessor<Match>("_M")));
	| LPAREN ret = match_operator [scope] RPAREN
Adam Wardynski's avatar
Adam Wardynski committed
	( // if there's a colon after the match, we have a submatch reference
		COLON i: UNSIGNED_INT { ret.reset(new Submatch(ret, token_ref_to_int(i))); }
	)*
;

// ----------------------------------------------------------------------------
// Wrapper from Function<Match> to Operator<Match>
functional_operator_match
	[ParsingScope& scope]
	returns [boost::shared_ptr<Operator<Match> > op]
{
	boost::shared_ptr<Function<Match> > body;
}
	: body = match_operator [scope] {
		op.reset(new Operator<Match>(body, scope.variables()));
	}
;

Adam Wardynski's avatar
Adam Wardynski committed
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// WCCL FILE PARSING RULES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

imports_section [WcclFile& wccl_file]
	: (import [wccl_file])+
;

import [WcclFile& wccl_file]
	: "import" LPAREN file_path : STRING COMMA lexicon_name : STRING RPAREN {
		wccl_file.import_lexicon(
			LexiconParser::parse_lexicon(
				token_ref_to_std_string(lexicon_name),
				token_ref_to_std_string(file_path)));
	}
;

wccl_file_section [WcclFile& wccl_file]
	: any_operator_section [wccl_file]
	| tag_rules_section [wccl_file]
	| match_rules_section [wccl_file]
;

tag_rules_section [WcclFile& wccl_file]
{
	boost::shared_ptr<TagRuleSequence> rule_seq;
}
	: rule_seq = parse_tag_rule_sequence [wccl_file.tagset()] {
		if (wccl_file.has_tag_rules()) {
			throw ParserException("Only one tag_rules section allowed in a WCCL file.");
		}
		wccl_file.set_tag_rules(rule_seq);
	}
;

match_rules_section [WcclFile& wccl_file]
{
	ParsingScope scope(wccl_file);
	boost::shared_ptr<Matching::MatchRule> match_rule;
	boost::shared_ptr<Matching::MatchRuleSequence> rule_seq = boost::make_shared<Matching::MatchRuleSequence>();
}
	: "match_rules" {
		if (wccl_file.has_match_rules()) {
			throw ParserException("Only one match_rules section allowed in a WCCL file.");
		}
	}
	LPAREN
		match_rule = match_rule_operator [scope] {
			rule_seq->push_back(*match_rule);
			scope.reset_variables();
		}
		(
			SEMI match_rule = match_rule_operator [scope] {
				rule_seq->push_back(*match_rule);
				scope.reset_variables();
			}
		)* 
	RPAREN { wccl_file.set_match_rules(rule_seq); }
Adam Wardynski's avatar
Adam Wardynski committed
any_operator_section
	[WcclFile& wccl_file]
{
	boost::shared_ptr<UntypedOpSequence> untyped_seq;
	boost::shared_ptr<OpSequence<Bool> > bool_seq;
	boost::shared_ptr<OpSequence<TSet> > symset_seq;
	boost::shared_ptr<OpSequence<StrSet> > strset_seq;
	boost::shared_ptr<OpSequence<Position> > pos_seq;
	boost::shared_ptr<OpSequence<Match> > m_seq;
}
	: untyped_seq = untyped_operator_sequence [wccl_file] {
Adam Wardynski's avatar
Adam Wardynski committed
			wccl_file.add_untyped_section(untyped_seq);
		}
	| bool_seq = bool_operator_sequence [wccl_file] {
Adam Wardynski's avatar
Adam Wardynski committed
			wccl_file.add_section(bool_seq);
		}
	| symset_seq = symset_operator_sequence [wccl_file] {
Adam Wardynski's avatar
Adam Wardynski committed
			wccl_file.add_section(symset_seq);
		}
	| strset_seq = strset_operator_sequence [wccl_file] {
Adam Wardynski's avatar
Adam Wardynski committed
			wccl_file.add_section(strset_seq);
		}
	| pos_seq = position_operator_sequence [wccl_file] {
Adam Wardynski's avatar
Adam Wardynski committed
			wccl_file.add_section(pos_seq);
		}
	| m_seq = match_operator_sequence [wccl_file] {
Adam Wardynski's avatar
Adam Wardynski committed
			wccl_file.add_section(m_seq);
		}
;

bool_operator_sequence
	[const WcclFile& wccl_file]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<OpSequence<Bool> > seq]
{
	ParsingScope scope(wccl_file);
Adam Wardynski's avatar
Adam Wardynski committed
	boost::shared_ptr<Operator<Bool> > op;
}
	: BOOL_SECTION_PREFIX name: STRING {
			seq.reset(new OpSequence<Bool>(token_ref_to_std_string(name)));
		}
Adam Wardynski's avatar
Adam Wardynski committed
		LPAREN
			op = functional_operator_bool [scope] { seq->append(op); scope.reset_variables(); }
			(SEMI op = functional_operator_bool [scope] { seq->append(op); scope.reset_variables(); })*
Adam Wardynski's avatar
Adam Wardynski committed
		RPAREN
;

symset_operator_sequence
	[const WcclFile& wccl_file]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<OpSequence<TSet> > seq]
{
	ParsingScope scope(wccl_file);
Adam Wardynski's avatar
Adam Wardynski committed
	boost::shared_ptr<Operator<TSet> > op;
}
	: TST_SECTION_PREFIX name: STRING {
			seq.reset(new OpSequence<TSet>(token_ref_to_std_string(name)));
		}
		LPAREN
			op = functional_operator_symset [scope] { seq->append(op); scope.reset_variables(); }
			(SEMI op = functional_operator_symset [scope] { seq->append(op); scope.reset_variables(); })*
Adam Wardynski's avatar
Adam Wardynski committed
		RPAREN
;

strset_operator_sequence
	[const WcclFile& wccl_file]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<OpSequence<StrSet> > seq]
{
	ParsingScope scope(wccl_file);
Adam Wardynski's avatar
Adam Wardynski committed
	boost::shared_ptr<Operator<StrSet> > op;
}
	: STR_SECTION_PREFIX name: STRING {
			seq.reset(new OpSequence<StrSet>(token_ref_to_std_string(name)));
		}
		LPAREN
			op = functional_operator_strset [scope] { seq->append(op); scope.reset_variables(); }
			(SEMI op = functional_operator_strset [scope] { seq->append(op); scope.reset_variables(); })*
Adam Wardynski's avatar
Adam Wardynski committed
		RPAREN
;

position_operator_sequence
	[const WcclFile& wccl_file]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<OpSequence<Position> > seq]
{
	ParsingScope scope(wccl_file);
Adam Wardynski's avatar
Adam Wardynski committed
	boost::shared_ptr<Operator<Position> > op;
}
	: POS_SECTION_PREFIX name: STRING {
			seq.reset(new OpSequence<Position>(token_ref_to_std_string(name)));
		}
		LPAREN
			op = functional_operator_position [scope] { seq->append(op); scope.reset_variables(); }
			(SEMI op = functional_operator_position [scope] { seq->append(op); scope.reset_variables(); })*
Adam Wardynski's avatar
Adam Wardynski committed
		RPAREN
;

untyped_operator_sequence
	[const WcclFile& wccl_file]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<UntypedOpSequence> seq]
{
	ParsingScope scope(wccl_file);
Adam Wardynski's avatar
Adam Wardynski committed
	boost::shared_ptr<FunctionalOperator> op;
}
	: AT_MARK name: STRING { 
			seq.reset(new UntypedOpSequence(token_ref_to_std_string(name)));
		}
		LPAREN
			op = functional_operator_any [scope] { seq->append(op); scope.reset_variables(); }
			(SEMI op = functional_operator_any [scope] { seq->append(op); scope.reset_variables(); })*
Adam Wardynski's avatar
Adam Wardynski committed
		RPAREN
;

match_operator_sequence
	[const WcclFile& wccl_file]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<OpSequence<Match> > seq]
{
	ParsingScope scope(wccl_file);
Adam Wardynski's avatar
Adam Wardynski committed
	boost::shared_ptr<Operator<Match> > op;
}
	: MATCH_SECTION_PREFIX name: STRING {
			seq.reset(new OpSequence<Match>(token_ref_to_std_string(name)));
		}
		LPAREN
			op = functional_operator_match [scope] { seq->append(op); scope.reset_variables(); }
			(SEMI op = functional_operator_match [scope] { seq->append(op); scope.reset_variables();  })*
functional_operator_any 
	[ParsingScope& scope]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<FunctionalOperator> op]
{
}
	: (position_operator [scope]) => op = functional_operator_position [scope]
	| (symset_operator [scope]) => op = functional_operator_symset [scope]
	| (strset_operator [scope]) => op = functional_operator_strset [scope]
	| (match_operator [scope]) => op = functional_operator_match [scope]
	| op = functional_operator_bool [scope]
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Tagging actions and rules:
// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------
// Single action such as select, delete, relabel or unify
action
	[ParsingScope& scope]
	returns [boost::shared_ptr<TagAction> act]
	: act = action_select  [scope]
	| act = action_delete  [scope]
	| act = action_relabel [scope]
	| act = action_unify [scope]
	| act = action_mark [scope]
	| act = action_unmark [scope]

// Action sequence - the actions are separated with commas:
// 	select(...), select(...), delete(...)
action_sequence
	[ParsingScope& scope]
	returns [boost::shared_ptr<std::vector<boost::shared_ptr<TagAction> > > v_act]
	boost::shared_ptr<TagAction> act;
	v_act.reset(new std::vector<boost::shared_ptr<TagAction> >);
	: act = action[scope] {
		COMMA act = action[scope] {
			v_act->push_back(act);
		}
	)*
;

// ----------------------------------------------------------------------------
// Single rule:
// 	rule(NAME, ACTIONS) or rule(NAME, COND, ACTIONS)
	[ParsingScope& scope]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<TagRule> rle]
{
	boost::shared_ptr<Function<Bool> > condition;
	boost::shared_ptr<std::vector<boost::shared_ptr<TagAction> > > actions;
	: "rule" LPAREN name: STRING COMMA 
				(condition = bool_operator [scope] COMMA)?
				actions = action_sequence [scope] 
		RPAREN {
			if (condition) {
				rle.reset(
					new TagRule(token_ref_to_std_string(name), scope.variables(), actions, condition));
					new TagRule(token_ref_to_std_string(name), scope.variables(), actions));
	returns [boost::shared_ptr<TagRuleSequence> rule_seq]
Adam Wardynski's avatar
Adam Wardynski committed
	boost::shared_ptr<TagRule> rle;
	rule_seq.reset(new TagRuleSequence());
ilor's avatar
ilor committed
		rule_seq->push_back(*rle);
ilor's avatar
ilor committed
			rule_seq->push_back(*rle);
			scope.reset_variables();
// ----------------------------------------------------------------------------
// This is wrapper for tag_rule_sequence in rules section in the wccl file
tag_rules
	returns [boost::shared_ptr<TagRuleSequence> rule_seq]
	: "tag_rules" LPAREN rule_seq = tag_rule_sequence [scope] RPAREN {

// ----------------------------------------------------------------------------
// Select action: 
//	select(position, predicate) or select(predicate);
action_select 
	[ParsingScope& scope]
	returns [boost::shared_ptr<Select> action]
{
	boost::shared_ptr<Function<Position> > pos;
	boost::shared_ptr<Function<Bool> > condition;
}
	: "select" LPAREN
	(
		(position_operator [scope]) =>
			pos = position_operator [scope] COMMA
			condition = bool_operator [scope] {
				// select(positon, condition); 
				action.reset(new Select(condition, pos));
			condition = bool_operator [scope] {
				action.reset(new Select(condition));
			}
		)
	) 
	RPAREN
;

// ----------------------------------------------------------------------------
// Delete action
//	delete(position, predicate) or delete(predicate);
action_delete
	[ParsingScope& scope]
	returns [boost::shared_ptr<Delete> action]
{
	boost::shared_ptr<Function<Position> > pos;
	boost::shared_ptr<Function<Bool> > condition;
}
	: "delete" LPAREN
	(
		(position_operator [scope]) =>
			pos = position_operator [scope] COMMA
			condition = bool_operator [scope] {
				// delete(positon, condition); 
ilor's avatar
ilor committed
				action.reset(new Delete(condition, pos));
			condition = bool_operator [scope] {
ilor's avatar
ilor committed
				action.reset(new Delete(condition));
			}
		)
	) 
	RPAREN
;

// ----------------------------------------------------------------------------
// Relabel action
// 	relabel(pos, symset, predicate) or relabel(symset, predicate)
action_relabel
	[ParsingScope& scope]
	returns [boost::shared_ptr<Relabel> action]
{
	boost::shared_ptr<Function<Position> > pos;
	boost::shared_ptr<Function<Bool> > condition;
	boost::shared_ptr<Function<TSet> > replace_with;
}
	: "relabel" LPAREN
	(
		(position_operator [scope]) =>
			pos = position_operator [scope] COMMA
			replace_with = symset_operator [scope] COMMA
			condition = bool_operator [scope] {
				// relabel(pos, symset, predicate)
				action.reset(new Relabel(replace_with, condition, pos));
			}
		)
	|
		(
			replace_with = symset_operator [scope] COMMA
			condition = bool_operator [scope] {
				// relabel(symset, predicate)
				action.reset(new Relabel(replace_with, condition));
			}
		)
	)
	RPAREN
;

// ----------------------------------------------------------------------------
// Unify action 
action_unify
	[ParsingScope& scope]
	returns [boost::shared_ptr<Unify> action]
{
	boost::shared_ptr<Function<TSet> > attribs_expr;
	boost::shared_ptr<Function<Position> > pos_begin, pos_end;
}
	: "unify" LPAREN
			pos_begin    = position_operator [scope] COMMA
			pos_end      = position_operator [scope] COMMA
			attribs_expr = symset_operator   [scope]
		RPAREN  {
			action.reset(new Unify(pos_begin, pos_end, attribs_expr));
		}
;

// ----------------------------------------------------------------------------
// Mark action
action_mark
	[ParsingScope& scope]
	returns [boost::shared_ptr<Mark> action]
{
	boost::shared_ptr<Function<Position> > pos_begin, pos_end, pos_head;
}
	: "mark" LPAREN
			pos_begin    = position_operator [scope] COMMA
			pos_end      = position_operator [scope] COMMA
			(pos_head    = position_operator [scope] COMMA)?
			chan_name: STRING
		RPAREN  {
			action.reset(new Mark(pos_begin, pos_end, pos_head, ((antlr::Token*)chan_name)->getText()));
		}
;

// ----------------------------------------------------------------------------
// Unmark action
action_unmark
	[ParsingScope& scope]
	returns [boost::shared_ptr<Unmark> action]
{
	boost::shared_ptr<Function<Position> > pos;
}
	: "unmark" LPAREN
			pos = position_operator [scope] COMMA
			chan_name: STRING
		RPAREN  {
			action.reset(new Unmark(pos, ((antlr::Token*)chan_name)->getText()));
		}
;

// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Match rules
// Returns boost::shared_ptr<Matching::MatchRule>
match_rule_operator
	[ParsingScope& scope]
	returns [boost::shared_ptr<Matching::MatchRule> ret_op]
	boost::shared_ptr<Matching::ApplyOperator> apply;
	scope.variables().get_put<Match>("_M");
	: apply = match_apply_operator [scope] {
		ret_op = boost::make_shared<Matching::MatchRule>(scope.variables(), apply);
;

// Match apply operator:
// 	apply(match(), cond(conditions), actions(actions)) 
// 	apply(match(), actions(actions))
// Returns boost::shared_ptr<Matching::ApplyOperator>
match_apply_operator
	[ParsingScope& scope]
	returns [boost::shared_ptr<Matching::ApplyOperator> ret_op]
	VariableAccessor<Match> matches = scope.variables().create_accessor<Match>("_M");;
	boost::shared_ptr<Matching::ConjConditions> match_cond;
	boost::shared_ptr<std::vector<boost::shared_ptr<Matching::MatchAction> > > actions;
	boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > conditions;
		"match" LPAREN match_cond = match_condition [scope] RPAREN COMMA
		("cond"   LPAREN conditions = bool_operator_comma_sep [scope] RPAREN COMMA)?
		"actions" LPAREN actions    =  match_action_comma_sep [scope] RPAREN
					new Matching::ApplyOperator(matches, match_cond, actions, conditions)
					new Matching::ApplyOperator(matches, match_cond, actions)
;

// Match conditions. Wrapper for vector of the match conditions
match_condition
	[ParsingScope& scope]
	returns [boost::shared_ptr<Matching::ConjConditions> condition]
	std::vector<boost::shared_ptr<const Matching::MatchCondition> > m_cond;
	: m_cond = match_condition_in [scope] {
		condition.reset(new Matching::ConjConditions(m_cond));
// Retutns std::vector< boost::shared_ptr<const Matching::MatchCondition> >
match_condition_in
	[ParsingScope& scope]
	returns [std::vector< boost::shared_ptr<const Matching::MatchCondition> > ret]
	boost::shared_ptr<const Matching::MatchCondition> r_cond;
	: r_cond = match_cond_all[scope] {
		ret.push_back(r_cond);
	}
	(
		COMMA
		r_cond = match_cond_all[scope] {
// Match variants variant(v1), variant(v2), ...
// Retutns boost::shared_ptr<std::vector<Matching::ConjConditions> >
	[ParsingScope& scope]
	returns [boost::shared_ptr<std::vector<boost::shared_ptr<Matching::ConjConditions> > > variants]
	variants.reset(new std::vector<boost::shared_ptr<Matching::ConjConditions> >());
	boost::shared_ptr<Matching::ConjConditions> variant;
	: "variant" LPAREN variant = match_condition [scope] RPAREN {
		variants->push_back(variant);
		COMMA "variant" LPAREN variant = match_condition [scope] RPAREN {
			variants->push_back(variant);
// One of the match condition
// Returns boost::shared_ptr<const Matching::MatchCondition>
	[ParsingScope& scope]
	returns [boost::shared_ptr<const Matching::MatchCondition> ret]
	: ret = match_cond_optional [scope]
	| ret = match_cond_repeate  [scope]
	| ret = match_cond_token    [scope]
	| ret = match_cond_oneof    [scope]
	| ret = match_cond_longest  [scope]
	| ret = match_cond_is
	| ret = match_cond_text
// Match condition - token (wraps a L0 predicate)
// Returns boost::shared_ptr<const Matching::MatchCondition>
	[ParsingScope& scope]
	returns [boost::shared_ptr<const Matching::TokenCondition> ret]
{
	boost::shared_ptr<Function<Bool> > bool_op;
}
	: bool_op = bool_operator [scope] {
		ret = boost::make_shared<Matching::TokenCondition>(bool_op);
// Returns boost::shared_ptr<Matching::OptionalMatch>
match_cond_optional
	[ParsingScope& scope]
	returns [boost::shared_ptr<Matching::OptionalMatch> mtch]
	boost::shared_ptr<Matching::ConjConditions> m_cond;
	: "optional" LPAREN m_cond = match_condition [scope] RPAREN {
		mtch.reset(new Matching::OptionalMatch(m_cond));
// Match condition - repeat
// Returns boost::shared_ptr<Matching::RepeatedMatch>
match_cond_repeate
	[ParsingScope& scope]
	returns [boost::shared_ptr<Matching::RepeatedMatch> mtch]
	boost::shared_ptr<Matching::ConjConditions> m_cond;
	: "repeat" LPAREN m_cond = match_condition [scope] RPAREN {
		mtch.reset(new Matching::RepeatedMatch(m_cond));
// Match condition - is(ann_name)
// Returns boost::shared_ptr<Matching::IsAnnotatedAs>
match_cond_is
	returns [boost::shared_ptr<Matching::IsAnnotatedAs> mtch]
	: "is" LPAREN annotation_name: STRING RPAREN {
		mtch.reset(new Matching::IsAnnotatedAs(token_ref_to_std_string(annotation_name)));
// Match condition - text(text)
// Returns boost::shared_ptr<Matching::MatchText>
match_cond_text
	returns [boost::shared_ptr<Matching::MatchText> mtch]
	: "text" LPAREN txt: STRING RPAREN {
		mtch.reset(new Matching::MatchText(token_ref_to_ustring(txt)));
// Match condition - oneof(variant1(v1), variant(v2), ...)
// Returns boost::shared_ptr<Matching::OneOf>
match_cond_oneof
	[ParsingScope& scope]
	returns [boost::shared_ptr<Matching::OneOf> onf]
	boost::shared_ptr<std::vector<boost::shared_ptr<Matching::ConjConditions> > > variants;
	: "oneof" LPAREN variants = match_variants [scope] RPAREN {
		onf.reset(new Matching::OneOf(variants));
// Match condition - longest(variant1(v1), variant(v2), ...)
// Returns boost::shared_ptr<Matching::Longest>
match_cond_longest
	[ParsingScope& scope]
	returns [boost::shared_ptr<Matching::Longest> lng]
	boost::shared_ptr<std::vector<boost::shared_ptr<Matching::ConjConditions> > > variants;
	: "longest" LPAREN variants = match_variants [scope] RPAREN {
		lng.reset(new Matching::Longest(variants));
// ----------------------------------------------------------------------------

// Match actions. Match action can be mark or unmark
// Returns boost::shared_ptr<Matching::MatchAction>
	[ParsingScope& scope]
	returns [boost::shared_ptr<Matching::MatchAction> m_act]
	: m_act = match_mark_action   [scope]
	| m_act = match_unmark_action [scope]
	| m_act = match_remark_action [scope]
	| m_act = match_setprop_action [scope]
// Returns 
	[ParsingScope& scope]
	returns [boost::shared_ptr<Matching::MarkMatch> m_act]
Paweł Kędzia's avatar
Paweł Kędzia committed
{
	boost::shared_ptr<Function<Match> > match_to;
	boost::shared_ptr<Function<Match> > match_from;
	boost::shared_ptr<Function<Match> > head_match;
Paweł Kędzia's avatar
Paweł Kędzia committed
}
	: "mark" LPAREN 
			match_from = match_operator[scope] COMMA
			( match_to  = match_operator[scope] COMMA
				( head_match = match_operator[scope] COMMA )?
Paweł Kędzia's avatar
Paweł Kędzia committed
			annotation_name : STRING
		RPAREN {
						match_from,
						((antlr::Token*)annotation_name)->getText()));
			} else {
				if (!head_match) {
					m_act.reset(
							match_from,
							match_to,
							((antlr::Token*)annotation_name)->getText()));
				} else {
					m_act.reset(
							match_from,
							match_to,
							head_match,
							((antlr::Token*)annotation_name)->getText()));
				}
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
// Returns boost::shared_ptr<Matching::UnmarkMatch>
	[ParsingScope& scope]
	returns [boost::shared_ptr<Matching::UnmarkMatch> m_act]
{
	boost::shared_ptr<Function<Match> > match_at;
}
	: "unmark" LPAREN
				match_at = match_operator[scope] COMMA
				annotation_name : STRING
			RPAREN {
				m_act.reset(
							match_at,
							((antlr::Token*)annotation_name)->getText()));
			}
// Overwrite-match mark action ("remark")
// Returns 
match_remark_action
	[ParsingScope& scope]
	returns [boost::shared_ptr<Matching::OverwriteMatch> m_act]
{
	boost::shared_ptr<Function<Match> > match_to;
	boost::shared_ptr<Function<Match> > match_from;
	boost::shared_ptr<Function<Match> > head_match;
}
	: "remark" LPAREN 
			match_from = match_operator[scope] COMMA
			( match_to  = match_operator[scope] COMMA
				( head_match = match_operator[scope] COMMA )?
			)?
			annotation_name : STRING
		RPAREN {
			if (!match_to) {
				m_act.reset(
					new Matching::OverwriteMatch(
						match_from,
						((antlr::Token*)annotation_name)->getText()));
			} else {
				if (!head_match) {
					m_act.reset(
						new Matching::OverwriteMatch(
							match_from,
							match_to,
							((antlr::Token*)annotation_name)->getText()));
				} else {
					m_act.reset(
						new Matching::OverwriteMatch(
							match_from,
							match_to,
							head_match,
							((antlr::Token*)annotation_name)->getText()));
				}
			}
		}
;

// Match prop action
// Returns boost::shared_ptr<Matching::PropMatch>
match_setprop_action
	[ParsingScope& scope]
	returns [boost::shared_ptr<Matching::PropMatch> m_act]
{
	boost::shared_ptr<Function<Match> > match_at;
}
	: "setprop" LPAREN
				match_at = match_operator[scope] COMMA
				key_name : STRING COMMA
				value_name : STRING
			RPAREN {
				m_act.reset(
					new Matching::PropMatch(
							match_at,
							((antlr::Token*)key_name)->getText(),
							((antlr::Token*)value_name)->getText()));
			}
;

// Match action separated by comma
// Returns boost::shared_ptr<std::vector<boost::shared_ptr<Matching::MatchAction> > >
	[ParsingScope& scope]
	returns [boost::shared_ptr<std::vector<boost::shared_ptr<Matching::MatchAction> > > r_vec]
	boost::shared_ptr<Matching::MatchAction> act;
		new std::vector<boost::shared_ptr<Matching::MatchAction> >
	: act = match_action [scope] {
		COMMA act = match_action [scope] {
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
// ANTLR LEXER
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
class ANTLRLexer extends Lexer;
options {
	exportVocab    = ANTLRExpr;
	charVocabulary = '\3'..'\377';
	testLiterals   = false;
rk's avatar
rk committed
}

rk's avatar
rk committed
STRING
options {
	paraphrase = "a string";
}
	: '"'!  (~('"'  | '\n' | '\r'))* '"'!
	| '\''! (~('\'' | '\n' | '\r'))* '\''!
SIGNED_INT
rk's avatar
rk committed
options {
	paraphrase = "Signed integer";
rk's avatar
rk committed
}
	: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+ 
rk's avatar
rk committed

UNSIGNED_INT
options {
	paraphrase = "Unsigned integer";
}
	: ('0'..'9')+ 
;	


rk's avatar
rk committed
QUOT_MARK
options {
	paraphrase = "Quote";
rk's avatar
rk committed

APOS_MARK
options {
	paraphrase = "Apostrophe";
rk's avatar
rk committed
Q_MARK
options {
	paraphrase = "Question mark";
rk's avatar
rk committed
}
	: '?'
rk's avatar
rk committed

E_MARK
options {
	paraphrase = "Exclamation mark";
rk's avatar
rk committed
}
	: '!'
rk's avatar
rk committed

STR_PREFIX
options {
	paraphrase = "String prefix";
}
	paraphrase = "Symset prefix";
;

BOOL_PREFIX
options {
	paraphrase = "Bool prefix";
}
rk's avatar
rk committed

POS_PREFIX
options {
MATCH_VECTOR_PREFIX
options {
	paraphrase = "Match vector prefix";
}
	: "$m:"
;

Adam Wardynski's avatar
Adam Wardynski committed
STR_SECTION_PREFIX
options {
	paraphrase = "Wccl file string operators section prefix";
}
	: "@s:"
;

POS_SECTION_PREFIX
options {
	paraphrase = "Wccl file position operators section prefix";
}
	: "@p:"
;

BOOL_SECTION_PREFIX
options {
	paraphrase = "Wccl file bool operators section prefix";
}
	: "@b:"
;

TST_SECTION_PREFIX
options {
	paraphrase = "Wccl file symset operators section prefix";
}
	: "@t:"
;

MATCH_SECTION_PREFIX
options {
	paraphrase = "Wccl file symset operators section prefix";
}
	: "@m:"
;

rk's avatar
rk committed
LBRACKET 
options {
	paraphrase = "'['";
}
	: '[' 
rk's avatar
rk committed

RBRACKET 
options {
	paraphrase = "']'";
}
	: ']' 
rk's avatar
rk committed

LPAREN
options {
	paraphrase = "'('";
}   
	: '(' 
rk's avatar
rk committed

RPAREN 
options {
	paraphrase = "')'";
} 
	: ')' 
rk's avatar
rk committed

LCURLY 
options {
	paraphrase = "'{'";
} 
	: '{' 
rk's avatar
rk committed

RCURLY 
options {
	paraphrase = "'}'";
} 
	: '}' 
rk's avatar
rk committed

AT_MARK 
options {
	paraphrase = "'@'";
} 
	: '@' 
rk's avatar
rk committed

Adam Wardynski's avatar
Adam Wardynski committed

rk's avatar
rk committed
COMMA
options { 
	paraphrase = "','";
rk's avatar
rk committed
}
	: ','
rk's avatar
rk committed

Adam Wardynski's avatar
Adam Wardynski committed
SEMI
options {
	paraphrase = ";";
}
	: ';'
;

rk's avatar
rk committed
SYMBOL
options { 
	paraphrase = "Symbol"; 
rk's avatar
rk committed
	testLiterals = true; 
}
	: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
	| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
rk's avatar
rk committed
WS
	  	| '\t'
	  	| '\f'
	  	| 
			( "\r\n"
				| '\r'
				| '\n'
			) { newline(); } 
		) { $setType(antlr::Token::SKIP); } 
rk's avatar
rk committed

COMMENT
options {
	paraphrase = "Single line comment";
}
	: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP);  }
;

ML_COMMENT
options {
	paraphrase = "Multi line comment";
rk's avatar
rk committed
}
	(			// TODO: test it and add reference to the site it's taken from!
				/* This actually works OK despite the ambiguity that
				'\r' '\n' can be matched in one alternative or by matching
				'\r' in one iteration and '\n' in another.. But 
				this is really matched just by one rule per (...)* 
				loop iteration, so it's OK.
				This is exactly how they do it all over the web - just
				turn off the warning for this particular token.*/
      : { LA(2)!='/' }? '*'
      | '\r' '\n' { newline(); }
      | '\r' { newline(); }
      | '\n' { newline(); }
      | ~('*'|'\n'|'\r')
rk's avatar
rk committed

HASH
options { 
	paraphrase = "'#'"; 
}
	: '#' 
rk's avatar
rk committed

//DSEPARATOR
//options { 
//	paraphrase = "':-'"; 
//}
//	: ":-" 
//;