Skip to content
Snippets Groups Projects
grammar.g 83.4 KiB
Newer Older
rk's avatar
rk committed
header {
Paweł Kędzia's avatar
Paweł Kędzia committed
	//don't try to add all the headers inside our namespace
	#include <libwccl/parser/ParserException.h>
	#include <libwccl/parser/parsingscope.h>
rk's avatar
rk committed

	#include <cstdio>
	#include <antlr/Token.hpp>
	#include <boost/lexical_cast.hpp>
	// values/variables
	#include <libwccl/variables.h>
	#include <libwccl/values/bool.h>
	#include <libwccl/values/tset.h>
	#include <libwccl/values/strset.h>
	#include <libwccl/values/position.h>
	
	// sentence context
	#include <libwccl/sentencecontext.h>

	// operators
	#include <libwccl/ops/operator.h>

	#include <libwccl/ops/functions/constant.h>
	#include <libwccl/ops/functions/vargetter.h>
	#include <libwccl/ops/functions/conditional.h>

	#include <libwccl/ops/functions/setops.h>
	#include <libwccl/ops/functions/bool/varsetter.h>
	#include <libwccl/ops/functions/bool/predicates/debug.h>
	#include <libwccl/ops/functions/bool/predicates/ambiguous.h>
	#include <libwccl/ops/functions/bool/predicates/issingular.h>
	#include <libwccl/ops/functions/bool/predicates/or.h>
	#include <libwccl/ops/functions/bool/predicates/nor.h>
	#include <libwccl/ops/functions/bool/predicates/and.h>
	#include <libwccl/ops/functions/bool/predicates/regex.h>
	#include <libwccl/ops/functions/bool/predicates/intersects.h>
	#include <libwccl/ops/functions/bool/predicates/issubsetof.h>
	#include <libwccl/ops/functions/bool/predicates/isinside.h>
	#include <libwccl/ops/functions/bool/predicates/isoutside.h>
	#include <libwccl/ops/functions/bool/predicates/isempty.h>
	#include <libwccl/ops/functions/bool/predicates/equals.h>
	#include <libwccl/ops/functions/bool/predicates/weakagreement.h>
	#include <libwccl/ops/functions/bool/predicates/pointagreement.h>
	#include <libwccl/ops/functions/bool/predicates/strongagreement.h>
ilor's avatar
ilor committed
	#include <libwccl/ops/functions/bool/predicates/annsub.h>
ilor's avatar
ilor committed
	#include <libwccl/ops/functions/bool/predicates/ann.h>
	#include <libwccl/ops/functions/bool/predicates/anntoken.h>

	#include <libwccl/ops/functions/strset/affix.h>
	#include <libwccl/ops/functions/strset/getorth.h>
	#include <libwccl/ops/functions/strset/toupper.h>
	#include <libwccl/ops/functions/strset/tolower.h>
	#include <libwccl/ops/functions/strset/getlemmas.h>
	#include <libwccl/ops/functions/strset/lextranslator.h>
	#include <libwccl/ops/functions/strset/propval.h>
	#include <libwccl/ops/functions/strset/anninter.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	#include <libwccl/ops/functions/tset/agrfilter.h>
Adam Radziszewski's avatar
Adam Radziszewski committed
	#include <libwccl/ops/functions/tset/catfilter.h>
	#include <libwccl/ops/functions/tset/getsymbols.h>
	#include <libwccl/ops/functions/tset/getwordclass.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	#include <libwccl/ops/functions/tset/getsymbolsinrange.h>
	#include <libwccl/ops/functions/position/relativeposition.h>
	#include <libwccl/ops/functions/position/lasttoken.h>
	#include <libwccl/ops/functions/position/firsttoken.h>
	#include <libwccl/ops/functions/bool/iterations/only.h>
	#include <libwccl/ops/functions/bool/iterations/atleast.h>
	#include <libwccl/ops/functions/bool/iterations/leftlook.h>
	#include <libwccl/ops/functions/bool/iterations/rightlook.h>
	#include <libwccl/ops/functions/bool/iterations/skip.h>
Adam Wardynski's avatar
Adam Wardynski committed
	#include <libwccl/ops/tagrule.h>
	#include <libwccl/ops/tagrulesequence.h>
	#include <libwccl/ops/tagactions/unify.h>
	#include <libwccl/ops/tagactions/delete.h>
	#include <libwccl/ops/tagactions/select.h>
	#include <libwccl/ops/tagactions/relabel.h>
	#include <libwccl/ops/tagactions/mark.h>
	#include <libwccl/ops/tagactions/unmark.h>
	// Match operators
	#include <libwccl/values/tokenmatch.h>
	#include <libwccl/values/annotationmatch.h>
	#include <libwccl/values/matchvector.h>
	#include <libwccl/ops/match/applyoperator.h>
	#include <libwccl/ops/match/conditions/optionalmatch.h>
	#include <libwccl/ops/match/conditions/repeatedmatch.h>
	#include <libwccl/ops/match/conditions/isannotatedas.h>
	#include <libwccl/ops/match/conditions/matchtext.h>
	#include <libwccl/ops/match/conditions/conjconditions.h>
	#include <libwccl/ops/match/conditions/tokencondition.h>
	#include <libwccl/ops/match/conditions/oneof.h>
	#include <libwccl/ops/match/conditions/longest.h>
	#include <libwccl/ops/match/actions/markmatch.h>
	#include <libwccl/ops/match/actions/unmarkmatch.h>
	#include <libwccl/ops/match/actions/setpropmatch.h>
	#include <libwccl/ops/match/actions/overwritematch.h>
	#include <libwccl/ops/functions/match/submatch.h>
Adam Wardynski's avatar
Adam Wardynski committed
	// Wccl whole file syntax
	#include <libwccl/wcclfile.h>
	
	#include <libwccl/lexicon/lexiconparser.h>

rk's avatar
rk committed
	// Unicode String
	#include <unicode/uniset.h>
	#include <unicode/unistr.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	// start our namespace again
	ANTLR_BEGIN_NAMESPACE(Wccl)
rk's avatar
rk committed
}

options {
	language = "Cpp";
//	genHashLines = true;
rk's avatar
rk committed
}

// ----------------------------------------------------------------------------
rk's avatar
rk committed
// ANTLR PARSER
// ----------------------------------------------------------------------------
rk's avatar
rk committed
class ANTLRParser extends Parser;
options {
rk's avatar
rk committed
	buildAST = false;
rk's avatar
rk committed
	defaultErrorHandler = false;
}
{
private:
	// 
rk's avatar
rk committed
	const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { 
		return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText().c_str()).unescape();
	const UnicodeString str_token_ref_to_ustring(antlr::RefToken& rstr) const { 
		UnicodeString ret_ustr, ustr = token_ref_to_ustring(rstr);

		if (ustr.length() < 3) {
			return "";
		}

		ustr.extract(1, ustr.length() - 2, ret_ustr);

		return ret_ustr;
	}
Paweł Kędzia's avatar
Paweł Kędzia committed
	const std::string str_token_rem_grav(antlr::RefToken& rstr) const {
		size_t len = 0;
		std::string ret = token_ref_to_std_string(rstr);

		if ((len = ret.length()) < 2) {
			return ret;
		}

		if (ret[0] == '`' && ret[len - 1] == '`') {
			return ret.substr(1, len - 2);
		}

		return ret;
	}
	//
rk's avatar
rk committed
	const std::string token_ref_to_std_string(antlr::RefToken& rstr) const { 
		return (((antlr::Token*)rstr)->getText());
rk's avatar
rk committed
	}
	//
	int token_ref_to_int(antlr::RefToken& rstr) { 
rk's avatar
rk committed
		return atoi(((antlr::Token*)rstr)->getText().c_str());
rk's avatar
rk committed
	}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// "GLOBAL" RULES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Rule for parsing string set operator with scope. 
// Returns boost::shared_ptr<Operator<StrSet> >
parse_strset_operator
	returns [boost::shared_ptr<Operator<StrSet> > res]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
	: res = functional_operator_strset [scope]
	EOF
// ----------------------------------------------------------------------------
// Rule for parsing bool operator with scope. 
// Returns boost::shared_ptr<Operator<Bool> > 
	returns [boost::shared_ptr<Operator<Bool> > res]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
	: res = functional_operator_bool [scope]
	EOF

// ----------------------------------------------------------------------------
// Rule for parsing symbol set operator with scope.
// Returns boost::shared_ptr<Operator<TSet> >
parse_symset_operator
	returns [boost::shared_ptr<Operator<TSet> > res]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
	: res = functional_operator_symset [scope]
	EOF
ilor's avatar
ilor committed
// ----------------------------------------------------------------------------
// Rule for parsing position operator with scope.
// Returns boost::shared_ptr<Operator<Position> >
parse_position_operator
ilor's avatar
ilor committed
	[const Corpus2::Tagset &tagset]
	returns [boost::shared_ptr<Operator<Position> > res]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
	: res = functional_operator_position [scope]
	EOF
Adam Wardynski's avatar
Adam Wardynski committed
;

// ----------------------------------------------------------------------------
// Rule for parsing match operator with scope.
// Returns boost::shared_ptr<Operator<Position> >
parse_match_operator
	[const Corpus2::Tagset &tagset]
	returns [boost::shared_ptr<Operator<Match> > res]
{	
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
Adam Wardynski's avatar
Adam Wardynski committed
}
	: res = functional_operator_match [scope]
	EOF
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Rule for parsing single WCCL Rule
Adam Wardynski's avatar
Adam Wardynski committed
// Returns boost::shared_ptr<TagRule>
	[const Corpus2::Tagset &tagset]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<TagRule> rle]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
// Rule for parsing tag rule section in the wccl file
// Returns boost::shared_ptr<TagRuleSequence>
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<TagRuleSequence> rule_seq]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Rule for parsing the match rules
// Returns boost::shared_ptr<Matching::MatchRule>
parse_match_rule
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<Matching::MatchRule> ret_match]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
	: ret_match = match_rule_operator[scope] 
Adam Wardynski's avatar
Adam Wardynski committed
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Rule for parsing wccl files
parse_wccl_file
	[const Corpus2::Tagset& tagset, const std::string search_path]
Adam Wardynski's avatar
Adam Wardynski committed
	returns [boost::shared_ptr<WcclFile> wccl_file]
{
	wccl_file = boost::make_shared<WcclFile>(tagset, search_path);
Adam Wardynski's avatar
Adam Wardynski committed
}
	: (imports_section [*wccl_file])?
	  (wccl_file_section [*wccl_file])+
	  EOF
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VALUES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
Adam Wardynski's avatar
Adam Wardynski committed
// Single or multiple (comma separated) elements in string set, may be:
//   'a' "a" [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"]
// Parsing strset literal and returning plain strset value.
// Returns boost::shared_ptr<StrSet> 
strset_literal 
	returns [boost::shared_ptr<StrSet> s_set]
	s_set.reset(new StrSet());
			s_set->insert(token_ref_to_ustring(s0)); 
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
	| LBRACKET 
		(
			s1: STRING { 
				s_set->insert(token_ref_to_ustring(s1)); 
			}
	  	(
				COMMA s2: STRING { 
					s_set->insert(token_ref_to_ustring(s2)); 
				}
			)*
	  )? 
		RBRACKET
;
Adam Wardynski's avatar
Adam Wardynski committed
// String set value as a constant string set:
// Returns boost::shared_ptr<Constant<StrSet> >
	returns [boost::shared_ptr<Constant<StrSet> > val]
	boost::shared_ptr<StrSet> set;
		val.reset(new Constant<StrSet>(*set));
// ----------------------------------------------------------------------------
Adam Wardynski's avatar
Adam Wardynski committed
// Element of sym set. This rule inserts element into symbol set 
// using corresponding tagset. 
// WARNING! This rule can throw ParserException! Be careful!
symset_elem
	[const Corpus2::Tagset& tagset, boost::shared_ptr<TSet>& t_set]
	: s1: SYMBOL {
Paweł Kędzia's avatar
Paweł Kędzia committed
		try {
Paweł Kędzia's avatar
Paweł Kędzia committed
			t_set->insert_symbol(tagset, str_token_rem_grav(s1));
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
		catch(Corpus2::TagParseError &e) {
			throw(ParserException(e.info()));
		}
// Symset literal. Symset element may be: 
//  a, `a ` (this is guaranteed by lexer rule - SYMBOL) or {a} {`a`} {a, b} 
//  {`a`, `b`} {a, `b`} {`a`, b}
// Parsing symset literal and returning plain symset value.
// Returns boost::shared_ptr<TSet>
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<TSet> t_set]
	t_set.reset(new TSet());
Paweł Kędzia's avatar
Paweł Kędzia committed
	| LCURLY 
		(
			symset_elem [tagset, t_set] (COMMA symset_elem [tagset, t_set])* 
		)?
// Symset value, as constant symbol set
// Returns boost::shared_ptr<Constant<TSet> >
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<Constant<TSet> > val]
	boost::shared_ptr<TSet> set;
	: set = symset_literal [tagset] {
		val.reset(new Constant<TSet>(*set));
// ----------------------------------------------------------------------------
// Bool literal. May be True or False. Parsing bool literal and returning 
// plain bool value.
// Returns boost::shared_ptr<Bool>
	returns [boost::shared_ptr<Bool> val]
	: "True"  { val.reset(new Bool(Bool(true ))); }
	| "False" { val.reset(new Bool(Bool(false))); }
// Bool value, as constat bool Value
// Returns boost::shared_ptr<Constant<Bool> >
	returns [boost::shared_ptr<Constant<Bool> > val]
	boost::shared_ptr<Bool> bool_lit;
		val.reset(new Constant<Bool>(*bool_lit));
// ----------------------------------------------------------------------------
// Position literal may be:
// 	(+|-)?(0-9)+ or  begin or end or nowhere
// Parsing position literal and returning plain position value.
// returns boost::shared_ptr<Position>
	returns [boost::shared_ptr<Position> val]
Paweł Kędzia's avatar
Paweł Kędzia committed
{
	int i = 0;
}
	: i = number {
		val.reset(new Position(Position(i)));
		val.reset(new Position(Position(Position::Begin)));
		val.reset(new Position(Position(Position::End)));
		val.reset(new Position(Position(Position::Nowhere)));

// Position as constant position value
// Returns boost::shared_ptr<Constant<Position> >
	returns [boost::shared_ptr<Constant<Position> > val]
	boost::shared_ptr<Position> pos_lit;
		val.reset(new Constant<Position>(*pos_lit));
// ----------------------------------------------------------------------------
// Value used into match operator such as TOK[position] and ANN[position, name]
// Returns boost::shared_ptr<Match>
	returns [boost::shared_ptr<Match> val]
{
	boost::shared_ptr<MatchData> m;
}
// Constant match value
// Returns boost::shared_ptr<Constant<Match> >
match_value_const
	returns [boost::shared_ptr<Constant<Match> > val]
{
	boost::shared_ptr<Match> m;
}
		val.reset(new Constant<Match>(*m));
	}
;

// ----------------------------------------------------------------------------
// Value used into match operator such as TOK[position] and ANN[position, name]
// Returns boost::shared_ptr<MatchData>
	returns [boost::shared_ptr<MatchData> val]
	: val = token_match_literal
	| val = ann_match_literal
	| val = match_vector_literal
// Returns boost::shared_ptr<TokenMatch>
	returns [boost::shared_ptr<TokenMatch> val]
{
}
	: "TOK" LBRACKET u: UNSIGNED_INT RBRACKET {
		val.reset(new TokenMatch(token_ref_to_int(u)));
// annotation match literal - ANN[position, name]
// Returns boost::shared_ptr<AnnotationMatch> 
	returns [boost::shared_ptr<AnnotationMatch> val]
{
}
	: "ANN" LBRACKET u : UNSIGNED_INT COMMA channel : STRING RBRACKET {
		val.reset(new AnnotationMatch(token_ref_to_int(u), token_ref_to_std_string(channel)));
// annotation match vector literal: MATCH() or MATCH(token, ann, MATCH())
// Returns boost::shared_ptr<MatchVector>
	returns [boost::shared_ptr<MatchVector> val]
{
	val.reset(new MatchVector());
}
	: "MATCH" LPAREN (match_vector_literal_item[val])? RPAREN
;

// Body of the MATCH value. It only adds vector items to the MatchVector
// Item may be single or multiple
match_vector_literal_item [boost::shared_ptr<MatchVector>& mvector]
{
	boost::shared_ptr<Match> m_val;
}
		mvector->append(m_val);
	} 
	(
		COMMA
// ----------------------------------------------------------------------------
// Number may be unsigned or signed: 1, +1, -1
number 
	returns [int ret]
{
	ret = 0;
}
	: s: SIGNED_INT   { ret = token_ref_to_int(s); }
	| u: UNSIGNED_INT { ret = token_ref_to_int(u); }
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VARIABLES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Position: $Name
// Get position variable (however, before put into) from variables
// Returns boost::shared_ptr<VariableAccessor<Position> > 
position_variable_acc
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<Position> > pos_acc]
		vars.get_put<Position>(str_token_rem_grav(n));
		VariableAccessor<Position> acc = 
			vars.create_accessor<Position>(str_token_rem_grav(n));
		pos_acc.reset(new VariableAccessor<Position>(acc));

// VarGetter for Position variable. This rule wrapped position_variable_acc.
// Returs boost::shared_ptr<VarGetter<Position> >
position_variable
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<Position> > op]
	boost::shared_ptr<VariableAccessor<Position> > pos_acc;
}
	: pos_acc = position_variable_acc [vars] {
		op.reset(new VarGetter<Position>(*pos_acc));
// ----------------------------------------------------------------------------
Adam Wardynski's avatar
Adam Wardynski committed
// This expression gets variable of StrSet type from the scope's Variables.
// Creates one first if it's not yet there.
// 
// Returns boost::shared_ptr<VariableAccessor<StrSet> >
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<StrSet> > strset_acc]
		vars.get_put<StrSet>(str_token_rem_grav(n));
		VariableAccessor<StrSet> acc = 
			vars.create_accessor<StrSet>(str_token_rem_grav(n));
		strset_acc.reset(new VariableAccessor<StrSet>(acc));

// Vargetter for StrSet variable. This rule wrapped strset_variable_acc.
// Returns boost::shared_ptr<VarGetter<StrSet> > 
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<StrSet> > op]
	boost::shared_ptr<VariableAccessor<StrSet> > strset_acc;
	: strset_acc = strset_variable_acc [vars] {
		op.reset(new VarGetter<StrSet>(*strset_acc));
// ----------------------------------------------------------------------------
Adam Wardynski's avatar
Adam Wardynski committed
// This expression gets variable of TSet type from the scope's Variables.
// Creates one first if it's not there yet.
// 
// Returns boost::shared_ptr<VariableAccessor<TSet> >
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<TSet> > symset_acc]
			vars.get_put<TSet>(str_token_rem_grav(n)); 	
			VariableAccessor<TSet> acc = 
				vars.create_accessor<TSet>(str_token_rem_grav(n));
			symset_acc.reset(new VariableAccessor<TSet>(acc));

// Vargetter for symbol set variable. This rule wrapped symset_variable_acc
// Returns boost::shared_ptr<VarGetter<TSet> > 
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<TSet> > op]
	boost::shared_ptr<VariableAccessor<TSet> > symset_acc;
	: symset_acc = symset_variable_acc [vars] {
			op.reset(new VarGetter<TSet>(*symset_acc));
// ----------------------------------------------------------------------------
Adam Wardynski's avatar
Adam Wardynski committed
// This expression gets variable of Bool type from the scope's Variables.
// Creates one first if it's not there yet.
// Returns boost::shared_ptr<VariableAccessor<Bool> >
	[Variables& vars] 
	returns [boost::shared_ptr<VariableAccessor<Bool> > bool_acc]
			vars.get_put<Bool>(str_token_rem_grav(n));
			VariableAccessor<Bool> acc = 
				vars.create_accessor<Bool>(str_token_rem_grav(n));
			bool_acc.reset(new VariableAccessor<Bool>(acc));

// Vargetter for bool variable. It is only wrapper for bool_variable_acc
// Returns boost::shared_ptr<VarGetter<Bool> >
	[Variables& vars] 
	returns [boost::shared_ptr<VarGetter<Bool> > op]
	boost::shared_ptr<VariableAccessor<Bool> > bool_acc;
	: bool_acc = bool_variable_acc [vars] {
			op.reset(new VarGetter<Bool>(*bool_acc));
rk's avatar
rk committed

// ----------------------------------------------------------------------------
// Match: $m:name
Adam Wardynski's avatar
Adam Wardynski committed
// This expression gets variable of Match type from the scope's Variables.
// Creates one first if it's not there yet.
// Returns boost::shared_ptr<VariableAccessor<Match> >
match_vector_variable_acc
	[Variables& vars]
	returns [boost::shared_ptr<VariableAccessor<Match> > mvv_acc]
	: MATCH_VECTOR_PREFIX n: SYMBOL {
			vars.get_put<Match>(str_token_rem_grav(n));

			VariableAccessor<Match> acc = 
				vars.create_accessor<Match>(str_token_rem_grav(n));

			mvv_acc.reset(new VariableAccessor<Match>(acc));
	}
;

Adam Wardynski's avatar
Adam Wardynski committed
// Vargetter for the match vector variable. Wrapper for match_vector_variable_acc
// Returns boost::shared_ptr<VarGetter<Match> >
match_vector_variable
	[Variables& vars]
	returns [boost::shared_ptr<VarGetter<Match> > mvv]
{
	boost::shared_ptr<VariableAccessor<Match> > mvv_acc;
}
	: mvv_acc = match_vector_variable_acc [vars] {
		mvv.reset(new VarGetter<Match>(*mvv_acc));
	}
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
// OPERATORS
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Symbol set (tagset) operators
// Returns boost::shared_ptr<Function<TSet> >
///////////////////////////////////////////////////////////////////////////////
symset_operator
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<TSet> > ret]
	: ret = symset_condition [scope]
	| {LA(1)==SYMBOL && LA(2)==LBRACKET}? (ret = symset_getsymbol [scope])
	| ret = symset_var_val [scope]
	| ret = symset_class   [scope]
	| ret = symset_range   [scope]
	| ret = symset_catflt  [scope]
	| ret = symset_agrflt  [scope]
	| ret = symset_union   [scope]
	| ret = symset_intersection  [scope]
	| LPAREN ret = symset_operator [scope] RPAREN
// ----------------------------------------------------------------------------
// comma-separated symset operators
symset_operator_comma_sep
	[ParsingScope& scope]
	returns
		[boost::shared_ptr<std::vector<boost::shared_ptr<Function<TSet> > > > ret_v]
{
	boost::shared_ptr<Function<TSet> > pred;
	ret_v.reset(
		new std::vector<boost::shared_ptr<Function<TSet> > >
	);
}
	: pred = symset_operator [scope] {
		ret_v->push_back(pred);
	}
	(
		COMMA pred = symset_operator [scope] {
			ret_v->push_back(pred);
		}
	)*
;

// ----------------------------------------------------------------------------
// Wrapper from Function<TSet> to Operator<TSet>
functional_operator_symset
	[ParsingScope& scope]
	returns [boost::shared_ptr<Operator<TSet> > op]
{
	boost::shared_ptr<Function<TSet> > body;
}
	: body = symset_operator [scope] {
		op.reset(new Operator<TSet>(body, scope.variables()));
	}
;

// ----------------------------------------------------------------------------
ilor's avatar
ilor committed
// A wrapper for symset variable and symset value.
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<TSet> > op]
	: op = symset_variable [scope.variables()]
	| op = symset_value    [scope.tagset()]
// ----------------------------------------------------------------------------
// Condition of the symset value:
// 	if (Bool, TSet, TSet)
// 	? TSet ? Bool : {}
symset_condition
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<TSet> > op]
	boost::shared_ptr<Function<Bool> > test;
	boost::shared_ptr<Function<TSet> > p_true, p_false;
	: "if" LPAREN test  = bool_operator [scope] COMMA 
							p_true  = symset_operator  [scope] 
							(COMMA p_false = symset_operator [scope])? 
				op.reset(new Conditional<TSet>(test, p_true, p_false));
				op.reset(new Conditional<TSet>(test, p_true));
			(p_true = symset_operator [scope])
			(test = bool_operator [scope]) {
			op.reset(new Conditional<TSet>(test, p_true));
// ----------------------------------------------------------------------------
// GetSymbol operator may be cas, m1, f, sg...
// WARNING! This rule can throw ParserException! Be careful!
symset_getsymbol
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<TSet> > op]
{
	Corpus2::Tag tag;
	boost::shared_ptr<Wccl::Function<Position> > position;
}
	: t: SYMBOL LBRACKET position = position_operator [scope] RBRACKET {
				tag = scope.tagset().parse_symbol(str_token_rem_grav(t));
			}
			catch(Corpus2::TagParseError &e) {
				throw(ParserException(e.info()));
			}
				
			op.reset(new Wccl::GetSymbols(tag, position));
		}
;

// ----------------------------------------------------------------------------
// Class operator.
symset_class 
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<TSet> > ret]
{
	boost::shared_ptr<Function<Position> > pos;
}
	: "class" LBRACKET pos = position_operator [scope] RBRACKET { 
		ret.reset(new GetWordClass(pos));
	}
;

Paweł Kędzia's avatar
Paweł Kędzia committed
// ----------------------------------------------------------------------------
// Range operator: range(class, begin, end) or range({...}, begin, end)
Paweł Kędzia's avatar
Paweł Kędzia committed
symset_range
	[ParsingScope& scope]
Paweł Kędzia's avatar
Paweł Kędzia committed
	returns [boost::shared_ptr<Function<TSet> > ret]
{
	boost::shared_ptr<TSet> tset;
Paweł Kędzia's avatar
Paweł Kędzia committed
	boost::shared_ptr<Function<Position> > p1, p2;
}
	: "range" LPAREN
			(tset = symset_literal [scope.tagset()] | tag_class: "class") COMMA
			p1  = position_operator [scope] COMMA 
			p2  = position_operator [scope] 
Paweł Kędzia's avatar
Paweł Kędzia committed
		RPAREN {
			if (tag_class) {
				ret.reset(new GetSymbolsInRange(Corpus2::Tag::all_pos_mask, p1, p2));
Paweł Kędzia's avatar
Paweł Kędzia committed
			}
			else {
		 		ret.reset(new GetSymbolsInRange(tset->get_value(), p1, p2));
// ----------------------------------------------------------------------------
// Catflt operator
symset_catflt
  [ParsingScope& scope]
  returns [boost::shared_ptr<Function<TSet> > ret]
{
  boost::shared_ptr<Function<TSet> > selector, mask;
  boost::shared_ptr<Function<Position> > position;
}
  : "catflt" LPAREN 
			position = position_operator [scope] COMMA
			selector = symset_operator   [scope] COMMA
			mask    = symset_operator    [scope] 
  	RPAREN {
  	  ret.reset(new CatFilter(position, selector, mask));
  	}
;

Paweł Kędzia's avatar
Paweł Kędzia committed
// ----------------------------------------------------------------------------
// Agrflt operator
symset_agrflt
  [ParsingScope& scope]
Paweł Kędzia's avatar
Paweł Kędzia committed
  returns [boost::shared_ptr<Function<TSet> > ret]
{
  boost::shared_ptr<Function<TSet> > attr, mask;
  boost::shared_ptr<Function<Position> > lpos, rpos;
}
  : "agrflt" LPAREN 
			lpos = position_operator [scope] COMMA
			rpos = position_operator [scope] COMMA
			attr = symset_operator   [scope] COMMA
			mask = symset_operator   [scope] 
Paweł Kędzia's avatar
Paweł Kędzia committed
  	RPAREN {
  	  ret.reset(new AgrFilter(lpos, rpos, attr, mask, scope.tagset()));
// ----------------------------------------------------------------------------
// Union operator
symset_union
  [ParsingScope& scope]
  returns [boost::shared_ptr<Function<TSet> > ret]
{
  boost::shared_ptr< std::vector< boost::shared_ptr<Function<TSet> > > > sets;
}
  : "union" LPAREN
			sets = symset_operator_comma_sep   [scope]
  	RPAREN {
  	  ret.reset(new SetUnion<TSet>(sets));
  	}
;

// ----------------------------------------------------------------------------
// Intersection operator
symset_intersection
  [ParsingScope& scope]
  returns [boost::shared_ptr<Function<TSet> > ret]
{
  boost::shared_ptr< std::vector< boost::shared_ptr<Function<TSet> > > > sets;
}
  : "intersection" LPAREN
			sets = symset_operator_comma_sep   [scope]
  	RPAREN {
  	  ret.reset(new SetIntersection<TSet>(sets));
  	}
;
///////////////////////////////////////////////////////////////////////////////
// Position operator
// Returns boost::shared_ptr<Function<Position> >
///////////////////////////////////////////////////////////////////////////////
position_operator
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Position> > ret]
	( ret = position_var_val     [scope.variables()]
	| ret = position_condition   [scope]
	| ret = position_first_token [scope]
	| ret = position_last_token  [scope]
	| LPAREN ret = position_operator [scope] RPAREN
	) 
	( // if there is SIGNED_INT after the position, it is actually a relative position
		i: SIGNED_INT {
			ret.reset(new RelativePosition(ret, token_ref_to_int(i)));
		}
	)?
// ----------------------------------------------------------------------------
// Wrapper from Function<Position> to Operator<Position>
functional_operator_position
	[ParsingScope& scope]
	returns [boost::shared_ptr<Operator<Position> > op]
{
	boost::shared_ptr<Function<Position> > body;
}
	: body = position_operator [scope] {
		op.reset(new Operator<Position>(body, scope.variables()));
	}
;

// ----------------------------------------------------------------------------
// Wrapper for position variable and position value
position_var_val
	[Variables& vars]
	returns [boost::shared_ptr<Function<Position> > ret]
	: ret = position_value
	| ret = position_variable [vars]
;

// ----------------------------------------------------------------------------
// Condition of the position value
// if (Bool, Position, Position)
// ? Position ? Bool : 0
position_condition
	[ParsingScope& scope]
	returns [boost::shared_ptr<Function<Position> > op]
{
	boost::shared_ptr<Function<Bool> > test;
	boost::shared_ptr<Function<Position> > p_true, p_false;
}
	: "if" LPAREN test  = bool_operator [scope] COMMA 
							p_true  = position_operator [scope] 
							(COMMA p_false = position_operator [scope])?