Skip to content
Snippets Groups Projects
grammar.g 34.2 KiB
Newer Older
rk's avatar
rk committed
header {
	#include <libwccl/parser/ParserException.h>
rk's avatar
rk committed

	#include <cstdio>
	#include <antlr/Token.hpp>
	#include <boost/lexical_cast.hpp>

	// values/variables
	#include <libwccl/variables.h>
	#include <libwccl/values/bool.h>
	#include <libwccl/values/tset.h>
	#include <libwccl/values/strset.h>
	#include <libwccl/values/position.h>
	
	// sentence context
	#include <libwccl/sentencecontext.h>

	// operators
	#include <libwccl/ops/functions/bool/predicates/or.h>
	#include <libwccl/ops/functions/bool/predicates/nor.h>
	#include <libwccl/ops/functions/bool/predicates/and.h>
	#include <libwccl/ops/functions/bool/predicates/regex.h>
	#include <libwccl/ops/functions/bool/predicates/equals.h>

	#include <libwccl/ops/functions/bool/varsetter.h>

	#include <libwccl/ops/functions/strset/affix.h>
	#include <libwccl/ops/functions/strset/getorth.h>
	#include <libwccl/ops/functions/strset/toupper.h>
	#include <libwccl/ops/functions/strset/tolower.h>

	#include <libwccl/ops/functions/constant.h>
	#include <libwccl/ops/functions/vargetter.h>

	#include <libwccl/ops/functions/bool/predicates/intersects.h>
	#include <libwccl/ops/functions/bool/predicates/issubsetof.h>
	#include <libwccl/ops/functions/bool/predicates/isinside.h>
	#include <libwccl/ops/functions/bool/predicates/isoutside.h>
	#include <libwccl/ops/functions/position/relativeposition.h>

	#include <libwccl/ops/functions/conditional.h>
rk's avatar
rk committed
	// Unicode String
	#include <unicode/uniset.h>
	#include <unicode/unistr.h>
	#include <libwccl/parser/ANTLRParserResult.h>
rk's avatar
rk committed
}

options {
	language = "Cpp";
//	namespace = "Wccl";
//	genHashLines = true;
rk's avatar
rk committed
}

// ----------------------------------------------------------------------------
rk's avatar
rk committed
// ANTLR PARSER
// ----------------------------------------------------------------------------
rk's avatar
rk committed
class ANTLRParser extends Parser;
options {
rk's avatar
rk committed
	buildAST = false;
rk's avatar
rk committed
	defaultErrorHandler = false;
}
{
private:
	// 
rk's avatar
rk committed
	const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { 
		return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText()).unescape();
	}
	//
	const UnicodeString str_token_ref_to_ustring(antlr::RefToken& rstr) const { 
		UnicodeString ret_ustr, ustr = token_ref_to_ustring(rstr);

		if (ustr.length() < 3) {
			return "";
		}

		ustr.extract(1, ustr.length() - 2, ret_ustr);

		return ret_ustr;
	}
	//
rk's avatar
rk committed
	const std::string token_ref_to_std_string(antlr::RefToken& rstr) const { 
		return (((antlr::Token*)rstr)->getText());
rk's avatar
rk committed
	}
	//
	int token_ref_to_int(antlr::RefToken& rstr) { 
rk's avatar
rk committed
		return atoi(((antlr::Token*)rstr)->getText().c_str());
rk's avatar
rk committed
	}
// TODO
//  - base, orth

// TEMPORARY CHANGES -> 
//  -> tymczasowo zakomentowalem wywoalnie regul condit_*
//  -> tymczasowo zakomentowalem 2 reguly z equal

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// "GLOBAL" RULES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Rules for parsing string operators in scope (variables). 
// Returns boost::shared_ptr<Wccl::Function<Wccl::StrSet> >
parse_string_operator
	returns [boost::shared_ptr<ANTLRParserResult<Wccl::StrSet> > res]
	res.reset(new ANTLRParserResult<Wccl::StrSet>());
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > op;
Paweł Kędzia's avatar
Paweł Kędzia committed
	: op = string_operators [tagset, *res->variables.get()] {
// ----------------------------------------------------------------------------
// Rules for parsing predicates in scope (variables). 
// Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> > 
parse_predicates 
	returns [boost::shared_ptr<ANTLRParserResult<Wccl::Bool> > res]
	res.reset(new ANTLRParserResult<Wccl::Bool>());
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > op;
Paweł Kędzia's avatar
Paweł Kędzia committed
	: op = logical_predicates [tagset, *res->variables.get()] {

// ----------------------------------------------------------------------------
// Rules for parsing tagset (symbol set) operators
// Returns boost::shared_ptr<Wccl::Function<Wccl::TSet> >
parse_sym_set_operator
	returns [boost::shared_ptr<ANTLRParserResult<Wccl::TSet> > res]
{
	res.reset(new ANTLRParserResult<Wccl::TSet>());
	boost::shared_ptr<Wccl::Function<Wccl::TSet> > op;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: op = sym_set_operators [tagset, *res->variables.get()] {
ilor's avatar
ilor committed
// ----------------------------------------------------------------------------
// Rules for parsing position operators
// Returns boost::shared_ptr<Wccl::Function<Wccl::Position> >
parse_position_operator
	[const Corpus2::Tagset &tagset]
	returns [boost::shared_ptr<ANTLRParserResult<Wccl::Position> > res]
{
	res.reset(new ANTLRParserResult<Wccl::Position>());
	boost::shared_ptr<Wccl::Function<Wccl::Position> > op;
}
	: op = position_operators [tagset, *res->variables.get()] {
		res->op = op;
	}
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VALUES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Single or muliple values in string set:
//   [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"] 
// Returns boost::shared_ptr<Wccl::StrSet> 
str_set_literal 
	returns [boost::shared_ptr<Wccl::StrSet> s_set]
{
	s_set.reset(new Wccl::StrSet());
}
	: LBRACKET
	  ( s1: STRING {
			s_set->insert(str_token_ref_to_ustring(s1)); 
		}
	    ( COMMA s2: STRING {
// Constrant string set 
// Returns boost::shared_ptr<Wccl::Constant<Wccl::StrSet> >
	returns [boost::shared_ptr<Wccl::Constant<Wccl::StrSet> > val]
{
		val.reset(new Wccl::Constant<Wccl::StrSet>(*set.get()));
	}
;
// ----------------------------------------------------------------------------
// Element of sym set. This rule, inset element into set.
// Element may be: a or `a `
sym_set_elem
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, boost::shared_ptr<Wccl::TSet>& t_set]
	: s1: SYMBOL {
Paweł Kędzia's avatar
Paweł Kędzia committed
		try {
			t_set->insert_symbol(tagset, token_ref_to_std_string(s1));
		}
		catch(Corpus2::TagParseError &e) {
			throw(ParserException(e.info()));
		}
// sym set literal
// {} {sym_set_elem} {sym_set_elem, ..., sym_set_elem}
// Returns boost::shared_ptr<Wccl::TSet>
sym_set_literal
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<Wccl::TSet> t_set]
{
	t_set.reset(new Wccl::TSet());
}
	: LCURLY 
	  ( sym_set_elem[tagset, t_set] (COMMA sym_set_elem[tagset, t_set])* )?
	  RCURLY
// Constant symbol set
// Returns boost::shared_ptr<Wccl::Constant<Wccl::TSet> >
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<Wccl::Constant<Wccl::TSet> > val]
{
Paweł Kędzia's avatar
Paweł Kędzia committed
	: set = sym_set_literal [tagset] {
		val.reset(new Wccl::Constant<Wccl::TSet>(*set.get()));
	}
;
// ----------------------------------------------------------------------------
// boolean value:
// Literal bool value may be True or False
// Returns boost::shared_ptr<Wccl::Bool>
bool_literal
	returns [boost::shared_ptr<Wccl::Bool> val]
	: "True"  { val.reset(new Wccl::Bool(Wccl::Bool(true ))); }
	| "False" { val.reset(new Wccl::Bool(Wccl::Bool(false))); }
;
// Constat bool Value
// Returns boost::shared_ptr<Wccl::Constant<Wccl::Bool> >
	returns [boost::shared_ptr<Wccl::Constant<Wccl::Bool> > val]
{
	boost::shared_ptr<Wccl::Bool> bool_lit;
}
	: bool_lit = bool_literal {
		val.reset(new Wccl::Constant<Wccl::Bool>(*bool_lit));
	}
// ----------------------------------------------------------------------------
// position value:
// Position literal may be (+|-)?(0-9)+ or begin or end or nowhere
// returns boost::shared_ptr<Wccl::Position>
position_literal
	returns [boost::shared_ptr<Wccl::Position> val]
	: i: UNSIGNED_INT {
		val.reset(new Wccl::Position(Wccl::Position(token_ref_to_int(i))));
		val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::Begin)));
	}
		val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::End)));
		val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::Nowhere)));
	}
;
ilor's avatar
ilor committed
// Constant position value
// Returns boost::shared_ptr<Wccl::Constant<Wccl::Position> >
position_value
	returns [boost::shared_ptr<Wccl::Constant<Wccl::Position> > val]
{
	boost::shared_ptr<Wccl::Position> pos_lit;
}
	: pos_lit = position_literal {
		val.reset(new Wccl::Constant<Wccl::Position>(*pos_lit));
// ----------------------------------------------------------------------------
// Number may be unsigned or signed
number returns [int ret]
{
	ret = 0;
}
	: s: SIGNED_INT   { ret = token_ref_to_int(s); }
	| u: UNSIGNED_INT { ret = token_ref_to_int(u); }
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VARIABLES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Position: $name
// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > 
position_variable_acc
	returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > pos_acc]
		vars.get_put<Wccl::Position>(token_ref_to_std_string(n));
ilor's avatar
ilor committed
		Wccl::VariableAccessor<Wccl::Position> acc = 
			vars.create_accessor<Wccl::Position>(token_ref_to_std_string(n));
		pos_acc.reset(new Wccl::VariableAccessor<Wccl::Position>(acc));
// Position vargetter
// Returs boost::shared_ptr<Wccl::VarGetter<Wccl::Position> >
position_variable
	[Wccl::Variables& vars] 
	returns [boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > op]
{
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > pos_acc;
}
	: pos_acc = position_variable_acc [vars] {
		op.reset(new Wccl::VarGetter<Wccl::Position>(*pos_acc.get()));
// realtive position
relpos
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Position> > ret]
{
	boost::shared_ptr<Wccl::Function<Wccl::Position> > pos;
}
	: "relpos" 
			LPAREN 
				pos = op_position [tagset, vars] ("+" | "-") ofst: UNSIGNED_INT 
			RPAREN {
			ret.reset(new Wccl::RelativePosition(
					pos,
					token_ref_to_int(ofst)
				)
			);
		}
;

// ----------------------------------------------------------------------------
// String set, $s:name
// This expression gets variable of the type StrSet from string-named variable 
// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> >
str_set_variable_acc
	returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > strset_acc]
		// get/put variable to variables
		vars.get_put<Wccl::StrSet>(token_ref_to_std_string(n));
ilor's avatar
ilor committed
		// makes accessor for value
		Wccl::VariableAccessor<Wccl::StrSet> acc = 
			vars.create_accessor<Wccl::StrSet>(token_ref_to_std_string(n));
		strset_acc.reset(new Wccl::VariableAccessor<Wccl::StrSet>(acc));
// Vargetter for StrSet variable
// Returns boost::shared_ptr<Wccl::VarGetter<Wccl::StrSet> > 
str_set_variable 
	[Wccl::Variables& vars] 
	returns [boost::shared_ptr<Wccl::VarGetter<Wccl::StrSet> > op]
{
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > strset_acc;
}
	: strset_acc = str_set_variable_acc [vars] {
		op.reset(new Wccl::VarGetter<Wccl::StrSet>(*strset_acc.get()));
// ----------------------------------------------------------------------------
// Symbol set: $t:name
// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> >
sym_set_variable_acc 
	returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > symset_acc]
			vars.get_put<Wccl::TSet>(token_ref_to_std_string(n)); 	

ilor's avatar
ilor committed
			Wccl::VariableAccessor<Wccl::TSet> acc = 
				vars.create_accessor<Wccl::TSet>(token_ref_to_std_string(n));
			symset_acc.reset(new Wccl::VariableAccessor<Wccl::TSet>(acc));
// Vargetter for symbol set variable
// Returns boost::shared_ptr<Wccl::VarGetter<Wccl::TSet> > 
sym_set_variable 
	[Wccl::Variables& vars] 
	returns [boost::shared_ptr<Wccl::VarGetter<Wccl::TSet> > op]
{
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > symset_acc;
}
	: symset_acc = sym_set_variable_acc [vars] {
			op.reset(new Wccl::VarGetter<Wccl::TSet>(*symset_acc.get()));
// ----------------------------------------------------------------------------
// Bool: $b:name
// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> >
boolean_variable_acc
	returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > bool_acc]
			vars.get_put<Wccl::Bool>(token_ref_to_std_string(n));

ilor's avatar
ilor committed
			Wccl::VariableAccessor<Wccl::Bool> acc = 
				vars.create_accessor<Wccl::Bool>(token_ref_to_std_string(n));
			bool_acc.reset(new Wccl::VariableAccessor<Wccl::Bool>(acc));
// Vargetter for bool variable
// Returns boost::shared_ptr<Wccl::VarGetter<Wccl::Bool> >
boolean_variable
	[Wccl::Variables& vars] 
	returns [boost::shared_ptr<Wccl::VarGetter<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > bool_acc;
}
	: bool_acc = boolean_variable_acc [vars] {
			op.reset(new Wccl::VarGetter<Wccl::Bool>(*bool_acc.get()));
rk's avatar
rk committed

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
// OPERATORS
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
rk's avatar
rk committed
// Setvar operator
// Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> >
// ----------------------------------------------------------------------------
setvar_op 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret]
	: "setvar" LPAREN
	  (
		ret = setvar_body_pos  [tagset, vars]
		| ret = setvar_body_bool [tagset, vars]
		| ret = setvar_body_sset [tagset, vars]
		| ret = setvar_body_tset [tagset, vars]
	  )
	  RPAREN
;
// Implementations of setvar:
// ----------------------------------------------------------------------------
setvar_body_pos 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::Position> > ret_op;
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > ret_acc;
	:	ret_acc = position_variable_acc [vars]
		COMMA
		ret_op  = op_position [tagset, vars] {
			op.reset(new Wccl::VarSetter<Wccl::Position>(*ret_acc.get(), ret_op));
		}

// ----------------------------------------------------------------------------
setvar_body_bool 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret_op;
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > ret_acc;
	:	ret_acc = boolean_variable_acc [vars]
		COMMA
		ret_op  = logical_predicates [tagset, vars] {
			op.reset(new Wccl::VarSetter<Wccl::Bool>(*ret_acc.get(), ret_op));
		}
// ----------------------------------------------------------------------------
setvar_body_sset
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret_op;
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > ret_acc;
	: ret_acc = str_set_variable_acc [vars]
	  COMMA ret_op  = string_operators [tagset, vars] {
		op.reset(new Wccl::VarSetter<Wccl::StrSet>(*ret_acc.get(), ret_op));
// ----------------------------------------------------------------------------
setvar_body_tset 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret_op;
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > ret_acc;
	: ret_acc = sym_set_variable_acc [vars]
	  COMMA 
	  ret_op  = sym_set_operators [tagset, vars] {
		op.reset(new Wccl::VarSetter<Wccl::TSet>(*ret_acc.get(), ret_op));

// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Symbol set (tagset) operators 
// Returns boost::shared_ptr<Wccl::Function<Wccl::TSet> >
// ----------------------------------------------------------------------------
sym_set_operators
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret]
Paweł Kędzia's avatar
Paweł Kędzia committed
	: ret = op_sym_set [tagset, vars] 
	| ret = condit_sym [tagset, vars]
// Implementations of symbol set operators:
// ----------------------------------------------------------------------------
op_sym_set
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > op]
	: op = sym_set_variable [vars]
Paweł Kędzia's avatar
Paweł Kędzia committed
	| op = sym_set_value [tagset]
// ----------------------------------------------------------------------------
// if (Bool, TSet, TSet)
// ? TSet ? Bool : {}
condit_sym
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > test;
	boost::shared_ptr<Wccl::Function<Wccl::TSet> > p_true, p_false;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "if" LPAREN test  = logical_predicates [tagset, vars] COMMA 
							p_true  = sym_set_operators  [tagset, vars] 
							(COMMA p_false = sym_set_operators [tagset, vars])? 
			if (p_false) {
				op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true, p_false));
			}
			else {
				op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true));
			}
		}
			(p_true = sym_set_operators [tagset, vars])
			(test = logical_predicates [tagset, vars]) {
			op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true));
		}
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Position operators 
// Returns boost::shared_ptr<Wccl::Function<Wccl::Position> >
// ----------------------------------------------------------------------------
position_operators
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Position> > ret]
	: ret = op_position [tagset, vars] 
// Implementations of symbol set operators:
// ----------------------------------------------------------------------------
op_position
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Position> > op]
	: op = position_variable [vars]
	| op = position_value
	| op = relpos [tagset, vars]
//	| op = condit_position [tagset, vars]
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Stiring operators 
// Returns boost::shared_ptr<Wccl::Function<Wccl::StrSet> >
// ----------------------------------------------------------------------------
string_operators 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
rk's avatar
rk committed
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
Paweł Kędzia's avatar
Paweł Kędzia committed
	: ret = op_orth    [tagset, vars] 
	| ret = op_base    [tagset, vars]
	| ret = op_lower   [tagset, vars] 
	| ret = op_upper   [tagset, vars]
	| ret = op_affix   [tagset, vars] 
	| ret = op_str_set [tagset, vars] 
	| ret = condit_str [tagset, vars]
rk's avatar
rk committed
;
// Implementations of string operators:
// ----------------------------------------------------------------------------
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
	boost::shared_ptr<Wccl::Function<Wccl::Position> > pos;
	: "orth" LBRACKET pos = op_position [tagset, vars] RBRACKET { 
			ret.reset(new Wccl::GetOrth(pos));
// ----------------------------------------------------------------------------
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
	boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > pos;
	: "base" LBRACKET pos = position_variable [vars] RBRACKET { 
		// ret = TODO
// ----------------------------------------------------------------------------
rk's avatar
rk committed
op_lower 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
rk's avatar
rk committed
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "lower" LPAREN o_ret = string_operators[tagset, vars] RPAREN {
rk's avatar
rk committed
		ret.reset(new Wccl::ToLower(o_ret));
	}
;
// ----------------------------------------------------------------------------
rk's avatar
rk committed
op_upper 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
rk's avatar
rk committed
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
rk's avatar
rk committed
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "upper" LPAREN o_ret = string_operators[tagset, vars] RPAREN {
rk's avatar
rk committed
		ret.reset(new Wccl::ToUpper(o_ret));
	}
;
// ----------------------------------------------------------------------------
rk's avatar
rk committed
op_affix 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
rk's avatar
rk committed
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
	int offset = 0;
rk's avatar
rk committed
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret;
}
			o_ret = string_operators[tagset, vars] COMMA offset = number 
	RPAREN {
		ret.reset(new Wccl::Affix(o_ret, offset));
// ----------------------------------------------------------------------------
op_str_set
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > op]
	: op = str_set_variable [vars]
	| op = str_set_value 
// ----------------------------------------------------------------------------
// if (Bool, StrSet, StrSet)
// ? StrSet ? Bool : []
condit_str
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > test;
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > p_true, p_false;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "if" LPAREN test  = logical_predicates [tagset, vars] COMMA 
							p_true  = string_operators   [tagset, vars] 
							(COMMA p_false = string_operators [tagset, vars])? 
		if (p_false) {
			op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true, p_false));
		}
		else {
			op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true));
		}
			p_true = string_operators [tagset, vars]
			test = logical_predicates [tagset, vars] {
			op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true));
		}

// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Logical predicates 
// Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> >
// ----------------------------------------------------------------------------
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret]
Paweł Kędzia's avatar
Paweł Kędzia committed
	: ret = lpred_and   [tagset, vars]  
	| ret = lpred_or    [tagset, vars]  
	| ret = lpred_nor   [tagset, vars]  
	| ret = lpred_bool  [tagset, vars]	
	| ret = lpred_in    [tagset, vars]
	| ret = lpred_inter [tagset, vars]
Paweł Kędzia's avatar
Paweł Kędzia committed
	| ret = lpred_eq    [tagset, vars]
	| ret = lpred_regex [tagset, vars]
	| ret = setvar_op   [tagset, vars]
	| ret = lpred_inout [tagset, vars]
	| ret = condit_bool [tagset, vars]
// ----------------------------------------------------------------------------
// comma-separated predicates
logical_predicates_comma_sep 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<
			std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > 
	> ret_v]
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > pred;
	ret_v.reset(
		new std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > >
	);
Paweł Kędzia's avatar
Paweł Kędzia committed
	: pred = logical_predicates [tagset, vars] { 
		ret_v->push_back(pred);
	} (
Paweł Kędzia's avatar
Paweł Kędzia committed
		COMMA pred = logical_predicates [tagset, vars] {
		ret_v->push_back(pred);
	})*
;
// ----------------------------------------------------------------------------
lpred_and 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
	boost::shared_ptr<
		std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > 
	> ret_v;
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "and" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN {
			op.reset(new Wccl::And(ret_v));
	}
// ----------------------------------------------------------------------------
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<
		std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > 
	> ret_v;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "or" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN {
			op.reset(new Wccl::Or(ret_v));
	}
;
// ----------------------------------------------------------------------------
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<
		std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > 
	> ret_v;
}
	: "not" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN {
			op.reset(new Wccl::Nor(ret_v));
	}
;
// ----------------------------------------------------------------------------
lpred_bool
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
	: op = boolean_variable [vars]
	| op = boolean_value 
;
// ----------------------------------------------------------------------------
lpred_in
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2;
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2;
:
	"in" LPAREN
	(
		(sym_set_operators [tagset, vars]) =>
		(
			t1 = sym_set_operators [tagset, vars] COMMA 
			t2 = sym_set_operators [tagset, vars] {
				op.reset(new Wccl::IsSubsetOf<Wccl::TSet>(t1, t2));
			}
		)
		|
		(
			s1 = string_operators [tagset, vars] COMMA
			s2 = string_operators [tagset, vars] {
				op.reset(new Wccl::IsSubsetOf<Wccl::StrSet>(s1, s2));
			}
		)
	)
	RPAREN
	
// ----------------------------------------------------------------------------
lpred_inter
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2;
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2;
}
	:
	"inter" LPAREN
	(
		(sym_set_operators [tagset, vars]) =>
		(
			t1 = sym_set_operators [tagset, vars] COMMA  
			t2 = sym_set_operators [tagset, vars]  {
				op.reset(new Wccl::Intersects<Wccl::TSet>(t1, t2));
			}
		)
	|
		(
			s1 = string_operators  [tagset, vars] COMMA  
			s2 = string_operators  [tagset, vars]  {
				op.reset(new Wccl::Intersects<Wccl::StrSet>(s1, s2));
			}
		)
	)
	RPAREN
// ----------------------------------------------------------------------------
lpred_eq
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2;
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > b1, b2;
	boost::shared_ptr<Wccl::Function<Wccl::StrSet>  > s1, s2;
	boost::shared_ptr<Wccl::Function<Wccl::Position> > p1, p2;
}
	: "equal" LPAREN
	(
		(
			p1 = position_operators [tagset, vars] COMMA 
			p2 = position_operators [tagset, vars] {
				op.reset(new Wccl::Equals<Wccl::Position>(p1, p2));
			}
		)
	|
		(sym_set_operators [tagset, vars]) =>
		(
			t1 = sym_set_operators [tagset, vars] COMMA  
			t2 = sym_set_operators [tagset, vars] {
				op.reset(new Wccl::Equals<Wccl::TSet>(t1, t2));
			}
		)
	|
		(string_operators [tagset, vars]) =>
		(
			s1 = string_operators  [tagset, vars] COMMA  
			s2 = string_operators  [tagset, vars] {
				op.reset(new Wccl::Equals<Wccl::StrSet>(s1, s2));
			}
		)
	|
		(
			b1 = logical_predicates [tagset, vars] COMMA
			b2 = logical_predicates [tagset, vars] {
				op.reset(new Wccl::Equals<Wccl::Bool>(b1, b2));
			}
		)
	)
	RPAREN
;

// ----------------------------------------------------------------------------
lpred_regex
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > expr;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "regex" LPAREN expr = string_operators [tagset, vars] COMMA reg: STRING RPAREN {
		op.reset(new Wccl::Regex(expr, str_token_ref_to_ustring(reg)));
// ----------------------------------------------------------------------------
lpred_inout
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::Position> > ret_pos;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "inside"  LPAREN ret_pos = position_operators [tagset, vars] RPAREN {
		op.reset(new Wccl::IsInside(ret_pos));
	}
Paweł Kędzia's avatar
Paweł Kędzia committed
	| "outside" LPAREN ret_pos = position_operators [tagset, vars] RPAREN {
		op.reset(new Wccl::IsOutside(ret_pos));
	}
;

// ----------------------------------------------------------------------------
// if (Bool, Bool, Bool)
// ? Bool ? Bool : False
condit_bool
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > test, p_true, p_false;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "if" LPAREN test  = logical_predicates [tagset, vars] COMMA 
							p_true  = logical_predicates [tagset, vars] 
							(COMMA p_false = logical_predicates [tagset, vars])? 
		if (p_false) {
			op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true, p_false));
		}
		else {
			op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true));
		}
			p_true = logical_predicates [tagset, vars]
			test = logical_predicates [tagset, vars] {
			op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true));
		}
// ----------------------------------------------------------------------------
// if (Bool, Position, Position)
// ? Position ? Bool : 0
/* Not implemented yet
 */
/*
condit_position
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Position> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > test;
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > p_true, p_false;
}
	: "if" LPAREN test  = logical_predicates [tagset, vars] COMMA 
							p_true  = position_operators [tagset, vars] 
							(COMMA p_false = position_operators [tagset, vars])? 
	RPAREN {
		if (p_false) {
			op.reset(new Wccl::Conditional<Wccl::Position>(test, p_true, p_false));
		}
		else {
			op.reset(new Wccl::Conditional<Wccl::Position>(test, p_true));
		}
	}
	| Q_MARK 
			p_true = position_operators [tagset, vars]
		Q_MARK 
			test = logical_predicates [tagset, vars] {
			op.reset(new Wccl::Conditional<Wccl::Position>(test, p_true));
		}
;
*/

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
// ANTLR LEXER
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
class ANTLRLexer extends Lexer;
options {
	exportVocab    = ANTLRExpr;
	charVocabulary = '\3'..'\377';
	testLiterals   = false;
rk's avatar
rk committed
}

STRING
options {
	paraphrase = "a string";
}
	: '"' (~'"')* '"'
	| '\'' (~'\'')* '\''
;
SIGNED_INT
rk's avatar
rk committed
options {
	paraphrase = "Signed integer";
rk's avatar
rk committed
}
	: ('-'|'+') ('0'..'9')+ 
rk's avatar
rk committed

UNSIGNED_INT
options {
	paraphrase = "Unsigned integer";
}
	: ('0'..'9')+ 
;	


rk's avatar
rk committed
QUOT_MARK
options {
	paraphrase = "Quote";
rk's avatar
rk committed

APOS_MARK
options {
	paraphrase = "Apostrophe";
rk's avatar
rk committed
Q_MARK
options {
	paraphrase = "Question mark";
rk's avatar
rk committed
}
	: '?'
rk's avatar
rk committed

E_MARK
options {
	paraphrase = "Exclamation mark";
rk's avatar
rk committed
}
	: '!'
rk's avatar
rk committed

STR_PREFIX
options {
	paraphrase = "String prefix";
}
	paraphrase = "Symset prefix";
;

BOOL_PREFIX
options {
	paraphrase = "Bool prefix";
}
rk's avatar
rk committed

POS_PREFIX
options {
rk's avatar
rk committed
LBRACKET 
options {
	paraphrase = "'['";
}
	: '[' 
rk's avatar
rk committed

RBRACKET 
options {
	paraphrase = "']'";
}
	: ']' 
rk's avatar
rk committed

LPAREN
options {
	paraphrase = "'('";
}   
	: '(' 
rk's avatar
rk committed

RPAREN 
options {
	paraphrase = "')'";
} 
	: ')' 
rk's avatar
rk committed

LCURLY 
options {
	paraphrase = "'{'";
} 
	: '{' 
rk's avatar
rk committed

RCURLY 
options {
	paraphrase = "'}'";
} 
	: '}' 
rk's avatar
rk committed

AT_MARK 
options {
	paraphrase = "'@'";
} 
	: '@' 
rk's avatar
rk committed

COMMA
options { 
	paraphrase = "','";
rk's avatar
rk committed
}
	: ','
rk's avatar
rk committed

SYMBOL
options { 
	paraphrase = "Symbol"; 
rk's avatar
rk committed
	testLiterals = true; 
}
	: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
	| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
rk's avatar
rk committed
WS
	  | '\t'
	  | '\f'
	  | ( "\r\n"
		| '\r'
		| '\n'
		)
		{ newline(); } 
	)
	{ $setType(antlr::Token::SKIP); } 
rk's avatar
rk committed

COMMENT
options {
	paraphrase = "Single line comment";
}
	: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP);  }
;

ML_COMMENT
options {
	paraphrase = "Multi line comment";
rk's avatar
rk committed
}
	(			// TODO: test it and add reference to the site it's taken from!
				/* This actually works OK despite the ambiguity that
				'\r' '\n' can be matched in one alternative or by matching
				'\r' in one iteration and '\n' in another.. But 
				this is really matched just by one rule per (...)* 
				loop iteration, so it's OK.
				This is exactly how they do it all over the web - just
				turn off the warning for this particular token.*/
		options { generateAmbigWarnings=false; }
      : { LA(2)!='/' }? '*'
      | '\r' '\n' { newline(); }
      | '\r' { newline(); }
      | '\n' { newline(); }
      | ~('*'|'\n'|'\r')
    )*
    "*/"
    {$setType(antlr::Token::SKIP);}
rk's avatar
rk committed

HASH
options { 
	paraphrase = "'#'"; 
}
	: '#' 
rk's avatar
rk committed

//DSEPARATOR
//options { 
//	paraphrase = "':-'"; 
//}
//	: ":-" 
//;