Skip to content
Snippets Groups Projects
grammar.g 31.6 KiB
Newer Older
rk's avatar
rk committed
header {
	#include <libwccl/parser/ParserException.h>
rk's avatar
rk committed

	#include <cstdio>
	#include <antlr/Token.hpp>
	#include <boost/lexical_cast.hpp>

	// values/variables
	#include <libwccl/variables.h>
	#include <libwccl/values/bool.h>
	#include <libwccl/values/tset.h>
	#include <libwccl/values/strset.h>
	#include <libwccl/values/position.h>
	
	// sentence context
	#include <libwccl/sentencecontext.h>

	// operators
	#include <libwccl/ops/functions/bool/predicates/or.h>
	#include <libwccl/ops/functions/bool/predicates/nor.h>
	#include <libwccl/ops/functions/bool/predicates/and.h>
	#include <libwccl/ops/functions/bool/predicates/regex.h>
	#include <libwccl/ops/functions/bool/predicates/equals.h>

	#include <libwccl/ops/functions/bool/varsetter.h>

	#include <libwccl/ops/functions/strset/affix.h>
	#include <libwccl/ops/functions/strset/toupper.h>
	#include <libwccl/ops/functions/strset/tolower.h>

	#include <libwccl/ops/functions/constant.h>
	#include <libwccl/ops/functions/vargetter.h>

	#include <libwccl/ops/functions/bool/predicates/intersects.h>
	#include <libwccl/ops/functions/bool/predicates/issubsetof.h>
	#include <libwccl/ops/functions/bool/predicates/isinside.h>
	#include <libwccl/ops/functions/bool/predicates/isoutside.h>
	#include <libwccl/ops/functions/conditional.h>
rk's avatar
rk committed
	// Unicode String
	#include <unicode/uniset.h>
	#include <unicode/unistr.h>
	#include <libwccl/parser/ANTLRParserResult.h>
rk's avatar
rk committed
}

options {
	language = "Cpp";
rk's avatar
rk committed
}

// ----------------------------------------------------------------------------
rk's avatar
rk committed
// ANTLR PARSER
// ----------------------------------------------------------------------------
rk's avatar
rk committed
class ANTLRParser extends Parser;
options {
rk's avatar
rk committed
	buildAST = false;
rk's avatar
rk committed
	defaultErrorHandler = false;
}
{
private:
	// 
rk's avatar
rk committed
	const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { 
		return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText()).unescape();
	}
	//
	const UnicodeString str_token_ref_to_ustring(antlr::RefToken& rstr) const { 
		UnicodeString ret_ustr, ustr = token_ref_to_ustring(rstr);

		if (ustr.length() < 3) {
			return "";
		}

		ustr.extract(1, ustr.length() - 2, ret_ustr);

		return ret_ustr;
	}
	//
rk's avatar
rk committed
	const std::string token_ref_to_std_string(antlr::RefToken& rstr) const { 
		return (((antlr::Token*)rstr)->getText());
rk's avatar
rk committed
	}
	//
	int token_ref_to_int(antlr::RefToken& rstr) { 
rk's avatar
rk committed
		return atoi(((antlr::Token*)rstr)->getText().c_str());
rk's avatar
rk committed
	}
// TODO
//  - base, orth

// TEMPORARY CHANGES -> 
//  -> tymczasowo zakomentowalem wywoalnie regul condit_*
//  -> tymczasowo zakomentowalem 2 reguly z equal

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// "GLOBAL" RULES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Rules for parsing string operators in scope (variables). 
// Returns boost::shared_ptr<Wccl::Function<Wccl::StrSet> >
parse_string_operator
	returns [boost::shared_ptr<ANTLRParserResult<Wccl::StrSet> > res]
	res.reset(new ANTLRParserResult<Wccl::StrSet>());
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > op;
Paweł Kędzia's avatar
Paweł Kędzia committed
	: op = string_operators [tagset, *res->variables.get()] {
// ----------------------------------------------------------------------------
// Rules for parsing predicates in scope (variables). 
// Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> > 
parse_predicates 
	returns [boost::shared_ptr<ANTLRParserResult<Wccl::Bool> > res]
	res.reset(new ANTLRParserResult<Wccl::Bool>());
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > op;
Paweł Kędzia's avatar
Paweł Kędzia committed
	: op = logical_predicates [tagset, *res->variables.get()] {

// ----------------------------------------------------------------------------
// Rules for parsing tagset (symbol set) operators
// Returns boost::shared_ptr<Wccl::Function<Wccl::TSet> >
parse_sym_set_operator
	returns [boost::shared_ptr<ANTLRParserResult<Wccl::TSet> > res]
{
	res.reset(new ANTLRParserResult<Wccl::TSet>());
	boost::shared_ptr<Wccl::Function<Wccl::TSet> > op;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: op = sym_set_operators [tagset, *res->variables.get()] {
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VALUES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Single or muliple values in string set:
//   [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"] 
// Returns boost::shared_ptr<Wccl::StrSet> 
str_set_literal 
	returns [boost::shared_ptr<Wccl::StrSet> s_set]
{
	s_set.reset(new Wccl::StrSet());
}
	: LBRACKET
	  ( s1: STRING {
			s_set->insert(str_token_ref_to_ustring(s1)); 
		}
	    ( COMMA s2: STRING {
// Constrant string set 
// Returns boost::shared_ptr<Wccl::Constant<Wccl::StrSet> >
	returns [boost::shared_ptr<Wccl::Constant<Wccl::StrSet> > val]
{
		val.reset(new Wccl::Constant<Wccl::StrSet>(*set.get()));
	}
;
// ----------------------------------------------------------------------------
// Element of sym set. This rule, inset element into set.
// Element may be: a or `a `
sym_set_elem
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, boost::shared_ptr<Wccl::TSet>& t_set]
	: s1: SYMBOL {
Paweł Kędzia's avatar
Paweł Kędzia committed
		try {
			t_set->insert_symbol(tagset, token_ref_to_std_string(s1));
		}
		catch(Corpus2::TagParseError &e) {
			throw(ParserException(e.info()));
		}
// sym set literal
// {} {sym_set_elem} {sym_set_elem, ..., sym_set_elem}
// Returns boost::shared_ptr<Wccl::TSet>
sym_set_literal
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<Wccl::TSet> t_set]
{
	t_set.reset(new Wccl::TSet());
}
	: LCURLY 
	  ( sym_set_elem[tagset, t_set] (COMMA sym_set_elem[tagset, t_set])* )?
	  RCURLY
// Constant symbol set
// Returns boost::shared_ptr<Wccl::Constant<Wccl::TSet> >
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset]
	returns [boost::shared_ptr<Wccl::Constant<Wccl::TSet> > val]
{
Paweł Kędzia's avatar
Paweł Kędzia committed
	: set = sym_set_literal [tagset] {
		val.reset(new Wccl::Constant<Wccl::TSet>(*set.get()));
	}
;
// ----------------------------------------------------------------------------
// boolean value:
// Literal bool value may be True or False
// Returns boost::shared_ptr<Wccl::Bool>
bool_literal
	returns [boost::shared_ptr<Wccl::Bool> val]
	: "True"  { val.reset(new Wccl::Bool(Wccl::Bool(true ))); }
	| "False" { val.reset(new Wccl::Bool(Wccl::Bool(false))); }
;
// Constat bool Value
// Returns boost::shared_ptr<Wccl::Constant<Wccl::Bool> >
	returns [boost::shared_ptr<Wccl::Constant<Wccl::Bool> > val]
{
	boost::shared_ptr<Wccl::Bool> bool_lit;
}
	: bool_lit = bool_literal {
		val.reset(new Wccl::Constant<Wccl::Bool>(*bool_lit));
	}
// ----------------------------------------------------------------------------
// position value:
// Position literal may be (+|-)?(0-9)+ or begin or end or nowhere
// returns boost::shared_ptr<Wccl::Position>
position_literal
	returns [boost::shared_ptr<Wccl::Position> val]
		val.reset(new Wccl::Position(Wccl::Position(token_ref_to_int(i))));
		val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::Begin)));
	}
		val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::End)));
		val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::Nowhere)));
	}
;
// Constat position value
// Returns boost::shared_ptr<Wccl::Constant<Wccl::Position> >
position_value
	returns [boost::shared_ptr<Wccl::Constant<Wccl::Position> > val]
{
	boost::shared_ptr<Wccl::Position> pos_lit;
}
	: pos_lit = position_literal {
		val.reset(new Wccl::Constant<Wccl::Position>(*pos_lit));
	}
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VARIABLES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Position: $name
// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > 
position_variable_acc
	returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > pos_acc]
		vars.get_put<Wccl::Position>(token_ref_to_std_string(n));
ilor's avatar
ilor committed
		Wccl::VariableAccessor<Wccl::Position> acc = 
			vars.create_accessor<Wccl::Position>(token_ref_to_std_string(n));
		pos_acc.reset(new Wccl::VariableAccessor<Wccl::Position>(acc));
// Position vargetter
// Returs boost::shared_ptr<Wccl::VarGetter<Wccl::Position> >
position_variable
	[Wccl::Variables& vars] 
	returns [boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > op]
{
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > pos_acc;
}
	: pos_acc = position_variable_acc [vars] {
		op.reset(new Wccl::VarGetter<Wccl::Position>(*pos_acc.get()));
// ----------------------------------------------------------------------------
// String set, $s:name
// This expression gets variable of the type StrSet from string-named variable 
// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> >
str_set_variable_acc
	returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > strset_acc]
		// get/put variable to variables
		vars.get_put<Wccl::StrSet>(token_ref_to_std_string(n));
ilor's avatar
ilor committed
		// makes accessor for value
		Wccl::VariableAccessor<Wccl::StrSet> acc = 
			vars.create_accessor<Wccl::StrSet>(token_ref_to_std_string(n));
		strset_acc.reset(new Wccl::VariableAccessor<Wccl::StrSet>(acc));
// Vargetter for StrSet variable
// Returns boost::shared_ptr<Wccl::VarGetter<Wccl::StrSet> > 
str_set_variable 
	[Wccl::Variables& vars] 
	returns [boost::shared_ptr<Wccl::VarGetter<Wccl::StrSet> > op]
{
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > strset_acc;
}
	: strset_acc = str_set_variable_acc [vars] {
		op.reset(new Wccl::VarGetter<Wccl::StrSet>(*strset_acc.get()));
// ----------------------------------------------------------------------------
// Symbol set: $t:name
// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> >
sym_set_variable_acc 
	returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > symset_acc]
			vars.get_put<Wccl::TSet>(token_ref_to_std_string(n)); 	

ilor's avatar
ilor committed
			Wccl::VariableAccessor<Wccl::TSet> acc = 
				vars.create_accessor<Wccl::TSet>(token_ref_to_std_string(n));
			symset_acc.reset(new Wccl::VariableAccessor<Wccl::TSet>(acc));
// Vargetter for symbol set variable
// Returns boost::shared_ptr<Wccl::VarGetter<Wccl::TSet> > 
sym_set_variable 
	[Wccl::Variables& vars] 
	returns [boost::shared_ptr<Wccl::VarGetter<Wccl::TSet> > op]
{
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > symset_acc;
}
	: symset_acc = sym_set_variable_acc [vars] {
			op.reset(new Wccl::VarGetter<Wccl::TSet>(*symset_acc.get()));
// ----------------------------------------------------------------------------
// Bool: $b:name
// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> >
boolean_variable_acc
	returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > bool_acc]
			vars.get_put<Wccl::Bool>(token_ref_to_std_string(n));

ilor's avatar
ilor committed
			Wccl::VariableAccessor<Wccl::Bool> acc = 
				vars.create_accessor<Wccl::Bool>(token_ref_to_std_string(n));
			bool_acc.reset(new Wccl::VariableAccessor<Wccl::Bool>(acc));
// Vargetter for bool variable
// Returns boost::shared_ptr<Wccl::VarGetter<Wccl::Bool> >
boolean_variable
	[Wccl::Variables& vars] 
	returns [boost::shared_ptr<Wccl::VarGetter<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > bool_acc;
}
	: bool_acc = boolean_variable_acc [vars] {
			op.reset(new Wccl::VarGetter<Wccl::Bool>(*bool_acc.get()));
rk's avatar
rk committed

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
// OPERATORS
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
rk's avatar
rk committed
// Setvar operator
// Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> >
// ----------------------------------------------------------------------------
setvar_op 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret]
	: "setvar" LPAREN
	  (
		ret = setvar_body_pos  [tagset, vars]
		| ret = setvar_body_bool [tagset, vars]
		| ret = setvar_body_sset [tagset, vars]
		| ret = setvar_body_tset [tagset, vars]
	  )
	  RPAREN
;
// Implementations of setvar:
// ----------------------------------------------------------------------------
setvar_body_pos 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::Position> > ret_op;
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > ret_acc;
	:	ret_acc = position_variable_acc [vars]
		COMMA
		ret_op  = op_position [vars] {
			op.reset(new Wccl::VarSetter<Wccl::Position>(*ret_acc.get(), ret_op));
		}

// ----------------------------------------------------------------------------
setvar_body_bool 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret_op;
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > ret_acc;
	:	ret_acc = boolean_variable_acc [vars]
		COMMA
		ret_op  = logical_predicates [tagset, vars] {
			op.reset(new Wccl::VarSetter<Wccl::Bool>(*ret_acc.get(), ret_op));
		}
// ----------------------------------------------------------------------------
setvar_body_sset
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret_op;
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > ret_acc;
	: ret_acc = str_set_variable_acc [vars]
	  COMMA ret_op  = string_operators [tagset, vars] {
		op.reset(new Wccl::VarSetter<Wccl::StrSet>(*ret_acc.get(), ret_op));
// ----------------------------------------------------------------------------
setvar_body_tset 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret_op;
	boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > ret_acc;
	: ret_acc = sym_set_variable_acc [vars]
	  COMMA 
	  ret_op  = sym_set_operators [tagset, vars] {
		op.reset(new Wccl::VarSetter<Wccl::TSet>(*ret_acc.get(), ret_op));

// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Symbol set (tagset) operators 
// Returns boost::shared_ptr<Wccl::Function<Wccl::TSet> >
// ----------------------------------------------------------------------------
sym_set_operators
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret]
Paweł Kędzia's avatar
Paweł Kędzia committed
	: ret = op_sym_set [tagset, vars] 
	| ret = condit_sym [tagset, vars]
// Implementations of symbol set operators:
// ----------------------------------------------------------------------------
op_sym_set
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > op]
	: op = sym_set_variable [vars]
Paweł Kędzia's avatar
Paweł Kędzia committed
	| op = sym_set_value [tagset]
// ----------------------------------------------------------------------------
// if (Bool, TSet, TSet)
// ? TSet ? Bool : {}
condit_sym
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > test;
	boost::shared_ptr<Wccl::Function<Wccl::TSet> > p_true, p_false;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "if" LPAREN test  = logical_predicates [tagset, vars] COMMA 
							p_true  = sym_set_operators  [tagset, vars] 
							(COMMA p_false = sym_set_operators [tagset, vars])? 
			if (p_false) {
				op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true, p_false));
			}
			else {
				op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true));
			}
		}
			(p_true = sym_set_operators [tagset, vars])
			(test = logical_predicates [tagset, vars]) {
			op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true));
		}
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Position operators 
// Returns boost::shared_ptr<Wccl::Function<Wccl::Position> >
// ----------------------------------------------------------------------------
position_operators
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Position> > ret]
	: ret = op_position [vars] 
;
// Implementations of symbol set operators:
// ----------------------------------------------------------------------------
op_position
	[Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Position> > op]
	: op = position_variable [vars]
	| op = position_value
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Stiring operators 
// Returns boost::shared_ptr<Wccl::Function<Wccl::StrSet> >
// ----------------------------------------------------------------------------
string_operators 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
rk's avatar
rk committed
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
Paweł Kędzia's avatar
Paweł Kędzia committed
	: ret = op_orth    [tagset, vars] 
	| ret = op_base    [tagset, vars]
	| ret = op_lower   [tagset, vars] 
	| ret = op_upper   [tagset, vars]
	| ret = op_affix   [tagset, vars] 
	| ret = op_str_set [tagset, vars] 
	| ret = condit_str [tagset, vars]
rk's avatar
rk committed
;
// Implementations of string operators:
// ----------------------------------------------------------------------------
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
	boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > pos;
	: "orth" LBRACKET pos = position_variable [vars] RBRACKET { 
		// ret = TODO
// ----------------------------------------------------------------------------
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
	boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > pos;
	: "base" LBRACKET pos = position_variable [vars] RBRACKET { 
		// ret = TODO
// ----------------------------------------------------------------------------
rk's avatar
rk committed
op_lower 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
rk's avatar
rk committed
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "lower" LPAREN o_ret = string_operators[tagset, vars] RPAREN {
rk's avatar
rk committed
		ret.reset(new Wccl::ToLower(o_ret));
	}
;
// ----------------------------------------------------------------------------
rk's avatar
rk committed
op_upper 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
rk's avatar
rk committed
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
rk's avatar
rk committed
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "upper" LPAREN o_ret = string_operators[tagset, vars] RPAREN {
rk's avatar
rk committed
		ret.reset(new Wccl::ToUpper(o_ret));
	}
;
// ----------------------------------------------------------------------------
rk's avatar
rk committed
op_affix 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
rk's avatar
rk committed
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
rk's avatar
rk committed
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret;
}
	: "affix" LPAREN 
			o_ret = string_operators[tagset, vars] COMMA 
			offset: INT RPAREN {
rk's avatar
rk committed
		ret.reset(new Wccl::Affix(o_ret, token_ref_to_int(offset)));
// ----------------------------------------------------------------------------
op_str_set
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > op]
	: op = str_set_variable [vars]
	| op = str_set_value 
// ----------------------------------------------------------------------------
// if (Bool, StrSet, StrSet)
// ? StrSet ? Bool : []
condit_str
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > test;
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > p_true, p_false;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "if" LPAREN test  = logical_predicates [tagset, vars] COMMA 
							p_true  = string_operators   [tagset, vars] 
							(COMMA p_false = string_operators [tagset, vars])? 
		if (p_false) {
			op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true, p_false));
		}
		else {
			op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true));
		}
			p_true = string_operators [tagset, vars]
			test = logical_predicates [tagset, vars] {
			op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true));
		}

// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Logical predicates 
// Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> >
// ----------------------------------------------------------------------------
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret]
Paweł Kędzia's avatar
Paweł Kędzia committed
	: ret = lpred_and   [tagset, vars]  
	| ret = lpred_or    [tagset, vars]  
	| ret = lpred_nor   [tagset, vars]  
	| ret = lpred_bool  [tagset, vars]	
	| ret = lpred_in    [tagset, vars]
	| ret = lpred_inter [tagset, vars]
Paweł Kędzia's avatar
Paweł Kędzia committed
	| ret = lpred_eq    [tagset, vars]
	| ret = lpred_regex [tagset, vars]
	| ret = setvar_op   [tagset, vars]
	| ret = lpred_inout [tagset, vars]
	| ret = condit_bool [tagset, vars]
// ----------------------------------------------------------------------------
// comma-separated predicates
logical_predicates_comma_sep 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<
			std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > 
	> ret_v]
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > pred;
	ret_v.reset(
		new std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > >
	);
Paweł Kędzia's avatar
Paweł Kędzia committed
	: pred = logical_predicates [tagset, vars] { 
		ret_v->push_back(pred);
	} (
Paweł Kędzia's avatar
Paweł Kędzia committed
		COMMA pred = logical_predicates [tagset, vars] {
		ret_v->push_back(pred);
	})*
;
// ----------------------------------------------------------------------------
lpred_and 
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
	boost::shared_ptr<
		std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > 
	> ret_v;
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "and" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN {
			op.reset(new Wccl::And(ret_v));
	}
// ----------------------------------------------------------------------------
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<
		std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > 
	> ret_v;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "or" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN {
			op.reset(new Wccl::Or(ret_v));
	}
;
// ----------------------------------------------------------------------------
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<
		std::vector<boost::shared_ptr<Wccl::Function<Wccl::Bool> > > 
	> ret_v;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "nor" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN {
			op.reset(new Wccl::Nor(ret_v));
	}
;
// ----------------------------------------------------------------------------
lpred_bool
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
	: op = boolean_variable [vars]
	| op = boolean_value 
;
// ----------------------------------------------------------------------------
lpred_in
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2;
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2;
:
	"in" LPAREN
	(
		(sym_set_operators [tagset, vars]) =>
		(
			t1 = sym_set_operators [tagset, vars] COMMA 
			t2 = sym_set_operators [tagset, vars] {
				op.reset(new Wccl::IsSubsetOf<Wccl::TSet>(t1, t2));
			}
		)
		|
		(
			s1 = string_operators [tagset, vars] COMMA
			s2 = string_operators [tagset, vars] {
				op.reset(new Wccl::IsSubsetOf<Wccl::StrSet>(s1, s2));
			}
		)
	)
	RPAREN
	
// ----------------------------------------------------------------------------
lpred_inter
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2;
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2;
}
	:
	"inter" LPAREN
	(
		(sym_set_operators [tagset, vars]) =>
		(
			t1 = sym_set_operators [tagset, vars] COMMA  
			t2 = sym_set_operators [tagset, vars]  {
				op.reset(new Wccl::Intersects<Wccl::TSet>(t1, t2));
			}
		)
	|
		(
			s1 = string_operators  [tagset, vars] COMMA  
			s2 = string_operators  [tagset, vars]  {
				op.reset(new Wccl::Intersects<Wccl::StrSet>(s1, s2));
			}
		)
	)
	RPAREN
// ----------------------------------------------------------------------------
lpred_eq
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2;
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > b1, b2;
	boost::shared_ptr<Wccl::Function<Wccl::StrSet>  > s1, s2;
	boost::shared_ptr<Wccl::Function<Wccl::Position> > p1, p2;
}
	: "equal" LPAREN
	(
		(
			p1 = position_operators [tagset, vars] COMMA 
			p2 = position_operators [tagset, vars] {
				op.reset(new Wccl::Equals<Wccl::Position>(p1, p2));
			}
		)
	|
		(sym_set_operators [tagset, vars]) =>
		(
			t1 = sym_set_operators [tagset, vars] COMMA  
			t2 = sym_set_operators [tagset, vars] {
				op.reset(new Wccl::Equals<Wccl::TSet>(t1, t2));
			}
		)
	|
		(string_operators [tagset, vars]) =>
		(
			s1 = string_operators  [tagset, vars] COMMA  
			s2 = string_operators  [tagset, vars] {
				op.reset(new Wccl::Equals<Wccl::StrSet>(s1, s2));
			}
		)
	|
		(
			b1 = logical_predicates [tagset, vars] COMMA
			b2 = logical_predicates [tagset, vars] {
				op.reset(new Wccl::Equals<Wccl::Bool>(b1, b2));
			}
		)
	)
	RPAREN
;

// ----------------------------------------------------------------------------
lpred_regex
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > expr;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "regex" LPAREN expr = string_operators [tagset, vars] COMMA reg: STRING RPAREN {
		op.reset(new Wccl::Regex(expr, str_token_ref_to_ustring(reg)));
// ----------------------------------------------------------------------------
lpred_inout
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::Position> > ret_pos;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "inside"  LPAREN ret_pos = position_operators [tagset, vars] RPAREN {
		op.reset(new Wccl::IsInside(ret_pos));
	}
Paweł Kędzia's avatar
Paweł Kędzia committed
	| "outside" LPAREN ret_pos = position_operators [tagset, vars] RPAREN {
		op.reset(new Wccl::IsOutside(ret_pos));
	}
;

// ----------------------------------------------------------------------------
// if (Bool, Bool, Bool)
// ? Bool ? Bool : False
condit_bool
Paweł Kędzia's avatar
Paweł Kędzia committed
	[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
	returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
	boost::shared_ptr<Wccl::Function<Wccl::Bool> > test, p_true, p_false;
}
Paweł Kędzia's avatar
Paweł Kędzia committed
	: "if" LPAREN test  = logical_predicates [tagset, vars] COMMA 
							p_true  = logical_predicates [tagset, vars] 
							(COMMA p_false = logical_predicates [tagset, vars])? 
		if (p_false) {
			op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true, p_false));
		}
		else {
			op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true));
		}
			p_true = logical_predicates [tagset, vars]
			test = logical_predicates [tagset, vars] {
			op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true));
		}
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
// ANTLR LEXER
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
class ANTLRLexer extends Lexer;
options {
	exportVocab    = ANTLRExpr;
	charVocabulary = '\3'..'\377';
	testLiterals   = false;
rk's avatar
rk committed
}

STRING
options {
	paraphrase = "a string";
}
	: '"' (~'"')* '"'
	| '\'' (~'\'')* '\''
;
rk's avatar
rk committed
INT
options {
	paraphrase = "Integer";
}
	: ('-'|'+')? ('0'..'9')+ 
;	
rk's avatar
rk committed

rk's avatar
rk committed
QUOT_MARK
options {
	paraphrase = "Quota mark";
rk's avatar
rk committed

APOS_MARK
options {
	paraphrase = "Aposptrophe mark";
}
rk's avatar
rk committed
Q_MARK
options {
	paraphrase = "Query mark";
}
	: '?'
rk's avatar
rk committed

E_MARK
options {
	paraphrase = "Exclamanation mark";
}
	: '!'
rk's avatar
rk committed

STR_PREFIX
options {
	paraphrase = "String prefix";
}
;

TST_PREFIX
options {
	paraphrase = "Tag set (symbol) prefix";
}
;

BOOL_PREFIX
options {
	paraphrase = "Bool prefix";
}
rk's avatar
rk committed

POS_PREFIX
options {
rk's avatar
rk committed
LBRACKET 
options {
	paraphrase = "'['";
}
	: '[' 
rk's avatar
rk committed

RBRACKET 
options {
	paraphrase = "']'";
}
	: ']'