Newer
Older
//don't try to add all the headers inside our namespace
#include <libwccl/parser/ParserException.h>
#include <cstdio>
#include <antlr/Token.hpp>
#include <boost/lexical_cast.hpp>
// values/variables
#include <libwccl/variables.h>
#include <libwccl/values/bool.h>
#include <libwccl/values/tset.h>
#include <libwccl/values/strset.h>
#include <libwccl/values/position.h>
// sentence context
#include <libwccl/sentencecontext.h>
// operators
#include <libwccl/ops/functions/bool/predicates/or.h>
#include <libwccl/ops/functions/bool/predicates/nor.h>
#include <libwccl/ops/functions/bool/predicates/and.h>
#include <libwccl/ops/functions/bool/predicates/regex.h>
#include <libwccl/ops/functions/bool/predicates/equals.h>
#include <libwccl/ops/functions/bool/varsetter.h>
#include <libwccl/ops/functions/strset/affix.h>
#include <libwccl/ops/functions/strset/getorth.h>
#include <libwccl/ops/functions/strset/toupper.h>
#include <libwccl/ops/functions/strset/tolower.h>
#include <libwccl/ops/functions/constant.h>
#include <libwccl/ops/functions/vargetter.h>
#include <libwccl/ops/functions/bool/predicates/intersects.h>
#include <libwccl/ops/functions/bool/predicates/issubsetof.h>
#include <libwccl/ops/functions/bool/predicates/isinside.h>
#include <libwccl/ops/functions/bool/predicates/isoutside.h>
#include <libwccl/ops/functions/position/relativeposition.h>
#include <libwccl/ops/functions/conditional.h>
// Unicode String
#include <unicode/uniset.h>
#include <unicode/unistr.h>
#include <libwccl/parser/ANTLRParserResult.h>
ANTLR_BEGIN_NAMESPACE(Wccl)
Paweł Kędzia
committed
genHashLines = false;
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
Paweł Kędzia
committed
exportVocab = ANTLRExpr;
const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const {
return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText()).unescape();
}
Paweł Kędzia
committed
const UnicodeString str_token_ref_to_ustring(antlr::RefToken& rstr) const {
UnicodeString ret_ustr, ustr = token_ref_to_ustring(rstr);
if (ustr.length() < 3) {
return "";
}
ustr.extract(1, ustr.length() - 2, ret_ustr);
return ret_ustr;
}
Paweł Kędzia
committed
//
const std::string str_token_rem_grav(antlr::RefToken& rstr) const {
size_t len = 0;
std::string ret = token_ref_to_std_string(rstr);
if ((len = ret.length()) < 2) {
return ret;
}
if (ret[0] == '`' && ret[len - 1] == '`') {
return ret.substr(1, len - 2);
}
return ret;
}
//
const std::string token_ref_to_std_string(antlr::RefToken& rstr) const {
return (((antlr::Token*)rstr)->getText());
//
int token_ref_to_int(antlr::RefToken& rstr) {
return atoi(((antlr::Token*)rstr)->getText().c_str());
Paweł Kędzia
committed
// TEMPORARY CHANGES ->
// -> tymczasowo zakomentowalem wywoalnie regul condit_*
// -> tymczasowo zakomentowalem 2 reguly z equal
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Rules for parsing string operators in scope (variables).
// Returns boost::shared_ptr<Function<StrSet> >
Paweł Kędzia
committed
[const Corpus2::Tagset &tagset]
returns [boost::shared_ptr<ANTLRParserResult<StrSet> > res]
res.reset(new ANTLRParserResult<StrSet>());
boost::shared_ptr<Function<StrSet> > op;
// ----------------------------------------------------------------------------
// Rules for parsing predicates in scope (variables).
// Returns boost::shared_ptr<Function<Bool> >
parse_predicates
Paweł Kędzia
committed
[const Corpus2::Tagset &tagset]
returns [boost::shared_ptr<ANTLRParserResult<Bool> > res]
res.reset(new ANTLRParserResult<Bool>());
boost::shared_ptr<Function<Bool> > op;
: op = logical_predicates [tagset, *res->variables.get()] {
// ----------------------------------------------------------------------------
// Rules for parsing tagset (symbol set) operators
// Returns boost::shared_ptr<Function<TSet> >
Paweł Kędzia
committed
[const Corpus2::Tagset &tagset]
returns [boost::shared_ptr<ANTLRParserResult<TSet> > res]
res.reset(new ANTLRParserResult<TSet>());
boost::shared_ptr<Function<TSet> > op;
// ----------------------------------------------------------------------------
// Rules for parsing position operators
// Returns boost::shared_ptr<Function<Position> >
parse_position_operator
[const Corpus2::Tagset &tagset]
returns [boost::shared_ptr<ANTLRParserResult<Position> > res]
res.reset(new ANTLRParserResult<Position>());
boost::shared_ptr<Function<Position> > op;
}
: op = position_operators [tagset, *res->variables.get()] {
res->op = op;
}
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VALUES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
Paweł Kędzia
committed
// Single or muliple values in string set:
// [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"]
// Returns boost::shared_ptr<StrSet>
Paweł Kędzia
committed
str_set_literal
returns [boost::shared_ptr<StrSet> s_set]
Paweł Kędzia
committed
{
s_set.reset(new StrSet());
Paweł Kędzia
committed
}
s_set->insert(token_ref_to_ustring(s0));
s_set->insert(token_ref_to_ustring(s1));
s_set->insert(token_ref_to_ustring(s2));
Paweł Kędzia
committed
}
Paweł Kędzia
committed
// Constrant string set
// Returns boost::shared_ptr<Constant<StrSet> >
returns [boost::shared_ptr<Constant<StrSet> > val]
boost::shared_ptr<StrSet> set;
Paweł Kędzia
committed
: set = str_set_literal {
val.reset(new Constant<StrSet>(*set.get()));
Paweł Kędzia
committed
// ----------------------------------------------------------------------------
Paweł Kędzia
committed
// Element of sym set. This rule, inset element into set.
// Element may be: a or `a `
sym_set_elem
[const Corpus2::Tagset& tagset, boost::shared_ptr<TSet>& t_set]
}
catch(Corpus2::TagParseError &e) {
throw(ParserException(e.info()));
}
Paweł Kędzia
committed
// sym set literal
// {} {sym_set_elem} {sym_set_elem, ..., sym_set_elem}
// Returns boost::shared_ptr<TSet>
Paweł Kędzia
committed
sym_set_literal
returns [boost::shared_ptr<TSet> t_set]
Paweł Kędzia
committed
{
t_set.reset(new TSet());
Paweł Kędzia
committed
}
( sym_set_elem[tagset, t_set] (COMMA sym_set_elem[tagset, t_set])* )?
RCURLY
Paweł Kędzia
committed
// Constant symbol set
// Returns boost::shared_ptr<Constant<TSet> >
returns [boost::shared_ptr<Constant<TSet> > val]
boost::shared_ptr<TSet> set;
val.reset(new Constant<TSet>(*set.get()));
Paweł Kędzia
committed
// ----------------------------------------------------------------------------
// boolean value:
Paweł Kędzia
committed
// Literal bool value may be True or False
// Returns boost::shared_ptr<Bool>
Paweł Kędzia
committed
bool_literal
returns [boost::shared_ptr<Bool> val]
: "True" { val.reset(new Bool(Bool(true ))); }
| "False" { val.reset(new Bool(Bool(false))); }
Paweł Kędzia
committed
;
// Constat bool Value
// Returns boost::shared_ptr<Constant<Bool> >
returns [boost::shared_ptr<Constant<Bool> > val]
Paweł Kędzia
committed
{
boost::shared_ptr<Bool> bool_lit;
Paweł Kędzia
committed
}
: bool_lit = bool_literal {
val.reset(new Constant<Bool>(*bool_lit));
Paweł Kędzia
committed
}
Paweł Kędzia
committed
// ----------------------------------------------------------------------------
// position value:
Paweł Kędzia
committed
// Position literal may be (+|-)?(0-9)+ or begin or end or nowhere
// returns boost::shared_ptr<Position>
Paweł Kędzia
committed
position_literal
returns [boost::shared_ptr<Position> val]
val.reset(new Position(Position(i)));
val.reset(new Position(Position(Position::Begin)));
Paweł Kędzia
committed
}
val.reset(new Position(Position(Position::End)));
val.reset(new Position(Position(Position::Nowhere)));
Paweł Kędzia
committed
}
;
// Returns boost::shared_ptr<Constant<Position> >
Paweł Kędzia
committed
position_value
returns [boost::shared_ptr<Constant<Position> > val]
Paweł Kędzia
committed
{
boost::shared_ptr<Position> pos_lit;
Paweł Kędzia
committed
}
: pos_lit = position_literal {
val.reset(new Constant<Position>(*pos_lit));
// ----------------------------------------------------------------------------
// Number may be unsigned or signed
number returns [int ret]
{
ret = 0;
}
: s: SIGNED_INT { ret = token_ref_to_int(s); }
| u: UNSIGNED_INT { ret = token_ref_to_int(u); }
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VARIABLES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
Paweł Kędzia
committed
// Position: $name
// Returns boost::shared_ptr<VariableAccessor<Position> >
[Variables& vars]
returns [boost::shared_ptr<VariableAccessor<Position> > pos_acc]
Paweł Kędzia
committed
: POS_PREFIX n: SYMBOL {
vars.get_put<Position>(str_token_rem_grav(n));
VariableAccessor<Position> acc =
vars.create_accessor<Position>(str_token_rem_grav(n));
pos_acc.reset(new VariableAccessor<Position>(acc));
Paweł Kędzia
committed
// Position vargetter
// Returs boost::shared_ptr<VarGetter<Position> >
[Variables& vars]
returns [boost::shared_ptr<VarGetter<Position> > op]
boost::shared_ptr<VariableAccessor<Position> > pos_acc;
}
: pos_acc = position_variable_acc [vars] {
op.reset(new VarGetter<Position>(*pos_acc.get()));
Paweł Kędzia
committed
// ----------------------------------------------------------------------------
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Position> > ret]
boost::shared_ptr<Function<Position> > pos;
: "relpos" LPAREN pos = op_position [tagset, vars] COMMA n = number RPAREN {
ret.reset(new RelativePosition(pos, n));
// ----------------------------------------------------------------------------
Paweł Kędzia
committed
// String set, $s:name
// This expression gets variable of the type StrSet from string-named variable
// Returns boost::shared_ptr<VariableAccessor<StrSet> >
[Variables& vars]
returns [boost::shared_ptr<VariableAccessor<StrSet> > strset_acc]
Paweł Kędzia
committed
: STR_PREFIX n: SYMBOL {
vars.get_put<StrSet>(str_token_rem_grav(n));
VariableAccessor<StrSet> acc =
vars.create_accessor<StrSet>(str_token_rem_grav(n));
strset_acc.reset(new VariableAccessor<StrSet>(acc));
Paweł Kędzia
committed
// Vargetter for StrSet variable
// Returns boost::shared_ptr<VarGetter<StrSet> >
[Variables& vars]
returns [boost::shared_ptr<VarGetter<StrSet> > op]
boost::shared_ptr<VariableAccessor<StrSet> > strset_acc;
}
: strset_acc = str_set_variable_acc [vars] {
op.reset(new VarGetter<StrSet>(*strset_acc.get()));
Paweł Kędzia
committed
// ----------------------------------------------------------------------------
Paweł Kędzia
committed
// Symbol set: $t:name
// Returns boost::shared_ptr<VariableAccessor<TSet> >
[Variables& vars]
returns [boost::shared_ptr<VariableAccessor<TSet> > symset_acc]
Paweł Kędzia
committed
: TST_PREFIX n: SYMBOL {
vars.get_put<TSet>(str_token_rem_grav(n));
VariableAccessor<TSet> acc =
vars.create_accessor<TSet>(str_token_rem_grav(n));
symset_acc.reset(new VariableAccessor<TSet>(acc));
Paweł Kędzia
committed
// Vargetter for symbol set variable
// Returns boost::shared_ptr<VarGetter<TSet> >
[Variables& vars]
returns [boost::shared_ptr<VarGetter<TSet> > op]
boost::shared_ptr<VariableAccessor<TSet> > symset_acc;
}
: symset_acc = sym_set_variable_acc [vars] {
op.reset(new VarGetter<TSet>(*symset_acc.get()));
Paweł Kędzia
committed
// ----------------------------------------------------------------------------
Paweł Kędzia
committed
// Bool: $b:name
// Returns boost::shared_ptr<VariableAccessor<Bool> >
[Variables& vars]
returns [boost::shared_ptr<VariableAccessor<Bool> > bool_acc]
Paweł Kędzia
committed
: BOOL_PREFIX n: SYMBOL {
vars.get_put<Bool>(str_token_rem_grav(n));
VariableAccessor<Bool> acc =
vars.create_accessor<Bool>(str_token_rem_grav(n));
bool_acc.reset(new VariableAccessor<Bool>(acc));
Paweł Kędzia
committed
// Vargetter for bool variable
// Returns boost::shared_ptr<VarGetter<Bool> >
[Variables& vars]
returns [boost::shared_ptr<VarGetter<Bool> > op]
boost::shared_ptr<VariableAccessor<Bool> > bool_acc;
}
: bool_acc = boolean_variable_acc [vars] {
op.reset(new VarGetter<Bool>(*bool_acc.get()));
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Returns boost::shared_ptr<Function<Bool> >
// ----------------------------------------------------------------------------
setvar_op
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > ret]
: "setvar" LPAREN
(
ret = setvar_body_pos [tagset, vars]
| ret = setvar_body_bool [tagset, vars]
| ret = setvar_body_sset [tagset, vars]
| ret = setvar_body_tset [tagset, vars]
)
RPAREN
;
// Implementations of setvar:
// ----------------------------------------------------------------------------
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<Position> > ret_op;
boost::shared_ptr<VariableAccessor<Position> > ret_acc;
: ret_acc = position_variable_acc [vars]
COMMA
ret_op = op_position [tagset, vars] {
op.reset(new VarSetter<Position>(*ret_acc.get(), ret_op));
// ----------------------------------------------------------------------------
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<Bool> > ret_op;
boost::shared_ptr<VariableAccessor<Bool> > ret_acc;
: ret_acc = boolean_variable_acc [vars]
COMMA
ret_op = logical_predicates [tagset, vars] {
op.reset(new VarSetter<Bool>(*ret_acc.get(), ret_op));
// ----------------------------------------------------------------------------
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<StrSet> > ret_op;
boost::shared_ptr<VariableAccessor<StrSet> > ret_acc;
: ret_acc = str_set_variable_acc [vars]
COMMA ret_op = string_operators [tagset, vars] {
op.reset(new VarSetter<StrSet>(*ret_acc.get(), ret_op));
// ----------------------------------------------------------------------------
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<TSet> > ret_op;
boost::shared_ptr<VariableAccessor<TSet> > ret_acc;
: ret_acc = sym_set_variable_acc [vars]
COMMA
ret_op = sym_set_operators [tagset, vars] {
op.reset(new VarSetter<TSet>(*ret_acc.get(), ret_op));
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Symbol set (tagset) operators
// Returns boost::shared_ptr<Function<TSet> >
// ----------------------------------------------------------------------------
sym_set_operators
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<TSet> > ret]
| ret = condit_sym [tagset, vars]
// Implementations of symbol set operators:
// ----------------------------------------------------------------------------
op_sym_set
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<TSet> > op]
: op = sym_set_variable [vars]
// ----------------------------------------------------------------------------
// if (Bool, TSet, TSet)
// ? TSet ? Bool : {}
condit_sym
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<TSet> > op]
boost::shared_ptr<Function<Bool> > test;
boost::shared_ptr<Function<TSet> > p_true, p_false;
: "if" LPAREN test = logical_predicates [tagset, vars] COMMA
p_true = sym_set_operators [tagset, vars]
(COMMA p_false = sym_set_operators [tagset, vars])?
RPAREN {
Paweł Kędzia
committed
if (p_false) {
op.reset(new Conditional<TSet>(test, p_true, p_false));
Paweł Kędzia
committed
}
else {
op.reset(new Conditional<TSet>(test, p_true));
Paweł Kędzia
committed
}
}
Paweł Kędzia
committed
| Q_MARK
(p_true = sym_set_operators [tagset, vars])
Paweł Kędzia
committed
Q_MARK
(test = logical_predicates [tagset, vars]) {
op.reset(new Conditional<TSet>(test, p_true));
Paweł Kędzia
committed
}
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Returns boost::shared_ptr<Function<Position> >
// ----------------------------------------------------------------------------
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Position> > ret]
: ret = op_position [tagset, vars]
// Implementations of symbol set operators:
// ----------------------------------------------------------------------------
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Position> > op]
: op = position_variable [vars]
| op = position_value
| op = relpos [tagset, vars]
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Stiring operators
// Returns boost::shared_ptr<Function<StrSet> >
// ----------------------------------------------------------------------------
string_operators
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<StrSet> > ret]
: ret = op_orth [tagset, vars]
| ret = op_base [tagset, vars]
| ret = op_lower [tagset, vars]
| ret = op_upper [tagset, vars]
| ret = op_affix [tagset, vars]
| ret = op_str_set [tagset, vars]
| ret = condit_str [tagset, vars]
// Implementations of string operators:
// ----------------------------------------------------------------------------
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<StrSet> > ret]
boost::shared_ptr<Function<Position> > pos;
: "orth" LBRACKET pos = op_position [tagset, vars] RBRACKET {
ret.reset(new GetOrth(pos));
// ----------------------------------------------------------------------------
[const Corpus2::Tagset& /*tagset*/, Variables& vars]
returns [boost::shared_ptr<Function<StrSet> > ret]
boost::shared_ptr<VarGetter<Position> > pos;
Paweł Kędzia
committed
: "base" LBRACKET pos = position_variable [vars] RBRACKET {
// ----------------------------------------------------------------------------
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<StrSet> > ret]
boost::shared_ptr<Function<StrSet> > o_ret;
: "lower" LPAREN o_ret = string_operators[tagset, vars] RPAREN {
ret.reset(new ToLower(o_ret));
}
;
// ----------------------------------------------------------------------------
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<StrSet> > ret]
boost::shared_ptr<Function<StrSet> > o_ret;
: "upper" LPAREN o_ret = string_operators[tagset, vars] RPAREN {
ret.reset(new ToUpper(o_ret));
}
;
// ----------------------------------------------------------------------------
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<StrSet> > ret]
boost::shared_ptr<Function<StrSet> > o_ret;
: "affix" LPAREN
o_ret = string_operators[tagset, vars] COMMA offset = number
RPAREN {
ret.reset(new Affix(o_ret, offset));
// ----------------------------------------------------------------------------
op_str_set
[const Corpus2::Tagset& /*tagset*/, Variables& vars]
returns [boost::shared_ptr<Function<StrSet> > op]
: op = str_set_variable [vars]
| op = str_set_value
// ----------------------------------------------------------------------------
// if (Bool, StrSet, StrSet)
// ? StrSet ? Bool : []
condit_str
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<StrSet> > op]
boost::shared_ptr<Function<Bool> > test;
boost::shared_ptr<Function<StrSet> > p_true, p_false;
: "if" LPAREN test = logical_predicates [tagset, vars] COMMA
p_true = string_operators [tagset, vars]
(COMMA p_false = string_operators [tagset, vars])?
RPAREN {
Paweł Kędzia
committed
if (p_false) {
op.reset(new Conditional<StrSet>(test, p_true, p_false));
Paweł Kędzia
committed
}
else {
op.reset(new Conditional<StrSet>(test, p_true));
Paweł Kędzia
committed
}
Paweł Kędzia
committed
| Q_MARK
p_true = string_operators [tagset, vars]
Paweł Kędzia
committed
Q_MARK
test = logical_predicates [tagset, vars] {
op.reset(new Conditional<StrSet>(test, p_true));
Paweł Kędzia
committed
}
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Logical predicates
// Returns boost::shared_ptr<Function<Bool> >
// ----------------------------------------------------------------------------
logical_predicates
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > ret]
: ret = lpred_and [tagset, vars]
| ret = lpred_or [tagset, vars]
| ret = lpred_nor [tagset, vars]
| ret = lpred_bool [tagset, vars]
| ret = lpred_in [tagset, vars]
| ret = lpred_inter [tagset, vars]
| ret = lpred_eq [tagset, vars]
| ret = lpred_regex [tagset, vars]
| ret = setvar_op [tagset, vars]
| ret = lpred_inout [tagset, vars]
| ret = condit_bool [tagset, vars]
// ----------------------------------------------------------------------------
// comma-separated predicates
logical_predicates_comma_sep
[const Corpus2::Tagset& tagset, Variables& vars]
std::vector<boost::shared_ptr<Function<Bool> > >
boost::shared_ptr<Function<Bool> > pred;
new std::vector<boost::shared_ptr<Function<Bool> > >
ret_v->push_back(pred);
} (
ret_v->push_back(pred);
})*
;
// ----------------------------------------------------------------------------
lpred_and
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
std::vector<boost::shared_ptr<Function<Bool> > >
: "and" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN {
op.reset(new And(ret_v));
// ----------------------------------------------------------------------------
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
{
boost::shared_ptr<
std::vector<boost::shared_ptr<Function<Bool> > >
: "or" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN {
op.reset(new Or(ret_v));
// ----------------------------------------------------------------------------
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
{
boost::shared_ptr<
std::vector<boost::shared_ptr<Function<Bool> > >
: "not" LPAREN ret_v = logical_predicates_comma_sep [tagset, vars] RPAREN {
op.reset(new Nor(ret_v));
// ----------------------------------------------------------------------------
lpred_bool
[const Corpus2::Tagset& /*tagset*/, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
: op = boolean_variable [vars]
| op = boolean_value
;
// ----------------------------------------------------------------------------
lpred_in
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<TSet> > t1, t2;
boost::shared_ptr<Function<StrSet> > s1, s2;
:
"in" LPAREN
(
(sym_set_operators [tagset, vars]) =>
(
t1 = sym_set_operators [tagset, vars] COMMA
t2 = sym_set_operators [tagset, vars] {
op.reset(new IsSubsetOf<TSet>(t1, t2));
}
)
|
(
s1 = string_operators [tagset, vars] COMMA
s2 = string_operators [tagset, vars] {
op.reset(new IsSubsetOf<StrSet>(s1, s2));
// ----------------------------------------------------------------------------
lpred_inter
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<TSet> > t1, t2;
boost::shared_ptr<Function<StrSet> > s1, s2;
Paweł Kędzia
committed
}
:
"inter" LPAREN
(
(sym_set_operators [tagset, vars]) =>
(
t1 = sym_set_operators [tagset, vars] COMMA
t2 = sym_set_operators [tagset, vars] {
op.reset(new Intersects<TSet>(t1, t2));
}
)
|
(
s1 = string_operators [tagset, vars] COMMA
s2 = string_operators [tagset, vars] {
op.reset(new Intersects<StrSet>(s1, s2));
// ----------------------------------------------------------------------------
lpred_eq
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<TSet> > t1, t2;
boost::shared_ptr<Function<Bool> > b1, b2;
boost::shared_ptr<Function<StrSet> > s1, s2;
boost::shared_ptr<Function<Position> > p1, p2;
(
p1 = position_operators [tagset, vars] COMMA
p2 = position_operators [tagset, vars] {
op.reset(new Equals<Position>(p1, p2));
}
)
|
(sym_set_operators [tagset, vars]) =>
(
t1 = sym_set_operators [tagset, vars] COMMA
t2 = sym_set_operators [tagset, vars] {
op.reset(new Equals<TSet>(t1, t2));
}
)
|
(string_operators [tagset, vars]) =>
(
s1 = string_operators [tagset, vars] COMMA
s2 = string_operators [tagset, vars] {
op.reset(new Equals<StrSet>(s1, s2));
}
)
|
(
b1 = logical_predicates [tagset, vars] COMMA
b2 = logical_predicates [tagset, vars] {
op.reset(new Equals<Bool>(b1, b2));
;
// ----------------------------------------------------------------------------
lpred_regex
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<StrSet> > expr;
: "regex" LPAREN expr = string_operators [tagset, vars] COMMA reg: STRING RPAREN {
op.reset(new Regex(expr, token_ref_to_ustring(reg)));
// ----------------------------------------------------------------------------
lpred_inout
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<Position> > ret_pos;
: "inside" LPAREN ret_pos = position_operators [tagset, vars] RPAREN {
op.reset(new IsInside(ret_pos));
| "outside" LPAREN ret_pos = position_operators [tagset, vars] RPAREN {
op.reset(new IsOutside(ret_pos));
// ----------------------------------------------------------------------------
// if (Bool, Bool, Bool)
// ? Bool ? Bool : False
condit_bool
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<Bool> > test, p_true, p_false;
: "if" LPAREN test = logical_predicates [tagset, vars] COMMA
p_true = logical_predicates [tagset, vars]
(COMMA p_false = logical_predicates [tagset, vars])?
RPAREN {
Paweł Kędzia
committed
if (p_false) {
op.reset(new Conditional<Bool>(test, p_true, p_false));
Paweł Kędzia
committed
}
else {
op.reset(new Conditional<Bool>(test, p_true));
Paweł Kędzia
committed
}
Paweł Kędzia
committed
| Q_MARK
p_true = logical_predicates [tagset, vars]
Paweł Kędzia
committed
Q_MARK
test = logical_predicates [tagset, vars] {
op.reset(new Conditional<Bool>(test, p_true));
Paweł Kędzia
committed
}
// ----------------------------------------------------------------------------
// if (Bool, Position, Position)
// ? Position ? Bool : 0
condit_position
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Position> > op]
boost::shared_ptr<Function<Bool> > test;
boost::shared_ptr<Function<Position> > p_true, p_false;
}
: "if" LPAREN test = logical_predicates [tagset, vars] COMMA
p_true = position_operators [tagset, vars]
(COMMA p_false = position_operators [tagset, vars])?
RPAREN {
if (p_false) {
op.reset(new Conditional<Position>(test, p_true, p_false));
}
else {
op.reset(new Conditional<Position>(test, p_true));
}
}
| Q_MARK
p_true = position_operators [tagset, vars]
Q_MARK
test = logical_predicates [tagset, vars] {
op.reset(new Conditional<Position>(test, p_true));
}
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
class ANTLRLexer extends Lexer;
options {
exportVocab = ANTLRExpr;
charVocabulary = '\3'..'\377';
}
STRING
options {
paraphrase = "a string";
}
UNSIGNED_INT
options {
paraphrase = "Unsigned integer";
}
: ('0'..'9')+
;
STR_PREFIX
options {
paraphrase = "String prefix";
}
Paweł Kędzia
committed
: "$s:"
;
TST_PREFIX
options {
Paweł Kędzia
committed
: "$t:"
;
BOOL_PREFIX
options {
paraphrase = "Bool prefix";
}
Paweł Kędzia
committed
: "$b:"
Paweł Kędzia
committed
paraphrase = "Position prefix";
Paweł Kędzia
committed
: '$'
RBRACKET
options {
paraphrase = "']'";
}
: ']'
LPAREN
options {
paraphrase = "'('";
}
: '('
RPAREN
options {
paraphrase = "')'";
}
: ')'
LCURLY
options {
paraphrase = "'{'";
}
: '{'
RCURLY
options {
paraphrase = "'}'";
}
: '}'
AT_MARK
options {
paraphrase = "'@'";
}
: '@'
: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
| '\t'
| '\f'
| ( "\r\n"
| '\r'
| '\n'
)
{ newline(); }
)
{ $setType(antlr::Token::SKIP); }
paraphrase = "Single line comment";
}
: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); }
;
ML_COMMENT
options {
paraphrase = "Multi line comment";
( // TODO: test it and add reference to the site it's taken from!
/* This actually works OK despite the ambiguity that
'\r' '\n' can be matched in one alternative or by matching
'\r' in one iteration and '\n' in another.. But
this is really matched just by one rule per (...)*
loop iteration, so it's OK.
This is exactly how they do it all over the web - just
turn off the warning for this particular token.*/
options { generateAmbigWarnings=false; }
: { LA(2)!='/' }? '*'
| '\r' '\n' { newline(); }
| '\r' { newline(); }
| '\n' { newline(); }
| ~('*'|'\n'|'\r')
)*
"*/"
{$setType(antlr::Token::SKIP);}
//DSEPARATOR
//options {
// paraphrase = "':-'";
//}
// : ":-"
//;