Skip to content
Snippets Groups Projects
Commit def7cb39 authored by Adam Wardynski's avatar Adam Wardynski
Browse files

K=1 grammar with syntactic predicates.

parent 11566df9
Branches
No related merge requests found
......@@ -57,7 +57,7 @@ options {
// ----------------------------------------------------------------------------
class ANTLRParser extends Parser;
options {
k = 4;
k = 1;
buildAST = false;
exportVocab = ANTLRExpr;
defaultErrorHandler = false;
......@@ -115,9 +115,6 @@ parse_string_operator
: op = string_operators [tagset, *res->variables.get()] {
res->op = op;
}
| op = condit_str [tagset, *res->variables.get()] {
res->op = op;
}
;
// ----------------------------------------------------------------------------
......@@ -133,9 +130,6 @@ parse_predicates
: op = logical_predicates [tagset, *res->variables.get()] {
res->op = op;
}
| op = condit_bool [tagset, *res->variables.get()] {
res->op = op;
}
;
// ----------------------------------------------------------------------------
......@@ -151,9 +145,6 @@ parse_sym_set_operator
: op = sym_set_operators [tagset, *res->variables.get()] {
res->op = op;
}
| op = condit_sym [tagset, *res->variables.get()] {
res->op = op;
}
;
///////////////////////////////////////////////////////////////////////////////
......@@ -170,15 +161,16 @@ str_set_literal
{
s_set.reset(new Wccl::StrSet());
}
: LBRACKET RBRACKET
| LBRACKET s1: STRING {
s_set->insert(str_token_ref_to_ustring(s1));
}
(
COMMA s2: STRING {
: LBRACKET
( s1: STRING {
s_set->insert(str_token_ref_to_ustring(s1));
}
( COMMA s2: STRING {
s_set->insert(str_token_ref_to_ustring(s2));
}
)* RBRACKET
)*
)?
RBRACKET
;
// Constrant string set
// Returns boost::shared_ptr<Wccl::Constant<Wccl::StrSet> >
......@@ -216,10 +208,9 @@ sym_set_literal
{
t_set.reset(new Wccl::TSet());
}
: LCURLY RCURLY
| LCURLY
sym_set_elem[tagset, t_set] (COMMA sym_set_elem[tagset, t_set]) *
RCURLY
: LCURLY
( sym_set_elem[tagset, t_set] (COMMA sym_set_elem[tagset, t_set])* )?
RCURLY
;
// Constant symbol set
// Returns boost::shared_ptr<Wccl::Constant<Wccl::TSet> >
......@@ -419,64 +410,71 @@ boolean_variable
setvar_op
[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret]
: ret = setvar_pos [tagset, vars]
| ret = setvar_bool [tagset, vars]
| ret = setvar_sset [tagset, vars]
| ret = setvar_tset [tagset, vars]
: "setvar" LPAREN
(
ret = setvar_body_pos [tagset, vars]
| ret = setvar_body_bool [tagset, vars]
| ret = setvar_body_sset [tagset, vars]
| ret = setvar_body_tset [tagset, vars]
)
RPAREN
;
// Implementations of setvar:
// ----------------------------------------------------------------------------
setvar_pos
setvar_body_pos
[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
boost::shared_ptr<Wccl::Function<Wccl::Position> > ret_op;
boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > ret_acc;
}
: "setvar" LPAREN ret_acc = position_variable_acc [vars] COMMA
ret_op = op_position [vars] RPAREN {
op.reset(new Wccl::VarSetter<Wccl::Position>(*ret_acc.get(), ret_op));
}
: ret_acc = position_variable_acc [vars]
COMMA
ret_op = op_position [vars] {
op.reset(new Wccl::VarSetter<Wccl::Position>(*ret_acc.get(), ret_op));
}
;
// ----------------------------------------------------------------------------
setvar_bool
setvar_body_bool
[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
boost::shared_ptr<Wccl::Function<Wccl::Bool> > ret_op;
boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > ret_acc;
}
: "setvar" LPAREN ret_acc = boolean_variable_acc [vars] COMMA
ret_op = logical_predicates [tagset, vars] RPAREN {
op.reset(new Wccl::VarSetter<Wccl::Bool>(*ret_acc.get(), ret_op));
}
: ret_acc = boolean_variable_acc [vars]
COMMA
ret_op = logical_predicates [tagset, vars] {
op.reset(new Wccl::VarSetter<Wccl::Bool>(*ret_acc.get(), ret_op));
}
;
// ----------------------------------------------------------------------------
setvar_sset
setvar_body_sset
[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret_op;
boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > ret_acc;
}
: "setvar" LPAREN ret_acc = str_set_variable_acc [vars] COMMA
ret_op = string_operators [tagset, vars] RPAREN {
: ret_acc = str_set_variable_acc [vars]
COMMA ret_op = string_operators [tagset, vars] {
op.reset(new Wccl::VarSetter<Wccl::StrSet>(*ret_acc.get(), ret_op));
}
;
// ----------------------------------------------------------------------------
setvar_tset
setvar_body_tset
[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::Function<Wccl::Bool> > op]
{
boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret_op;
boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > ret_acc;
}
: "setvar" LPAREN ret_acc = sym_set_variable_acc [vars] COMMA
ret_op = sym_set_operators [tagset, vars] RPAREN {
: ret_acc = sym_set_variable_acc [vars]
COMMA
ret_op = sym_set_operators [tagset, vars] {
op.reset(new Wccl::VarSetter<Wccl::TSet>(*ret_acc.get(), ret_op));
}
;
......@@ -490,7 +488,7 @@ sym_set_operators
[const Corpus2::Tagset& tagset, Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret]
: ret = op_sym_set [tagset, vars]
// | ret = condit_sym [vars]
| ret = condit_sym [tagset, vars]
;
// Implementations of symbol set operators:
// ----------------------------------------------------------------------------
......@@ -565,7 +563,7 @@ string_operators
| ret = op_upper [tagset, vars]
| ret = op_affix [tagset, vars]
| ret = op_str_set [tagset, vars]
// | ret = condit_str [vars]
| ret = condit_str [tagset, vars]
;
// Implementations of string operators:
// ----------------------------------------------------------------------------
......@@ -677,12 +675,12 @@ logical_predicates
| ret = lpred_nor [tagset, vars]
| ret = lpred_bool [tagset, vars]
| ret = lpred_in [tagset, vars]
| ret = lpred_inter [tagset, vars]
| ret = lpred_inter [tagset, vars]
| ret = lpred_eq [tagset, vars]
| ret = lpred_regex [tagset, vars]
| ret = setvar_op [tagset, vars]
| ret = lpred_inout [tagset, vars]
// | ret = condit_bool [vars]
| ret = condit_bool [tagset, vars]
;
// ----------------------------------------------------------------------------
// comma-separated predicates
......@@ -758,14 +756,26 @@ lpred_in
boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2;
boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2;
}
: "in" LPAREN t1 = sym_set_operators [tagset, vars] COMMA
t2 = sym_set_operators [tagset, vars] RPAREN {
op.reset(new Wccl::IsSubsetOf<Wccl::TSet>(t1, t2));
}
| "in" LPAREN s1 = string_operators [tagset, vars] COMMA
s2 = string_operators [tagset, vars] RPAREN {
op.reset(new Wccl::IsSubsetOf<Wccl::StrSet>(s1, s2));
}
:
"in" LPAREN
(
(sym_set_operators [tagset, vars]) =>
(
t1 = sym_set_operators [tagset, vars] COMMA
t2 = sym_set_operators [tagset, vars] {
op.reset(new Wccl::IsSubsetOf<Wccl::TSet>(t1, t2));
}
)
|
(
s1 = string_operators [tagset, vars] COMMA
s2 = string_operators [tagset, vars] {
op.reset(new Wccl::IsSubsetOf<Wccl::StrSet>(s1, s2));
}
)
)
RPAREN
;
// ----------------------------------------------------------------------------
......@@ -776,14 +786,25 @@ lpred_inter
boost::shared_ptr<Wccl::Function<Wccl::TSet> > t1, t2;
boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2;
}
: "inter" LPAREN s1 = string_operators [tagset, vars] COMMA
s2 = string_operators [tagset, vars] RPAREN {
op.reset(new Wccl::Intersects<Wccl::StrSet>(s1, s2));
}
| "inter" LPAREN t1 = sym_set_operators [tagset, vars] COMMA
t2 = sym_set_operators [tagset, vars] RPAREN {
op.reset(new Wccl::Intersects<Wccl::TSet>(t1, t2));
}
:
"inter" LPAREN
(
(sym_set_operators [tagset, vars]) =>
(
t1 = sym_set_operators [tagset, vars] COMMA
t2 = sym_set_operators [tagset, vars] {
op.reset(new Wccl::Intersects<Wccl::TSet>(t1, t2));
}
)
|
(
s1 = string_operators [tagset, vars] COMMA
s2 = string_operators [tagset, vars] {
op.reset(new Wccl::Intersects<Wccl::StrSet>(s1, s2));
}
)
)
RPAREN
;
// ----------------------------------------------------------------------------
......@@ -796,22 +817,39 @@ lpred_eq
boost::shared_ptr<Wccl::Function<Wccl::StrSet> > s1, s2;
boost::shared_ptr<Wccl::Function<Wccl::Position> > p1, p2;
}
: "equal" LPAREN s1 = string_operators [tagset, vars] COMMA
s2 = string_operators [tagset, vars] RPAREN {
op.reset(new Wccl::Equals<Wccl::StrSet>(s1, s2));
}
| "equal" LPAREN t1 = sym_set_operators [tagset, vars] COMMA
t2 = sym_set_operators [tagset, vars] RPAREN {
op.reset(new Wccl::Equals<Wccl::TSet>(t1, t2));
}
| "equal" LPAREN p1 = position_operators [tagset, vars] COMMA
p2 = position_operators [tagset, vars] RPAREN {
op.reset(new Wccl::Equals<Wccl::Position>(p1, p2));
}
| "equal" LPAREN b1 = logical_predicates [tagset, vars] COMMA
b2 = logical_predicates [tagset, vars] RPAREN {
op.reset(new Wccl::Equals<Wccl::Bool>(b1, b2));
}
: "equal" LPAREN
(
(
p1 = position_operators [tagset, vars] COMMA
p2 = position_operators [tagset, vars] {
op.reset(new Wccl::Equals<Wccl::Position>(p1, p2));
}
)
|
(sym_set_operators [tagset, vars]) =>
(
t1 = sym_set_operators [tagset, vars] COMMA
t2 = sym_set_operators [tagset, vars] {
op.reset(new Wccl::Equals<Wccl::TSet>(t1, t2));
}
)
|
(string_operators [tagset, vars]) =>
(
s1 = string_operators [tagset, vars] COMMA
s2 = string_operators [tagset, vars] {
op.reset(new Wccl::Equals<Wccl::StrSet>(s1, s2));
}
)
|
(
b1 = logical_predicates [tagset, vars] COMMA
b2 = logical_predicates [tagset, vars] {
op.reset(new Wccl::Equals<Wccl::Bool>(b1, b2));
}
)
)
RPAREN
;
// ----------------------------------------------------------------------------
......@@ -881,13 +919,13 @@ class ANTLRLexer extends Lexer;
options {
exportVocab = ANTLRExpr;
charVocabulary = '\3'..'\377';
testLiterals = false;
k = 3;
}
STRING
options {
paraphrase = "a string";
testLiterals = true;
}
: '"' (~'"')* '"'
| '\'' (~'\'')* '\''
......@@ -896,7 +934,6 @@ options {
INT
options {
paraphrase = "Integer";
testLiterals = true;
}
: ('-'|'+')? ('0'..'9')+
;
......@@ -904,7 +941,6 @@ options {
QUOT_MARK
options {
paraphrase = "Quota mark";
testLiterals = true;
}
: '\''
;
......@@ -912,7 +948,6 @@ options {
APOS_MARK
options {
paraphrase = "Aposptrophe mark";
testLiterals = true;
}
: '"'
;
......@@ -920,7 +955,6 @@ options {
Q_MARK
options {
paraphrase = "Query mark";
testLiterals = true;
}
: '?'
;
......@@ -928,7 +962,6 @@ options {
E_MARK
options {
paraphrase = "Exclamanation mark";
testLiterals = true;
}
: '!'
;
......@@ -936,7 +969,6 @@ options {
STR_PREFIX
options {
paraphrase = "String prefix";
testLiterals = true;
}
: "$s:"
;
......@@ -944,7 +976,6 @@ options {
TST_PREFIX
options {
paraphrase = "Tag set (symbol) prefix";
testLiterals = true;
}
: "$t:"
;
......@@ -952,7 +983,6 @@ options {
BOOL_PREFIX
options {
paraphrase = "Bool prefix";
testLiterals = true;
}
: "$b:"
;
......@@ -967,7 +997,6 @@ options {
LBRACKET
options {
paraphrase = "'['";
testLiterals = true;
}
: '['
;
......@@ -975,7 +1004,6 @@ options {
RBRACKET
options {
paraphrase = "']'";
testLiterals = true;
}
: ']'
;
......@@ -983,7 +1011,6 @@ options {
LPAREN
options {
paraphrase = "'('";
testLiterals = true;
}
: '('
;
......@@ -991,7 +1018,6 @@ options {
RPAREN
options {
paraphrase = "')'";
testLiterals = true;
}
: ')'
;
......@@ -999,7 +1025,6 @@ options {
LCURLY
options {
paraphrase = "'{'";
testLiterals = true;
}
: '{'
;
......@@ -1007,7 +1032,6 @@ options {
RCURLY
options {
paraphrase = "'}'";
testLiterals = true;
}
: '}'
;
......@@ -1015,15 +1039,13 @@ options {
AT_MARK
options {
paraphrase = "'@'";
testLiterals = true;
}
: '@'
;
COMMA
options {
paraphrase = "','";
testLiterals = true;
paraphrase = "','";
}
: ','
;
......@@ -1039,25 +1061,37 @@ options {
WS
: ( ' '
| '\t'
| '\r' '\n' { newline(); }
| '\n' { newline(); }
) { $setType(antlr::Token::SKIP); }
| '\t'
| '\f'
| ( "\r\n"
| '\r'
| '\n'
)
{ newline(); }
)
{ $setType(antlr::Token::SKIP); }
;
COMMENT
options {
paraphrase = "Comment";
testLiterals = true;
paraphrase = "Single line comment";
}
: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); }
;
ML_COMMENT
options {
paraphrase = "Multi line comment";
}
: "//" (~'\n')* '\n'{ $setType(antlr::Token::SKIP); newline(); }
| "/*" (.)* "*/" { $setType(antlr::Token::SKIP); }
: "/*"
(.)*
"*/"
{ $setType(antlr::Token::SKIP); }
;
HASH
options {
paraphrase = "'#'";
testLiterals = true;
}
: '#'
;
......@@ -1065,7 +1099,6 @@ options {
DSEPARATOR
options {
paraphrase = "':-'";
testLiterals = true;
}
: ":-"
;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment