Skip to content
Snippets Groups Projects
Commit c4d45c5b authored by Paweł Kędzia's avatar Paweł Kędzia
Browse files

Fixed grammar and added literal rules to return shared_ptr to <StrSet, TSet, Bool, Position>.

parent bc44d186
Branches
No related merge requests found
......@@ -43,7 +43,7 @@ boost::shared_ptr<ANTLRParserResult<Wccl::StrSet> > Parser::parseStringOperator(
ANTLRLexer lexer(istr);
ANTLRParser parser(lexer);
return parser.parse_string_operator();
return parser.parse_string_operator(this->tagset);
}
// ----------------------------------------------------------------------------
......@@ -73,7 +73,7 @@ boost::shared_ptr<ANTLRParserResult<Wccl::Bool> > Parser::parsePredicate(
ANTLRLexer lexer(istr);
ANTLRParser parser(lexer);
return parser.parse_predicates();
return parser.parse_predicates(this->tagset);
}
// ----------------------------------------------------------------------------
......@@ -104,5 +104,5 @@ boost::shared_ptr<ANTLRParserResult<Wccl::TSet> > Parser::parseSymSetOperator(
ANTLRLexer lexer(istr);
ANTLRParser parser(lexer);
return parser.parse_sym_set_operator();
return parser.parse_sym_set_operator(this->tagset);
}
......@@ -49,6 +49,7 @@ header {
options {
language = "Cpp";
genHashLines = false;
}
// ----------------------------------------------------------------------------
......@@ -57,8 +58,8 @@ options {
class ANTLRParser extends Parser;
options {
k = 4;
exportVocab = ANTLRExpr;
buildAST = false;
exportVocab = ANTLRExpr;
defaultErrorHandler = false;
}
{
......@@ -91,7 +92,10 @@ private:
// TODO
// - base, orth
// - equal do bool
// TEMPORARY CHANGES ->
// -> tymczasowo zakomentowalem wywoalnie regul condit_*
// -> tymczasowo zakomentowalem 2 reguly z equal
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
......@@ -102,6 +106,7 @@ private:
// Rules for parsing string operators in scope (variables).
// Returns boost::shared_ptr<Wccl::Function<Wccl::StrSet> >
parse_string_operator
[const Corpus2::Tagset &tagset]
returns [boost::shared_ptr<ANTLRParserResult<Wccl::StrSet> > res]
{
res.reset(new ANTLRParserResult<Wccl::StrSet>());
......@@ -110,12 +115,16 @@ parse_string_operator
: op = string_operators [*res->variables.get()] {
res->op = op;
}
| op = condit_str [*res->variables.get()] {
res->op = op;
}
;
// ----------------------------------------------------------------------------
// Rules for parsing predicates in scope (variables).
// Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> >
parse_predicates
[const Corpus2::Tagset &tagset]
returns [boost::shared_ptr<ANTLRParserResult<Wccl::Bool> > res]
{
res.reset(new ANTLRParserResult<Wccl::Bool>());
......@@ -124,12 +133,16 @@ parse_predicates
: op = logical_predicates [*res->variables.get()] {
res->op = op;
}
| op = condit_bool [*res->variables.get()] {
res->op = op;
}
;
// ----------------------------------------------------------------------------
// Rules for parsing tagset (symbol set) operators
// Returns boost::shared_ptr<Wccl::Function<Wccl::TSet> >
parse_sym_set_operator
[const Corpus2::Tagset &tagset]
returns [boost::shared_ptr<ANTLRParserResult<Wccl::TSet> > res]
{
res.reset(new ANTLRParserResult<Wccl::TSet>());
......@@ -138,6 +151,9 @@ parse_sym_set_operator
: op = sym_set_operators [*res->variables.get()] {
res->op = op;
}
| op = condit_sym [*res->variables.get()] {
res->op = op;
}
;
///////////////////////////////////////////////////////////////////////////////
......@@ -146,103 +162,120 @@ parse_sym_set_operator
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Single or muliple values in string set
str_set_value_in
[boost::shared_ptr<Wccl::StrSet>& s_set]
: v1: STRING {
s_set->insert(str_token_ref_to_ustring(v1));
// Single or muliple values in string set:
// [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"]
// Returns boost::shared_ptr<Wccl::StrSet>
str_set_literal
returns [boost::shared_ptr<Wccl::StrSet> s_set]
{
s_set.reset(new Wccl::StrSet());
}
: LBRACKET RBRACKET
| LBRACKET s1: STRING {
s_set->insert(str_token_ref_to_ustring(s1));
}
| v2: STRING COMMA str_set_value_in [s_set] {
s_set->insert(str_token_ref_to_ustring(v2));
}
(
COMMA s2: STRING {
s_set->insert(str_token_ref_to_ustring(s2));
}
)* RBRACKET
;
// string set, called as unnamed (temporary) StrSet:
// calls: [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"] or variable $A
// Constrant string set
// Returns boost::shared_ptr<Wccl::Constant<Wccl::StrSet> >
str_set_value
returns [boost::shared_ptr<Wccl::Constant<Wccl::StrSet> > val]
{
boost::shared_ptr<Wccl::StrSet> set(new Wccl::StrSet);
boost::shared_ptr<Wccl::StrSet> set;
}
: LBRACKET RBRACKET {
val.reset(new Wccl::Constant<Wccl::StrSet>(*set.get()));
}
| LBRACKET str_set_value_in [set] RBRACKET {
: set = str_set_literal {
val.reset(new Wccl::Constant<Wccl::StrSet>(*set.get()));
}
;
// ----------------------------------------------------------------------------
// element of sym set
sym_set_elem
// Element of sym set. This rule, inset element into set.
// Element may be: a or `a `
sym_set_elem
[boost::shared_ptr<Wccl::TSet> &t_set]
: s1: SYMBOL {
// t_set->insert(token_ref_to_ustring(s1));
}
| G_MARK s2: SYMBOL G_MARK {
// t_set->insert(token_ref_to_ustring(s2));
// t_set->insert(token_ref_to_ustring(s1));
}
| s3: SYMBOL COMMA sym_set_elem [t_set] {
// t_set->insert(token_ref_to_ustring(s3));
}
| G_MARK s4: SYMBOL G_MARK COMMA sym_set_elem [t_set] {
// t_set->insert(token_ref_to_ustring(s3));
| G_MARK s2: SYMBOL G_MARK {
// t_set->insert(token_ref_to_ustring(s2));
}
;
// sym set in
sym_set_value_in
[boost::shared_ptr<Wccl::TSet> &set]
: sym_set_elem [set]
// sym set literal
// {} {sym_set_elem} {sym_set_elem, ..., sym_set_elem}
// Returns boost::shared_ptr<Wccl::TSet>
sym_set_literal
returns [boost::shared_ptr<Wccl::TSet> t_set]
{
t_set.reset(new Wccl::TSet());
}
: LCURLY RCURLY
| LCURLY sym_set_elem[t_set] (COMMA sym_set_elem[t_set]) *
;
// sym set {} {a} {a, b}
// Constant symbol set
// Returns boost::shared_ptr<Wccl::Constant<Wccl::TSet> >
sym_set_value
returns [boost::shared_ptr<Wccl::Constant<Wccl::TSet> > val]
{
boost::shared_ptr<Wccl::TSet> set(new Wccl::TSet);
boost::shared_ptr<Wccl::TSet> set;
}
: LCURLY RCURLY {
val.reset(new Wccl::Constant<Wccl::TSet>(*set.get()));
}
| LCURLY sym_set_value_in [set] RCURLY {
: set = sym_set_literal {
val.reset(new Wccl::Constant<Wccl::TSet>(*set.get()));
}
;
// ----------------------------------------------------------------------------
// boolean value:
// Literal bool value may be True or False
// Returns boost::shared_ptr<Wccl::Bool>
bool_literal
returns [boost::shared_ptr<Wccl::Bool> val]
: "True" { val.reset(new Wccl::Bool(Wccl::Bool(true ))); }
| "False" { val.reset(new Wccl::Bool(Wccl::Bool(false))); }
;
// Constat bool Value
// Returns boost::shared_ptr<Wccl::Constant<Wccl::Bool> >
boolean_value
returns [boost::shared_ptr<Wccl::Constant<Wccl::Bool> > val]
: "True" { val.reset(new Wccl::Constant<Wccl::Bool>(Wccl::Bool(true ))); }
| "False" { val.reset(new Wccl::Constant<Wccl::Bool>(Wccl::Bool(false))); }
{
boost::shared_ptr<Wccl::Bool> bool_lit;
}
: bool_lit = bool_literal {
val.reset(new Wccl::Constant<Wccl::Bool>(*bool_lit));
}
;
// ----------------------------------------------------------------------------
// position value:
position_value
returns [boost::shared_ptr<Wccl::Constant<Wccl::Position> > val]
// Position literal may be (+|-)?(0-9)+ or begin or end or nowhere
// returns boost::shared_ptr<Wccl::Position>
position_literal
returns [boost::shared_ptr<Wccl::Position> val]
: i: INT {
val.reset(
new Wccl::Constant<Wccl::Position>(Wccl::Position(token_ref_to_int(i)))
);
val.reset(new Wccl::Position(Wccl::Position(token_ref_to_int(i))));
}
| "begin" {
val.reset(
new Wccl::Constant<Wccl::Position>(
Wccl::Position(Wccl::Position::Begin)
)
);
}
val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::Begin)));
}
| "end" {
val.reset(
new Wccl::Constant<Wccl::Position>(
Wccl::Position(Wccl::Position::End)
)
);
val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::End)));
}
| "nowhere" {
val.reset(
new Wccl::Constant<Wccl::Position>(
Wccl::Position(Wccl::Position::Nowhere)
)
);
val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::Nowhere)));
}
;
// Constat position value
// Returns boost::shared_ptr<Wccl::Constant<Wccl::Position> >
position_value
returns [boost::shared_ptr<Wccl::Constant<Wccl::Position> > val]
{
boost::shared_ptr<Wccl::Position> pos_lit;
}
: pos_lit = position_literal {
new Wccl::Constant<Wccl::Position>(*pos_lit);
}
;
......@@ -251,14 +284,13 @@ position_value
// VARIABLES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Position: $name
// ----------------------------------------------------------------------------
// returns accessor
// Position: $name
// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> >
position_variable_acc
[Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::Position> > pos_acc]
// : DOLLAR POS_PREFIX n: SYMBOL {
: DOLLAR n: SYMBOL {
: POS_PREFIX n: SYMBOL {
vars.get_put<Wccl::Position>(token_ref_to_std_string(n));
Wccl::VariableAccessor<Wccl::Position> acc =
......@@ -267,7 +299,8 @@ position_variable_acc
pos_acc.reset(new Wccl::VariableAccessor<Wccl::Position>(acc));
}
;
// returs vargetter
// Position vargetter
// Returs boost::shared_ptr<Wccl::VarGetter<Wccl::Position> >
position_variable
[Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > op]
......@@ -278,24 +311,15 @@ position_variable
op.reset(new Wccl::VarGetter<Wccl::Position>(*pos_acc.get()));
}
;
// ----------------------------------------------------------------------------
// Position reference: $(0-9)+name
// !! Cannot use for setvar(...,...) !!
position_ref_variable
[Wccl::Variables& vars]
: DOLLAR p_ref: INT n: SYMBOL {
// TODO
vars.get_put<Wccl::Position>(token_ref_to_std_string(n));
}
;
// ----------------------------------------------------------------------------
// String set, call examples: $name, $sName, $s_name, $s_Name etc.
// This expression gets variable of tyme StrSet from string-named variable
// returns accessor
// String set, $s:name
// This expression gets variable of the type StrSet from string-named variable
// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> >
str_set_variable_acc
[Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::StrSet> > strset_acc]
: DOLLAR STR_PREFIX n: SYMBOL {
: STR_PREFIX n: SYMBOL {
// get/put variable to variables
vars.get_put<Wccl::StrSet>(token_ref_to_std_string(n));
......@@ -306,7 +330,8 @@ str_set_variable_acc
strset_acc.reset(new Wccl::VariableAccessor<Wccl::StrSet>(acc));
}
;
// vargetter
// Vargetter for StrSet variable
// Returns boost::shared_ptr<Wccl::VarGetter<Wccl::StrSet> >
str_set_variable
[Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::VarGetter<Wccl::StrSet> > op]
......@@ -317,13 +342,14 @@ str_set_variable
op.reset(new Wccl::VarGetter<Wccl::StrSet>(*strset_acc.get()));
}
;
// ----------------------------------------------------------------------------
// Symbol set: $tName
// returns accessor
// Symbol set: $t:name
// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> >
sym_set_variable_acc
[Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::TSet> > symset_acc]
: DOLLAR TST_PREFIX n: SYMBOL {
: TST_PREFIX n: SYMBOL {
vars.get_put<Wccl::TSet>(token_ref_to_std_string(n));
Wccl::VariableAccessor<Wccl::TSet> acc =
......@@ -332,7 +358,8 @@ sym_set_variable_acc
symset_acc.reset(new Wccl::VariableAccessor<Wccl::TSet>(acc));
}
;
// returns vargetter
// Vargetter for symbol set variable
// Returns boost::shared_ptr<Wccl::VarGetter<Wccl::TSet> >
sym_set_variable
[Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::VarGetter<Wccl::TSet> > op]
......@@ -343,13 +370,14 @@ sym_set_variable
op.reset(new Wccl::VarGetter<Wccl::TSet>(*symset_acc.get()));
}
;
// ----------------------------------------------------------------------------
// Bool: $bName
// returns accessor
// Bool: $b:name
// Returns boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> >
boolean_variable_acc
[Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::VariableAccessor<Wccl::Bool> > bool_acc]
: DOLLAR BOOL_PREFIX n: SYMBOL {
: BOOL_PREFIX n: SYMBOL {
vars.get_put<Wccl::Bool>(token_ref_to_std_string(n));
Wccl::VariableAccessor<Wccl::Bool> acc =
......@@ -358,7 +386,8 @@ boolean_variable_acc
bool_acc.reset(new Wccl::VariableAccessor<Wccl::Bool>(acc));
}
;
// returns vargetter
// Vargetter for bool variable
// Returns boost::shared_ptr<Wccl::VarGetter<Wccl::Bool> >
boolean_variable
[Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::VarGetter<Wccl::Bool> > op]
......@@ -378,6 +407,7 @@ boolean_variable
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Setvar operator
// Returns boost::shared_ptr<Wccl::Function<Wccl::Bool> >
// ----------------------------------------------------------------------------
setvar_op
[Wccl::Variables& vars]
......@@ -453,7 +483,7 @@ sym_set_operators
[Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::Function<Wccl::TSet> > ret]
: ret = op_sym_set [vars]
| ret = condit_sym [vars]
// | ret = condit_sym [vars]
;
// Implementations of symbol set operators:
// ----------------------------------------------------------------------------
......@@ -485,10 +515,13 @@ condit_sym
op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true));
}
}
| Q_MARK p_true = sym_set_operators [vars]
Q_MARK test = logical_predicates [vars] {
op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true));
}
| Q_MARK
(p_true = sym_set_operators [vars] | p_true = condit_sym [vars])
Q_MARK
(test = logical_predicates [vars] | test = condit_bool [vars])
{
op.reset(new Wccl::Conditional<Wccl::TSet>(test, p_true));
}
;
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
......@@ -523,7 +556,7 @@ string_operators
| ret = op_upper [vars]
| ret = op_affix [vars]
| ret = op_str_set [vars]
| ret = condit_str [vars]
// | ret = condit_str [vars]
;
// Implementations of string operators:
// ----------------------------------------------------------------------------
......@@ -531,9 +564,9 @@ op_orth
[Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
{
// TODO
boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > pos;
}
: "orth" LBRACKET position_ref_variable [vars] RBRACKET {
: "orth" LBRACKET pos = position_variable [vars] RBRACKET {
// ret = TODO
}
;
......@@ -542,9 +575,9 @@ op_base
[Wccl::Variables& vars]
returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
{
// TODO
boost::shared_ptr<Wccl::VarGetter<Wccl::Position> > pos;
}
: "base" LBRACKET position_ref_variable [vars] RBRACKET {
: "base" LBRACKET pos = position_variable [vars] RBRACKET {
// ret = TODO
}
;
......@@ -609,10 +642,13 @@ condit_str
op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true));
}
}
| Q_MARK p_true = string_operators [vars]
Q_MARK test = logical_predicates [vars] {
op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true));
}
| Q_MARK
(p_true = string_operators [vars] | p_true = condit_str [vars])
Q_MARK
(test = logical_predicates [vars] | test = condit_bool [vars])
{
op.reset(new Wccl::Conditional<Wccl::StrSet>(test, p_true));
}
;
// ----------------------------------------------------------------------------
......@@ -633,7 +669,7 @@ logical_predicates
| ret = lpred_regex [vars]
| ret = setvar_op [vars]
| ret = lpred_inout [vars]
| ret = condit_bool [vars]
// | ret = condit_bool [vars]
;
// ----------------------------------------------------------------------------
// comma-separated predicates
......@@ -812,10 +848,13 @@ condit_bool
op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true));
}
}
| Q_MARK p_true = logical_predicates [vars]
Q_MARK test = logical_predicates [vars] {
op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true));
}
| Q_MARK
(p_true = logical_predicates [vars] | p_true = condit_bool [vars])
Q_MARK
(test = logical_predicates [vars] | test = condit_bool [vars])
{
op.reset(new Wccl::Conditional<Wccl::Bool>(test, p_true));
}
;
///////////////////////////////////////////////////////////////////////////////
......@@ -827,13 +866,14 @@ class ANTLRLexer extends Lexer;
options {
exportVocab = ANTLRExpr;
charVocabulary = '\3'..'\377';
testLiterals = false;
k = 2;
// testLiterals = false;
k = 4;
}
STRING
options {
paraphrase = "a string";
testLiterals = true;
}
: '"' (~'"')* '"'
| '\'' (~'\'')* '\''
......@@ -842,6 +882,7 @@ options {
INT
options {
paraphrase = "Integer";
testLiterals = true;
}
: ('-'|'+')? ('0'..'9')+
;
......@@ -849,6 +890,7 @@ options {
QUOT_MARK
options {
paraphrase = "Quota mark";
testLiterals = true;
}
: '\''
;
......@@ -856,6 +898,7 @@ options {
APOS_MARK
options {
paraphrase = "Aposptrophe mark";
testLiterals = true;
}
: '"'
;
......@@ -863,6 +906,7 @@ options {
Q_MARK
options {
paraphrase = "Query mark";
testLiterals = true;
}
: '?'
;
......@@ -870,6 +914,7 @@ options {
E_MARK
options {
paraphrase = "Exclamanation mark";
testLiterals = true;
}
: '!'
;
......@@ -877,6 +922,7 @@ options {
G_MARK
options {
paraphrase = "Gravis mark";
testLiterals = true;
}
: '`'
;
......@@ -884,36 +930,38 @@ options {
STR_PREFIX
options {
paraphrase = "String prefix";
testLiterals = true;
}
: "s:"
: "$s:"
;
TST_PREFIX
options {
paraphrase = "Tag set (symbol) prefix";
testLiterals = true;
}
: "t:"
: "$t:"
;
BOOL_PREFIX
options {
paraphrase = "Bool prefix";
testLiterals = true;
}
: "b:"
: "$b:"
;
/*
POS_PREFIX
options {
paraphrase = "Position prefix";
}
: "p:"
: '$'
;
*/
LBRACKET
options {
paraphrase = "'['";
testLiterals = true;
}
: '['
;
......@@ -921,6 +969,7 @@ options {
RBRACKET
options {
paraphrase = "']'";
testLiterals = true;
}
: ']'
;
......@@ -928,6 +977,7 @@ options {
LPAREN
options {
paraphrase = "'('";
testLiterals = true;
}
: '('
;
......@@ -935,6 +985,7 @@ options {
RPAREN
options {
paraphrase = "')'";
testLiterals = true;
}
: ')'
;
......@@ -942,6 +993,7 @@ options {
LCURLY
options {
paraphrase = "'{'";
testLiterals = true;
}
: '{'
;
......@@ -949,20 +1001,25 @@ options {
RCURLY
options {
paraphrase = "'}'";
testLiterals = true;
}
: '}'
;
/*
DOLLAR
options {
paraphrase = "'$'";
testLiterals = true;
}
: '$'
;
*/
AT_MARK
options {
paraphrase = "'@'";
testLiterals = true;
}
: '@'
;
......@@ -970,6 +1027,7 @@ options {
COMMA
options {
paraphrase = "','";
testLiterals = true;
}
: ','
;
......@@ -993,13 +1051,16 @@ WS
COMMENT
options {
paraphrase = "Comment";
testLiterals = true;
}
: "//" (~'\n')* '\n'{ $setType(antlr::Token::SKIP); newline(); }
| "/*" (.)* "*/" { $setType(antlr::Token::SKIP); }
;
HASH
options {
paraphrase = "'#'";
testLiterals = true;
}
: '#'
;
......@@ -1007,6 +1068,7 @@ options {
DSEPARATOR
options {
paraphrase = "':-'";
testLiterals = true;
}
: ":-"
;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment