From 276222e623baefe97e7b73234a8990a913088e0d Mon Sep 17 00:00:00 2001
From: rk <rk@wojtek-desktop.(none)>
Date: Mon, 15 Nov 2010 15:13:48 +0100
Subject: [PATCH] Fixed grammar for string operators.

---
 libwccl/parser/Parser.cpp |   5 +-
 libwccl/parser/Parser.h   |   3 -
 libwccl/parser/grammar.g  | 251 ++++++++++++++++++++------------------
 wcclparser/strop_main.cpp |   4 +-
 wcclparser/val_main.cpp   |   2 +-
 5 files changed, 139 insertions(+), 126 deletions(-)

diff --git a/libwccl/parser/Parser.cpp b/libwccl/parser/Parser.cpp
index 5309a77..5ec704b 100644
--- a/libwccl/parser/Parser.cpp
+++ b/libwccl/parser/Parser.cpp
@@ -55,6 +55,7 @@ boost::shared_ptr<Wccl::Function<Wccl::StrSet> > Parser::parseStringOperator(
  * @arg str writed value(s)
  * @retrun boost::shared_ptr<Wccl::Value>
  */
+/*
 boost::shared_ptr<Wccl::Value> Parser::parseValue(const std::string& str) const
 {
 	std::stringstream ss (std::stringstream::in | std::stringstream::out);
@@ -62,12 +63,13 @@ boost::shared_ptr<Wccl::Value> Parser::parseValue(const std::string& str) const
 
 	return this->parseValue(ss);
 }
-
+*/
 /**
  * @desc Parse values. Runs parse_values rule in the parser grammar.
  * @arg istr input stream with writed values
  * @return boost::shared_ptr<Wccl::Value> to created value
  */
+/*
 boost::shared_ptr<Wccl::Value> Parser::parseValue(std::istream& istr) const
 {
 	ANTLRLexer lexer(istr);
@@ -75,3 +77,4 @@ boost::shared_ptr<Wccl::Value> Parser::parseValue(std::istream& istr) const
 
 	return parser.parse_values();
 }
+*/
diff --git a/libwccl/parser/Parser.h b/libwccl/parser/Parser.h
index a7ed2e3..a76b3a9 100644
--- a/libwccl/parser/Parser.h
+++ b/libwccl/parser/Parser.h
@@ -30,9 +30,6 @@ public:
 		parseStringOperator(const std::string&) const;
 	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > 
 		parseStringOperator(std::istream&     ) const;
-	// methods for parsing values
-	boost::shared_ptr<Wccl::Value> parseValue(const std::string&) const;
-	boost::shared_ptr<Wccl::Value> parseValue(std::istream&     ) const;
 
 private:
 	const Corpus2::Tagset &tagset;
diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g
index 936afe9..03bb9bb 100644
--- a/libwccl/parser/grammar.g
+++ b/libwccl/parser/grammar.g
@@ -24,6 +24,10 @@ header {
 	#include <libwccl/ops/constant.h>
 	#include <libwccl/ops/functions.h>
 	#include <libwccl/ops/logicalpredicate.h>
+
+	// Unicode String
+	#include <unicode/uniset.h>
+	#include <unicode/unistr.h>
 }
 
 options {
@@ -43,12 +47,16 @@ options {
 {
 private:
 	// 
-	const std::string token_ref_to_std_string(antlr::RefToken& rstr) { 
-		return (((antlr::Token*)rstr)->getText()); 
+	const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { 
+		return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText()).unescape();
+	}
+	//
+	const std::string token_ref_to_std_string(antlr::RefToken& rstr) const { 
+		return (((antlr::Token*)rstr)->getText());
 	}
 	//
 	int token_ref_to_int(antlr::RefToken& rstr) { 
-		return atoi(this->token_ref_to_std_string(rstr).c_str());
+		return atoi(((antlr::Token*)rstr)->getText().c_str());
 	}
 
 	// hepls function for processing
@@ -96,9 +104,8 @@ parse_string_operator
 	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > res]
 {
 	Wccl::Variables vars;
-	boost::shared_ptr<Wccl::StrSet> mret;
 }
-	: mret = string_operators [vars, res]
+	: res = string_operators [vars]
 ;
 // ----------------------------------------------------------------------------
 // Rules for parsing predicates in scope (variables). 
@@ -114,6 +121,7 @@ parse_predicates
 // ----------------------------------------------------------------------------
 // Rules for parsing values in scope (variables). 
 // Returns boost::shared_ptr<Wccl::Value>
+/*
 parse_values 
 	returns [boost::shared_ptr<Wccl::Value> ret]
 {
@@ -121,20 +129,23 @@ parse_values
 }
 	: ret = values [vars]
 ;
+*/
 
 ///////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////
 // All values:
 // Values can be use for setvar(...,..)
 // ----------------------------------------------------------------------------
+/*
 values 
 	[Wccl::Variables& vars] 
-	returns [boost::shared_ptr<Wccl::Value> res]
+	returns [boost::shared_ptr<Wccl::Constant<Wccl::Value> > res]
 	: res = position [vars]
 	| res = str_set  [vars]
 	| res = sym_set  [vars]
 	| res = boolean  [vars]
 ;
+*/
 // ----------------------------------------------------------------------------
 // Values reference => values + position_ref 
 // !! Cannot use for setvar(...,...) !!
@@ -153,14 +164,19 @@ values_ref [std::string& name]:
 // TODO regula do lexera?
 position 
 	[Wccl::Variables& vars] 
-	returns [boost::shared_ptr<Wccl::Position> val]
-	: DOLLAR "0" n: SYMBOL  { 
-			val = vars.get_put<Wccl::Position>(token_ref_to_std_string(n)); 	
+	returns [boost::shared_ptr<Wccl::Constant<Wccl::Position> > op]
+{
+	boost::shared_ptr<Wccl::Position> val;
+}
+	: DOLLAR "0" n: SYMBOL { 
+		val = vars.get_put<Wccl::Position>(token_ref_to_std_string(n)); 	
+		op.reset(new Wccl::Constant<Wccl::Position>(*val.get()));
 	}
 ;
 // ----------------------------------------------------------------------------
 // Position reference: $(0-9)+name
 // !! Cannot use for setvar(...,...) !!
+/*
 position_ref 
 	[Wccl::Variables& vars]
 	returns [boost::shared_ptr<Wccl::PositionRef> val]
@@ -173,35 +189,50 @@ position_ref
 		);
 	}
 ;
+*/
 // ----------------------------------------------------------------------------
 // String set, call examples: $name, $Name, $_name, $_Name etc.
 // This expression gets variable of tyme StrSet from string-named variable 
-// Returns variable<StrSet> from Set-variables
+// Returns Wccl::Function<Wccl::StrSet> from Set-variables
 str_set 
 	[Wccl::Variables& vars] 
-	returns [boost::shared_ptr<Wccl::StrSet> val]
+	returns [boost::shared_ptr<Wccl::Constant<Wccl::StrSet> > op]
+{
+	boost::shared_ptr<Wccl::StrSet> val;
+}
 	:	DOLLAR n: SYMBOL {
-			val = vars.get_put<Wccl::StrSet>(token_ref_to_std_string(n)); 	
+		val = vars.get_put<Wccl::StrSet>(token_ref_to_std_string(n));
+		op.reset(new Wccl::Constant<Wccl::StrSet>(*val.get()));
 	}
 ;
 // ----------------------------------------------------------------------------
 // Symbol set: $$name
 sym_set 
 	[Wccl::Variables& vars] 
-	returns [boost::shared_ptr<Wccl::TSet> val]
+	returns [boost::shared_ptr<Wccl::Constant<Wccl::TSet> > op]
+{
+	boost::shared_ptr<Wccl::TSet> val;
+}
 	: DOLLAR DOLLAR n: SYMBOL { 
 			val = vars.get_put<Wccl::TSet>(token_ref_to_std_string(n)); 	
+			op.reset(new Wccl::Constant<Wccl::TSet>(*val.get()));
 	}
 ;
 // ----------------------------------------------------------------------------
 // Bool: $?name
+/*
 boolean 
 	[Wccl::Variables& vars] 
-	returns [boost::shared_ptr<Wccl::Bool> val]
+	returns [boost::shared_ptr<Wccl::Constant<Wccl::Bool> > op]
+{
+	boost::shared_ptr<Wccl::Bool> val;
+}
 	: DOLLAR Q_MARK n: SYMBOL { 
 			val = vars.get_put<Wccl::Bool>(token_ref_to_std_string(n)); 	
+			op.reset(new Wccl::Constant<Wccl::Bool>(*val.get()));
 	}
 ;
+*/
 // Boolean $!name
 /*
 boolean_ref [std::string& name]:
@@ -281,28 +312,27 @@ setvar_tset [std::string& value]
 // ----------------------------------------------------------------------------
 // Single or muliple values in string set
 str_set_v_in 
-	[Wccl::Variables& vars] 
-	returns [boost::shared_ptr<Wccl::StrSet> var]
+	[boost::shared_ptr<Wccl::StrSet>& s_set]
+  : v1: STRING { s_set->insert(token_ref_to_ustring(v1)); }
+  | v2: STRING COMMA str_set_v_in [s_set] {
+    s_set->insert(token_ref_to_ustring(v2)); 
+  }
+;
+// string set, called as unnamed (temporary) StrSet: 
+// 	calls: [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"] or variable $A
+str_set_v 
+	[Wccl::Variables& vars]
+	returns [boost::shared_ptr<Wccl::Constant<Wccl::StrSet> > val]
 {
-	var.reset(new Wccl::StrSet);
+	boost::shared_ptr<Wccl::StrSet> set(new Wccl::StrSet);
 }
-	: v1: STRING { 
-		var->insert(token_ref_to_std_string(v1).c_str()); 
-	}
-	| v2: STRING COMMA var = str_set_v_in [vars] {
-		var->insert(token_ref_to_std_string(v2).c_str()); 
-	}
-	;
-// string set called as unnamed (temporary) StrSet: 
-// 	calls: [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"]
-// Actually, doing nothing with vars. 
-str_set_v 
-	[Wccl::Variables& vars] 
-	returns [boost::shared_ptr<Wccl::StrSet> val]
 	: LBRACKET RBRACKET { 
-			val.reset(new Wccl::StrSet); // initialize as unnamed empty variable
+		val.reset(new Wccl::Constant<Wccl::StrSet>(*set.get()));
+	}
+	| LBRACKET str_set_v_in [set] RBRACKET {
+		val.reset(new Wccl::Constant<Wccl::StrSet>(*set.get()));
 	}
-	| LBRACKET val = str_set_v_in [vars] RBRACKET
+	| val = str_set [vars]
 	;
 // ----------------------------------------------------------------------------
 // element of sym set 
@@ -337,6 +367,7 @@ sym_set_v [std::string& value]
 */
 // ----------------------------------------------------------------------------
 // boolean:
+/*
 boolean_v 
 	[Wccl::Variables& vars] 
 	returns [boost::shared_ptr<Wccl::Bool> val]
@@ -344,6 +375,7 @@ boolean_v
 	| "False" { val.reset(new Wccl::Bool(false)); }
 	| val = boolean [vars]
 ;
+*/
 // ----------------------------------------------------------------------------
 // position value:
 /*
@@ -477,60 +509,20 @@ et_any [std::string& v]
 ///////////////////////////////////////////////////////////////////////////////
 // ----------------------------------------------------------------------------
 string_operators 
-	[Wccl::Variables& vars, boost::shared_ptr<Wccl::Function<Wccl::StrSet> >& op] 
-	returns [boost::shared_ptr<Wccl::StrSet> ret] 
-	: ret = op_orth [vars] {
-		/*
-		op.reset(
-			new Wccl::Orth(
-				boost::shared_ptr<Wccl::Function<Wccl::StrSet> >(
-					new Wccl::Constant<Wccl::StrSet>(*ret.get())
-				)
-			)
-		);
-		*/ 
-	}
-	| ret = op_base [vars] {
-		/*
-		op.reset(
-			new Wccl::Base(
-				boost::shared_ptr<Wccl::Function<Wccl::StrSet> >(
-					new Wccl::Constant<Wccl::StrSet>(*ret.get())
-				)
-			)
-		);
-		*/ 
-	}
-	| ret = op_lower [vars] {
-		op.reset(
-			new Wccl::ToLower(
-				boost::shared_ptr<Wccl::Function<Wccl::StrSet> >(
-					new Wccl::Constant<Wccl::StrSet>(*ret.get())
-				)
-			)
-		); 
-	}
-	| ret = op_upper [vars]  {
-		op.reset(
-			new Wccl::ToUpper(
-				boost::shared_ptr<Wccl::Function<Wccl::StrSet> >(
-					new Wccl::Constant<Wccl::StrSet>(*ret.get())
-				)
-			)
-		); 
-	}
-	| ret = op_affix [vars]  {
-		op.reset(
-			new Wccl::ToUpper(
-				boost::shared_ptr<Wccl::Function<Wccl::StrSet> >(
-					new Wccl::Constant<Wccl::StrSet>(*ret.get())
-				)
-			)
-		); 
-	}
+	[Wccl::Variables& vars]
+	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
+	/*
+	: ret = op_orth [vars] 
+	| ret = op_base [vars]
+	*/ 
+	: ret = op_lower  [vars] 
+	| ret = op_upper  [vars]
+	| ret = op_affix  [vars] 
+	| ret = str_set_v [vars]
 ;
 // Implementations of string operators:
 // ----------------------------------------------------------------------------
+/*
 op_orth [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret]
 {
 	boost::shared_ptr<Wccl::PositionRef> tmpPosRef;
@@ -538,8 +530,10 @@ op_orth [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret]
 	: "orth" LBRACKET tmpPosRef = position_ref [vars] RBRACKET { 
 		// TODO
 	}
-	;
+;
+*/
 // ----------------------------------------------------------------------------
+/*
 op_base [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret]
 {
 	boost::shared_ptr<Wccl::PositionRef> tmpPosRef;
@@ -547,53 +541,42 @@ op_base [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret]
 	: "base" LBRACKET tmpPosRef = position_ref [vars] RBRACKET { 
 		// TODO
 	}
-	;
+;
+*/
 // ----------------------------------------------------------------------------
-op_lower [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret]
+// returns boost::shared_ptr<Wccl::Function<Wccl::StrSet> >
+op_lower 
+	[Wccl::Variables& vars] returns 
+	[boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
 {
-	boost::shared_ptr<Wccl::StrSet> ret_str_set;
-	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > tmp_op;
-}
-	: "lower" LPAREN (
-			ret_str_set = str_set   [vars] | 
-			ret_str_set = str_set_v [vars]
-		) RPAREN {
-		Wccl::ToLower to_lower(get_str_set_expr(ret_str_set));
-		ret = to_lower.apply(get_tmp_context());
+	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret;
+}
+	: "lower" LPAREN o_ret = string_operators[vars] RPAREN {
+		ret.reset(new Wccl::ToLower(o_ret));
 	}
-	| "lower" LPAREN ret = string_operators[vars, tmp_op] RPAREN
 ;
 // ----------------------------------------------------------------------------
-op_upper [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret]
+op_upper 
+	[Wccl::Variables& vars] 
+	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
 {
-	boost::shared_ptr<Wccl::StrSet> ret_str_set;
-	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > tmp_op;
-}
-	:	"upper" LPAREN (
-			ret_str_set = str_set   [vars] | 
-			ret_str_set = str_set_v [vars]
-		) RPAREN {
-		Wccl::ToUpper to_upper(get_str_set_expr(ret_str_set));
-		ret = to_upper.apply(get_tmp_context());
+	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret;
+}
+	: "upper" LPAREN o_ret = string_operators[vars] RPAREN {
+		ret.reset(new Wccl::ToUpper(o_ret));
 	}
-	| "upper" LPAREN ret = string_operators[vars, tmp_op] RPAREN
 ;
 // ----------------------------------------------------------------------------
-op_affix [Wccl::Variables& vars] returns [boost::shared_ptr<Wccl::StrSet> ret]
+op_affix 
+	[Wccl::Variables& vars] 
+	returns [boost::shared_ptr<Wccl::Function<Wccl::StrSet> > ret]
 {
-	boost::shared_ptr<Wccl::StrSet> ret_str_set;
-	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > tmp_op;
-}
-	:	"affix" LPAREN (
-			ret_str_set = str_set   [vars] | 
-			ret_str_set = str_set_v [vars] | 
-			ret_str_set = string_operators[vars, tmp_op]
-		) COMMA p_af: INT RPAREN {
-		Wccl::Affix affix(get_str_set_expr(ret_str_set), token_ref_to_int(p_af));
-		ret = affix.apply(get_tmp_context());
+	boost::shared_ptr<Wccl::Function<Wccl::StrSet> > o_ret;
+}
+	: "affix" LPAREN o_ret = string_operators[vars] COMMA offset: INT RPAREN {
+		ret.reset(new Wccl::Affix(o_ret, token_ref_to_int(offset)));
 	}
 ;
-
 ///////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////
 // Predicates returns boost::shared_ptr<Wccl::Function<Wccl::Bool> >
@@ -658,6 +641,21 @@ options {
 	|    '\'' (~'\'')* '\''
 	;
 
+// STRING_APOS
+// options {
+// 	paraphrase = "a string without apostrophe";
+// }
+// 	:	(~'"')* 
+// ;
+
+// STRING_QUOT
+// options {
+// 	paraphrase = "a string without quotation";
+// }
+// 	:	(~'\'')* 
+//	;
+
+
 INT
 options {
 	paraphrase = "Integer";
@@ -665,6 +663,21 @@ options {
 	: ('-'|'+')?('0'..'9')+
 	;	
 
+QUOT_MARK
+options {
+	paraphrase = "Quota mark";
+}
+	: '\''
+	;
+
+APOS_MARK
+options {
+	paraphrase = "Aposptrophe mark";
+}
+	: '"'
+	;
+
+
 Q_MARK
 options {
 	paraphrase = "Query mark";
diff --git a/wcclparser/strop_main.cpp b/wcclparser/strop_main.cpp
index c8381c5..4f4d485 100644
--- a/wcclparser/strop_main.cpp
+++ b/wcclparser/strop_main.cpp
@@ -42,11 +42,11 @@ int main()
 						std::cerr << "Parsed expression: " << retStr->to_raw_string() << std::endl;
 					}
 					else {
-						std::cerr << "Problem while parsing -- haven't StrSet object in boost::shared_ptr!" << std::endl;
+						std::cerr << "Problem while parsing -- haven't got StrSet object in boost::shared_ptr!" << std::endl;
 					}
 				}
 				else {
-					std::cerr << "Problem while parsing -- haven't Function<Wccl::StrSet> object in boost::shared_ptr!" << std::endl;
+					std::cerr << "Problem while parsing -- haven't got Function<Wccl::StrSet> object in boost::shared_ptr!" << std::endl;
 				}
 			}
 			catch (antlr::MismatchedTokenException &e) {
diff --git a/wcclparser/val_main.cpp b/wcclparser/val_main.cpp
index 2148656..8a379c5 100644
--- a/wcclparser/val_main.cpp
+++ b/wcclparser/val_main.cpp
@@ -31,7 +31,7 @@ int main()
 		}
     else {
 			try {
-	      valRet = parser.parseValue(str_in);
+	      // valRet = parser.parseValue(str_in);
 
 				/*
 				if (retOp.get()) {
-- 
GitLab