From 24f23044df96a743f2daaadb9fff22c21691afe1 Mon Sep 17 00:00:00 2001
From: Adam Wardynski <award@.(win7-laptop)>
Date: Wed, 4 May 2011 15:10:34 +0200
Subject: [PATCH] Fix issues around parsing scope, and sequence syntax. Each
 subsequent rule was reusing variables object, which wasn't good. Parsing of
 sequences had to be changed not to end with semi, and it wasn't actually
 passing on lexicons in the first place.

---
 libwccl/parser/grammar.g      | 218 ++++++++++++++++++++--------------
 libwccl/parser/parsingscope.h |  28 +++--
 libwccl/wcclfile.h            |  18 ++-
 3 files changed, 167 insertions(+), 97 deletions(-)

diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g
index 685f9bc..101fcf3 100644
--- a/libwccl/parser/grammar.g
+++ b/libwccl/parser/grammar.g
@@ -183,13 +183,11 @@ parse_strset_operator
 	[const Corpus2::Tagset &tagset]
 	returns [boost::shared_ptr<Operator<StrSet> > res]
 {
-	ParsingScope scope(tagset);
-	boost::shared_ptr<Function<StrSet> > body;
+	Lexicons empty_lex;
+	ParsingScope scope(tagset, empty_lex);
 }
-	: body = strset_operator [scope] {
-			res.reset(new Operator<StrSet>(body, scope.variables()));
-	}
-	(EOF | SEMI)
+	: res = functional_operator_strset [scope]
+	EOF
 ;
 
 // ----------------------------------------------------------------------------
@@ -199,13 +197,11 @@ parse_bool_operator
 	[const Corpus2::Tagset &tagset]
 	returns [boost::shared_ptr<Operator<Bool> > res]
 {
-	ParsingScope scope(tagset);
-	boost::shared_ptr<Function<Bool> > body;
+	Lexicons empty_lex;
+	ParsingScope scope(tagset, empty_lex);
 }
-	: body = bool_operator [scope] {
-			res.reset(new Operator<Bool>(body, scope.variables()));
-	}
-	(EOF | SEMI)
+	: res = functional_operator_bool [scope]
+	EOF
 ;
 
 // ----------------------------------------------------------------------------
@@ -215,13 +211,11 @@ parse_symset_operator
 	[const Corpus2::Tagset &tagset]
 	returns [boost::shared_ptr<Operator<TSet> > res]
 {
-	ParsingScope scope(tagset);
-	boost::shared_ptr<Function<TSet> > body;
+	Lexicons empty_lex;
+	ParsingScope scope(tagset, empty_lex);
 }
-	: body = symset_operator [scope] {
-			res.reset(new Operator<TSet>(body, scope.variables()));
-	}
-	(EOF | SEMI)
+	: res = functional_operator_symset [scope]
+	EOF
 ;
 
 // ----------------------------------------------------------------------------
@@ -231,13 +225,11 @@ parse_position_operator
 	[const Corpus2::Tagset &tagset]
 	returns [boost::shared_ptr<Operator<Position> > res]
 {	
-	ParsingScope scope(tagset);
-	boost::shared_ptr<Function<Position> > body;
+	Lexicons empty_lex;
+	ParsingScope scope(tagset, empty_lex);
 }
-	: body = position_operator [scope] {
-			res.reset(new Operator<Position>(body, scope.variables()));
-	}
-	(EOF | SEMI)
+	: res = functional_operator_position [scope]
+	EOF
 ;
 
 // ----------------------------------------------------------------------------
@@ -247,13 +239,11 @@ parse_match_operator
 	[const Corpus2::Tagset &tagset]
 	returns [boost::shared_ptr<Operator<Match> > res]
 {	
-	ParsingScope scope(tagset);
-	boost::shared_ptr<Function<Match> > body;
+	Lexicons empty_lex;
+	ParsingScope scope(tagset, empty_lex);
 }
-	: body = match_operator [scope] {
-			res.reset(new Operator<Match>(body, scope.variables()));
-	}
-	(EOF | SEMI)
+	: res = functional_operator_match [scope]
+	EOF
 ;
 
 // ----------------------------------------------------------------------------
@@ -264,7 +254,8 @@ parse_single_tag_rule
 	[const Corpus2::Tagset &tagset]
 	returns [boost::shared_ptr<TagRule> rle]
 {	
-	ParsingScope scope(tagset);
+	Lexicons empty_lex;
+	ParsingScope scope(tagset, empty_lex);
 }
 	: rle = tag_rule [scope]
 ;
@@ -275,7 +266,8 @@ parse_tag_rule_sequence
 	[const Corpus2::Tagset& tagset]
 	returns [boost::shared_ptr<TagRuleSequence> rule_seq]
 {
-	ParsingScope scope(tagset);
+	Lexicons empty_lex;
+	ParsingScope scope(tagset, empty_lex);
 }
 	: rule_seq = tag_rules[scope]
 ;
@@ -288,7 +280,8 @@ parse_match_rule
 	[const Corpus2::Tagset& tagset]
 	returns [boost::shared_ptr<MatchRule> ret_match]
 {
-	ParsingScope scope(tagset);
+	Lexicons empty_lex;
+	ParsingScope scope(tagset, empty_lex);
 	scope.variables().get_put<Match>("_M");
 }
 	: ret_match = match_rule_operator[scope] 
@@ -745,6 +738,19 @@ symset_operator
 	| LPAREN ret = symset_operator [scope] RPAREN
 ;
 
+// ----------------------------------------------------------------------------
+// Wrapper from Function<TSet> to Operator<TSet>
+functional_operator_symset
+	[ParsingScope& scope]
+	returns [boost::shared_ptr<Operator<TSet> > op]
+{
+	boost::shared_ptr<Function<TSet> > body;
+}
+	: body = symset_operator [scope] {
+		op.reset(new Operator<TSet>(body, scope.variables()));
+	}
+;
+
 // ----------------------------------------------------------------------------
 // A wrapper for symset variable and symset value.
 symset_var_val
@@ -901,6 +907,19 @@ position_operator
 	)?
 ;
 
+// ----------------------------------------------------------------------------
+// Wrapper from Function<Position> to Operator<Position>
+functional_operator_position
+	[ParsingScope& scope]
+	returns [boost::shared_ptr<Operator<Position> > op]
+{
+	boost::shared_ptr<Function<Position> > body;
+}
+	: body = position_operator [scope] {
+		op.reset(new Operator<Position>(body, scope.variables()));
+	}
+;
+
 // ----------------------------------------------------------------------------
 // Wrapper for position variable and position value
 position_var_val
@@ -984,6 +1003,18 @@ strset_operator [ParsingScope& scope]
 	| LPAREN ret = strset_operator [scope] RPAREN
 ;
 
+// ----------------------------------------------------------------------------
+// Wrapper from Function<StrSet> to Operator<StrSet>
+functional_operator_strset
+	[ParsingScope& scope]
+	returns [boost::shared_ptr<Operator<StrSet> > op]
+{
+	boost::shared_ptr<Function<StrSet> > body;
+}
+	: body = strset_operator [scope] {
+		op.reset(new Operator<StrSet>(body, scope.variables()));
+	}
+;
 // ----------------------------------------------------------------------------
 // Orth operator.
 strset_orth 
@@ -1144,6 +1175,19 @@ bool_operator
 	| LPAREN ret = bool_operator [scope] RPAREN
 ;
 
+// ----------------------------------------------------------------------------
+// wrapper from Function<Bool> to Operator<Bool>
+functional_operator_bool
+	[ParsingScope& scope]
+	returns [boost::shared_ptr<Operator<Bool> > op]
+{
+	boost::shared_ptr<Function<Bool> > body;
+}
+	: body = bool_operator [scope] {
+		op.reset(new Operator<Bool>(body, scope.variables()));
+	}
+;
+
 // ----------------------------------------------------------------------------
 // comma-separated predicates (bool operators)
 bool_operator_comma_sep
@@ -1823,6 +1867,19 @@ match_operator
 	)*
 ;
 
+// ----------------------------------------------------------------------------
+// Wrapper from Function<Match> to Operator<Match>
+functional_operator_match
+	[ParsingScope& scope]
+	returns [boost::shared_ptr<Operator<Match> > op]
+{
+	boost::shared_ptr<Function<Match> > body;
+}
+	: body = match_operator [scope] {
+		op.reset(new Operator<Match>(body, scope.variables()));
+	}
+;
+
 ///////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////
 // WCCL FILE PARSING RULES
@@ -1852,121 +1909,132 @@ any_operator_section
 	boost::shared_ptr<OpSequence<Position> > pos_seq;
 	boost::shared_ptr<OpSequence<Match> > m_seq;
 }
-	: untyped_seq = untyped_operator_sequence [wccl_file.tagset()] {
+	: untyped_seq = untyped_operator_sequence [wccl_file] {
 			wccl_file.add_untyped_section(untyped_seq);
 		}
-	| bool_seq = bool_operator_sequence [wccl_file.tagset()] {
+	| bool_seq = bool_operator_sequence [wccl_file] {
 			wccl_file.add_section(bool_seq);
 		}
-	| symset_seq = symset_operator_sequence [wccl_file.tagset()] {
+	| symset_seq = symset_operator_sequence [wccl_file] {
 			wccl_file.add_section(symset_seq);
 		}
-	| strset_seq = strset_operator_sequence [wccl_file.tagset()] {
+	| strset_seq = strset_operator_sequence [wccl_file] {
 			wccl_file.add_section(strset_seq);
 		}
-	| pos_seq = position_operator_sequence [wccl_file.tagset()] {
+	| pos_seq = position_operator_sequence [wccl_file] {
 			wccl_file.add_section(pos_seq);
 		}
-	| m_seq = match_operator_sequence [wccl_file.tagset()] {
+	| m_seq = match_operator_sequence [wccl_file] {
 			wccl_file.add_section(m_seq);
 		}
 ;
 
 bool_operator_sequence
-	[const Corpus2::Tagset& tagset]
+	[const WcclFile& wccl_file]
 	returns [boost::shared_ptr<OpSequence<Bool> > seq]
 {
+	ParsingScope scope(wccl_file);
 	boost::shared_ptr<Operator<Bool> > op;
 }
 	: BOOL_SECTION_PREFIX name: STRING {
 			seq.reset(new OpSequence<Bool>(token_ref_to_std_string(name)));
 		}
 		LPAREN
-			(op = parse_bool_operator [tagset] { seq->append(op); })+
+			op = functional_operator_bool [scope] { seq->append(op); }
+			(SEMI op = functional_operator_bool [scope.new_scope()] { seq->append(op); })*
 		RPAREN
 ;
 
 symset_operator_sequence
-	[const Corpus2::Tagset& tagset]
+	[const WcclFile& wccl_file]
 	returns [boost::shared_ptr<OpSequence<TSet> > seq]
 {
+	ParsingScope scope(wccl_file);
 	boost::shared_ptr<Operator<TSet> > op;
 }
 	: TST_SECTION_PREFIX name: STRING {
 			seq.reset(new OpSequence<TSet>(token_ref_to_std_string(name)));
 		}
 		LPAREN
-			(op = parse_symset_operator [tagset] { seq->append(op); })+
+			op = functional_operator_symset [scope] { seq->append(op); }
+			(SEMI op = functional_operator_symset [scope.new_scope()] { seq->append(op); })*
 		RPAREN
 ;
 
 strset_operator_sequence
-	[const Corpus2::Tagset& tagset]
+	[const WcclFile& wccl_file]
 	returns [boost::shared_ptr<OpSequence<StrSet> > seq]
 {
+	ParsingScope scope(wccl_file);
 	boost::shared_ptr<Operator<StrSet> > op;
 }
 	: STR_SECTION_PREFIX name: STRING {
 			seq.reset(new OpSequence<StrSet>(token_ref_to_std_string(name)));
 		}
 		LPAREN
-			(op = parse_strset_operator [tagset] { seq->append(op); })+
+			op = functional_operator_strset [scope] { seq->append(op); }
+			(SEMI op = functional_operator_strset [scope.new_scope()] { seq->append(op); })*
 		RPAREN
 ;
 
 position_operator_sequence
-	[const Corpus2::Tagset& tagset]
+	[const WcclFile& wccl_file]
 	returns [boost::shared_ptr<OpSequence<Position> > seq]
 {
+	ParsingScope scope(wccl_file);
 	boost::shared_ptr<Operator<Position> > op;
 }
 	: POS_SECTION_PREFIX name: STRING {
 			seq.reset(new OpSequence<Position>(token_ref_to_std_string(name)));
 		}
 		LPAREN
-			(op = parse_position_operator [tagset] { seq->append(op); })+
+			op = functional_operator_position [scope] { seq->append(op); }
+			(SEMI op = functional_operator_position [scope.new_scope()] { seq->append(op); })*
 		RPAREN
 ;
 
 untyped_operator_sequence
-	[const Corpus2::Tagset& tagset]
+	[const WcclFile& wccl_file]
 	returns [boost::shared_ptr<UntypedOpSequence> seq]
 {
+	ParsingScope scope(wccl_file);
 	boost::shared_ptr<FunctionalOperator> op;
 }
 	: AT_MARK name: STRING { 
 			seq.reset(new UntypedOpSequence(token_ref_to_std_string(name)));
 		}
 		LPAREN
-			(op = any_operator [tagset] { seq->append(op); })+
+			op = functional_operator_any [scope] { seq->append(op); }
+			(SEMI op = functional_operator_any [scope.new_scope()] { seq->append(op); })*
 		RPAREN
 ;
 
 match_operator_sequence
-	[const Corpus2::Tagset& tagset]
+	[const WcclFile& wccl_file]
 	returns [boost::shared_ptr<OpSequence<Match> > seq]
 {
+	ParsingScope scope(wccl_file);
 	boost::shared_ptr<Operator<Match> > op;
 }
 	: MATCH_SECTION_PREFIX name: STRING {
 			seq.reset(new OpSequence<Match>(token_ref_to_std_string(name)));
 		}
 		LPAREN
-			(op = parse_match_operator [tagset] { seq->append(op); })+
+			op = functional_operator_match [scope] { seq->append(op); }
+			(SEMI op = functional_operator_match [scope.new_scope()] { seq->append(op); })*
 		RPAREN
 ;
 
-any_operator 
-	[const Corpus2::Tagset& tagset]
+functional_operator_any 
+	[ParsingScope& scope]
 	returns [boost::shared_ptr<FunctionalOperator> op]
 {
-	static ParsingScope _s(tagset); // just a bogus scope for predicates
 }
-	: (position_operator [_s]) => op = parse_position_operator [tagset]
-	| (symset_operator [_s]) => op = parse_symset_operator [tagset]
-	| (strset_operator [_s]) => op = parse_strset_operator [tagset]
-	| (match_operator [_s]) => op = parse_match_operator [tagset]
-	| op = parse_bool_operator [tagset]
+	: (position_operator [scope]) => op = functional_operator_position [scope]
+	| (symset_operator [scope]) => op = functional_operator_symset [scope]
+	| (strset_operator [scope]) => op = functional_operator_strset [scope]
+	| (match_operator [scope]) => op = functional_operator_match [scope]
+	| op = functional_operator_bool [scope]
 ;
 
 // ----------------------------------------------------------------------------
@@ -2031,45 +2099,21 @@ tag_rule
 					new TagRule(token_ref_to_std_string(name), scope.variables(), actions));
 			}
 		}
-/*
-	: "rule" LPAREN name: STRING COMMA
-	(
-		(bool_operator[scope]) =>
-		(
-			condition = bool_operator [scope] COMMA
-			actions = action_sequence [scope] {
-				// rule(NAME, COND, ACTIONS)
-				rle.reset(
-					new TagRule(token_ref_to_std_string(name), scope.variables(), actions, condition));
-			}
-		)
-	|
-		(
-			actions = action_sequence [scope] {
-				// rule(NAME, ACTIONS)
-				rle.reset(new TagRule(token_ref_to_std_string(name), scope.variables(), actions));
-			}
-		)
-	)
-	RPAREN
-*/
 ;
 
 // Rule sequence
 tag_rule_sequence
-	[ParsingScope& scope]
+	[const ParsingScope& scope]
 	returns [boost::shared_ptr<TagRuleSequence> rule_seq]
 {
-	// FIXME czy tutaj przypadkiem nie powinno byc shared_ptr?
 	boost::shared_ptr<TagRule> rle;
-
 	rule_seq.reset(new TagRuleSequence());
 }
-	: rle = tag_rule [scope] {
+	: rle = tag_rule [scope.new_scope()] {
 		rule_seq->push_back(*rle);
 	}
 	(
-		SEMI rle = tag_rule [scope] {
+		SEMI rle = tag_rule [scope.new_scope()] {
 			rule_seq->push_back(*rle);
 		}
 	)*
@@ -2078,7 +2122,7 @@ tag_rule_sequence
 // Temporary name. 
 // This is wrapper for tag_rule_sequence in rules section in the wccl file
 tag_rules
-	[ParsingScope& scope]
+	[const ParsingScope& scope]
 	returns [boost::shared_ptr<TagRuleSequence> rule_seq]
 	: "tag_rules" LPAREN rule_seq = tag_rule_sequence [scope] RPAREN {
 		//
diff --git a/libwccl/parser/parsingscope.h b/libwccl/parser/parsingscope.h
index 9a85ba4..acbf6ae 100644
--- a/libwccl/parser/parsingscope.h
+++ b/libwccl/parser/parsingscope.h
@@ -3,20 +3,32 @@
 
 #include <libwccl/variables.h>
 #include <libwccl/lexicon/lexicons.h>
+#include <libwccl/wcclfile.h>
 
 namespace Wccl {
 
 class ParsingScope
 {
 public:
-	ParsingScope(const Corpus2::Tagset& tagset)
+	ParsingScope(const Corpus2::Tagset& tagset, const Lexicons& lexicons)
 		: tagset_(tagset),
 		  variables_(),
-		  lexicons_(new Lexicons())
+		  lexicons_(lexicons)
 	{
 	}
 
-	const Corpus2::Tagset& tagset() {
+	explicit ParsingScope(const WcclFile& file)
+		: tagset_(file.tagset()),
+		  variables_(),
+		  lexicons_(file.get_lexicons())
+	{
+	}
+
+	ParsingScope new_scope() const {
+		return ParsingScope(tagset_, lexicons_);
+	}
+
+	const Corpus2::Tagset& tagset() const {
 		return tagset_;
 	}
 
@@ -24,18 +36,18 @@ public:
 		return variables_;
 	}
 
-	boost::shared_ptr<Lexicons> lexicons_ptr() {
-		return lexicons_;
+	const Variables& variables() const {
+		return variables_;
 	}
 
-	Lexicons& lexicons() {
-		return *lexicons_;
+	const Lexicons& lexicons() const {
+		return lexicons_;
 	}
 
 private:
 	const Corpus2::Tagset& tagset_;
 	Variables variables_;
-	boost::shared_ptr<Lexicons> lexicons_;
+	const Lexicons& lexicons_;
 };
 
 } /* end ns Wccl */
diff --git a/libwccl/wcclfile.h b/libwccl/wcclfile.h
index 5dc4754..03293aa 100644
--- a/libwccl/wcclfile.h
+++ b/libwccl/wcclfile.h
@@ -21,7 +21,7 @@ class WcclFile
 	  WcclFileOpSections<OpSequence<Match> >
 {
 public:
-	WcclFile(const Corpus2::Tagset tagset);
+	explicit WcclFile(const Corpus2::Tagset& tagset);
 
 	const std::vector<boost::shared_ptr<UntypedOpSequence> >& untyped_sections();
 	template<class T>
@@ -77,6 +77,8 @@ public:
 	bool has_lexicon(const std::string& name) const;
 	boost::shared_ptr<const Lexicon> get_lexicon_ptr(const std::string& name) const;
 	const Lexicon& get_lexicon(const std::string& name) const;
+	boost::shared_ptr<const Lexicons> get_lexicons_ptr() const;
+	const Lexicons& get_lexicons() const;
 
 	void add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section);
 	void add_untyped_section(const boost::shared_ptr<const UntypedOpSequence>& section);
@@ -116,7 +118,7 @@ private:
 namespace Wccl {
 
 inline
-WcclFile::WcclFile(const Corpus2::Tagset tagset)
+WcclFile::WcclFile(const Corpus2::Tagset& tagset)
   : tagset_(tagset)
 {
 }
@@ -308,6 +310,18 @@ const Lexicon& WcclFile::get_lexicon(const std::string &name) const
 	return lexicons_->get(name);
 }
 
+inline
+boost::shared_ptr<const Lexicons> WcclFile::get_lexicons_ptr() const
+{
+	return lexicons_;
+}
+
+inline
+const Lexicons& WcclFile::get_lexicons() const
+{
+	return *lexicons_;
+}
+
 inline
 void WcclFile::add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section)
 {
-- 
GitLab