From f741ad7b90cd01c71224b14ced6cc72056c43621 Mon Sep 17 00:00:00 2001
From: Adam Wardynski <award@.(win7-laptop)>
Date: Fri, 29 Apr 2011 09:29:36 +0200
Subject: [PATCH] Parsing WcclFile.

---
 libwccl/parser/grammar.g | 222 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 218 insertions(+), 4 deletions(-)

diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g
index 932d3e1..39b47dc 100644
--- a/libwccl/parser/grammar.g
+++ b/libwccl/parser/grammar.g
@@ -95,6 +95,9 @@ header {
 	#include <libwccl/ops/match/actions/unmarkmatch.h>
 	#include <libwccl/ops/functions/match/submatch.h>
 
+	// Wccl whole file syntax
+	#include <libwccl/wcclfile.h>
+	
 	// Unicode String
 	#include <unicode/uniset.h>
 	#include <unicode/unistr.h>
@@ -183,7 +186,7 @@ parse_strset_operator
 	: body = strset_operator [scope] {
 			res.reset(new Operator<StrSet>(body, scope.variables()));
 	}
-	EOF
+	(EOF | SEMI)
 ;
 
 // ----------------------------------------------------------------------------
@@ -199,7 +202,7 @@ parse_bool_operator
 	: body = bool_operator [scope] {
 			res.reset(new Operator<Bool>(body, scope.variables()));
 	}
-	EOF
+	(EOF | SEMI)
 ;
 
 // ----------------------------------------------------------------------------
@@ -215,7 +218,7 @@ parse_symset_operator
 	: body = symset_operator [scope] {
 			res.reset(new Operator<TSet>(body, scope.variables()));
 	}
-	EOF
+	(EOF | SEMI)
 ;
 
 // ----------------------------------------------------------------------------
@@ -231,7 +234,23 @@ parse_position_operator
 	: body = position_operator [scope] {
 			res.reset(new Operator<Position>(body, scope.variables()));
 	}
-	EOF
+	(EOF | SEMI)
+;
+
+// ----------------------------------------------------------------------------
+// Rule for parsing match operator with scope.
+// Returns boost::shared_ptr<Operator<Position> >
+parse_match_operator
+	[const Corpus2::Tagset &tagset]
+	returns [boost::shared_ptr<Operator<Match> > res]
+{	
+	ParsingScope scope(tagset);
+	boost::shared_ptr<Function<Match> > body;
+}
+	: body = match_operator [scope] {
+			res.reset(new Operator<Match>(body, scope.variables()));
+	}
+	(EOF | SEMI)
 ;
 
 // ----------------------------------------------------------------------------
@@ -272,6 +291,25 @@ parse_match_rule
 	: ret_match = match_rule_operator[scope] 
 ;
 
+// ----------------------------------------------------------------------------
+// ----------------------------------------------------------------------------
+// Rule for parsing wccl files
+parse_wccl_file
+	[const Corpus2::Tagset& tagset]
+	returns [boost::shared_ptr<WcclFile> wccl_file]
+{
+	wccl_file = boost::make_shared<WcclFile>(tagset);
+	boost::shared_ptr<TagRuleSequence> rule_seq;
+}
+	: (any_operator_section [*wccl_file] )*
+	  (
+		rule_seq = parse_rule_sequence [tagset] { wccl_file->set_tag_rules(rule_seq); }
+		(any_operator_section [*wccl_file] )*
+	  )?
+	EOF
+;
+
+
 ///////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////
 // VALUES
@@ -1764,6 +1802,139 @@ match_operator
 	)*
 ;
 
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+// WCCL FILE PARSING RULES
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+
+any_operator_section
+	[WcclFile& wccl_file]
+{
+	boost::shared_ptr<UntypedOpSequence> untyped_seq;
+	boost::shared_ptr<OpSequence<Bool> > bool_seq;
+	boost::shared_ptr<OpSequence<TSet> > symset_seq;
+	boost::shared_ptr<OpSequence<StrSet> > strset_seq;
+	boost::shared_ptr<OpSequence<Position> > pos_seq;
+	boost::shared_ptr<OpSequence<Match> > m_seq;
+}
+	: untyped_seq = untyped_operator_sequence [wccl_file.tagset()] {
+			wccl_file.add_untyped_section(untyped_seq);
+		}
+	| bool_seq = bool_operator_sequence [wccl_file.tagset()] {
+			wccl_file.add_section(bool_seq);
+		}
+	| symset_seq = symset_operator_sequence [wccl_file.tagset()] {
+			wccl_file.add_section(symset_seq);
+		}
+	| strset_seq = strset_operator_sequence [wccl_file.tagset()] {
+			wccl_file.add_section(strset_seq);
+		}
+	| pos_seq = position_operator_sequence [wccl_file.tagset()] {
+			wccl_file.add_section(pos_seq);
+		}
+	| m_seq = match_operator_sequence [wccl_file.tagset()] {
+			wccl_file.add_section(m_seq);
+		}
+;
+
+bool_operator_sequence
+	[const Corpus2::Tagset& tagset]
+	returns [boost::shared_ptr<OpSequence<Bool> > seq]
+{
+	boost::shared_ptr<Operator<Bool> > op;
+}
+	: BOOL_SECTION_PREFIX name: STRING {
+			seq.reset(new OpSequence<Bool>(token_ref_to_std_string(name)));
+		}
+		LPAREN
+			(op = parse_bool_operator [tagset] { seq->append(op); })+
+		RPAREN
+;
+
+symset_operator_sequence
+	[const Corpus2::Tagset& tagset]
+	returns [boost::shared_ptr<OpSequence<TSet> > seq]
+{
+	boost::shared_ptr<Operator<TSet> > op;
+}
+	: TST_SECTION_PREFIX name: STRING {
+			seq.reset(new OpSequence<TSet>(token_ref_to_std_string(name)));
+		}
+		LPAREN
+			(op = parse_symset_operator [tagset] { seq->append(op); })+
+		RPAREN
+;
+
+strset_operator_sequence
+	[const Corpus2::Tagset& tagset]
+	returns [boost::shared_ptr<OpSequence<StrSet> > seq]
+{
+	boost::shared_ptr<Operator<StrSet> > op;
+}
+	: STR_SECTION_PREFIX name: STRING {
+			seq.reset(new OpSequence<StrSet>(token_ref_to_std_string(name)));
+		}
+		LPAREN
+			(op = parse_strset_operator [tagset] { seq->append(op); })+
+		RPAREN
+;
+
+position_operator_sequence
+	[const Corpus2::Tagset& tagset]
+	returns [boost::shared_ptr<OpSequence<Position> > seq]
+{
+	boost::shared_ptr<Operator<Position> > op;
+}
+	: POS_SECTION_PREFIX name: STRING {
+			seq.reset(new OpSequence<Position>(token_ref_to_std_string(name)));
+		}
+		LPAREN
+			(op = parse_position_operator [tagset] { seq->append(op); })+
+		RPAREN
+;
+
+untyped_operator_sequence
+	[const Corpus2::Tagset& tagset]
+	returns [boost::shared_ptr<UntypedOpSequence> seq]
+{
+	boost::shared_ptr<FunctionalOperator> op;
+}
+	: AT_MARK name: STRING { 
+			seq.reset(new UntypedOpSequence(token_ref_to_std_string(name)));
+		}
+		LPAREN
+			(op = any_operator [tagset] { seq->append(op); })+
+		RPAREN
+;
+
+match_operator_sequence
+	[const Corpus2::Tagset& tagset]
+	returns [boost::shared_ptr<OpSequence<Match> > seq]
+{
+	boost::shared_ptr<Operator<Match> > op;
+}
+	: MATCH_SECTION_PREFIX name: STRING {
+			seq.reset(new OpSequence<Match>(token_ref_to_std_string(name)));
+		}
+		LPAREN
+			(op = parse_match_operator [tagset] { seq->append(op); })+
+		RPAREN
+;
+
+any_operator 
+	[const Corpus2::Tagset& tagset]
+	returns [boost::shared_ptr<FunctionalOperator> op]
+{
+	static ParsingScope _s(tagset); // just a bogus scope for predicates
+}
+	: (position_operator [_s]) => op = parse_position_operator [tagset]
+	| (symset_operator [_s]) => op = parse_symset_operator [tagset]
+	| (strset_operator [_s]) => op = parse_strset_operator [tagset]
+	| (match_operator [_s]) => op = parse_match_operator [tagset]
+	| op = parse_bool_operator [tagset]
+;
+
 // ----------------------------------------------------------------------------
 // ----------------------------------------------------------------------------
 // Tagging actions and rules:
@@ -2417,6 +2588,41 @@ options {
 	: "$m:"
 ;
 
+STR_SECTION_PREFIX
+options {
+	paraphrase = "Wccl file string operators section prefix";
+}
+	: "@s:"
+;
+
+POS_SECTION_PREFIX
+options {
+	paraphrase = "Wccl file position operators section prefix";
+}
+	: "@p:"
+;
+
+BOOL_SECTION_PREFIX
+options {
+	paraphrase = "Wccl file bool operators section prefix";
+}
+	: "@b:"
+;
+
+TST_SECTION_PREFIX
+options {
+	paraphrase = "Wccl file symset operators section prefix";
+}
+	: "@t:"
+;
+
+MATCH_SECTION_PREFIX
+options {
+	paraphrase = "Wccl file symset operators section prefix";
+}
+	: "@m:"
+;
+
 LBRACKET 
 options {
 	paraphrase = "'['";
@@ -2466,6 +2672,7 @@ options {
 	: '@' 
 ;
 
+
 COMMA
 options { 
 	paraphrase = "','";
@@ -2487,6 +2694,13 @@ options {
 	: ':'
 ;
 
+SEMI
+options {
+	paraphrase = ";";
+}
+	: ';'
+;
+
 SYMBOL
 options { 
 	paraphrase = "Symbol"; 
-- 
GitLab