From f741ad7b90cd01c71224b14ced6cc72056c43621 Mon Sep 17 00:00:00 2001 From: Adam Wardynski <award@.(win7-laptop)> Date: Fri, 29 Apr 2011 09:29:36 +0200 Subject: [PATCH] Parsing WcclFile. --- libwccl/parser/grammar.g | 222 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 218 insertions(+), 4 deletions(-) diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 932d3e1..39b47dc 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -95,6 +95,9 @@ header { #include <libwccl/ops/match/actions/unmarkmatch.h> #include <libwccl/ops/functions/match/submatch.h> + // Wccl whole file syntax + #include <libwccl/wcclfile.h> + // Unicode String #include <unicode/uniset.h> #include <unicode/unistr.h> @@ -183,7 +186,7 @@ parse_strset_operator : body = strset_operator [scope] { res.reset(new Operator<StrSet>(body, scope.variables())); } - EOF + (EOF | SEMI) ; // ---------------------------------------------------------------------------- @@ -199,7 +202,7 @@ parse_bool_operator : body = bool_operator [scope] { res.reset(new Operator<Bool>(body, scope.variables())); } - EOF + (EOF | SEMI) ; // ---------------------------------------------------------------------------- @@ -215,7 +218,7 @@ parse_symset_operator : body = symset_operator [scope] { res.reset(new Operator<TSet>(body, scope.variables())); } - EOF + (EOF | SEMI) ; // ---------------------------------------------------------------------------- @@ -231,7 +234,23 @@ parse_position_operator : body = position_operator [scope] { res.reset(new Operator<Position>(body, scope.variables())); } - EOF + (EOF | SEMI) +; + +// ---------------------------------------------------------------------------- +// Rule for parsing match operator with scope. +// Returns boost::shared_ptr<Operator<Position> > +parse_match_operator + [const Corpus2::Tagset &tagset] + returns [boost::shared_ptr<Operator<Match> > res] +{ + ParsingScope scope(tagset); + boost::shared_ptr<Function<Match> > body; +} + : body = match_operator [scope] { + res.reset(new Operator<Match>(body, scope.variables())); + } + (EOF | SEMI) ; // ---------------------------------------------------------------------------- @@ -272,6 +291,25 @@ parse_match_rule : ret_match = match_rule_operator[scope] ; +// ---------------------------------------------------------------------------- +// ---------------------------------------------------------------------------- +// Rule for parsing wccl files +parse_wccl_file + [const Corpus2::Tagset& tagset] + returns [boost::shared_ptr<WcclFile> wccl_file] +{ + wccl_file = boost::make_shared<WcclFile>(tagset); + boost::shared_ptr<TagRuleSequence> rule_seq; +} + : (any_operator_section [*wccl_file] )* + ( + rule_seq = parse_rule_sequence [tagset] { wccl_file->set_tag_rules(rule_seq); } + (any_operator_section [*wccl_file] )* + )? + EOF +; + + /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // VALUES @@ -1764,6 +1802,139 @@ match_operator )* ; +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// WCCL FILE PARSING RULES +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// + +any_operator_section + [WcclFile& wccl_file] +{ + boost::shared_ptr<UntypedOpSequence> untyped_seq; + boost::shared_ptr<OpSequence<Bool> > bool_seq; + boost::shared_ptr<OpSequence<TSet> > symset_seq; + boost::shared_ptr<OpSequence<StrSet> > strset_seq; + boost::shared_ptr<OpSequence<Position> > pos_seq; + boost::shared_ptr<OpSequence<Match> > m_seq; +} + : untyped_seq = untyped_operator_sequence [wccl_file.tagset()] { + wccl_file.add_untyped_section(untyped_seq); + } + | bool_seq = bool_operator_sequence [wccl_file.tagset()] { + wccl_file.add_section(bool_seq); + } + | symset_seq = symset_operator_sequence [wccl_file.tagset()] { + wccl_file.add_section(symset_seq); + } + | strset_seq = strset_operator_sequence [wccl_file.tagset()] { + wccl_file.add_section(strset_seq); + } + | pos_seq = position_operator_sequence [wccl_file.tagset()] { + wccl_file.add_section(pos_seq); + } + | m_seq = match_operator_sequence [wccl_file.tagset()] { + wccl_file.add_section(m_seq); + } +; + +bool_operator_sequence + [const Corpus2::Tagset& tagset] + returns [boost::shared_ptr<OpSequence<Bool> > seq] +{ + boost::shared_ptr<Operator<Bool> > op; +} + : BOOL_SECTION_PREFIX name: STRING { + seq.reset(new OpSequence<Bool>(token_ref_to_std_string(name))); + } + LPAREN + (op = parse_bool_operator [tagset] { seq->append(op); })+ + RPAREN +; + +symset_operator_sequence + [const Corpus2::Tagset& tagset] + returns [boost::shared_ptr<OpSequence<TSet> > seq] +{ + boost::shared_ptr<Operator<TSet> > op; +} + : TST_SECTION_PREFIX name: STRING { + seq.reset(new OpSequence<TSet>(token_ref_to_std_string(name))); + } + LPAREN + (op = parse_symset_operator [tagset] { seq->append(op); })+ + RPAREN +; + +strset_operator_sequence + [const Corpus2::Tagset& tagset] + returns [boost::shared_ptr<OpSequence<StrSet> > seq] +{ + boost::shared_ptr<Operator<StrSet> > op; +} + : STR_SECTION_PREFIX name: STRING { + seq.reset(new OpSequence<StrSet>(token_ref_to_std_string(name))); + } + LPAREN + (op = parse_strset_operator [tagset] { seq->append(op); })+ + RPAREN +; + +position_operator_sequence + [const Corpus2::Tagset& tagset] + returns [boost::shared_ptr<OpSequence<Position> > seq] +{ + boost::shared_ptr<Operator<Position> > op; +} + : POS_SECTION_PREFIX name: STRING { + seq.reset(new OpSequence<Position>(token_ref_to_std_string(name))); + } + LPAREN + (op = parse_position_operator [tagset] { seq->append(op); })+ + RPAREN +; + +untyped_operator_sequence + [const Corpus2::Tagset& tagset] + returns [boost::shared_ptr<UntypedOpSequence> seq] +{ + boost::shared_ptr<FunctionalOperator> op; +} + : AT_MARK name: STRING { + seq.reset(new UntypedOpSequence(token_ref_to_std_string(name))); + } + LPAREN + (op = any_operator [tagset] { seq->append(op); })+ + RPAREN +; + +match_operator_sequence + [const Corpus2::Tagset& tagset] + returns [boost::shared_ptr<OpSequence<Match> > seq] +{ + boost::shared_ptr<Operator<Match> > op; +} + : MATCH_SECTION_PREFIX name: STRING { + seq.reset(new OpSequence<Match>(token_ref_to_std_string(name))); + } + LPAREN + (op = parse_match_operator [tagset] { seq->append(op); })+ + RPAREN +; + +any_operator + [const Corpus2::Tagset& tagset] + returns [boost::shared_ptr<FunctionalOperator> op] +{ + static ParsingScope _s(tagset); // just a bogus scope for predicates +} + : (position_operator [_s]) => op = parse_position_operator [tagset] + | (symset_operator [_s]) => op = parse_symset_operator [tagset] + | (strset_operator [_s]) => op = parse_strset_operator [tagset] + | (match_operator [_s]) => op = parse_match_operator [tagset] + | op = parse_bool_operator [tagset] +; + // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Tagging actions and rules: @@ -2417,6 +2588,41 @@ options { : "$m:" ; +STR_SECTION_PREFIX +options { + paraphrase = "Wccl file string operators section prefix"; +} + : "@s:" +; + +POS_SECTION_PREFIX +options { + paraphrase = "Wccl file position operators section prefix"; +} + : "@p:" +; + +BOOL_SECTION_PREFIX +options { + paraphrase = "Wccl file bool operators section prefix"; +} + : "@b:" +; + +TST_SECTION_PREFIX +options { + paraphrase = "Wccl file symset operators section prefix"; +} + : "@t:" +; + +MATCH_SECTION_PREFIX +options { + paraphrase = "Wccl file symset operators section prefix"; +} + : "@m:" +; + LBRACKET options { paraphrase = "'['"; @@ -2466,6 +2672,7 @@ options { : '@' ; + COMMA options { paraphrase = "','"; @@ -2487,6 +2694,13 @@ options { : ':' ; +SEMI +options { + paraphrase = ";"; +} + : ';' +; + SYMBOL options { paraphrase = "Symbol"; -- GitLab