From 24f23044df96a743f2daaadb9fff22c21691afe1 Mon Sep 17 00:00:00 2001 From: Adam Wardynski <award@.(win7-laptop)> Date: Wed, 4 May 2011 15:10:34 +0200 Subject: [PATCH] Fix issues around parsing scope, and sequence syntax. Each subsequent rule was reusing variables object, which wasn't good. Parsing of sequences had to be changed not to end with semi, and it wasn't actually passing on lexicons in the first place. --- libwccl/parser/grammar.g | 218 ++++++++++++++++++++-------------- libwccl/parser/parsingscope.h | 28 +++-- libwccl/wcclfile.h | 18 ++- 3 files changed, 167 insertions(+), 97 deletions(-) diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 685f9bc..101fcf3 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -183,13 +183,11 @@ parse_strset_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<Operator<StrSet> > res] { - ParsingScope scope(tagset); - boost::shared_ptr<Function<StrSet> > body; + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); } - : body = strset_operator [scope] { - res.reset(new Operator<StrSet>(body, scope.variables())); - } - (EOF | SEMI) + : res = functional_operator_strset [scope] + EOF ; // ---------------------------------------------------------------------------- @@ -199,13 +197,11 @@ parse_bool_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<Operator<Bool> > res] { - ParsingScope scope(tagset); - boost::shared_ptr<Function<Bool> > body; + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); } - : body = bool_operator [scope] { - res.reset(new Operator<Bool>(body, scope.variables())); - } - (EOF | SEMI) + : res = functional_operator_bool [scope] + EOF ; // ---------------------------------------------------------------------------- @@ -215,13 +211,11 @@ parse_symset_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<Operator<TSet> > res] { - ParsingScope scope(tagset); - boost::shared_ptr<Function<TSet> > body; + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); } - : body = symset_operator [scope] { - res.reset(new Operator<TSet>(body, scope.variables())); - } - (EOF | SEMI) + : res = functional_operator_symset [scope] + EOF ; // ---------------------------------------------------------------------------- @@ -231,13 +225,11 @@ parse_position_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<Operator<Position> > res] { - ParsingScope scope(tagset); - boost::shared_ptr<Function<Position> > body; + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); } - : body = position_operator [scope] { - res.reset(new Operator<Position>(body, scope.variables())); - } - (EOF | SEMI) + : res = functional_operator_position [scope] + EOF ; // ---------------------------------------------------------------------------- @@ -247,13 +239,11 @@ parse_match_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<Operator<Match> > res] { - ParsingScope scope(tagset); - boost::shared_ptr<Function<Match> > body; + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); } - : body = match_operator [scope] { - res.reset(new Operator<Match>(body, scope.variables())); - } - (EOF | SEMI) + : res = functional_operator_match [scope] + EOF ; // ---------------------------------------------------------------------------- @@ -264,7 +254,8 @@ parse_single_tag_rule [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<TagRule> rle] { - ParsingScope scope(tagset); + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); } : rle = tag_rule [scope] ; @@ -275,7 +266,8 @@ parse_tag_rule_sequence [const Corpus2::Tagset& tagset] returns [boost::shared_ptr<TagRuleSequence> rule_seq] { - ParsingScope scope(tagset); + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); } : rule_seq = tag_rules[scope] ; @@ -288,7 +280,8 @@ parse_match_rule [const Corpus2::Tagset& tagset] returns [boost::shared_ptr<MatchRule> ret_match] { - ParsingScope scope(tagset); + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); scope.variables().get_put<Match>("_M"); } : ret_match = match_rule_operator[scope] @@ -745,6 +738,19 @@ symset_operator | LPAREN ret = symset_operator [scope] RPAREN ; +// ---------------------------------------------------------------------------- +// Wrapper from Function<TSet> to Operator<TSet> +functional_operator_symset + [ParsingScope& scope] + returns [boost::shared_ptr<Operator<TSet> > op] +{ + boost::shared_ptr<Function<TSet> > body; +} + : body = symset_operator [scope] { + op.reset(new Operator<TSet>(body, scope.variables())); + } +; + // ---------------------------------------------------------------------------- // A wrapper for symset variable and symset value. symset_var_val @@ -901,6 +907,19 @@ position_operator )? ; +// ---------------------------------------------------------------------------- +// Wrapper from Function<Position> to Operator<Position> +functional_operator_position + [ParsingScope& scope] + returns [boost::shared_ptr<Operator<Position> > op] +{ + boost::shared_ptr<Function<Position> > body; +} + : body = position_operator [scope] { + op.reset(new Operator<Position>(body, scope.variables())); + } +; + // ---------------------------------------------------------------------------- // Wrapper for position variable and position value position_var_val @@ -984,6 +1003,18 @@ strset_operator [ParsingScope& scope] | LPAREN ret = strset_operator [scope] RPAREN ; +// ---------------------------------------------------------------------------- +// Wrapper from Function<StrSet> to Operator<StrSet> +functional_operator_strset + [ParsingScope& scope] + returns [boost::shared_ptr<Operator<StrSet> > op] +{ + boost::shared_ptr<Function<StrSet> > body; +} + : body = strset_operator [scope] { + op.reset(new Operator<StrSet>(body, scope.variables())); + } +; // ---------------------------------------------------------------------------- // Orth operator. strset_orth @@ -1144,6 +1175,19 @@ bool_operator | LPAREN ret = bool_operator [scope] RPAREN ; +// ---------------------------------------------------------------------------- +// wrapper from Function<Bool> to Operator<Bool> +functional_operator_bool + [ParsingScope& scope] + returns [boost::shared_ptr<Operator<Bool> > op] +{ + boost::shared_ptr<Function<Bool> > body; +} + : body = bool_operator [scope] { + op.reset(new Operator<Bool>(body, scope.variables())); + } +; + // ---------------------------------------------------------------------------- // comma-separated predicates (bool operators) bool_operator_comma_sep @@ -1823,6 +1867,19 @@ match_operator )* ; +// ---------------------------------------------------------------------------- +// Wrapper from Function<Match> to Operator<Match> +functional_operator_match + [ParsingScope& scope] + returns [boost::shared_ptr<Operator<Match> > op] +{ + boost::shared_ptr<Function<Match> > body; +} + : body = match_operator [scope] { + op.reset(new Operator<Match>(body, scope.variables())); + } +; + /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // WCCL FILE PARSING RULES @@ -1852,121 +1909,132 @@ any_operator_section boost::shared_ptr<OpSequence<Position> > pos_seq; boost::shared_ptr<OpSequence<Match> > m_seq; } - : untyped_seq = untyped_operator_sequence [wccl_file.tagset()] { + : untyped_seq = untyped_operator_sequence [wccl_file] { wccl_file.add_untyped_section(untyped_seq); } - | bool_seq = bool_operator_sequence [wccl_file.tagset()] { + | bool_seq = bool_operator_sequence [wccl_file] { wccl_file.add_section(bool_seq); } - | symset_seq = symset_operator_sequence [wccl_file.tagset()] { + | symset_seq = symset_operator_sequence [wccl_file] { wccl_file.add_section(symset_seq); } - | strset_seq = strset_operator_sequence [wccl_file.tagset()] { + | strset_seq = strset_operator_sequence [wccl_file] { wccl_file.add_section(strset_seq); } - | pos_seq = position_operator_sequence [wccl_file.tagset()] { + | pos_seq = position_operator_sequence [wccl_file] { wccl_file.add_section(pos_seq); } - | m_seq = match_operator_sequence [wccl_file.tagset()] { + | m_seq = match_operator_sequence [wccl_file] { wccl_file.add_section(m_seq); } ; bool_operator_sequence - [const Corpus2::Tagset& tagset] + [const WcclFile& wccl_file] returns [boost::shared_ptr<OpSequence<Bool> > seq] { + ParsingScope scope(wccl_file); boost::shared_ptr<Operator<Bool> > op; } : BOOL_SECTION_PREFIX name: STRING { seq.reset(new OpSequence<Bool>(token_ref_to_std_string(name))); } LPAREN - (op = parse_bool_operator [tagset] { seq->append(op); })+ + op = functional_operator_bool [scope] { seq->append(op); } + (SEMI op = functional_operator_bool [scope.new_scope()] { seq->append(op); })* RPAREN ; symset_operator_sequence - [const Corpus2::Tagset& tagset] + [const WcclFile& wccl_file] returns [boost::shared_ptr<OpSequence<TSet> > seq] { + ParsingScope scope(wccl_file); boost::shared_ptr<Operator<TSet> > op; } : TST_SECTION_PREFIX name: STRING { seq.reset(new OpSequence<TSet>(token_ref_to_std_string(name))); } LPAREN - (op = parse_symset_operator [tagset] { seq->append(op); })+ + op = functional_operator_symset [scope] { seq->append(op); } + (SEMI op = functional_operator_symset [scope.new_scope()] { seq->append(op); })* RPAREN ; strset_operator_sequence - [const Corpus2::Tagset& tagset] + [const WcclFile& wccl_file] returns [boost::shared_ptr<OpSequence<StrSet> > seq] { + ParsingScope scope(wccl_file); boost::shared_ptr<Operator<StrSet> > op; } : STR_SECTION_PREFIX name: STRING { seq.reset(new OpSequence<StrSet>(token_ref_to_std_string(name))); } LPAREN - (op = parse_strset_operator [tagset] { seq->append(op); })+ + op = functional_operator_strset [scope] { seq->append(op); } + (SEMI op = functional_operator_strset [scope.new_scope()] { seq->append(op); })* RPAREN ; position_operator_sequence - [const Corpus2::Tagset& tagset] + [const WcclFile& wccl_file] returns [boost::shared_ptr<OpSequence<Position> > seq] { + ParsingScope scope(wccl_file); boost::shared_ptr<Operator<Position> > op; } : POS_SECTION_PREFIX name: STRING { seq.reset(new OpSequence<Position>(token_ref_to_std_string(name))); } LPAREN - (op = parse_position_operator [tagset] { seq->append(op); })+ + op = functional_operator_position [scope] { seq->append(op); } + (SEMI op = functional_operator_position [scope.new_scope()] { seq->append(op); })* RPAREN ; untyped_operator_sequence - [const Corpus2::Tagset& tagset] + [const WcclFile& wccl_file] returns [boost::shared_ptr<UntypedOpSequence> seq] { + ParsingScope scope(wccl_file); boost::shared_ptr<FunctionalOperator> op; } : AT_MARK name: STRING { seq.reset(new UntypedOpSequence(token_ref_to_std_string(name))); } LPAREN - (op = any_operator [tagset] { seq->append(op); })+ + op = functional_operator_any [scope] { seq->append(op); } + (SEMI op = functional_operator_any [scope.new_scope()] { seq->append(op); })* RPAREN ; match_operator_sequence - [const Corpus2::Tagset& tagset] + [const WcclFile& wccl_file] returns [boost::shared_ptr<OpSequence<Match> > seq] { + ParsingScope scope(wccl_file); boost::shared_ptr<Operator<Match> > op; } : MATCH_SECTION_PREFIX name: STRING { seq.reset(new OpSequence<Match>(token_ref_to_std_string(name))); } LPAREN - (op = parse_match_operator [tagset] { seq->append(op); })+ + op = functional_operator_match [scope] { seq->append(op); } + (SEMI op = functional_operator_match [scope.new_scope()] { seq->append(op); })* RPAREN ; -any_operator - [const Corpus2::Tagset& tagset] +functional_operator_any + [ParsingScope& scope] returns [boost::shared_ptr<FunctionalOperator> op] { - static ParsingScope _s(tagset); // just a bogus scope for predicates } - : (position_operator [_s]) => op = parse_position_operator [tagset] - | (symset_operator [_s]) => op = parse_symset_operator [tagset] - | (strset_operator [_s]) => op = parse_strset_operator [tagset] - | (match_operator [_s]) => op = parse_match_operator [tagset] - | op = parse_bool_operator [tagset] + : (position_operator [scope]) => op = functional_operator_position [scope] + | (symset_operator [scope]) => op = functional_operator_symset [scope] + | (strset_operator [scope]) => op = functional_operator_strset [scope] + | (match_operator [scope]) => op = functional_operator_match [scope] + | op = functional_operator_bool [scope] ; // ---------------------------------------------------------------------------- @@ -2031,45 +2099,21 @@ tag_rule new TagRule(token_ref_to_std_string(name), scope.variables(), actions)); } } -/* - : "rule" LPAREN name: STRING COMMA - ( - (bool_operator[scope]) => - ( - condition = bool_operator [scope] COMMA - actions = action_sequence [scope] { - // rule(NAME, COND, ACTIONS) - rle.reset( - new TagRule(token_ref_to_std_string(name), scope.variables(), actions, condition)); - } - ) - | - ( - actions = action_sequence [scope] { - // rule(NAME, ACTIONS) - rle.reset(new TagRule(token_ref_to_std_string(name), scope.variables(), actions)); - } - ) - ) - RPAREN -*/ ; // Rule sequence tag_rule_sequence - [ParsingScope& scope] + [const ParsingScope& scope] returns [boost::shared_ptr<TagRuleSequence> rule_seq] { - // FIXME czy tutaj przypadkiem nie powinno byc shared_ptr? boost::shared_ptr<TagRule> rle; - rule_seq.reset(new TagRuleSequence()); } - : rle = tag_rule [scope] { + : rle = tag_rule [scope.new_scope()] { rule_seq->push_back(*rle); } ( - SEMI rle = tag_rule [scope] { + SEMI rle = tag_rule [scope.new_scope()] { rule_seq->push_back(*rle); } )* @@ -2078,7 +2122,7 @@ tag_rule_sequence // Temporary name. // This is wrapper for tag_rule_sequence in rules section in the wccl file tag_rules - [ParsingScope& scope] + [const ParsingScope& scope] returns [boost::shared_ptr<TagRuleSequence> rule_seq] : "tag_rules" LPAREN rule_seq = tag_rule_sequence [scope] RPAREN { // diff --git a/libwccl/parser/parsingscope.h b/libwccl/parser/parsingscope.h index 9a85ba4..acbf6ae 100644 --- a/libwccl/parser/parsingscope.h +++ b/libwccl/parser/parsingscope.h @@ -3,20 +3,32 @@ #include <libwccl/variables.h> #include <libwccl/lexicon/lexicons.h> +#include <libwccl/wcclfile.h> namespace Wccl { class ParsingScope { public: - ParsingScope(const Corpus2::Tagset& tagset) + ParsingScope(const Corpus2::Tagset& tagset, const Lexicons& lexicons) : tagset_(tagset), variables_(), - lexicons_(new Lexicons()) + lexicons_(lexicons) { } - const Corpus2::Tagset& tagset() { + explicit ParsingScope(const WcclFile& file) + : tagset_(file.tagset()), + variables_(), + lexicons_(file.get_lexicons()) + { + } + + ParsingScope new_scope() const { + return ParsingScope(tagset_, lexicons_); + } + + const Corpus2::Tagset& tagset() const { return tagset_; } @@ -24,18 +36,18 @@ public: return variables_; } - boost::shared_ptr<Lexicons> lexicons_ptr() { - return lexicons_; + const Variables& variables() const { + return variables_; } - Lexicons& lexicons() { - return *lexicons_; + const Lexicons& lexicons() const { + return lexicons_; } private: const Corpus2::Tagset& tagset_; Variables variables_; - boost::shared_ptr<Lexicons> lexicons_; + const Lexicons& lexicons_; }; } /* end ns Wccl */ diff --git a/libwccl/wcclfile.h b/libwccl/wcclfile.h index 5dc4754..03293aa 100644 --- a/libwccl/wcclfile.h +++ b/libwccl/wcclfile.h @@ -21,7 +21,7 @@ class WcclFile WcclFileOpSections<OpSequence<Match> > { public: - WcclFile(const Corpus2::Tagset tagset); + explicit WcclFile(const Corpus2::Tagset& tagset); const std::vector<boost::shared_ptr<UntypedOpSequence> >& untyped_sections(); template<class T> @@ -77,6 +77,8 @@ public: bool has_lexicon(const std::string& name) const; boost::shared_ptr<const Lexicon> get_lexicon_ptr(const std::string& name) const; const Lexicon& get_lexicon(const std::string& name) const; + boost::shared_ptr<const Lexicons> get_lexicons_ptr() const; + const Lexicons& get_lexicons() const; void add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section); void add_untyped_section(const boost::shared_ptr<const UntypedOpSequence>& section); @@ -116,7 +118,7 @@ private: namespace Wccl { inline -WcclFile::WcclFile(const Corpus2::Tagset tagset) +WcclFile::WcclFile(const Corpus2::Tagset& tagset) : tagset_(tagset) { } @@ -308,6 +310,18 @@ const Lexicon& WcclFile::get_lexicon(const std::string &name) const return lexicons_->get(name); } +inline +boost::shared_ptr<const Lexicons> WcclFile::get_lexicons_ptr() const +{ + return lexicons_; +} + +inline +const Lexicons& WcclFile::get_lexicons() const +{ + return *lexicons_; +} + inline void WcclFile::add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section) { -- GitLab