diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 685f9bc6f927a3758508c54515acb1dc0387b52d..101fcf313fd60216f63bae1fe1d267f0afb52bde 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -183,13 +183,11 @@ parse_strset_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<Operator<StrSet> > res] { - ParsingScope scope(tagset); - boost::shared_ptr<Function<StrSet> > body; + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); } - : body = strset_operator [scope] { - res.reset(new Operator<StrSet>(body, scope.variables())); - } - (EOF | SEMI) + : res = functional_operator_strset [scope] + EOF ; // ---------------------------------------------------------------------------- @@ -199,13 +197,11 @@ parse_bool_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<Operator<Bool> > res] { - ParsingScope scope(tagset); - boost::shared_ptr<Function<Bool> > body; + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); } - : body = bool_operator [scope] { - res.reset(new Operator<Bool>(body, scope.variables())); - } - (EOF | SEMI) + : res = functional_operator_bool [scope] + EOF ; // ---------------------------------------------------------------------------- @@ -215,13 +211,11 @@ parse_symset_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<Operator<TSet> > res] { - ParsingScope scope(tagset); - boost::shared_ptr<Function<TSet> > body; + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); } - : body = symset_operator [scope] { - res.reset(new Operator<TSet>(body, scope.variables())); - } - (EOF | SEMI) + : res = functional_operator_symset [scope] + EOF ; // ---------------------------------------------------------------------------- @@ -231,13 +225,11 @@ parse_position_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<Operator<Position> > res] { - ParsingScope scope(tagset); - boost::shared_ptr<Function<Position> > body; + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); } - : body = position_operator [scope] { - res.reset(new Operator<Position>(body, scope.variables())); - } - (EOF | SEMI) + : res = functional_operator_position [scope] + EOF ; // ---------------------------------------------------------------------------- @@ -247,13 +239,11 @@ parse_match_operator [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<Operator<Match> > res] { - ParsingScope scope(tagset); - boost::shared_ptr<Function<Match> > body; + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); } - : body = match_operator [scope] { - res.reset(new Operator<Match>(body, scope.variables())); - } - (EOF | SEMI) + : res = functional_operator_match [scope] + EOF ; // ---------------------------------------------------------------------------- @@ -264,7 +254,8 @@ parse_single_tag_rule [const Corpus2::Tagset &tagset] returns [boost::shared_ptr<TagRule> rle] { - ParsingScope scope(tagset); + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); } : rle = tag_rule [scope] ; @@ -275,7 +266,8 @@ parse_tag_rule_sequence [const Corpus2::Tagset& tagset] returns [boost::shared_ptr<TagRuleSequence> rule_seq] { - ParsingScope scope(tagset); + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); } : rule_seq = tag_rules[scope] ; @@ -288,7 +280,8 @@ parse_match_rule [const Corpus2::Tagset& tagset] returns [boost::shared_ptr<MatchRule> ret_match] { - ParsingScope scope(tagset); + Lexicons empty_lex; + ParsingScope scope(tagset, empty_lex); scope.variables().get_put<Match>("_M"); } : ret_match = match_rule_operator[scope] @@ -745,6 +738,19 @@ symset_operator | LPAREN ret = symset_operator [scope] RPAREN ; +// ---------------------------------------------------------------------------- +// Wrapper from Function<TSet> to Operator<TSet> +functional_operator_symset + [ParsingScope& scope] + returns [boost::shared_ptr<Operator<TSet> > op] +{ + boost::shared_ptr<Function<TSet> > body; +} + : body = symset_operator [scope] { + op.reset(new Operator<TSet>(body, scope.variables())); + } +; + // ---------------------------------------------------------------------------- // A wrapper for symset variable and symset value. symset_var_val @@ -901,6 +907,19 @@ position_operator )? ; +// ---------------------------------------------------------------------------- +// Wrapper from Function<Position> to Operator<Position> +functional_operator_position + [ParsingScope& scope] + returns [boost::shared_ptr<Operator<Position> > op] +{ + boost::shared_ptr<Function<Position> > body; +} + : body = position_operator [scope] { + op.reset(new Operator<Position>(body, scope.variables())); + } +; + // ---------------------------------------------------------------------------- // Wrapper for position variable and position value position_var_val @@ -984,6 +1003,18 @@ strset_operator [ParsingScope& scope] | LPAREN ret = strset_operator [scope] RPAREN ; +// ---------------------------------------------------------------------------- +// Wrapper from Function<StrSet> to Operator<StrSet> +functional_operator_strset + [ParsingScope& scope] + returns [boost::shared_ptr<Operator<StrSet> > op] +{ + boost::shared_ptr<Function<StrSet> > body; +} + : body = strset_operator [scope] { + op.reset(new Operator<StrSet>(body, scope.variables())); + } +; // ---------------------------------------------------------------------------- // Orth operator. strset_orth @@ -1144,6 +1175,19 @@ bool_operator | LPAREN ret = bool_operator [scope] RPAREN ; +// ---------------------------------------------------------------------------- +// wrapper from Function<Bool> to Operator<Bool> +functional_operator_bool + [ParsingScope& scope] + returns [boost::shared_ptr<Operator<Bool> > op] +{ + boost::shared_ptr<Function<Bool> > body; +} + : body = bool_operator [scope] { + op.reset(new Operator<Bool>(body, scope.variables())); + } +; + // ---------------------------------------------------------------------------- // comma-separated predicates (bool operators) bool_operator_comma_sep @@ -1823,6 +1867,19 @@ match_operator )* ; +// ---------------------------------------------------------------------------- +// Wrapper from Function<Match> to Operator<Match> +functional_operator_match + [ParsingScope& scope] + returns [boost::shared_ptr<Operator<Match> > op] +{ + boost::shared_ptr<Function<Match> > body; +} + : body = match_operator [scope] { + op.reset(new Operator<Match>(body, scope.variables())); + } +; + /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // WCCL FILE PARSING RULES @@ -1852,121 +1909,132 @@ any_operator_section boost::shared_ptr<OpSequence<Position> > pos_seq; boost::shared_ptr<OpSequence<Match> > m_seq; } - : untyped_seq = untyped_operator_sequence [wccl_file.tagset()] { + : untyped_seq = untyped_operator_sequence [wccl_file] { wccl_file.add_untyped_section(untyped_seq); } - | bool_seq = bool_operator_sequence [wccl_file.tagset()] { + | bool_seq = bool_operator_sequence [wccl_file] { wccl_file.add_section(bool_seq); } - | symset_seq = symset_operator_sequence [wccl_file.tagset()] { + | symset_seq = symset_operator_sequence [wccl_file] { wccl_file.add_section(symset_seq); } - | strset_seq = strset_operator_sequence [wccl_file.tagset()] { + | strset_seq = strset_operator_sequence [wccl_file] { wccl_file.add_section(strset_seq); } - | pos_seq = position_operator_sequence [wccl_file.tagset()] { + | pos_seq = position_operator_sequence [wccl_file] { wccl_file.add_section(pos_seq); } - | m_seq = match_operator_sequence [wccl_file.tagset()] { + | m_seq = match_operator_sequence [wccl_file] { wccl_file.add_section(m_seq); } ; bool_operator_sequence - [const Corpus2::Tagset& tagset] + [const WcclFile& wccl_file] returns [boost::shared_ptr<OpSequence<Bool> > seq] { + ParsingScope scope(wccl_file); boost::shared_ptr<Operator<Bool> > op; } : BOOL_SECTION_PREFIX name: STRING { seq.reset(new OpSequence<Bool>(token_ref_to_std_string(name))); } LPAREN - (op = parse_bool_operator [tagset] { seq->append(op); })+ + op = functional_operator_bool [scope] { seq->append(op); } + (SEMI op = functional_operator_bool [scope.new_scope()] { seq->append(op); })* RPAREN ; symset_operator_sequence - [const Corpus2::Tagset& tagset] + [const WcclFile& wccl_file] returns [boost::shared_ptr<OpSequence<TSet> > seq] { + ParsingScope scope(wccl_file); boost::shared_ptr<Operator<TSet> > op; } : TST_SECTION_PREFIX name: STRING { seq.reset(new OpSequence<TSet>(token_ref_to_std_string(name))); } LPAREN - (op = parse_symset_operator [tagset] { seq->append(op); })+ + op = functional_operator_symset [scope] { seq->append(op); } + (SEMI op = functional_operator_symset [scope.new_scope()] { seq->append(op); })* RPAREN ; strset_operator_sequence - [const Corpus2::Tagset& tagset] + [const WcclFile& wccl_file] returns [boost::shared_ptr<OpSequence<StrSet> > seq] { + ParsingScope scope(wccl_file); boost::shared_ptr<Operator<StrSet> > op; } : STR_SECTION_PREFIX name: STRING { seq.reset(new OpSequence<StrSet>(token_ref_to_std_string(name))); } LPAREN - (op = parse_strset_operator [tagset] { seq->append(op); })+ + op = functional_operator_strset [scope] { seq->append(op); } + (SEMI op = functional_operator_strset [scope.new_scope()] { seq->append(op); })* RPAREN ; position_operator_sequence - [const Corpus2::Tagset& tagset] + [const WcclFile& wccl_file] returns [boost::shared_ptr<OpSequence<Position> > seq] { + ParsingScope scope(wccl_file); boost::shared_ptr<Operator<Position> > op; } : POS_SECTION_PREFIX name: STRING { seq.reset(new OpSequence<Position>(token_ref_to_std_string(name))); } LPAREN - (op = parse_position_operator [tagset] { seq->append(op); })+ + op = functional_operator_position [scope] { seq->append(op); } + (SEMI op = functional_operator_position [scope.new_scope()] { seq->append(op); })* RPAREN ; untyped_operator_sequence - [const Corpus2::Tagset& tagset] + [const WcclFile& wccl_file] returns [boost::shared_ptr<UntypedOpSequence> seq] { + ParsingScope scope(wccl_file); boost::shared_ptr<FunctionalOperator> op; } : AT_MARK name: STRING { seq.reset(new UntypedOpSequence(token_ref_to_std_string(name))); } LPAREN - (op = any_operator [tagset] { seq->append(op); })+ + op = functional_operator_any [scope] { seq->append(op); } + (SEMI op = functional_operator_any [scope.new_scope()] { seq->append(op); })* RPAREN ; match_operator_sequence - [const Corpus2::Tagset& tagset] + [const WcclFile& wccl_file] returns [boost::shared_ptr<OpSequence<Match> > seq] { + ParsingScope scope(wccl_file); boost::shared_ptr<Operator<Match> > op; } : MATCH_SECTION_PREFIX name: STRING { seq.reset(new OpSequence<Match>(token_ref_to_std_string(name))); } LPAREN - (op = parse_match_operator [tagset] { seq->append(op); })+ + op = functional_operator_match [scope] { seq->append(op); } + (SEMI op = functional_operator_match [scope.new_scope()] { seq->append(op); })* RPAREN ; -any_operator - [const Corpus2::Tagset& tagset] +functional_operator_any + [ParsingScope& scope] returns [boost::shared_ptr<FunctionalOperator> op] { - static ParsingScope _s(tagset); // just a bogus scope for predicates } - : (position_operator [_s]) => op = parse_position_operator [tagset] - | (symset_operator [_s]) => op = parse_symset_operator [tagset] - | (strset_operator [_s]) => op = parse_strset_operator [tagset] - | (match_operator [_s]) => op = parse_match_operator [tagset] - | op = parse_bool_operator [tagset] + : (position_operator [scope]) => op = functional_operator_position [scope] + | (symset_operator [scope]) => op = functional_operator_symset [scope] + | (strset_operator [scope]) => op = functional_operator_strset [scope] + | (match_operator [scope]) => op = functional_operator_match [scope] + | op = functional_operator_bool [scope] ; // ---------------------------------------------------------------------------- @@ -2031,45 +2099,21 @@ tag_rule new TagRule(token_ref_to_std_string(name), scope.variables(), actions)); } } -/* - : "rule" LPAREN name: STRING COMMA - ( - (bool_operator[scope]) => - ( - condition = bool_operator [scope] COMMA - actions = action_sequence [scope] { - // rule(NAME, COND, ACTIONS) - rle.reset( - new TagRule(token_ref_to_std_string(name), scope.variables(), actions, condition)); - } - ) - | - ( - actions = action_sequence [scope] { - // rule(NAME, ACTIONS) - rle.reset(new TagRule(token_ref_to_std_string(name), scope.variables(), actions)); - } - ) - ) - RPAREN -*/ ; // Rule sequence tag_rule_sequence - [ParsingScope& scope] + [const ParsingScope& scope] returns [boost::shared_ptr<TagRuleSequence> rule_seq] { - // FIXME czy tutaj przypadkiem nie powinno byc shared_ptr? boost::shared_ptr<TagRule> rle; - rule_seq.reset(new TagRuleSequence()); } - : rle = tag_rule [scope] { + : rle = tag_rule [scope.new_scope()] { rule_seq->push_back(*rle); } ( - SEMI rle = tag_rule [scope] { + SEMI rle = tag_rule [scope.new_scope()] { rule_seq->push_back(*rle); } )* @@ -2078,7 +2122,7 @@ tag_rule_sequence // Temporary name. // This is wrapper for tag_rule_sequence in rules section in the wccl file tag_rules - [ParsingScope& scope] + [const ParsingScope& scope] returns [boost::shared_ptr<TagRuleSequence> rule_seq] : "tag_rules" LPAREN rule_seq = tag_rule_sequence [scope] RPAREN { // diff --git a/libwccl/parser/parsingscope.h b/libwccl/parser/parsingscope.h index 9a85ba461c8ae2c5ab3d66b579dfd8d44316863a..acbf6aea44bdd9795383358d8ba12e516c3f8e7d 100644 --- a/libwccl/parser/parsingscope.h +++ b/libwccl/parser/parsingscope.h @@ -3,20 +3,32 @@ #include <libwccl/variables.h> #include <libwccl/lexicon/lexicons.h> +#include <libwccl/wcclfile.h> namespace Wccl { class ParsingScope { public: - ParsingScope(const Corpus2::Tagset& tagset) + ParsingScope(const Corpus2::Tagset& tagset, const Lexicons& lexicons) : tagset_(tagset), variables_(), - lexicons_(new Lexicons()) + lexicons_(lexicons) { } - const Corpus2::Tagset& tagset() { + explicit ParsingScope(const WcclFile& file) + : tagset_(file.tagset()), + variables_(), + lexicons_(file.get_lexicons()) + { + } + + ParsingScope new_scope() const { + return ParsingScope(tagset_, lexicons_); + } + + const Corpus2::Tagset& tagset() const { return tagset_; } @@ -24,18 +36,18 @@ public: return variables_; } - boost::shared_ptr<Lexicons> lexicons_ptr() { - return lexicons_; + const Variables& variables() const { + return variables_; } - Lexicons& lexicons() { - return *lexicons_; + const Lexicons& lexicons() const { + return lexicons_; } private: const Corpus2::Tagset& tagset_; Variables variables_; - boost::shared_ptr<Lexicons> lexicons_; + const Lexicons& lexicons_; }; } /* end ns Wccl */ diff --git a/libwccl/wcclfile.h b/libwccl/wcclfile.h index 5dc47546d67f04bae57fa657d3c169d21712ae36..03293aaafae971f0b4c365fec0e03429404f5f59 100644 --- a/libwccl/wcclfile.h +++ b/libwccl/wcclfile.h @@ -21,7 +21,7 @@ class WcclFile WcclFileOpSections<OpSequence<Match> > { public: - WcclFile(const Corpus2::Tagset tagset); + explicit WcclFile(const Corpus2::Tagset& tagset); const std::vector<boost::shared_ptr<UntypedOpSequence> >& untyped_sections(); template<class T> @@ -77,6 +77,8 @@ public: bool has_lexicon(const std::string& name) const; boost::shared_ptr<const Lexicon> get_lexicon_ptr(const std::string& name) const; const Lexicon& get_lexicon(const std::string& name) const; + boost::shared_ptr<const Lexicons> get_lexicons_ptr() const; + const Lexicons& get_lexicons() const; void add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section); void add_untyped_section(const boost::shared_ptr<const UntypedOpSequence>& section); @@ -116,7 +118,7 @@ private: namespace Wccl { inline -WcclFile::WcclFile(const Corpus2::Tagset tagset) +WcclFile::WcclFile(const Corpus2::Tagset& tagset) : tagset_(tagset) { } @@ -308,6 +310,18 @@ const Lexicon& WcclFile::get_lexicon(const std::string &name) const return lexicons_->get(name); } +inline +boost::shared_ptr<const Lexicons> WcclFile::get_lexicons_ptr() const +{ + return lexicons_; +} + +inline +const Lexicons& WcclFile::get_lexicons() const +{ + return *lexicons_; +} + inline void WcclFile::add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section) {