diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 2e4b2bc29aedb37db29bb5b85cdfe8ddd9d1a24a..120220144a2ef6bcb7e3f31231a6abc568a3a3db 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -77,6 +77,7 @@ SET(libwccl_STAT_SRC ops/match/conditions/repeatedmatch.cpp ops/match/conditions/tokencondition.cpp ops/matchrule.cpp + ops/matchrulesequence.cpp ops/opsequence.cpp ops/tagaction.cpp ops/tagactions/delete.cpp diff --git a/libwccl/ops/matchrulesequence.cpp b/libwccl/ops/matchrulesequence.cpp new file mode 100644 index 0000000000000000000000000000000000000000..eec6e42296fddd9770d6e810a4f1e68ee4e2089c --- /dev/null +++ b/libwccl/ops/matchrulesequence.cpp @@ -0,0 +1,46 @@ +#include <libwccl/ops/matchrulesequence.h> +#include <libpwrutils/foreach.h> + +namespace Wccl { + +void MatchRuleSequence::apply_all(const boost::shared_ptr<Corpus2::AnnotatedSentence>& sentence) +{ + if(!sentence || sentence->empty()) { + throw InvalidArgument( + "sentence", + "Received an empty sentence."); + } + foreach (MatchRule& rule, *this) { + rule.apply(sentence); + } +} + +std::string MatchRuleSequence::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream os; + os << "match_rules(\n"; + for (size_t i = 0; i < size(); ++i) { + if (i != 0) { + os << ";\n"; + } + os << at(i).to_string(tagset); + } + os << ")"; + return os.str(); +} + +std::ostream& MatchRuleSequence::write_to(std::ostream &os) const +{ + os << "match_rules(\n"; + for (size_t i = 0; i < size(); ++i) { + if (i != 0) { + os << ";\n"; + } + os << at(i); + } + os << ")"; + return os; +} + +} /* end ns Wccl */ + diff --git a/libwccl/ops/matchrulesequence.h b/libwccl/ops/matchrulesequence.h new file mode 100644 index 0000000000000000000000000000000000000000..db908a16ba1758bbb8fe75bcafccc563adfe5c0f --- /dev/null +++ b/libwccl/ops/matchrulesequence.h @@ -0,0 +1,82 @@ +#ifndef LIBWCCL_OPS_MATCHRULESEQUENCE_H +#define LIBWCCL_OPS_MATCHRULESEQUENCE_H + +#include <libwccl/ops/matchrule.h> + +namespace Wccl { + +/** + * Represents a sequence of parsed WCCL Match rules. It's a conveniency wrapper around + * vector of MatchRule objects, that allows automatic execution of all contained MatchRules + * one by one. + * @note The class methods are not thread-safe + */ +class MatchRuleSequence : public std::vector<MatchRule>, public Expression +{ +public: + MatchRuleSequence(const std::vector<MatchRule>& rules); + + MatchRuleSequence(); + + /** + * Executes all contained Rules sequentially, once for each position + * starting from 0 to given sentence's end. + * @returns True if any of the Rules made a change on any of the sentence + * positions, False otherwise. + * @param sentence Sentence to execute on. + * @see execute_once() - equivalent method; the \link operator()() operator() \endlink allows + * more convenient functional notation, however if you only have a pointer + * you might prefer the execute_once() method as shown below. The choice is yours. + * @see execute_until_done() - executes all Rules repeatedly, until there are no changes. + * \code + * Bool res; + * res = ruleseq(sentence); + * // versus + * res = ruleseq.execute_once(sentence); + * // or if you have a pointer... + * res = (*ruleseq_ptr)(sentence); + * // versus + * res = ruleseq_ptr->execute_once(sentence); + * \endcode + */ + void operator()(const boost::shared_ptr<Corpus2::AnnotatedSentence>& sentence); + + /** + * Executes all contained Rules sequentially, once for each position + * starting from 0 to given sentence's end. + * @returns True if any of the Rules made a change on any of the sentence's + * positions, False otherwise. + * @param sentence Sentence to execute on. + * @see \link operator()() operator() \endlink - an equivalent of this method that + * allows functional notation, treating RuleSeqence directly as a function object + * @see execute_until_done() - executes all Rules repeatedly, until there are no changes. + */ + void apply_all(const boost::shared_ptr<Corpus2::AnnotatedSentence>& sentence); + + std::string to_string(const Corpus2::Tagset& tagset) const; +protected: + std::ostream& write_to(std::ostream& os) const; +}; + + + +// +//--- implementation details --- +// +inline +MatchRuleSequence::MatchRuleSequence(const std::vector<MatchRule>& rules) + : std::vector<MatchRule>(rules) { +} + +inline +MatchRuleSequence::MatchRuleSequence() + : std::vector<MatchRule>() { +} + +inline +void MatchRuleSequence::operator()(const boost::shared_ptr<Corpus2::AnnotatedSentence>& sentence) { + apply_all(sentence); +} + +} /* end ns Wccl */ +#endif // LIBWCCL_OPS_MATCHRULESEQUENCE_H diff --git a/libwccl/ops/tagrulesequence.h b/libwccl/ops/tagrulesequence.h index 4a1ab672876b3b6224e793246e7da5b4fe02729b..34d5534ebac76610f97b20195e969fb39e19f7c7 100644 --- a/libwccl/ops/tagrulesequence.h +++ b/libwccl/ops/tagrulesequence.h @@ -6,15 +6,15 @@ namespace Wccl { /** - * Represents a sequence of parsed WCCL Rules. It's a conveniency wrapper around - * vector of Rule objects, that allows automatic execution of all contained Rules + * Represents a sequence of parsed WCCL tag rules. It's a conveniency wrapper around + * vector of TagRule objects, that allows automatic execution of all contained TagRules * for all positions of a Sentence. * @note The class methods are not thread-safe */ class TagRuleSequence : public std::vector<TagRule>, public Expression { public: - TagRuleSequence(std::vector<TagRule> rules); + TagRuleSequence(const std::vector<TagRule>& rules); TagRuleSequence(); @@ -77,7 +77,7 @@ protected: //--- implementation details --- // inline -TagRuleSequence::TagRuleSequence(std::vector<TagRule> rules) +TagRuleSequence::TagRuleSequence(const std::vector<TagRule>& rules) : std::vector<TagRule>(rules) { } diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index eb9a9c74499399cfc9406a617a3d832bd15d0ecc..c6958c7d7ddb83c66f44e0a022d9d0952211f628 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -1915,6 +1915,7 @@ match_rules_section [WcclFile& wccl_file] { ParsingScope scope(wccl_file); boost::shared_ptr<MatchRule> match_rule; + boost::shared_ptr<MatchRuleSequence> rule_seq = boost::make_shared<MatchRuleSequence>(); } : "match_rules" { if (wccl_file.has_match_rules()) { @@ -1923,16 +1924,16 @@ match_rules_section [WcclFile& wccl_file] } LPAREN match_rule = match_rule_operator [scope] { - wccl_file.add_match_rule(match_rule); + rule_seq->push_back(*match_rule); scope.reset_variables(); } ( SEMI match_rule = match_rule_operator [scope] { - wccl_file.add_match_rule(match_rule); + rule_seq->push_back(*match_rule); scope.reset_variables(); } - )* - RPAREN + )* + RPAREN { wccl_file.set_match_rules(rule_seq); } ; any_operator_section diff --git a/libwccl/wcclfile.cpp b/libwccl/wcclfile.cpp index 4c6edb65264966196e26d4ae926559974b3efff3..0bf8f8f6c09154c533433a60f2923b59ec0fd522 100644 --- a/libwccl/wcclfile.cpp +++ b/libwccl/wcclfile.cpp @@ -36,6 +36,14 @@ boost::shared_ptr<const TagRuleSequence> WcclFile::get_tag_rules_ptr() const return tag_rules_; } +boost::shared_ptr<const MatchRuleSequence> WcclFile::get_match_rules_ptr() const +{ + if (!has_match_rules()) { + throw WcclError("There are no match rules."); + } + return match_rules_; +} + std::ostream& WcclFile::write_to(std::ostream& os) const { if (has_lexicons()) { @@ -51,14 +59,7 @@ std::ostream& WcclFile::write_to(std::ostream& os) const os << tag_rules_->to_string(tagset_) << '\n'; } if (has_match_rules()) { - os << "match_rules(\n"; - for (size_t i = 0; i < match_rules_.size(); ++i) { - if (i != 0) { - os << ";\n"; - } - os << match_rules_[i]->to_string(tagset_); - } - os << "\n)\n"; + os << match_rules_->to_string(tagset_) << '\n'; } return os; } diff --git a/libwccl/wcclfile.h b/libwccl/wcclfile.h index 7d241973e17a6dc4f6a588725c9dd975d511c996..a3c4e4ca54e02a71b75c48fb788f6ef4a42b9bb2 100644 --- a/libwccl/wcclfile.h +++ b/libwccl/wcclfile.h @@ -8,13 +8,19 @@ #include <libwccl/values/tset.h> #include <libwccl/wcclfileopsections.h> #include <libwccl/ops/tagrulesequence.h> -#include <libwccl/ops/matchrule.h> +#include <libwccl/ops/matchrulesequence.h> #include <libwccl/lexicon/lexicons.h> #include <libwccl/exception.h> #include <libpwrutils/pathsearch.h> namespace Wccl { +/** + * Class representing contents of parsed WCCL files + * - imported lexicons, any typed and untyped named + * operator sections, up to one tag rules section, + * and up to one match rules section. + */ class WcclFile : WcclFileOpSections<UntypedOpSequence>, WcclFileOpSections<OpSequence<StrSet> >, @@ -26,6 +32,9 @@ class WcclFile public: WcclFile(const Corpus2::Tagset& tagset, const std::string& search_path); + // + // Untyped and typed operator sections: @X:"sectioname" ( op1; op2 ) + // const std::vector<boost::shared_ptr<UntypedOpSequence> >& untyped_sections(); template<class T> const typename std::vector<boost::shared_ptr<OpSequence<T> > >& sections(); @@ -76,14 +85,6 @@ public: FunctionalOpSequence::name_op_v_t gen_all_op_pairs(); FunctionalOpSequence::name_op_v_c_t gen_all_op_pairs() const; - void import_lexicon(const boost::shared_ptr<Lexicon>& lexicon); - bool has_lexicon(const std::string& name) const; - bool has_lexicons() const; - boost::shared_ptr<const Lexicon> get_lexicon_ptr(const std::string& name) const; - const Lexicon& get_lexicon(const std::string& name) const; - boost::shared_ptr<const Lexicons> get_lexicons_ptr() const; - const Lexicons& get_lexicons() const; - void add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section); void add_untyped_section(const boost::shared_ptr<const UntypedOpSequence>& section); void add_untyped_section(const UntypedOpSequence& section); @@ -94,6 +95,20 @@ public: template<class T> void add_section(const OpSequence<T>& section); + // + // Lexicons, import sections: import("path", "name") + // + void import_lexicon(const boost::shared_ptr<Lexicon>& lexicon); + bool has_lexicon(const std::string& name) const; + bool has_lexicons() const; + boost::shared_ptr<const Lexicon> get_lexicon_ptr(const std::string& name) const; + const Lexicon& get_lexicon(const std::string& name) const; + boost::shared_ptr<const Lexicons> get_lexicons_ptr() const; + const Lexicons& get_lexicons() const; + + // + // Tag rules section: tag_rules ( rule1; rule2 ) + // bool has_tag_rules() const; void set_tag_rules(const boost::shared_ptr<TagRuleSequence>& tag_rules); @@ -102,10 +117,19 @@ public: boost::shared_ptr<TagRuleSequence> get_tag_rules_ptr(); boost::shared_ptr<const TagRuleSequence> get_tag_rules_ptr() const; + // + // Match rules section: match_rules ( apply1; apply2 ) + // bool has_match_rules() const; - void add_match_rule(const boost::shared_ptr<MatchRule>& match_rule); - const std::vector<boost::shared_ptr<MatchRule> >& get_match_rules(); + void set_match_rules(const boost::shared_ptr<MatchRuleSequence>& match_rules); + const MatchRuleSequence& get_match_rules() const; + boost::shared_ptr<MatchRuleSequence> get_match_rules_ptr(); + boost::shared_ptr<const MatchRuleSequence> get_match_rules_ptr() const; + + // + // Miscelaneous + // friend std::ostream& operator<<(std::ostream& ostream, const WcclFile& wccl_file); std::string to_string() const; @@ -117,7 +141,7 @@ private: std::ostream& write_to(std::ostream& ostream) const; std::vector<boost::shared_ptr<FunctionalOpSequence> > all_sections_; boost::shared_ptr<TagRuleSequence> tag_rules_; - std::vector<boost::shared_ptr<MatchRule> > match_rules_; + boost::shared_ptr<MatchRuleSequence> match_rules_; boost::shared_ptr<Lexicons> lexicons_; const Corpus2::Tagset& tagset_; PwrNlp::PathSearcher<Wccl::FileNotFound> path_; @@ -410,19 +434,22 @@ void WcclFile::set_tag_rules(const boost::shared_ptr<TagRuleSequence>& tag_rules inline bool WcclFile::has_match_rules() const { - return !match_rules_.empty(); + return match_rules_; } inline -void WcclFile::add_match_rule(const boost::shared_ptr<MatchRule>& match_rule) +const MatchRuleSequence& WcclFile::get_match_rules() const { - match_rules_.push_back(match_rule); + return *get_match_rules_ptr(); } inline -const std::vector<boost::shared_ptr<MatchRule> >& WcclFile::get_match_rules() +void WcclFile::set_match_rules(const boost::shared_ptr<MatchRuleSequence>& match_rules) { - return match_rules_; + if (has_match_rules()) { + throw WcclError("Tag rules already added."); + } + match_rules_ = match_rules; } inline