diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 2e4b2bc29aedb37db29bb5b85cdfe8ddd9d1a24a..120220144a2ef6bcb7e3f31231a6abc568a3a3db 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -77,6 +77,7 @@ SET(libwccl_STAT_SRC ops/match/conditions/repeatedmatch.cpp ops/match/conditions/tokencondition.cpp ops/matchrule.cpp + ops/matchrulesequence.cpp ops/opsequence.cpp ops/tagaction.cpp ops/tagactions/delete.cpp diff --git a/libwccl/ops/matchrulesequence.cpp b/libwccl/ops/matchrulesequence.cpp new file mode 100644 index 0000000000000000000000000000000000000000..eec6e42296fddd9770d6e810a4f1e68ee4e2089c --- /dev/null +++ b/libwccl/ops/matchrulesequence.cpp @@ -0,0 +1,46 @@ +#include <libwccl/ops/matchrulesequence.h> +#include <libpwrutils/foreach.h> + +namespace Wccl { + +void MatchRuleSequence::apply_all(const boost::shared_ptr<Corpus2::AnnotatedSentence>& sentence) +{ + if(!sentence || sentence->empty()) { + throw InvalidArgument( + "sentence", + "Received an empty sentence."); + } + foreach (MatchRule& rule, *this) { + rule.apply(sentence); + } +} + +std::string MatchRuleSequence::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream os; + os << "match_rules(\n"; + for (size_t i = 0; i < size(); ++i) { + if (i != 0) { + os << ";\n"; + } + os << at(i).to_string(tagset); + } + os << ")"; + return os.str(); +} + +std::ostream& MatchRuleSequence::write_to(std::ostream &os) const +{ + os << "match_rules(\n"; + for (size_t i = 0; i < size(); ++i) { + if (i != 0) { + os << ";\n"; + } + os << at(i); + } + os << ")"; + return os; +} + +} /* end ns Wccl */ + diff --git a/libwccl/ops/matchrulesequence.h b/libwccl/ops/matchrulesequence.h new file mode 100644 index 0000000000000000000000000000000000000000..db908a16ba1758bbb8fe75bcafccc563adfe5c0f --- /dev/null +++ b/libwccl/ops/matchrulesequence.h @@ -0,0 +1,82 @@ +#ifndef LIBWCCL_OPS_MATCHRULESEQUENCE_H +#define LIBWCCL_OPS_MATCHRULESEQUENCE_H + +#include <libwccl/ops/matchrule.h> + +namespace Wccl { + +/** + * Represents a sequence of parsed WCCL Match rules. It's a conveniency wrapper around + * vector of MatchRule objects, that allows automatic execution of all contained MatchRules + * one by one. + * @note The class methods are not thread-safe + */ +class MatchRuleSequence : public std::vector<MatchRule>, public Expression +{ +public: + MatchRuleSequence(const std::vector<MatchRule>& rules); + + MatchRuleSequence(); + + /** + * Executes all contained Rules sequentially, once for each position + * starting from 0 to given sentence's end. + * @returns True if any of the Rules made a change on any of the sentence + * positions, False otherwise. + * @param sentence Sentence to execute on. + * @see execute_once() - equivalent method; the \link operator()() operator() \endlink allows + * more convenient functional notation, however if you only have a pointer + * you might prefer the execute_once() method as shown below. The choice is yours. + * @see execute_until_done() - executes all Rules repeatedly, until there are no changes. + * \code + * Bool res; + * res = ruleseq(sentence); + * // versus + * res = ruleseq.execute_once(sentence); + * // or if you have a pointer... + * res = (*ruleseq_ptr)(sentence); + * // versus + * res = ruleseq_ptr->execute_once(sentence); + * \endcode + */ + void operator()(const boost::shared_ptr<Corpus2::AnnotatedSentence>& sentence); + + /** + * Executes all contained Rules sequentially, once for each position + * starting from 0 to given sentence's end. + * @returns True if any of the Rules made a change on any of the sentence's + * positions, False otherwise. + * @param sentence Sentence to execute on. + * @see \link operator()() operator() \endlink - an equivalent of this method that + * allows functional notation, treating RuleSeqence directly as a function object + * @see execute_until_done() - executes all Rules repeatedly, until there are no changes. + */ + void apply_all(const boost::shared_ptr<Corpus2::AnnotatedSentence>& sentence); + + std::string to_string(const Corpus2::Tagset& tagset) const; +protected: + std::ostream& write_to(std::ostream& os) const; +}; + + + +// +//--- implementation details --- +// +inline +MatchRuleSequence::MatchRuleSequence(const std::vector<MatchRule>& rules) + : std::vector<MatchRule>(rules) { +} + +inline +MatchRuleSequence::MatchRuleSequence() + : std::vector<MatchRule>() { +} + +inline +void MatchRuleSequence::operator()(const boost::shared_ptr<Corpus2::AnnotatedSentence>& sentence) { + apply_all(sentence); +} + +} /* end ns Wccl */ +#endif // LIBWCCL_OPS_MATCHRULESEQUENCE_H diff --git a/libwccl/ops/tagrulesequence.h b/libwccl/ops/tagrulesequence.h index 4a1ab672876b3b6224e793246e7da5b4fe02729b..34d5534ebac76610f97b20195e969fb39e19f7c7 100644 --- a/libwccl/ops/tagrulesequence.h +++ b/libwccl/ops/tagrulesequence.h @@ -6,15 +6,15 @@ namespace Wccl { /** - * Represents a sequence of parsed WCCL Rules. It's a conveniency wrapper around - * vector of Rule objects, that allows automatic execution of all contained Rules + * Represents a sequence of parsed WCCL tag rules. It's a conveniency wrapper around + * vector of TagRule objects, that allows automatic execution of all contained TagRules * for all positions of a Sentence. * @note The class methods are not thread-safe */ class TagRuleSequence : public std::vector<TagRule>, public Expression { public: - TagRuleSequence(std::vector<TagRule> rules); + TagRuleSequence(const std::vector<TagRule>& rules); TagRuleSequence(); @@ -77,7 +77,7 @@ protected: //--- implementation details --- // inline -TagRuleSequence::TagRuleSequence(std::vector<TagRule> rules) +TagRuleSequence::TagRuleSequence(const std::vector<TagRule>& rules) : std::vector<TagRule>(rules) { } diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index eb9a9c74499399cfc9406a617a3d832bd15d0ecc..c6958c7d7ddb83c66f44e0a022d9d0952211f628 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -1915,6 +1915,7 @@ match_rules_section [WcclFile& wccl_file] { ParsingScope scope(wccl_file); boost::shared_ptr<MatchRule> match_rule; + boost::shared_ptr<MatchRuleSequence> rule_seq = boost::make_shared<MatchRuleSequence>(); } : "match_rules" { if (wccl_file.has_match_rules()) { @@ -1923,16 +1924,16 @@ match_rules_section [WcclFile& wccl_file] } LPAREN match_rule = match_rule_operator [scope] { - wccl_file.add_match_rule(match_rule); + rule_seq->push_back(*match_rule); scope.reset_variables(); } ( SEMI match_rule = match_rule_operator [scope] { - wccl_file.add_match_rule(match_rule); + rule_seq->push_back(*match_rule); scope.reset_variables(); } - )* - RPAREN + )* + RPAREN { wccl_file.set_match_rules(rule_seq); } ; any_operator_section diff --git a/libwccl/wcclfile.cpp b/libwccl/wcclfile.cpp index 4c6edb65264966196e26d4ae926559974b3efff3..0bf8f8f6c09154c533433a60f2923b59ec0fd522 100644 --- a/libwccl/wcclfile.cpp +++ b/libwccl/wcclfile.cpp @@ -36,6 +36,14 @@ boost::shared_ptr<const TagRuleSequence> WcclFile::get_tag_rules_ptr() const return tag_rules_; } +boost::shared_ptr<const MatchRuleSequence> WcclFile::get_match_rules_ptr() const +{ + if (!has_match_rules()) { + throw WcclError("There are no match rules."); + } + return match_rules_; +} + std::ostream& WcclFile::write_to(std::ostream& os) const { if (has_lexicons()) { @@ -51,14 +59,7 @@ std::ostream& WcclFile::write_to(std::ostream& os) const os << tag_rules_->to_string(tagset_) << '\n'; } if (has_match_rules()) { - os << "match_rules(\n"; - for (size_t i = 0; i < match_rules_.size(); ++i) { - if (i != 0) { - os << ";\n"; - } - os << match_rules_[i]->to_string(tagset_); - } - os << "\n)\n"; + os << match_rules_->to_string(tagset_) << '\n'; } return os; } diff --git a/libwccl/wcclfile.h b/libwccl/wcclfile.h index f14816bffd96e41b253cfab225c54637d21d6bf5..a3c4e4ca54e02a71b75c48fb788f6ef4a42b9bb2 100644 --- a/libwccl/wcclfile.h +++ b/libwccl/wcclfile.h @@ -8,7 +8,7 @@ #include <libwccl/values/tset.h> #include <libwccl/wcclfileopsections.h> #include <libwccl/ops/tagrulesequence.h> -#include <libwccl/ops/matchrule.h> +#include <libwccl/ops/matchrulesequence.h> #include <libwccl/lexicon/lexicons.h> #include <libwccl/exception.h> #include <libpwrutils/pathsearch.h> @@ -121,8 +121,11 @@ public: // Match rules section: match_rules ( apply1; apply2 ) // bool has_match_rules() const; - void add_match_rule(const boost::shared_ptr<MatchRule>& match_rule); - const std::vector<boost::shared_ptr<MatchRule> >& get_match_rules(); + + void set_match_rules(const boost::shared_ptr<MatchRuleSequence>& match_rules); + const MatchRuleSequence& get_match_rules() const; + boost::shared_ptr<MatchRuleSequence> get_match_rules_ptr(); + boost::shared_ptr<const MatchRuleSequence> get_match_rules_ptr() const; // // Miscelaneous @@ -138,7 +141,7 @@ private: std::ostream& write_to(std::ostream& ostream) const; std::vector<boost::shared_ptr<FunctionalOpSequence> > all_sections_; boost::shared_ptr<TagRuleSequence> tag_rules_; - std::vector<boost::shared_ptr<MatchRule> > match_rules_; + boost::shared_ptr<MatchRuleSequence> match_rules_; boost::shared_ptr<Lexicons> lexicons_; const Corpus2::Tagset& tagset_; PwrNlp::PathSearcher<Wccl::FileNotFound> path_; @@ -431,19 +434,22 @@ void WcclFile::set_tag_rules(const boost::shared_ptr<TagRuleSequence>& tag_rules inline bool WcclFile::has_match_rules() const { - return !match_rules_.empty(); + return match_rules_; } inline -void WcclFile::add_match_rule(const boost::shared_ptr<MatchRule>& match_rule) +const MatchRuleSequence& WcclFile::get_match_rules() const { - match_rules_.push_back(match_rule); + return *get_match_rules_ptr(); } inline -const std::vector<boost::shared_ptr<MatchRule> >& WcclFile::get_match_rules() +void WcclFile::set_match_rules(const boost::shared_ptr<MatchRuleSequence>& match_rules) { - return match_rules_; + if (has_match_rules()) { + throw WcclError("Tag rules already added."); + } + match_rules_ = match_rules; } inline