diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 7fa31d3812b9056e8b3081c9ed489f3f26c6ad70..f9dc761d8eea65f8d29fdb02e99d379bc5ffad41 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -60,6 +60,7 @@ SET(libwccl_STAT_SRC ops/functions/tset/getsymbols.cpp ops/functions/tset/getsymbolsinrange.cpp ops/rule.cpp + ops/rulesequence.cpp parser/grammar.g parser/Parser.cpp parser/ParserException.cpp diff --git a/libwccl/ops/rulesequence.cpp b/libwccl/ops/rulesequence.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4e754db67eda9b6a9b23c3c0094e9c5098d59ea7 --- /dev/null +++ b/libwccl/ops/rulesequence.cpp @@ -0,0 +1,44 @@ +#include <libwccl/ops/rulesequence.h> +#include <libpwrutils/foreach.h> + + +namespace Wccl { + +Bool RuleSequence::execute_once(const boost::shared_ptr<Corpus2::Sentence>& sentence) +{ + if(!sentence || sentence->empty()) { + throw InvalidArgument( + "sentence", + "Received an empty sentence."); + } + Bool changed(false); + SentenceContext sc(sentence); + while(sc.is_current_inside()) { + foreach (Rule& rule, *this) { + if (rule.execute(sc).get_value()) { + changed.set_value(true); + } + } + sc.advance(); + } + return changed; +} + +int RuleSequence::execute_until_done(const boost::shared_ptr<Corpus2::Sentence>& sentence, int max_iter) +{ + if(max_iter < 0) { + throw InvalidArgument( + "max_iter", + "Supplied a negative value for maximum number of iterations."); + } + int iter_no = 0; + while(iter_no < max_iter) { + ++iter_no; + if (!execute_once(sentence).get_value()) { + return iter_no; + } + } + return iter_no; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/rulesequence.h b/libwccl/ops/rulesequence.h new file mode 100644 index 0000000000000000000000000000000000000000..f8e5d2fb89d53848a725b2487e774f6bb6177e4d --- /dev/null +++ b/libwccl/ops/rulesequence.h @@ -0,0 +1,92 @@ +#ifndef LIBWCCL_OPS_RULESEQUENCE_H +#define LIBWCCL_OPS_RULESEQUENCE_H + +#include <libwccl/ops/rule.h> + +namespace Wccl { + +/** + * Represents a sequence of parsed WCCL Rules. It's a conveniency wrapper around + * vector of Rule objects, that allows automatic execution of all contained Rules + * for all positions of a Sentence. + * @note The class methods are not thread-safe + */ +class RuleSequence : public std::vector<Rule> +{ +public: + RuleSequence(std::vector<Rule> rules); + + RuleSequence(); + + /** + * Executes all contained Rules sequentially, once for each position + * starting from 0 to given sentence's end. + * @returns True if any of the Rules made a change on any of the sentence + * positions, False otherwise. + * @param sentence Sentence to execute on. + * @see execute_once() - equivalent method; the \link operator()() operator() \endlink allows + * more convenient functional notation, however if you only have a pointer + * you might prefer the execute_once() method as shown below. The choice is yours. + * @see execute_until_done() - executes all Rules repeatedly, until there are no changes. + * \code + * Bool res; + * res = ruleseq(sentence); + * // versus + * res = ruleseq.execute(sentence); + * // or if you have a pointer... + * res = (*ruleseq_ptr)(sentence); + * // versus + * res = ruleseq_ptr->execute(sentence); + * \endcode + */ + Bool operator()(const boost::shared_ptr<Corpus2::Sentence>& sentence); + + /** + * Executes all contained Rules sequentially, once for each position + * starting from 0 to given sentence's end. + * @returns True if any of the Rules made a change on any of the sentence's + * positions, False otherwise. + * @param sentence Sentence to execute on. + * @see \link operator()() operator() \endlink - an equivalent of this method that + * allows functional notation, treating RuleSeqence directly as a function object + * @see execute_until_done() - executes all Rules repeatedly, until there are no changes. + */ + Bool execute_once(const boost::shared_ptr<Corpus2::Sentence>& sentence); + + /** + * Executes all contained Rules sequentially, for each position + * starting from 0 to given sentence's end. Repeats until there are no changes, + * or specified number of iterations have been executed. + * @returns Number of full iterations that have been executed until there were no changes, + * or until max_iter has been reached while changes were still reported. + * @param sentence Sentence to execute on. + * @param max_iter Maximum number of iterations (mostly to safeguard from an infinite loop) + * @see \link operator()() operator() \endlink - Executes Rules but only once + * @see execute_once() - Executes Rules but only once + */ + int execute_until_done(const boost::shared_ptr<Corpus2::Sentence>& sentence, int max_iter = 1000); +}; + + + +// +//--- implementation details --- +// +inline +RuleSequence::RuleSequence(std::vector<Rule> rules) + : std::vector<Rule>(rules) { +} + +inline +RuleSequence::RuleSequence() + : std::vector<Rule>() { +} + +inline +Bool RuleSequence::operator()(const boost::shared_ptr<Corpus2::Sentence>& sentence) { + return execute_once(sentence); +} + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_RULESEQUENCE_H