From c7f5fdc27a6eaef4541c7327bd0f281c5bacccd0 Mon Sep 17 00:00:00 2001 From: Adam Wardynski <award@.(win7-laptop)> Date: Mon, 4 Apr 2011 14:27:43 +0200 Subject: [PATCH] longest - condition that takes longest possible variant. --- libwccl/CMakeLists.txt | 1 + libwccl/ops/match/conditions/longest.cpp | 61 ++++++++++++++++++++++++ libwccl/ops/match/conditions/longest.h | 52 ++++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 libwccl/ops/match/conditions/longest.cpp create mode 100644 libwccl/ops/match/conditions/longest.h diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index d57040a..4e2017b 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -55,6 +55,7 @@ SET(libwccl_STAT_SRC ops/functions/tset/getsymbols.cpp ops/functions/tset/getsymbolsinrange.cpp ops/match/conditions/conjconditions.cpp + ops/match/conditions/longest.cpp ops/match/conditions/oneof.cpp ops/match/conditions/optionalmatch.cpp ops/match/conditions/repeatedmatch.cpp diff --git a/libwccl/ops/match/conditions/longest.cpp b/libwccl/ops/match/conditions/longest.cpp new file mode 100644 index 0000000..ce8b9aa --- /dev/null +++ b/libwccl/ops/match/conditions/longest.cpp @@ -0,0 +1,61 @@ +#include <libwccl/ops/match/conditions/longest.h> +#include <libwccl/values/matchvector.h> +#include <sstream> +#include <libpwrutils/foreach.h> + +namespace Wccl { + +Longest::Longest(const boost::shared_ptr<std::vector<ConjConditions> >& variants) + : _variants(variants) +{ + BOOST_ASSERT(_variants); + BOOST_ASSERT(!_variants->empty()); +} + +MatchResult Longest::apply(const ActionExecContext& context) const +{ + int orig_pos = context.sentence_context().get_position(); + int longest_pos = orig_pos; + MatchResult longest; + + foreach(const ConjConditions& variant, *_variants) { + MatchResult res = variant.apply(context); + int cur_pos = context.sentence_context().get_position(); + if (res.matched() && longest_pos < cur_pos) { + longest_pos = cur_pos; + longest = res; + } + context.sentence_context().set_position(orig_pos); + } + + context.sentence_context().set_position(longest_pos); + return longest; +} + +std::string Longest::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream ostream; + ostream << name() << "("; + for (size_t i = 0; i < _variants->size(); ++i) { + if (i != 0) { + ostream << ", "; + } + ostream << "variant" << _variants->at(i).to_string(tagset); + } + ostream << ")"; + return ostream.str(); +} + +std::ostream& Longest::write_to(std::ostream& ostream) const +{ + ostream << name() << "("; + for (size_t i = 0; i < _variants->size(); ++i) { + if (i != 0) { + ostream << ", "; + } + ostream << "variant" << _variants->at(i); + } + return ostream << ")"; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/match/conditions/longest.h b/libwccl/ops/match/conditions/longest.h new file mode 100644 index 0000000..2aed55b --- /dev/null +++ b/libwccl/ops/match/conditions/longest.h @@ -0,0 +1,52 @@ +#ifndef LIBWCCL_OPS_MATCH_CONDITIONS_LONGEST_H +#define LIBWCCL_OPS_MATCH_CONDITIONS_LONGEST_H + +#include <libwccl/ops/match/conditions/conjconditions.h> + +namespace Wccl { + +/** + * Class for "longest" condition of match + */ +class Longest : public MatchCondition +{ +public: + Longest(const boost::shared_ptr<std::vector<ConjConditions> >& variants); + + /** + * @returns Name of the condition. + */ + std::string name() const { + return "longest"; + } + /** + * Applies the condition to the given execution context. + * Inner match variants are executed one by one to check which + * one is the longest one. If any match variants were found to be "true", + * the one that produced the longest match is returned with "true". + * If there was no "true" match variant, "false" is returned instead. + * If a match is found, the current sentence Position is increased + * as to point one token after all the matched tokens, otherwise + * it stays unchanged. + */ + MatchResult apply(const ActionExecContext& context) const; + + /** + * @returns String representation of the MatchCondition + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + /** + * Writes string representation of the MatchCondition to + * an output stream. + * @returns Stream written to. + * @note May be incomplete and/or containt internal info. + */ + std::ostream& write_to(std::ostream& ostream) const; +private: + const boost::shared_ptr<std::vector<ConjConditions> > _variants; +}; +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_MATCH_CONDITIONS_LONGEST_H -- GitLab