From 261c5b03ea34460359b6b4742dc3db4f2262af85 Mon Sep 17 00:00:00 2001 From: Adam Wardynski <award@.(win7-laptop)> Date: Thu, 21 Apr 2011 19:02:47 +0200 Subject: [PATCH] text() match condition, matching given text. --- libwccl/CMakeLists.txt | 1 + libwccl/ops/match/conditions/matchtext.cpp | 49 +++++++++++++++++++++ libwccl/ops/match/conditions/matchtext.h | 51 ++++++++++++++++++++++ 3 files changed, 101 insertions(+) create mode 100644 libwccl/ops/match/conditions/matchtext.cpp create mode 100644 libwccl/ops/match/conditions/matchtext.h diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 92bfe50..eff2894 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -65,6 +65,7 @@ SET(libwccl_STAT_SRC ops/match/conditions/conjconditions.cpp ops/match/conditions/isannotatedas.cpp ops/match/conditions/longest.cpp + ops/match/conditions/matchtext.cpp ops/match/conditions/oneof.cpp ops/match/conditions/optionalmatch.cpp ops/match/conditions/repeatedmatch.cpp diff --git a/libwccl/ops/match/conditions/matchtext.cpp b/libwccl/ops/match/conditions/matchtext.cpp new file mode 100644 index 0000000..60e7c37 --- /dev/null +++ b/libwccl/ops/match/conditions/matchtext.cpp @@ -0,0 +1,49 @@ +#include <libwccl/ops/match/conditions/matchtext.h> +#include <sstream> +#include <libpwrutils/util.h> + + +namespace Wccl { + +MatchResult MatchText::apply(const ActionExecContext& context) const +{ + SentenceContext& sc = context.sentence_context(); + + int orig_iter = sc.get_position(); + + UnicodeString sent_frag(sc.at(orig_iter)->orth()); + int iter_pos = orig_iter + 1; + while(sent_frag.length() <= text_.length() && iter_pos < sc.size()) { + if (sc.at(iter_pos)->wa() != PwrNlp::Whitespace::None) { + sent_frag += " "; + } + sent_frag += sc.at(iter_pos)->orth(); + ++iter_pos; + } + + if (sent_frag != text_) { + return MatchResult(); + } + + boost::shared_ptr<MatchVector> v(new MatchVector()); + for(int i = orig_iter; i < iter_pos; ++i) { + v->append(boost::shared_ptr<TokenMatch>(new TokenMatch(i)); + } + // increase current sentence position to the point after the matched tokens. + sc.set_position(orig_iter + v->size()); + return MatchResult(v); +} + +std::string MatchText::to_string(const Corpus2::Tagset&) const +{ + std::ostringstream os; + os << name() << "(\"" << PwrNlp::to_utf8(text_) << "\")"; + return os.str(); +} + +std::ostream& MatchText::write_to(std::ostream& os) const +{ + return os << name() << "(\"" << PwrNlp::to_utf8(text_) << "\")"; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/match/conditions/matchtext.h b/libwccl/ops/match/conditions/matchtext.h new file mode 100644 index 0000000..f565540 --- /dev/null +++ b/libwccl/ops/match/conditions/matchtext.h @@ -0,0 +1,51 @@ +#ifndef LIBWCCL_OPS_MATCH_CONDITIONS_MATCHTEXT_H +#define LIBWCCL_OPS_MATCH_CONDITIONS_MATCHTEXT_H + +#include <libwccl/ops/match/matchcondition.h> +#include <libwccl/ops/function.h> +#include <unicode/unistr.h> + +namespace Wccl { +/** + * text() match condition - matches a text + */ +class MatchText : public MatchCondition +{ +public: + MatchText(const UnicodeString& text) + : text_(text) { + BOOST_ASSERT(!text_.isEmpty()); + } + /** + * @returns Name of the Condition. + */ + std::string name() const { + return "text"; + } + /** + * Applies the condition to the given execution context. + * If a match is found, the current sentence Position is increased + * by the amount of matched tokens. + */ + MatchResult apply(const ActionExecContext& context) const; + + /** + * @returns String representation of the Condition + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + /** + * Writes the string representation of the Condition to + * an output stream. + * @returns Stream written to. + * @note May be incomplete and/or containt internal info. + */ + std::ostream& write_to(std::ostream& ostream) const; +private: + const UnicodeString text_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_MATCH_CONDITIONS_MATCHTEXT_H -- GitLab