From 261c5b03ea34460359b6b4742dc3db4f2262af85 Mon Sep 17 00:00:00 2001
From: Adam Wardynski <award@.(win7-laptop)>
Date: Thu, 21 Apr 2011 19:02:47 +0200
Subject: [PATCH] text() match condition, matching given text.

---
 libwccl/CMakeLists.txt                     |  1 +
 libwccl/ops/match/conditions/matchtext.cpp | 49 +++++++++++++++++++++
 libwccl/ops/match/conditions/matchtext.h   | 51 ++++++++++++++++++++++
 3 files changed, 101 insertions(+)
 create mode 100644 libwccl/ops/match/conditions/matchtext.cpp
 create mode 100644 libwccl/ops/match/conditions/matchtext.h

diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt
index 92bfe50..eff2894 100644
--- a/libwccl/CMakeLists.txt
+++ b/libwccl/CMakeLists.txt
@@ -65,6 +65,7 @@ SET(libwccl_STAT_SRC
 	ops/match/conditions/conjconditions.cpp
 	ops/match/conditions/isannotatedas.cpp
 	ops/match/conditions/longest.cpp
+	ops/match/conditions/matchtext.cpp
 	ops/match/conditions/oneof.cpp
 	ops/match/conditions/optionalmatch.cpp
 	ops/match/conditions/repeatedmatch.cpp
diff --git a/libwccl/ops/match/conditions/matchtext.cpp b/libwccl/ops/match/conditions/matchtext.cpp
new file mode 100644
index 0000000..60e7c37
--- /dev/null
+++ b/libwccl/ops/match/conditions/matchtext.cpp
@@ -0,0 +1,49 @@
+#include <libwccl/ops/match/conditions/matchtext.h>
+#include <sstream>
+#include <libpwrutils/util.h>
+
+
+namespace Wccl {
+
+MatchResult MatchText::apply(const ActionExecContext& context) const
+{
+	SentenceContext& sc = context.sentence_context();
+
+	int orig_iter = sc.get_position();
+	
+	UnicodeString sent_frag(sc.at(orig_iter)->orth());
+	int iter_pos = orig_iter + 1;
+	while(sent_frag.length() <= text_.length() && iter_pos < sc.size()) {
+		if (sc.at(iter_pos)->wa() != PwrNlp::Whitespace::None) {
+			sent_frag += " ";
+		}
+		sent_frag += sc.at(iter_pos)->orth();
+		++iter_pos;
+	}
+
+	if (sent_frag != text_) {
+		return MatchResult();
+	}
+
+	boost::shared_ptr<MatchVector> v(new MatchVector());
+	for(int i = orig_iter; i < iter_pos; ++i) {
+		v->append(boost::shared_ptr<TokenMatch>(new TokenMatch(i));
+	}
+	// increase current sentence position to the point after the matched tokens.
+	sc.set_position(orig_iter + v->size());
+	return MatchResult(v);
+}
+
+std::string MatchText::to_string(const Corpus2::Tagset&) const
+{
+	std::ostringstream os;
+	os << name() << "(\"" << PwrNlp::to_utf8(text_) << "\")";
+	return os.str();
+}
+
+std::ostream& MatchText::write_to(std::ostream& os) const
+{
+	return os << name() << "(\"" << PwrNlp::to_utf8(text_) << "\")";
+}
+
+} /* end ns Wccl */
diff --git a/libwccl/ops/match/conditions/matchtext.h b/libwccl/ops/match/conditions/matchtext.h
new file mode 100644
index 0000000..f565540
--- /dev/null
+++ b/libwccl/ops/match/conditions/matchtext.h
@@ -0,0 +1,51 @@
+#ifndef LIBWCCL_OPS_MATCH_CONDITIONS_MATCHTEXT_H
+#define LIBWCCL_OPS_MATCH_CONDITIONS_MATCHTEXT_H
+
+#include <libwccl/ops/match/matchcondition.h>
+#include <libwccl/ops/function.h>
+#include <unicode/unistr.h>
+
+namespace Wccl {
+/**
+ * text() match condition - matches a text
+ */
+class MatchText : public MatchCondition
+{
+public:
+	MatchText(const UnicodeString& text)
+		: text_(text) {
+			BOOST_ASSERT(!text_.isEmpty());
+	}
+	/**
+	 * @returns Name of the Condition.
+	 */
+	std::string name() const {
+		return "text";
+	}
+	/**
+	 * Applies the condition to the given execution context.
+	 * If a match is found, the current sentence Position is increased
+	 * by the amount of matched tokens.
+	 */
+	MatchResult apply(const ActionExecContext& context) const;
+
+	/**
+	 * @returns String representation of the Condition
+	 */
+	std::string to_string(const Corpus2::Tagset& tagset) const;
+
+protected:
+	/**
+	 * Writes the string representation of the Condition to
+	 * an output stream.
+	 * @returns Stream written to.
+	 * @note May be incomplete and/or containt internal info.
+	 */
+	std::ostream& write_to(std::ostream& ostream) const;
+private:
+	const UnicodeString text_;
+};
+
+} /* end ns Wccl */
+
+#endif // LIBWCCL_OPS_MATCH_CONDITIONS_MATCHTEXT_H
-- 
GitLab