From ec4caf8d6d63444f90a277d388d0f92b48891473 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adam=20Wardy=C5=84ski?= <no@email>
Date: Wed, 10 Nov 2010 21:02:10 +0100
Subject: [PATCH] Adding "upper" operator as ToUpper class

---
 libwccl/CMakeLists.txt    |  1 +
 libwccl/ops/toupper.cpp   | 26 ++++++++++++++++++
 libwccl/ops/toupper.h     | 56 +++++++++++++++++++++++++++++++++++++++
 tests/strsetfunctions.cpp | 53 ++++++++++++++++++++++++++++++++++++
 4 files changed, 136 insertions(+)
 create mode 100644 libwccl/ops/toupper.cpp
 create mode 100644 libwccl/ops/toupper.h

diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt
index b142613..c5fb97c 100644
--- a/libwccl/CMakeLists.txt
+++ b/libwccl/CMakeLists.txt
@@ -22,6 +22,7 @@ SET(libwccl_STAT_SRC
 	ops/or.cpp
 	ops/predicate.cpp
 	ops/tolower.cpp
+	ops/toupper.cpp
 	parser/Parser.cpp
 	parser/ParserException.cpp
 	sentencecontext.cpp
diff --git a/libwccl/ops/toupper.cpp b/libwccl/ops/toupper.cpp
new file mode 100644
index 0000000..1e94d31
--- /dev/null
+++ b/libwccl/ops/toupper.cpp
@@ -0,0 +1,26 @@
+#include <libwccl/ops/toupper.h>
+#include <libwccl/ops/formatters.h>
+
+namespace Wccl {
+
+std::string ToUpper::to_string(const Corpus2::Tagset& tagset) const
+{
+	return UnaryFunctionFormatter::to_string(tagset, *this, *strset_expr_);
+}
+
+std::string ToUpper::to_raw_string() const {
+	return UnaryFunctionFormatter::to_raw_string(*this, *strset_expr_);
+}
+
+ToUpper::BaseRetValPtr ToUpper::apply_internal(const SentenceContext& context) const {
+	const boost::shared_ptr<StrSet >& set = strset_expr_->apply(context);
+	boost::shared_ptr<StrSet > u_set = boost::make_shared<StrSet>();
+	//TODO: should tolower be a method of StrSet as well?
+	foreach(const UnicodeString& s, set->contents()) {
+		//TODO: what about locale? is default ok? should the context hold it?
+		u_set->insert(UnicodeString(s).toUpper());
+	}
+	return u_set;
+}
+
+} /* end ns Wccl */
diff --git a/libwccl/ops/toupper.h b/libwccl/ops/toupper.h
new file mode 100644
index 0000000..6491d92
--- /dev/null
+++ b/libwccl/ops/toupper.h
@@ -0,0 +1,56 @@
+#ifndef LIBWCCL_OPS_TOUPPER_H
+#define LIBWCCL_OPS_TOUPPER_H
+
+#include <boost/shared_ptr.hpp>
+#include <libwccl/values/strset.h>
+#include <libwccl/ops/functions.h>
+
+namespace Wccl {
+
+/**
+ * Operator that takes a set of strings and returns a new
+ * set with corresponding values in upper case form
+ */
+class ToUpper : public Function<StrSet> {
+public:
+	typedef boost::shared_ptr<Function<StrSet> > StrSetFunctionPtr;
+	
+	ToUpper(const StrSetFunctionPtr& strset_expr)
+		: strset_expr_(strset_expr)
+	{
+		BOOST_ASSERT(strset_expr_);
+	}
+
+	/**
+	 * String representation of the operator in form of:
+	 * "upper(strset_expr_string)"
+	 */
+	virtual std::string to_string(const Corpus2::Tagset& tagset) const;
+
+	/**
+	 * String representation of conditional operator in form of:
+	 * "upper(strset_expr_raw_string)"
+	 * This version does not require tagset, but may be inclomplete
+	 * and/or contain internal info.
+	 */
+	virtual std::string to_raw_string() const;
+
+	virtual const std::string raw_operator_name() const {
+		return "upper";
+	}
+
+protected:
+	const StrSetFunctionPtr strset_expr_;
+
+	typedef FunctionBase::BaseRetValPtr BaseRetValPtr;
+
+	/**
+	 * Get a string set from the argument expression and return copy of the set
+	 * with all strings in upper case form
+	 */
+	virtual BaseRetValPtr apply_internal(const SentenceContext& context) const;
+};
+
+} /* end ns Wccl */
+
+#endif // LIBWCCL_OPS_TOUPPER_H
diff --git a/tests/strsetfunctions.cpp b/tests/strsetfunctions.cpp
index 4822967..cd64290 100644
--- a/tests/strsetfunctions.cpp
+++ b/tests/strsetfunctions.cpp
@@ -7,6 +7,7 @@
 #include <libwccl/values/strset.h>
 #include <libwccl/sentencecontext.h>
 #include <libwccl/ops/tolower.h>
+#include <libwccl/ops/toupper.h>
 #include <libwccl/ops/constant.h>
 
 using namespace Wccl;
@@ -77,6 +78,28 @@ BOOST_FIXTURE_TEST_CASE(lower_locale, StrSetFix)
 	BOOST_CHECK(lowerset.equals(*to_lower.apply(sc)));
 }
 
+BOOST_FIXTURE_TEST_CASE(upper_locale, StrSetFix)
+{
+	//I'm not sure if I can guarantee this test will pass
+	//on all locales? - ToUpper uses default locale at the moment
+
+	//I wanted to make sure switching around encoding of source file
+	//won't affect the test, so I explicitly provide escaped UTF8 sequence
+
+	StrSet upperset;
+	upperset.insert(UnicodeString::fromUTF8(
+		"ZA\xC5\xBB\xC3\x93\xC5\x81\xC4\x86G\xC4\x98\xC5\x9AL\xC4\x84JA\xC5\xB9\xC5\x83"
+		"ZA\xC5\xBB\xC3\x93\xC5\x81\xC4\x86G\xC4\x98\xC5\x9AL\xC4\x84JA\xC5\xB9\xC5\x83"));
+	StrSet lowerset;
+	lowerset.insert(UnicodeString::fromUTF8(
+		"za\xC5\xBC\xC3\xB3\xC5\x82\xC4\x87g\xC4\x99\xC5\x9Bl\xC4\x85ja\xC5\xBA\xC5\x84"
+		"ZA\xC5\xBB\xC3\x93\xC5\x81\xC4\x86g\xC4\x98\xC5\x9AL\xC4\x84JA\xC5\xB9\xC5\x83"));
+
+	ToUpper to_upper(boost::shared_ptr<Function<StrSet> >(
+		new Constant<StrSet>(lowerset)));
+
+	BOOST_CHECK(upperset.equals(*to_upper.apply(sc)));
+}
 
 //------ to_string test cases -------
 
@@ -90,4 +113,34 @@ BOOST_FIXTURE_TEST_CASE(lower_to_string, StrSetFix)
 	BOOST_CHECK_EQUAL(expected, to_lower.to_string(tagset));
 }
 
+BOOST_AUTO_TEST_CASE(lower_to_raw_string)
+{
+	StrSet one_elem_set;
+	one_elem_set.insert("YayaAy");
+	ToLower to_lower(boost::shared_ptr<Function<StrSet> >(
+			new Constant<StrSet>(one_elem_set)));
+	std::string expected = "lower([\"YayaAy\"])";
+	BOOST_CHECK_EQUAL(expected, to_lower.to_raw_string());
+}
+
+BOOST_FIXTURE_TEST_CASE(upper_to_string, StrSetFix)
+{
+	StrSet one_elem_set;
+	one_elem_set.insert("YayaAy");
+	ToUpper to_upper(boost::shared_ptr<Function<StrSet> >(
+			new Constant<StrSet>(one_elem_set)));
+	std::string expected = "upper([\"YayaAy\"])";
+	BOOST_CHECK_EQUAL(expected, to_upper.to_string(tagset));
+}
+
+BOOST_AUTO_TEST_CASE(upper_to_raw_string)
+{
+	StrSet one_elem_set;
+	one_elem_set.insert("YayaAy");
+	ToUpper to_upper(boost::shared_ptr<Function<StrSet> >(
+			new Constant<StrSet>(one_elem_set)));
+	std::string expected = "upper([\"YayaAy\"])";
+	BOOST_CHECK_EQUAL(expected, to_upper.to_raw_string());
+}
+
 BOOST_AUTO_TEST_SUITE_END()
-- 
GitLab