From 60c8f519ae6f20fd16550c772bb0952e5b760de3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adam=20Wardy=C5=84ski?= <no@email>
Date: Wed, 10 Nov 2010 20:41:49 +0100
Subject: [PATCH] Adding "lower" operator

---
 libwccl/CMakeLists.txt    |  1 +
 libwccl/ops/tolower.cpp   | 26 +++++++++++
 libwccl/ops/tolower.h     | 56 +++++++++++++++++++++++
 tests/CMakeLists.txt      |  1 +
 tests/strsetfunctions.cpp | 93 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 177 insertions(+)
 create mode 100644 libwccl/ops/tolower.cpp
 create mode 100644 libwccl/ops/tolower.h
 create mode 100644 tests/strsetfunctions.cpp

diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt
index b3bb20e..b142613 100644
--- a/libwccl/CMakeLists.txt
+++ b/libwccl/CMakeLists.txt
@@ -21,6 +21,7 @@ SET(libwccl_STAT_SRC
     ops/nor.cpp
 	ops/or.cpp
 	ops/predicate.cpp
+	ops/tolower.cpp
 	parser/Parser.cpp
 	parser/ParserException.cpp
 	sentencecontext.cpp
diff --git a/libwccl/ops/tolower.cpp b/libwccl/ops/tolower.cpp
new file mode 100644
index 0000000..d553cc7
--- /dev/null
+++ b/libwccl/ops/tolower.cpp
@@ -0,0 +1,26 @@
+#include <libwccl/ops/tolower.h>
+#include <libwccl/ops/formatters.h>
+
+namespace Wccl {
+
+std::string ToLower::to_string(const Corpus2::Tagset& tagset) const
+{
+	return UnaryFunctionFormatter::to_string(tagset, *this, *strset_expr_);
+}
+
+std::string ToLower::to_raw_string() const {
+	return UnaryFunctionFormatter::to_raw_string(*this, *strset_expr_);
+}
+
+ToLower::BaseRetValPtr ToLower::apply_internal(const SentenceContext& context) const {
+	const boost::shared_ptr<StrSet >& set = strset_expr_->apply(context);
+	boost::shared_ptr<StrSet > l_set = boost::make_shared<StrSet>();
+	//TODO: should tolower be a method of StrSet as well?
+	foreach(const UnicodeString& s, set->contents()) {
+		//TODO: what about locale? is default ok? should the context hold it?
+		l_set->insert(UnicodeString(s).toLower());
+	}
+	return l_set;
+}
+
+} /* end ns Wccl */
diff --git a/libwccl/ops/tolower.h b/libwccl/ops/tolower.h
new file mode 100644
index 0000000..7aeb4c0
--- /dev/null
+++ b/libwccl/ops/tolower.h
@@ -0,0 +1,56 @@
+#ifndef LIBWCCL_OPS_TOLOWER_H
+#define LIBWCCL_OPS_TOLOWER_H
+
+#include <boost/shared_ptr.hpp>
+#include <libwccl/values/strset.h>
+#include <libwccl/ops/functions.h>
+
+namespace Wccl {
+
+/**
+ * Operator that takes a set of strings and returns a new
+ * set with corresponding values in lower case form
+ */
+class ToLower : public Function<StrSet> {
+public:
+	typedef boost::shared_ptr<Function<StrSet> > StrSetFunctionPtr;
+	
+	ToLower(const StrSetFunctionPtr& strset_expr)
+		: strset_expr_(strset_expr)
+	{
+		BOOST_ASSERT(strset_expr_);
+	}
+
+	/**
+	 * String representation of the operator in form of:
+	 * "lower(strset_expr_string)"
+	 */
+	virtual std::string to_string(const Corpus2::Tagset& tagset) const;
+
+	/**
+	 * String representation of conditional operator in form of:
+	 * "if cond_expr_raw_s then iftrue_expr_raw_s else iffalse_expr_raw_s"
+	 * This version does not require tagset, but may be inclomplete
+	 * and/or contain internal info.
+	 */
+	virtual std::string to_raw_string() const;
+
+	virtual const std::string raw_operator_name() const {
+		return "lower";
+	}
+
+protected:
+	const StrSetFunctionPtr strset_expr_;
+
+	typedef FunctionBase::BaseRetValPtr BaseRetValPtr;
+
+	/**
+	 * Get a string set from the argument expression and return copy of the set
+	 * with all strings in lower case form
+	 */
+	virtual BaseRetValPtr apply_internal(const SentenceContext& context) const;
+};
+
+} /* end ns Wccl */
+
+#endif // LIBWCCL_OPS_TOLOWER_H
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index abc70b5..a7eeb50 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -11,6 +11,7 @@ add_executable(tests
 	logicalpredicates.cpp
 	main.cpp
 	position.cpp
+	strsetfunctions.cpp
 	values.cpp
 	varaccess.cpp
 	variables.cpp
diff --git a/tests/strsetfunctions.cpp b/tests/strsetfunctions.cpp
new file mode 100644
index 0000000..4822967
--- /dev/null
+++ b/tests/strsetfunctions.cpp
@@ -0,0 +1,93 @@
+#include <boost/test/unit_test.hpp>
+#include <boost/bind.hpp>
+#include <boost/shared_ptr.hpp>
+#include <libcorpus2/sentence.h>
+
+
+#include <libwccl/values/strset.h>
+#include <libwccl/sentencecontext.h>
+#include <libwccl/ops/tolower.h>
+#include <libwccl/ops/constant.h>
+
+using namespace Wccl;
+
+BOOST_AUTO_TEST_SUITE(strset_functions)
+
+struct StrSetFix
+{
+	StrSetFix()
+		: sc(boost::make_shared<Corpus2::Sentence>()),
+		  tagset(),
+		  strset(),
+		  strset_expr()
+	{
+		strset.insert("alllower");
+		strset.insert("Firstcapital");
+		strset.insert("PascalCase");
+		strset.insert("camelCase");
+		strset.insert("some1325numbers");
+		strset.insert("ALLUPPER");
+		strset.insert("kIdSpEeChLoL");
+
+		strset_expr.reset(new Constant<StrSet>(strset));
+	}
+	SentenceContext sc;
+	Corpus2::Tagset tagset;
+
+	StrSet strset;
+	boost::shared_ptr<Function<StrSet> > strset_expr;
+};
+
+BOOST_FIXTURE_TEST_CASE(lower, StrSetFix)
+{
+	StrSet lowerset;
+	lowerset.insert("alllower");
+	lowerset.insert("firstcapital");
+	lowerset.insert("pascalcase");
+	lowerset.insert("camelcase");
+	lowerset.insert("some1325numbers");
+	lowerset.insert("allupper");
+	lowerset.insert("kidspeechlol");
+
+	ToLower to_lower(strset_expr);
+
+	BOOST_CHECK(lowerset.equals(*to_lower.apply(sc)));
+}
+
+BOOST_FIXTURE_TEST_CASE(lower_locale, StrSetFix)
+{
+	//I'm not sure if I can guarantee this test will pass
+	//on all locales? - ToLower uses default locale at the moment
+
+	//I wanted to make sure switching around encoding of source file
+	//won't affect the test, so I explicitly provide escaped UTF8 sequence
+
+	StrSet upperset;
+	upperset.insert(UnicodeString::fromUTF8(
+		"za\xC5\xBB\xC3\x93\xC5\x81\xC4\x86g\xC4\x98\xC5\x9AL\xC4\x84ja\xC5\xB9\xC5\x83"
+		"zA\xC5\xBC\xC3\xB3\xC5\x82\xC4\x87g\xC4\x99\xC5\x9Bl\xC4\x85ja\xC5\xBA\xC5\x84"));
+	StrSet lowerset;
+	lowerset.insert(UnicodeString::fromUTF8(
+		"za\xC5\xBC\xC3\xB3\xC5\x82\xC4\x87g\xC4\x99\xC5\x9Bl\xC4\x85ja\xC5\xBA\xC5\x84"
+		"za\xC5\xBC\xC3\xB3\xC5\x82\xC4\x87g\xC4\x99\xC5\x9Bl\xC4\x85ja\xC5\xBA\xC5\x84"));
+
+	ToLower to_lower(boost::shared_ptr<Function<StrSet> >(
+		new Constant<StrSet>(upperset)));
+
+	BOOST_CHECK(lowerset.equals(*to_lower.apply(sc)));
+}
+
+
+//------ to_string test cases -------
+
+BOOST_FIXTURE_TEST_CASE(lower_to_string, StrSetFix)
+{
+	StrSet one_elem_set;
+	one_elem_set.insert("YayaAy");
+	ToLower to_lower(boost::shared_ptr<Function<StrSet> >(
+			new Constant<StrSet>(one_elem_set)));
+	std::string expected = "lower([\"YayaAy\"])";
+	BOOST_CHECK_EQUAL(expected, to_lower.to_string(tagset));
+}
+
+BOOST_AUTO_TEST_SUITE_END()
-- 
GitLab