From ec4caf8d6d63444f90a277d388d0f92b48891473 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Wardy=C5=84ski?= <no@email> Date: Wed, 10 Nov 2010 21:02:10 +0100 Subject: [PATCH] Adding "upper" operator as ToUpper class --- libwccl/CMakeLists.txt | 1 + libwccl/ops/toupper.cpp | 26 ++++++++++++++++++ libwccl/ops/toupper.h | 56 +++++++++++++++++++++++++++++++++++++++ tests/strsetfunctions.cpp | 53 ++++++++++++++++++++++++++++++++++++ 4 files changed, 136 insertions(+) create mode 100644 libwccl/ops/toupper.cpp create mode 100644 libwccl/ops/toupper.h diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index b142613..c5fb97c 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -22,6 +22,7 @@ SET(libwccl_STAT_SRC ops/or.cpp ops/predicate.cpp ops/tolower.cpp + ops/toupper.cpp parser/Parser.cpp parser/ParserException.cpp sentencecontext.cpp diff --git a/libwccl/ops/toupper.cpp b/libwccl/ops/toupper.cpp new file mode 100644 index 0000000..1e94d31 --- /dev/null +++ b/libwccl/ops/toupper.cpp @@ -0,0 +1,26 @@ +#include <libwccl/ops/toupper.h> +#include <libwccl/ops/formatters.h> + +namespace Wccl { + +std::string ToUpper::to_string(const Corpus2::Tagset& tagset) const +{ + return UnaryFunctionFormatter::to_string(tagset, *this, *strset_expr_); +} + +std::string ToUpper::to_raw_string() const { + return UnaryFunctionFormatter::to_raw_string(*this, *strset_expr_); +} + +ToUpper::BaseRetValPtr ToUpper::apply_internal(const SentenceContext& context) const { + const boost::shared_ptr<StrSet >& set = strset_expr_->apply(context); + boost::shared_ptr<StrSet > u_set = boost::make_shared<StrSet>(); + //TODO: should tolower be a method of StrSet as well? + foreach(const UnicodeString& s, set->contents()) { + //TODO: what about locale? is default ok? should the context hold it? + u_set->insert(UnicodeString(s).toUpper()); + } + return u_set; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/toupper.h b/libwccl/ops/toupper.h new file mode 100644 index 0000000..6491d92 --- /dev/null +++ b/libwccl/ops/toupper.h @@ -0,0 +1,56 @@ +#ifndef LIBWCCL_OPS_TOUPPER_H +#define LIBWCCL_OPS_TOUPPER_H + +#include <boost/shared_ptr.hpp> +#include <libwccl/values/strset.h> +#include <libwccl/ops/functions.h> + +namespace Wccl { + +/** + * Operator that takes a set of strings and returns a new + * set with corresponding values in upper case form + */ +class ToUpper : public Function<StrSet> { +public: + typedef boost::shared_ptr<Function<StrSet> > StrSetFunctionPtr; + + ToUpper(const StrSetFunctionPtr& strset_expr) + : strset_expr_(strset_expr) + { + BOOST_ASSERT(strset_expr_); + } + + /** + * String representation of the operator in form of: + * "upper(strset_expr_string)" + */ + virtual std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * String representation of conditional operator in form of: + * "upper(strset_expr_raw_string)" + * This version does not require tagset, but may be inclomplete + * and/or contain internal info. + */ + virtual std::string to_raw_string() const; + + virtual const std::string raw_operator_name() const { + return "upper"; + } + +protected: + const StrSetFunctionPtr strset_expr_; + + typedef FunctionBase::BaseRetValPtr BaseRetValPtr; + + /** + * Get a string set from the argument expression and return copy of the set + * with all strings in upper case form + */ + virtual BaseRetValPtr apply_internal(const SentenceContext& context) const; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_TOUPPER_H diff --git a/tests/strsetfunctions.cpp b/tests/strsetfunctions.cpp index 4822967..cd64290 100644 --- a/tests/strsetfunctions.cpp +++ b/tests/strsetfunctions.cpp @@ -7,6 +7,7 @@ #include <libwccl/values/strset.h> #include <libwccl/sentencecontext.h> #include <libwccl/ops/tolower.h> +#include <libwccl/ops/toupper.h> #include <libwccl/ops/constant.h> using namespace Wccl; @@ -77,6 +78,28 @@ BOOST_FIXTURE_TEST_CASE(lower_locale, StrSetFix) BOOST_CHECK(lowerset.equals(*to_lower.apply(sc))); } +BOOST_FIXTURE_TEST_CASE(upper_locale, StrSetFix) +{ + //I'm not sure if I can guarantee this test will pass + //on all locales? - ToUpper uses default locale at the moment + + //I wanted to make sure switching around encoding of source file + //won't affect the test, so I explicitly provide escaped UTF8 sequence + + StrSet upperset; + upperset.insert(UnicodeString::fromUTF8( + "ZA\xC5\xBB\xC3\x93\xC5\x81\xC4\x86G\xC4\x98\xC5\x9AL\xC4\x84JA\xC5\xB9\xC5\x83" + "ZA\xC5\xBB\xC3\x93\xC5\x81\xC4\x86G\xC4\x98\xC5\x9AL\xC4\x84JA\xC5\xB9\xC5\x83")); + StrSet lowerset; + lowerset.insert(UnicodeString::fromUTF8( + "za\xC5\xBC\xC3\xB3\xC5\x82\xC4\x87g\xC4\x99\xC5\x9Bl\xC4\x85ja\xC5\xBA\xC5\x84" + "ZA\xC5\xBB\xC3\x93\xC5\x81\xC4\x86g\xC4\x98\xC5\x9AL\xC4\x84JA\xC5\xB9\xC5\x83")); + + ToUpper to_upper(boost::shared_ptr<Function<StrSet> >( + new Constant<StrSet>(lowerset))); + + BOOST_CHECK(upperset.equals(*to_upper.apply(sc))); +} //------ to_string test cases ------- @@ -90,4 +113,34 @@ BOOST_FIXTURE_TEST_CASE(lower_to_string, StrSetFix) BOOST_CHECK_EQUAL(expected, to_lower.to_string(tagset)); } +BOOST_AUTO_TEST_CASE(lower_to_raw_string) +{ + StrSet one_elem_set; + one_elem_set.insert("YayaAy"); + ToLower to_lower(boost::shared_ptr<Function<StrSet> >( + new Constant<StrSet>(one_elem_set))); + std::string expected = "lower([\"YayaAy\"])"; + BOOST_CHECK_EQUAL(expected, to_lower.to_raw_string()); +} + +BOOST_FIXTURE_TEST_CASE(upper_to_string, StrSetFix) +{ + StrSet one_elem_set; + one_elem_set.insert("YayaAy"); + ToUpper to_upper(boost::shared_ptr<Function<StrSet> >( + new Constant<StrSet>(one_elem_set))); + std::string expected = "upper([\"YayaAy\"])"; + BOOST_CHECK_EQUAL(expected, to_upper.to_string(tagset)); +} + +BOOST_AUTO_TEST_CASE(upper_to_raw_string) +{ + StrSet one_elem_set; + one_elem_set.insert("YayaAy"); + ToUpper to_upper(boost::shared_ptr<Function<StrSet> >( + new Constant<StrSet>(one_elem_set))); + std::string expected = "upper([\"YayaAy\"])"; + BOOST_CHECK_EQUAL(expected, to_upper.to_raw_string()); +} + BOOST_AUTO_TEST_SUITE_END() -- GitLab