Skip to content
Snippets Groups Projects
Commit ec4caf8d authored by Adam Wardyński's avatar Adam Wardyński
Browse files

Adding "upper" operator as ToUpper class

parent 1fcd181e
Branches
No related merge requests found
......@@ -22,6 +22,7 @@ SET(libwccl_STAT_SRC
ops/or.cpp
ops/predicate.cpp
ops/tolower.cpp
ops/toupper.cpp
parser/Parser.cpp
parser/ParserException.cpp
sentencecontext.cpp
......
#include <libwccl/ops/toupper.h>
#include <libwccl/ops/formatters.h>
namespace Wccl {
std::string ToUpper::to_string(const Corpus2::Tagset& tagset) const
{
return UnaryFunctionFormatter::to_string(tagset, *this, *strset_expr_);
}
std::string ToUpper::to_raw_string() const {
return UnaryFunctionFormatter::to_raw_string(*this, *strset_expr_);
}
ToUpper::BaseRetValPtr ToUpper::apply_internal(const SentenceContext& context) const {
const boost::shared_ptr<StrSet >& set = strset_expr_->apply(context);
boost::shared_ptr<StrSet > u_set = boost::make_shared<StrSet>();
//TODO: should tolower be a method of StrSet as well?
foreach(const UnicodeString& s, set->contents()) {
//TODO: what about locale? is default ok? should the context hold it?
u_set->insert(UnicodeString(s).toUpper());
}
return u_set;
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_TOUPPER_H
#define LIBWCCL_OPS_TOUPPER_H
#include <boost/shared_ptr.hpp>
#include <libwccl/values/strset.h>
#include <libwccl/ops/functions.h>
namespace Wccl {
/**
* Operator that takes a set of strings and returns a new
* set with corresponding values in upper case form
*/
class ToUpper : public Function<StrSet> {
public:
typedef boost::shared_ptr<Function<StrSet> > StrSetFunctionPtr;
ToUpper(const StrSetFunctionPtr& strset_expr)
: strset_expr_(strset_expr)
{
BOOST_ASSERT(strset_expr_);
}
/**
* String representation of the operator in form of:
* "upper(strset_expr_string)"
*/
virtual std::string to_string(const Corpus2::Tagset& tagset) const;
/**
* String representation of conditional operator in form of:
* "upper(strset_expr_raw_string)"
* This version does not require tagset, but may be inclomplete
* and/or contain internal info.
*/
virtual std::string to_raw_string() const;
virtual const std::string raw_operator_name() const {
return "upper";
}
protected:
const StrSetFunctionPtr strset_expr_;
typedef FunctionBase::BaseRetValPtr BaseRetValPtr;
/**
* Get a string set from the argument expression and return copy of the set
* with all strings in upper case form
*/
virtual BaseRetValPtr apply_internal(const SentenceContext& context) const;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_TOUPPER_H
......@@ -7,6 +7,7 @@
#include <libwccl/values/strset.h>
#include <libwccl/sentencecontext.h>
#include <libwccl/ops/tolower.h>
#include <libwccl/ops/toupper.h>
#include <libwccl/ops/constant.h>
using namespace Wccl;
......@@ -77,6 +78,28 @@ BOOST_FIXTURE_TEST_CASE(lower_locale, StrSetFix)
BOOST_CHECK(lowerset.equals(*to_lower.apply(sc)));
}
BOOST_FIXTURE_TEST_CASE(upper_locale, StrSetFix)
{
//I'm not sure if I can guarantee this test will pass
//on all locales? - ToUpper uses default locale at the moment
//I wanted to make sure switching around encoding of source file
//won't affect the test, so I explicitly provide escaped UTF8 sequence
StrSet upperset;
upperset.insert(UnicodeString::fromUTF8(
"ZA\xC5\xBB\xC3\x93\xC5\x81\xC4\x86G\xC4\x98\xC5\x9AL\xC4\x84JA\xC5\xB9\xC5\x83"
"ZA\xC5\xBB\xC3\x93\xC5\x81\xC4\x86G\xC4\x98\xC5\x9AL\xC4\x84JA\xC5\xB9\xC5\x83"));
StrSet lowerset;
lowerset.insert(UnicodeString::fromUTF8(
"za\xC5\xBC\xC3\xB3\xC5\x82\xC4\x87g\xC4\x99\xC5\x9Bl\xC4\x85ja\xC5\xBA\xC5\x84"
"ZA\xC5\xBB\xC3\x93\xC5\x81\xC4\x86g\xC4\x98\xC5\x9AL\xC4\x84JA\xC5\xB9\xC5\x83"));
ToUpper to_upper(boost::shared_ptr<Function<StrSet> >(
new Constant<StrSet>(lowerset)));
BOOST_CHECK(upperset.equals(*to_upper.apply(sc)));
}
//------ to_string test cases -------
......@@ -90,4 +113,34 @@ BOOST_FIXTURE_TEST_CASE(lower_to_string, StrSetFix)
BOOST_CHECK_EQUAL(expected, to_lower.to_string(tagset));
}
BOOST_AUTO_TEST_CASE(lower_to_raw_string)
{
StrSet one_elem_set;
one_elem_set.insert("YayaAy");
ToLower to_lower(boost::shared_ptr<Function<StrSet> >(
new Constant<StrSet>(one_elem_set)));
std::string expected = "lower([\"YayaAy\"])";
BOOST_CHECK_EQUAL(expected, to_lower.to_raw_string());
}
BOOST_FIXTURE_TEST_CASE(upper_to_string, StrSetFix)
{
StrSet one_elem_set;
one_elem_set.insert("YayaAy");
ToUpper to_upper(boost::shared_ptr<Function<StrSet> >(
new Constant<StrSet>(one_elem_set)));
std::string expected = "upper([\"YayaAy\"])";
BOOST_CHECK_EQUAL(expected, to_upper.to_string(tagset));
}
BOOST_AUTO_TEST_CASE(upper_to_raw_string)
{
StrSet one_elem_set;
one_elem_set.insert("YayaAy");
ToUpper to_upper(boost::shared_ptr<Function<StrSet> >(
new Constant<StrSet>(one_elem_set)));
std::string expected = "upper([\"YayaAy\"])";
BOOST_CHECK_EQUAL(expected, to_upper.to_raw_string());
}
BOOST_AUTO_TEST_SUITE_END()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment