Skip to content
Snippets Groups Projects
Commit 60c8f519 authored by Adam Wardyński's avatar Adam Wardyński
Browse files

Adding "lower" operator

parent b581f8cc
Branches
No related tags found
No related merge requests found
......@@ -21,6 +21,7 @@ SET(libwccl_STAT_SRC
ops/nor.cpp
ops/or.cpp
ops/predicate.cpp
ops/tolower.cpp
parser/Parser.cpp
parser/ParserException.cpp
sentencecontext.cpp
......
#include <libwccl/ops/tolower.h>
#include <libwccl/ops/formatters.h>
namespace Wccl {
std::string ToLower::to_string(const Corpus2::Tagset& tagset) const
{
return UnaryFunctionFormatter::to_string(tagset, *this, *strset_expr_);
}
std::string ToLower::to_raw_string() const {
return UnaryFunctionFormatter::to_raw_string(*this, *strset_expr_);
}
ToLower::BaseRetValPtr ToLower::apply_internal(const SentenceContext& context) const {
const boost::shared_ptr<StrSet >& set = strset_expr_->apply(context);
boost::shared_ptr<StrSet > l_set = boost::make_shared<StrSet>();
//TODO: should tolower be a method of StrSet as well?
foreach(const UnicodeString& s, set->contents()) {
//TODO: what about locale? is default ok? should the context hold it?
l_set->insert(UnicodeString(s).toLower());
}
return l_set;
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_TOLOWER_H
#define LIBWCCL_OPS_TOLOWER_H
#include <boost/shared_ptr.hpp>
#include <libwccl/values/strset.h>
#include <libwccl/ops/functions.h>
namespace Wccl {
/**
* Operator that takes a set of strings and returns a new
* set with corresponding values in lower case form
*/
class ToLower : public Function<StrSet> {
public:
typedef boost::shared_ptr<Function<StrSet> > StrSetFunctionPtr;
ToLower(const StrSetFunctionPtr& strset_expr)
: strset_expr_(strset_expr)
{
BOOST_ASSERT(strset_expr_);
}
/**
* String representation of the operator in form of:
* "lower(strset_expr_string)"
*/
virtual std::string to_string(const Corpus2::Tagset& tagset) const;
/**
* String representation of conditional operator in form of:
* "if cond_expr_raw_s then iftrue_expr_raw_s else iffalse_expr_raw_s"
* This version does not require tagset, but may be inclomplete
* and/or contain internal info.
*/
virtual std::string to_raw_string() const;
virtual const std::string raw_operator_name() const {
return "lower";
}
protected:
const StrSetFunctionPtr strset_expr_;
typedef FunctionBase::BaseRetValPtr BaseRetValPtr;
/**
* Get a string set from the argument expression and return copy of the set
* with all strings in lower case form
*/
virtual BaseRetValPtr apply_internal(const SentenceContext& context) const;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_TOLOWER_H
......@@ -11,6 +11,7 @@ add_executable(tests
logicalpredicates.cpp
main.cpp
position.cpp
strsetfunctions.cpp
values.cpp
varaccess.cpp
variables.cpp
......
#include <boost/test/unit_test.hpp>
#include <boost/bind.hpp>
#include <boost/shared_ptr.hpp>
#include <libcorpus2/sentence.h>
#include <libwccl/values/strset.h>
#include <libwccl/sentencecontext.h>
#include <libwccl/ops/tolower.h>
#include <libwccl/ops/constant.h>
using namespace Wccl;
BOOST_AUTO_TEST_SUITE(strset_functions)
struct StrSetFix
{
StrSetFix()
: sc(boost::make_shared<Corpus2::Sentence>()),
tagset(),
strset(),
strset_expr()
{
strset.insert("alllower");
strset.insert("Firstcapital");
strset.insert("PascalCase");
strset.insert("camelCase");
strset.insert("some1325numbers");
strset.insert("ALLUPPER");
strset.insert("kIdSpEeChLoL");
strset_expr.reset(new Constant<StrSet>(strset));
}
SentenceContext sc;
Corpus2::Tagset tagset;
StrSet strset;
boost::shared_ptr<Function<StrSet> > strset_expr;
};
BOOST_FIXTURE_TEST_CASE(lower, StrSetFix)
{
StrSet lowerset;
lowerset.insert("alllower");
lowerset.insert("firstcapital");
lowerset.insert("pascalcase");
lowerset.insert("camelcase");
lowerset.insert("some1325numbers");
lowerset.insert("allupper");
lowerset.insert("kidspeechlol");
ToLower to_lower(strset_expr);
BOOST_CHECK(lowerset.equals(*to_lower.apply(sc)));
}
BOOST_FIXTURE_TEST_CASE(lower_locale, StrSetFix)
{
//I'm not sure if I can guarantee this test will pass
//on all locales? - ToLower uses default locale at the moment
//I wanted to make sure switching around encoding of source file
//won't affect the test, so I explicitly provide escaped UTF8 sequence
StrSet upperset;
upperset.insert(UnicodeString::fromUTF8(
"za\xC5\xBB\xC3\x93\xC5\x81\xC4\x86g\xC4\x98\xC5\x9AL\xC4\x84ja\xC5\xB9\xC5\x83"
"zA\xC5\xBC\xC3\xB3\xC5\x82\xC4\x87g\xC4\x99\xC5\x9Bl\xC4\x85ja\xC5\xBA\xC5\x84"));
StrSet lowerset;
lowerset.insert(UnicodeString::fromUTF8(
"za\xC5\xBC\xC3\xB3\xC5\x82\xC4\x87g\xC4\x99\xC5\x9Bl\xC4\x85ja\xC5\xBA\xC5\x84"
"za\xC5\xBC\xC3\xB3\xC5\x82\xC4\x87g\xC4\x99\xC5\x9Bl\xC4\x85ja\xC5\xBA\xC5\x84"));
ToLower to_lower(boost::shared_ptr<Function<StrSet> >(
new Constant<StrSet>(upperset)));
BOOST_CHECK(lowerset.equals(*to_lower.apply(sc)));
}
//------ to_string test cases -------
BOOST_FIXTURE_TEST_CASE(lower_to_string, StrSetFix)
{
StrSet one_elem_set;
one_elem_set.insert("YayaAy");
ToLower to_lower(boost::shared_ptr<Function<StrSet> >(
new Constant<StrSet>(one_elem_set)));
std::string expected = "lower([\"YayaAy\"])";
BOOST_CHECK_EQUAL(expected, to_lower.to_string(tagset));
}
BOOST_AUTO_TEST_SUITE_END()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment