Skip to content
Snippets Groups Projects
Commit 60c8f519 authored by Adam Wardyński's avatar Adam Wardyński
Browse files

Adding "lower" operator

parent b581f8cc
Branches
No related merge requests found
......@@ -21,6 +21,7 @@ SET(libwccl_STAT_SRC
ops/nor.cpp
ops/or.cpp
ops/predicate.cpp
ops/tolower.cpp
parser/Parser.cpp
parser/ParserException.cpp
sentencecontext.cpp
......
#include <libwccl/ops/tolower.h>
#include <libwccl/ops/formatters.h>
namespace Wccl {
std::string ToLower::to_string(const Corpus2::Tagset& tagset) const
{
return UnaryFunctionFormatter::to_string(tagset, *this, *strset_expr_);
}
std::string ToLower::to_raw_string() const {
return UnaryFunctionFormatter::to_raw_string(*this, *strset_expr_);
}
ToLower::BaseRetValPtr ToLower::apply_internal(const SentenceContext& context) const {
const boost::shared_ptr<StrSet >& set = strset_expr_->apply(context);
boost::shared_ptr<StrSet > l_set = boost::make_shared<StrSet>();
//TODO: should tolower be a method of StrSet as well?
foreach(const UnicodeString& s, set->contents()) {
//TODO: what about locale? is default ok? should the context hold it?
l_set->insert(UnicodeString(s).toLower());
}
return l_set;
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_TOLOWER_H
#define LIBWCCL_OPS_TOLOWER_H
#include <boost/shared_ptr.hpp>
#include <libwccl/values/strset.h>
#include <libwccl/ops/functions.h>
namespace Wccl {
/**
* Operator that takes a set of strings and returns a new
* set with corresponding values in lower case form
*/
class ToLower : public Function<StrSet> {
public:
typedef boost::shared_ptr<Function<StrSet> > StrSetFunctionPtr;
ToLower(const StrSetFunctionPtr& strset_expr)
: strset_expr_(strset_expr)
{
BOOST_ASSERT(strset_expr_);
}
/**
* String representation of the operator in form of:
* "lower(strset_expr_string)"
*/
virtual std::string to_string(const Corpus2::Tagset& tagset) const;
/**
* String representation of conditional operator in form of:
* "if cond_expr_raw_s then iftrue_expr_raw_s else iffalse_expr_raw_s"
* This version does not require tagset, but may be inclomplete
* and/or contain internal info.
*/
virtual std::string to_raw_string() const;
virtual const std::string raw_operator_name() const {
return "lower";
}
protected:
const StrSetFunctionPtr strset_expr_;
typedef FunctionBase::BaseRetValPtr BaseRetValPtr;
/**
* Get a string set from the argument expression and return copy of the set
* with all strings in lower case form
*/
virtual BaseRetValPtr apply_internal(const SentenceContext& context) const;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_TOLOWER_H
......@@ -11,6 +11,7 @@ add_executable(tests
logicalpredicates.cpp
main.cpp
position.cpp
strsetfunctions.cpp
values.cpp
varaccess.cpp
variables.cpp
......
#include <boost/test/unit_test.hpp>
#include <boost/bind.hpp>
#include <boost/shared_ptr.hpp>
#include <libcorpus2/sentence.h>
#include <libwccl/values/strset.h>
#include <libwccl/sentencecontext.h>
#include <libwccl/ops/tolower.h>
#include <libwccl/ops/constant.h>
using namespace Wccl;
BOOST_AUTO_TEST_SUITE(strset_functions)
struct StrSetFix
{
StrSetFix()
: sc(boost::make_shared<Corpus2::Sentence>()),
tagset(),
strset(),
strset_expr()
{
strset.insert("alllower");
strset.insert("Firstcapital");
strset.insert("PascalCase");
strset.insert("camelCase");
strset.insert("some1325numbers");
strset.insert("ALLUPPER");
strset.insert("kIdSpEeChLoL");
strset_expr.reset(new Constant<StrSet>(strset));
}
SentenceContext sc;
Corpus2::Tagset tagset;
StrSet strset;
boost::shared_ptr<Function<StrSet> > strset_expr;
};
BOOST_FIXTURE_TEST_CASE(lower, StrSetFix)
{
StrSet lowerset;
lowerset.insert("alllower");
lowerset.insert("firstcapital");
lowerset.insert("pascalcase");
lowerset.insert("camelcase");
lowerset.insert("some1325numbers");
lowerset.insert("allupper");
lowerset.insert("kidspeechlol");
ToLower to_lower(strset_expr);
BOOST_CHECK(lowerset.equals(*to_lower.apply(sc)));
}
BOOST_FIXTURE_TEST_CASE(lower_locale, StrSetFix)
{
//I'm not sure if I can guarantee this test will pass
//on all locales? - ToLower uses default locale at the moment
//I wanted to make sure switching around encoding of source file
//won't affect the test, so I explicitly provide escaped UTF8 sequence
StrSet upperset;
upperset.insert(UnicodeString::fromUTF8(
"za\xC5\xBB\xC3\x93\xC5\x81\xC4\x86g\xC4\x98\xC5\x9AL\xC4\x84ja\xC5\xB9\xC5\x83"
"zA\xC5\xBC\xC3\xB3\xC5\x82\xC4\x87g\xC4\x99\xC5\x9Bl\xC4\x85ja\xC5\xBA\xC5\x84"));
StrSet lowerset;
lowerset.insert(UnicodeString::fromUTF8(
"za\xC5\xBC\xC3\xB3\xC5\x82\xC4\x87g\xC4\x99\xC5\x9Bl\xC4\x85ja\xC5\xBA\xC5\x84"
"za\xC5\xBC\xC3\xB3\xC5\x82\xC4\x87g\xC4\x99\xC5\x9Bl\xC4\x85ja\xC5\xBA\xC5\x84"));
ToLower to_lower(boost::shared_ptr<Function<StrSet> >(
new Constant<StrSet>(upperset)));
BOOST_CHECK(lowerset.equals(*to_lower.apply(sc)));
}
//------ to_string test cases -------
BOOST_FIXTURE_TEST_CASE(lower_to_string, StrSetFix)
{
StrSet one_elem_set;
one_elem_set.insert("YayaAy");
ToLower to_lower(boost::shared_ptr<Function<StrSet> >(
new Constant<StrSet>(one_elem_set)));
std::string expected = "lower([\"YayaAy\"])";
BOOST_CHECK_EQUAL(expected, to_lower.to_string(tagset));
}
BOOST_AUTO_TEST_SUITE_END()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment