Skip to content
Snippets Groups Projects
Commit f6a79879 authored by Adam Wardyński's avatar Adam Wardyński
Browse files

Affix operator (returning prefixes or suffixes of given length)

parent 201e00b0
Branches
No related merge requests found
......@@ -16,9 +16,10 @@ set(LIBS ${LIBS} ${Boost_LIBRARIES})
SET(libwccl_STAT_SRC
exception.cpp
ops/and.cpp
ops/affix.cpp
ops/formatters.cpp
ops/logicalpredicate.cpp
ops/nor.cpp
ops/nor.cpp
ops/or.cpp
ops/predicate.cpp
ops/tolower.cpp
......
#include <libwccl/ops/affix.h>
#include <sstream>
#include <boost/foreach.hpp>
#define foreach BOOST_FOREACH
namespace Wccl {
std::string Affix::to_string(const Corpus2::Tagset& tagset) const
{
std::stringstream str;
str << operator_name(tagset) << "(" << strset_expr_->to_string(tagset)
<< ", " << affix_length_ << ")";
return str.str();
}
std::string Affix::to_raw_string() const {
std::stringstream str;
str << raw_operator_name() << "(" << strset_expr_->to_raw_string()
<< ", " << affix_length_ << ")";
return str.str();
}
Affix::BaseRetValPtr Affix::apply_internal(const SentenceContext& context) const
{
if(affix_length_ == 0) {
return strset_expr_->apply(context);
}
const boost::shared_ptr<StrSet>& set = strset_expr_->apply(context);
boost::shared_ptr<StrSet> a_set = boost::shared_ptr<StrSet>(new StrSet());
if(affix_length_ < 0) {
foreach(const UnicodeString& s, set->contents()) {
a_set->insert(UnicodeString(s).remove(0, s.length() + affix_length_));
}
} else {
foreach(const UnicodeString& s, set->contents()) {
UnicodeString prefixed(s);
prefixed.truncate(affix_length_);
a_set->insert(prefixed);
}
}
return a_set;
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_AFFIX_H
#define LIBWCCL_OPS_AFFIX_H
#include <boost/shared_ptr.hpp>
#include <libwccl/values/strset.h>
#include <libwccl/ops/functions.h>
namespace Wccl {
/**
* Operator that takes a set of strings and returns a new
* set with corresponding values that are prefixes or
* suffixes of given length
*/
class Affix : public Function<StrSet> {
public:
typedef boost::shared_ptr<Function<StrSet> > StrSetFunctionPtr;
Affix(const StrSetFunctionPtr& strset_expr, int affix_length)
: strset_expr_(strset_expr),
affix_length_(affix_length)
{
BOOST_ASSERT(strset_expr_);
}
/**
* String representation of the operator in form of:
* "affix(strset_expr_string)"
*/
virtual std::string to_string(const Corpus2::Tagset& tagset) const;
/**
* String representation of conditional operator in form of:
* "affix(strset_expr_raw_string)"
* This version does not require tagset, but may be inclomplete
* and/or contain internal info.
*/
virtual std::string to_raw_string() const;
virtual const std::string raw_operator_name() const {
return "affix";
}
protected:
const StrSetFunctionPtr strset_expr_;
const int affix_length_;
typedef FunctionBase::BaseRetValPtr BaseRetValPtr;
/**
* Get a string set from the argument expression and return copy of the set
* with all strings converted into prefixes or suffixes of given length
*/
virtual BaseRetValPtr apply_internal(const SentenceContext& context) const;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_AFFIX_H
......@@ -8,6 +8,7 @@
#include <libwccl/sentencecontext.h>
#include <libwccl/ops/tolower.h>
#include <libwccl/ops/toupper.h>
#include <libwccl/ops/affix.h>
#include <libwccl/ops/constant.h>
using namespace Wccl;
......@@ -29,6 +30,7 @@ struct StrSetFix
strset.insert("some1325numbers");
strset.insert("ALLUPPER");
strset.insert("kIdSpEeChLoL");
strset.insert("short");
strset_expr.reset(new Constant<StrSet>(strset));
}
......@@ -49,12 +51,70 @@ BOOST_FIXTURE_TEST_CASE(lower, StrSetFix)
lowerset.insert("some1325numbers");
lowerset.insert("allupper");
lowerset.insert("kidspeechlol");
lowerset.insert("short");
ToLower to_lower(strset_expr);
BOOST_CHECK(lowerset.equals(*to_lower.apply(sc)));
}
BOOST_FIXTURE_TEST_CASE(upper, StrSetFix)
{
StrSet upperset;
upperset.insert("ALLLOWER");
upperset.insert("FIRSTCAPITAL");
upperset.insert("PASCALCASE");
upperset.insert("CAMELCASE");
upperset.insert("SOME1325NUMBERS");
upperset.insert("ALLUPPER");
upperset.insert("KIDSPEECHLOL");
upperset.insert("SHORT");
ToUpper to_upper(strset_expr);
BOOST_CHECK(upperset.equals(*to_upper.apply(sc)));
}
BOOST_FIXTURE_TEST_CASE(prefix, StrSetFix)
{
StrSet prefixset;
prefixset.insert("alllowe");
prefixset.insert("Firstca");
prefixset.insert("PascalC");
prefixset.insert("camelCa");
prefixset.insert("some132");
prefixset.insert("ALLUPPE");
prefixset.insert("kIdSpEe");
prefixset.insert("short");
Affix prefix(strset_expr, 7);
BOOST_CHECK(prefixset.equals(*prefix.apply(sc)));
}
BOOST_FIXTURE_TEST_CASE(suffix, StrSetFix)
{
StrSet suffixset;
suffixset.insert("lllower");
suffixset.insert("capital");
suffixset.insert("calCase");
suffixset.insert("melCase");
suffixset.insert("numbers");
suffixset.insert("LLUPPER");
suffixset.insert("EeChLoL");
suffixset.insert("short");
Affix suffix(strset_expr, -7);
BOOST_CHECK(suffixset.equals(*suffix.apply(sc)));
}
BOOST_FIXTURE_TEST_CASE(affix_0, StrSetFix)
{
Affix affix_0(strset_expr, 0);
BOOST_CHECK(strset.equals(*affix_0.apply(sc)));
}
BOOST_FIXTURE_TEST_CASE(lower_locale, StrSetFix)
{
//I'm not sure if I can guarantee this test will pass
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment