Skip to content
Snippets Groups Projects
regex.cpp 2.6 KiB
Newer Older
#include <libwccl/ops/functions/bool/predicates/regex.h>
#include <libpwrutils/foreach.h>
Adam Wardynski's avatar
Adam Wardynski committed
#include <sstream>
#include <libpwrutils/util.h>

namespace Wccl {

RegexParseError::RegexParseError(
	const UnicodeString& pattern,
	const UErrorCode& status_code,
	const UParseError& parse_error)
	: WcclError("Could not parse regular expression."),
ilor's avatar
ilor committed
	  pattern(pattern),
	  status(u_errorName(status_code)),
ilor's avatar
ilor committed
	  upe(parse_error)
Adam Wardynski's avatar
Adam Wardynski committed
{
}

std::string RegexParseError::info() const
{
	std::stringstream ss;
ilor's avatar
ilor committed
	ss << "Could not parse regular expression at line " << upe.line
		<< " offset " << upe.offset << ". Status: " << status
		<< ". Error: " << PwrNlp::to_utf8(UnicodeString(upe.postContext))
	    << ". Expression was: " << PwrNlp::to_utf8(pattern);
Adam Wardynski's avatar
Adam Wardynski committed
	return ss.str();
}

RegexParseError::~RegexParseError() throw()
{
}

Adam Wardynski's avatar
Adam Wardynski committed
boost::shared_ptr<const RegexPattern> compile_regex(const UnicodeString &pat_str)
{
	UParseError error;
	memset(&error, 0, sizeof(error));
	UErrorCode status;
	memset(&status, 0, sizeof(status));
	boost::shared_ptr<const RegexPattern> pattern(RegexPattern::compile(pat_str, error, status));
	if(status != U_ZERO_ERROR)
	{
		throw RegexParseError(pat_str, status, error);
	}
	return pattern;
}

Regex::Regex(const Regex::StrSetFunctionPtr &strset_expr, const UnicodeString &patstr)
	: strset_expr_(strset_expr),
	  patstr_(patstr),
	  pattern_(compile_regex(patstr))
{
	BOOST_ASSERT(strset_expr_);
	BOOST_ASSERT(pattern_);
}

std::string Regex::to_string(const Corpus2::Tagset& tagset) const
{
	std::stringstream ss;
	ss << name(tagset) << "(" << strset_expr_->to_string(tagset)
Adam Wardynski's avatar
Adam Wardynski committed
		<< ", \"" << PwrNlp::to_utf8(patstr_) << "\")"; //TODO: utf escaping?
	return ss.str();
}

std::string Regex::to_raw_string() const {
	std::stringstream ss;
	ss << raw_name() << "(" << strset_expr_->to_raw_string()
Adam Wardynski's avatar
Adam Wardynski committed
		<< ", \"" << PwrNlp::to_utf8(patstr_) << "\")"; //TODO: utf escaping?
	return ss.str();
}

Regex::BaseRetValPtr Regex::apply_internal(const FunExecContext& context) const {
	const boost::shared_ptr<const StrSet>& set = strset_expr_->apply(context);
Adam Wardynski's avatar
Adam Wardynski committed
	if(set->empty()) {
		return Predicate::False(context);
Adam Wardynski's avatar
Adam Wardynski committed
	}
	foreach(const UnicodeString& s, set->contents()) {
		UErrorCode status = U_ZERO_ERROR;
		boost::scoped_ptr<RegexMatcher> matcher(pattern_->matcher(s, status));
		if(status != U_ZERO_ERROR) {
			BOOST_ASSERT(status == U_ZERO_ERROR);
			return Predicate::False(context);
Adam Wardynski's avatar
Adam Wardynski committed
		}
		bool matched = matcher->matches(status);
		if(status != U_ZERO_ERROR) {
			BOOST_ASSERT(status == U_ZERO_ERROR);
			return Predicate::False(context);
Adam Wardynski's avatar
Adam Wardynski committed
		}
		if(!matched) {
			return Predicate::False(context);
	return Predicate::True(context);
Adam Wardynski's avatar
Adam Wardynski committed
}

} /* end ns Wccl */