Newer
Older
#include <boost/foreach.hpp>
#define foreach BOOST_FOREACH
#include <sstream>
#include <libpwrutils/util.h>
namespace Wccl {
RegexParseError::RegexParseError(
const UnicodeString& pattern,
const UErrorCode& status_code,
const UParseError& parse_error)
: WcclError("Could not parse regular expression."),
status(u_errorName(status_code)),
pattern_line(parse_error.line),
offset(parse_error.offset),
pre_context(PwrNlp::to_utf8(UnicodeString(parse_error.preContext))),
error(PwrNlp::to_utf8(UnicodeString(parse_error.postContext))),
expression(PwrNlp::to_utf8(UnicodeString(pattern)))
{
}
std::string RegexParseError::info() const
{
std::stringstream ss;
ss << "Could not parse regular expression at line " << pattern_line
<< " offset " << offset << ". Status: " << status
<< ". Error: " << error << ". Expression was: " << expression;
return ss.str();
}
RegexParseError::~RegexParseError() throw()
{
}
boost::shared_ptr<const RegexPattern> compile_regex(const UnicodeString &pat_str)
{
UParseError error;
memset(&error, 0, sizeof(error));
UErrorCode status;
memset(&status, 0, sizeof(status));
boost::shared_ptr<const RegexPattern> pattern(RegexPattern::compile(pat_str, error, status));
if(status != U_ZERO_ERROR)
{
throw RegexParseError(pat_str, status, error);
}
return pattern;
}
Regex::Regex(const Regex::StrSetFunctionPtr &strset_expr, const UnicodeString &patstr)
: strset_expr_(strset_expr),
patstr_(patstr),
pattern_(compile_regex(patstr))
{
BOOST_ASSERT(strset_expr_);
BOOST_ASSERT(pattern_);
}
std::string Regex::to_string(const Corpus2::Tagset& tagset) const
{
std::stringstream ss;
ss << name(tagset) << "(" << strset_expr_->to_string(tagset)
<< ", \"" << PwrNlp::to_utf8(patstr_) << "\")"; //TODO: utf escaping?
return ss.str();
}
std::string Regex::to_raw_string() const {
std::stringstream ss;
ss << raw_name() << "(" << strset_expr_->to_raw_string()
<< ", \"" << PwrNlp::to_utf8(patstr_) << "\")"; //TODO: utf escaping?
return ss.str();
}
Regex::BaseRetValPtr Regex::apply_internal(const FunExecContext& context) const {
const boost::shared_ptr<const StrSet>& set = strset_expr_->apply(context);
return Predicate::False(context);
}
foreach(const UnicodeString& s, set->contents()) {
UErrorCode status = U_ZERO_ERROR;
boost::scoped_ptr<RegexMatcher> matcher(pattern_->matcher(s, status));
if(status != U_ZERO_ERROR) {
BOOST_ASSERT(status == U_ZERO_ERROR);
return Predicate::False(context);
}
bool matched = matcher->matches(status);
if(status != U_ZERO_ERROR) {
BOOST_ASSERT(status == U_ZERO_ERROR);
return Predicate::False(context);
return Predicate::False(context);
return Predicate::True(context);