Skip to content
Snippets Groups Projects
Select Git revision
  • a994d25f90d2c04879f80d6f9df44878a6ee2428
  • master default protected
  • vertical_relations
  • lu_without_semantic_frames
  • hierarchy
  • additional-unification-filters
  • v0.1.1
  • v0.1.0
  • v0.0.9
  • v0.0.8
  • v0.0.7
  • v0.0.6
  • v0.0.5
  • v0.0.4
  • v0.0.3
  • v0.0.2
  • v0.0.1
17 results

wsgi.py

Blame
  • regex.cpp 2.55 KiB
    #include <libwccl/ops/functions/bool/predicates/regex.h>
    #include <libpwrutils/foreach.h>
    
    #include <libpwrutils/util.h>
    
    namespace Wccl {
    
    RegexParseError::RegexParseError(
    	const UnicodeString& pattern,
    	const UErrorCode& status_code,
    	const UParseError& parse_error)
    	: WcclError("Could not parse regular expression."),
    	  pattern(pattern),
    	  status(u_errorName(status_code)),
    	  upe(parse_error)
    {
    }
    
    std::string RegexParseError::info() const
    {
    	std::stringstream ss;
    	ss << "Could not parse regular expression at line " << upe.line
    		<< " offset " << upe.offset << ". Status: " << status
    		<< ". Error: " << PwrNlp::to_utf8(UnicodeString(upe.postContext))
    	    << ". Expression was: " << PwrNlp::to_utf8(pattern);
    	return ss.str();
    }
    
    RegexParseError::~RegexParseError() throw()
    {
    }
    
    boost::shared_ptr<const RegexPattern> compile_regex(const UnicodeString &pat_str)
    {
    	UParseError error;
    	memset(&error, 0, sizeof(error));
    	UErrorCode status;
    	memset(&status, 0, sizeof(status));
    	boost::shared_ptr<const RegexPattern> pattern(RegexPattern::compile(pat_str, error, status));
    	if(status != U_ZERO_ERROR)
    	{
    		throw RegexParseError(pat_str, status, error);
    	}
    	return pattern;
    }
    
    Regex::Regex(const Regex::StrSetFunctionPtr &strset_expr, const UnicodeString &patstr)
    	: strset_expr_(strset_expr),
    	  patstr_(patstr),
    	  pattern_(compile_regex(patstr))
    {
    	BOOST_ASSERT(strset_expr_);
    	BOOST_ASSERT(pattern_);
    }
    
    std::string Regex::to_string(const Corpus2::Tagset& tagset) const
    {
    	std::stringstream ss;
    	ss << name(tagset) << "(" << strset_expr_->to_string(tagset)
    		<< ", \"" << PwrNlp::to_utf8(patstr_) << "\")"; //TODO: utf escaping?
    	return ss.str();
    }
    
    std::ostream& Regex::write_to(std::ostream& os) const
    {
    	//TODO: utf escaping?
    	return os << raw_name() << "(" 
    			<< *strset_expr_ << ", \"" << PwrNlp::to_utf8(patstr_) << "\")";
    }
    
    Regex::BaseRetValPtr Regex::apply_internal(const FunExecContext& context) const
    {
    	const boost::shared_ptr<const StrSet>& set = strset_expr_->apply(context);
    	if(set->empty()) {
    		return Predicate::False(context);
    	}
    	foreach(const UnicodeString& s, set->contents()) {
    		UErrorCode status = U_ZERO_ERROR;
    		boost::scoped_ptr<RegexMatcher> matcher(pattern_->matcher(s, status));
    		if(status != U_ZERO_ERROR) {
    			BOOST_ASSERT(status == U_ZERO_ERROR);
    			return Predicate::False(context);
    		}
    		bool matched = matcher->matches(status);
    		if(status != U_ZERO_ERROR) {
    			BOOST_ASSERT(status == U_ZERO_ERROR);
    			return Predicate::False(context);
    		}
    		if(!matched) {
    			return Predicate::False(context);
    		}
    	}
    	return Predicate::True(context);
    }
    
    } /* end ns Wccl */