Skip to content
Snippets Groups Projects
Commit c4ead5bd authored by Adam Wardynski's avatar Adam Wardynski
Browse files

Getting submatch from a match via "->" operator.

Also, adding M in grammar to be shorthand for $m:_M (vargetter) op in match context.
parent 72b7a918
Branches
No related merge requests found
......@@ -45,6 +45,7 @@ SET(libwccl_STAT_SRC
ops/functions/bool/predicates/regex.cpp
ops/functions/bool/predicates/strongagreement.cpp
ops/functions/bool/predicates/weakagreement.cpp
ops/functions/match/submatch.cpp
ops/functions/position/firsttoken.cpp
ops/functions/position/lasttoken.cpp
ops/functions/position/relativeposition.cpp
......
#include <libwccl/ops/functions/match/submatch.h>
#include <libwccl/ops/functions/vargetter.h>
namespace Wccl {
Submatch::BaseRetValPtr Submatch::apply_internal(
const FunExecContext &context) const
{
const RetValPtr& match = match_expr_->apply(context);
const MatchData& md = match->get_value();
return RetValPtr(md.submatch(index_ ));
}
std::string Submatch::to_string(const Corpus2::Tagset &tagset) const
{
std::ostringstream ss;
boost::shared_ptr<VarGetter<Match> > getvar =
boost::dynamic_pointer_cast<VarGetter<Match> >(match_expr_);
if (!getvar) {
ss << match_expr_->to_string(tagset);
} else {
std::string s = match_expr_->to_string(tagset);
if (s == Match::var_repr("_M")) {
ss << "M";
} else {
ss << s;
}
}
ss << " " << name(tagset) << " " << index_;
return ss.str();
}
std::ostream& Submatch::write_to(std::ostream& ostream) const
{
boost::shared_ptr<VarGetter<Match> > getvar =
boost::dynamic_pointer_cast<VarGetter<Match> >(match_expr_);
if (!getvar) {
ostream << *match_expr_;
} else {
std::string s = match_expr_->to_raw_string();
if (s == Match::var_repr("_M")) {
ostream << "M";
} else {
ostream << s;
}
}
ostream << " " << raw_name() << " " << index_;
return ostream;
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_FUNCTIONS_MATCH_SUBMATCH_H
#define LIBWCCL_OPS_FUNCTIONS_MATCH_SUBMATCH_H
#include <libwccl/ops/function.h>
#include <libwccl/values/match.h>
namespace Wccl {
/**
* Operator that takes a Match and an index and returns
* a submatch at given index (indices start at 1; operator
* works only on Matches that have MatchVector).
*/
class Submatch : public Function<Match>
{
public:
typedef boost::shared_ptr<Function<Match> > MatchFunctionPtr;
Submatch(const MatchFunctionPtr& match_expr, size_t index)
: match_expr_(match_expr),
index_(index)
{
BOOST_ASSERT(match_expr_);
if (index < 1) {
throw InvalidArgument("index", "Submatch indices start from 1.");
}
}
/**
* @returns String representation of the function
*/
std::string to_string(const Corpus2::Tagset& tagset) const;
/**
* @returns Name of the function
*/
std::string raw_name() const {
return "->";
}
protected:
const MatchFunctionPtr match_expr_;
const size_t index_;
/**
* Writes string representation of the function.
* @returns Stream written to.
*/
std::ostream& write_to(std::ostream& ostream) const;
/**
* Takes the value of a Match from argument expression, and returns
* a submatch at given index. Works only if Match has a MatchVector.
* @throws WcclError if given Match does not hold a MatchVector, or
* if the index is outside boundaries of MatchVector.
* @returns Match that is in underlying MatchVector at specified index
* (note - indexing starts from 1 not from 0).
*/
BaseRetValPtr apply_internal(const FunExecContext& context) const;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_FUNCTIONS_MATCH_SUBMATCH_H
......@@ -82,6 +82,7 @@ header {
#include <libwccl/ops/match/conditions/tokencondition.h>
#include <libwccl/ops/match/actions/markmatch.h>
#include <libwccl/ops/match/actions/unmarkmatch.h>
#include <libwccl/ops/functions/match/submatch.h>
// Unicode String
#include <unicode/uniset.h>
......@@ -2050,6 +2051,19 @@ match_fit
{
//
}
:
( ret = match_var_val [tagset, vars]
| "M" { ret.reset(new VarGetter<Match>(vars.create_accessor<Match>("_M"))); }
| LPAREN ret = match_fit [tagset, vars] RPAREN
)
( // if there's an arrow after the match, we have a submatch reference
ARROW i: UNSIGNED_INT { ret.reset(new Submatch(ret, token_ref_to_int(i))); }
)?
;
match_var_val
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Match> > ret]
: ret = match_vector_variable [vars]
| ret = match_value_const
;
......@@ -2211,6 +2225,13 @@ options {
: ','
;
ARROW
options {
paraphrase = "->";
}
: "->"
;
SYMBOL
options {
paraphrase = "Symbol";
......
......@@ -2,10 +2,12 @@
#define LIBWCCL_VALUES_MATCHDATA_H
#include <libwccl/values/position.h>
#include <libwccl/exception.h>
#include <libcorpus2/ann/annotatedsentence.h>
namespace Wccl {
class Match;
/**
* Base abstract class for data held by a Match Value
* - VectorMatch, TokenMatch or AnnotationMatch.
......@@ -32,6 +34,20 @@ public:
*/
virtual Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const = 0;
/**
* Getter for a submatch at given index (indexing starts from 1).
*/
virtual const boost::shared_ptr<const Match>& submatch(size_t idx) const {
throw WcclError("Getting a submatch is possible only for a MatchVector.");
}
/**
* Getter for a submatch at given index (indexing starts from 1).
*/
virtual const boost::shared_ptr<Match>& submatch(size_t idx) {
throw WcclError("Getting a submatch is possible only for a MatchVector.");
}
boost::shared_ptr<MatchData> clone() const {
return boost::shared_ptr<MatchData>(clone_internal());
}
......
......@@ -89,10 +89,18 @@ void MatchVector::append(const boost::shared_ptr<MatchData> &m)
matches_.push_back(boost::shared_ptr<Match>(new Match(m)));
}
const boost::shared_ptr<const Match>& MatchVector::submatch(size_t idx) const {
if (idx + 1< matches_.size() || idx == 0) {
return matches_[idx - 1];
} else {
throw Wccl::WcclError("Match vector index out of range");
}
}
const boost::shared_ptr<Match>& MatchVector::submatch(size_t idx)
{
if (idx < matches_.size()) {
return matches_[idx];
if (idx + 1 < matches_.size() || idx == 0) {
return matches_[idx - 1];
} else {
throw Wccl::WcclError("Match vector index out of range");
}
......
......@@ -47,17 +47,17 @@ public:
}
/**
* Submatch accesor with bounds checking, throws if out of bounds
* Submatch accessor with bounds checking, throws if out of bounds.
* @note Indexing is assumed from 1.
*/
const boost::shared_ptr<Match>& submatch(size_t idx);
/**
* Submatch indexing operator. Per C++ container tradition, no bounds
* checking is done.
* Submatch accessor with bounds checking, throws if out of bounds.
* Const version.
* @note Indexing is assumed from 1.
*/
const boost::shared_ptr<Match>& operator[](size_t idx) const {
return matches_[idx];
}
const boost::shared_ptr<const Match>& submatch(size_t idx) const;
void clear() {
matches_.clear();
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment