Skip to content
Snippets Groups Projects
Commit 1cf10615 authored by ilor's avatar ilor
Browse files

Add the annsub operator

parent 3f4500b3
Branches
No related tags found
No related merge requests found
......@@ -35,6 +35,7 @@ SET(libwccl_STAT_SRC
ops/functions/bool/iterations/rightlook.cpp
ops/functions/bool/predicate.cpp
ops/functions/bool/predicates/and.cpp
ops/functions/bool/predicates/annsub.cpp
ops/functions/bool/predicates/debug.cpp
ops/functions/bool/predicates/isinside.cpp
ops/functions/bool/predicates/isoutside.cpp
......
#include <libwccl/ops/functions/bool/predicates/annsub.h>
#include <libwccl/values/match.h>
#include <libcorpus2/ann/annotatedsentence.h>
namespace Wccl {
AnnSub::BaseRetValPtr AnnSub::apply_internal(const FunExecContext& context) const
{
boost::shared_ptr<Corpus2::AnnotatedSentence> as
= boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(
context.sentence_context().get_sentence_ptr());
if (!as) {
throw InvalidArgument("context", "Operator needs an annotated sentence.");
}
boost::shared_ptr<const Match> check_from = check_from_->apply(context);
boost::shared_ptr<const Match> check_to =
(check_from_ == check_to_) ? check_from : check_to_->apply(context);
int abs_left = check_from->first_token(as).get_value();
if (abs_left < 0) {
throw WcclError("Received starting match that points outside sentence.");
}
int abs_right = check_to->last_token(as).get_value();
if (abs_right >= context.sentence_context().size()) {
throw WcclError("Received ending match that points outside sentence.");
}
if (abs_left > abs_right) {
throw WcclError("Received starting match points after the received ending match.");
}
if (!as->has_channel(chan_name_)) {
as->create_channel(chan_name_);
}
Corpus2::AnnotationChannel& channel = as->get_channel(chan_name_);
int segment_idx = channel.get_segment_at(abs_left);
if (segment_idx == 0) {
return Predicate::False(context);
} else {
for (int i = abs_left + 1; i <= abs_right; ++i) {
if (segment_idx != channel.get_segment_at(i)) {
return Predicate::False(context);
}
}
}
return Predicate::True(context);
}
std::string AnnSub::to_string(const Corpus2::Tagset& tagset) const
{
std::ostringstream ostream;
ostream << raw_name() << "(" << check_from_->to_string(tagset);
if (check_from_ != check_to_) {
ostream << ", " << check_to_->to_string(tagset);
}
ostream << ", \"" << chan_name_ << "\")";
return ostream.str();
}
std::ostream& AnnSub::write_to(std::ostream& ostream) const
{
ostream << raw_name() << "(" << *check_from_;
if (check_from_ != check_to_) {
ostream << ", " << *check_to_;
}
ostream << ", \"" << chan_name_ << "\")";
return ostream;
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ANNSUB_H
#define LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ANNSUB_H
#include <libwccl/ops/functions/bool/predicate.h>
namespace Wccl {
/**
* An annotation-checking match condition
*/
class AnnSub : public Predicate
{
public:
AnnSub(
const boost::shared_ptr<Function<Match> >& check_from,
const boost::shared_ptr<Function<Match> >& check_to,
const std::string& annotation_name)
: check_from_(check_from),
check_to_(check_to),
chan_name_(annotation_name)
{
BOOST_ASSERT(check_from);
BOOST_ASSERT(check_to);
}
AnnSub(
const boost::shared_ptr<Function<Match> >& check_from_to,
const std::string& annotation_name)
: check_from_(check_from_to),
check_to_(check_from_to),
chan_name_(annotation_name)
{
BOOST_ASSERT(check_from_);
BOOST_ASSERT(check_to_);
}
/**
* @returns Name of the function
*/
std::string raw_name() const {
return "annsub";
}
/**
* @returns String representation of the predicate
*/
std::string to_string(const Corpus2::Tagset& tagset) const;
protected:
/**
* Outputs the string value of the returned value
* @returns True
*/
BaseRetValPtr apply_internal(const FunExecContext& context) const;
/**
* Writes string representation of the AnnSub to
* an output stream.
* @returns Stream written to.
* @note May be incomplete and/or containt internal info.
*/
std::ostream& write_to(std::ostream& ostream) const;
private:
const boost::shared_ptr< const Function<Match> > check_from_;
const boost::shared_ptr< const Function<Match> > check_to_;
const std::string chan_name_;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ANNSUB_H
......@@ -39,6 +39,7 @@ header {
#include <libwccl/ops/functions/bool/predicates/weakagreement.h>
#include <libwccl/ops/functions/bool/predicates/pointagreement.h>
#include <libwccl/ops/functions/bool/predicates/strongagreement.h>
#include <libwccl/ops/functions/bool/predicates/annsub.h>
#include <libwccl/ops/functions/strset/affix.h>
#include <libwccl/ops/functions/strset/getorth.h>
......@@ -1030,6 +1031,8 @@ bool_operator
| ret = bool_agreement [tagset, vars]
//
| ret = bool_phrase [tagset, vars]
//
| ret = bool_annsub [tagset, vars]
// debug operators
| ret = debug_print_operator [tagset, vars]
//
......@@ -1272,6 +1275,30 @@ inter_operator
RPAREN
;
// ----------------------------------------------------------------------------
// Annotation-sub operator.
bool_annsub
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Bool> > op]
{
boost::shared_ptr< Function<Match> > match_from;
boost::shared_ptr< Function<Match> > match_to;
std::string chan_name;
}
: "annsub" LPAREN
match_from = match_fit [tagset, vars] COMMA
(match_to = match_fit [tagset, vars] COMMA)?
name : STRING
RPAREN
{
if (match_to) {
op.reset(new AnnSub(match_from, match_to, chan_name));
} else {
op.reset(new AnnSub(match_from, chan_name));
}
}
;
// ----------------------------------------------------------------------------
// Debug printing:
debug_print_operator
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment