diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 59dc656c1c4ed5308e23c0b551bd9c5a3041d6cf..92bfe506d4a7dd9ebd1467a471c1c31432cbbb64 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -35,6 +35,7 @@ SET(libwccl_STAT_SRC ops/functions/bool/iterations/rightlook.cpp ops/functions/bool/predicate.cpp ops/functions/bool/predicates/and.cpp + ops/functions/bool/predicates/annsub.cpp ops/functions/bool/predicates/debug.cpp ops/functions/bool/predicates/isinside.cpp ops/functions/bool/predicates/isoutside.cpp diff --git a/libwccl/ops/functions/bool/predicates/annsub.cpp b/libwccl/ops/functions/bool/predicates/annsub.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bcaefc254bd9afbbc3c650f1be2788e2da39abdc --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/annsub.cpp @@ -0,0 +1,69 @@ +#include <libwccl/ops/functions/bool/predicates/annsub.h> +#include <libwccl/values/match.h> +#include <libcorpus2/ann/annotatedsentence.h> + +namespace Wccl { + +AnnSub::BaseRetValPtr AnnSub::apply_internal(const FunExecContext& context) const +{ + boost::shared_ptr<Corpus2::AnnotatedSentence> as + = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>( + context.sentence_context().get_sentence_ptr()); + if (!as) { + throw InvalidArgument("context", "Operator needs an annotated sentence."); + } + + boost::shared_ptr<const Match> check_from = check_from_->apply(context); + boost::shared_ptr<const Match> check_to = + (check_from_ == check_to_) ? check_from : check_to_->apply(context); + int abs_left = check_from->first_token(as).get_value(); + if (abs_left < 0) { + throw WcclError("Received starting match that points outside sentence."); + } + int abs_right = check_to->last_token(as).get_value(); + if (abs_right >= context.sentence_context().size()) { + throw WcclError("Received ending match that points outside sentence."); + } + if (abs_left > abs_right) { + throw WcclError("Received starting match points after the received ending match."); + } + if (!as->has_channel(chan_name_)) { + as->create_channel(chan_name_); + } + Corpus2::AnnotationChannel& channel = as->get_channel(chan_name_); + + int segment_idx = channel.get_segment_at(abs_left); + if (segment_idx == 0) { + return Predicate::False(context); + } else { + for (int i = abs_left + 1; i <= abs_right; ++i) { + if (segment_idx != channel.get_segment_at(i)) { + return Predicate::False(context); + } + } + } + return Predicate::True(context); +} + +std::string AnnSub::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream ostream; + ostream << raw_name() << "(" << check_from_->to_string(tagset); + if (check_from_ != check_to_) { + ostream << ", " << check_to_->to_string(tagset); + } + ostream << ", \"" << chan_name_ << "\")"; + return ostream.str(); +} + +std::ostream& AnnSub::write_to(std::ostream& ostream) const +{ + ostream << raw_name() << "(" << *check_from_; + if (check_from_ != check_to_) { + ostream << ", " << *check_to_; + } + ostream << ", \"" << chan_name_ << "\")"; + return ostream; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/bool/predicates/annsub.h b/libwccl/ops/functions/bool/predicates/annsub.h new file mode 100644 index 0000000000000000000000000000000000000000..da32dc02c2f665dfccaa3bd4d094a43ca7cc648e --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/annsub.h @@ -0,0 +1,73 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ANNSUB_H +#define LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ANNSUB_H + +#include <libwccl/ops/functions/bool/predicate.h> + +namespace Wccl { + +/** + * An annotation-checking match condition + */ +class AnnSub : public Predicate +{ +public: + AnnSub( + const boost::shared_ptr<Function<Match> >& check_from, + const boost::shared_ptr<Function<Match> >& check_to, + const std::string& annotation_name) + : check_from_(check_from), + check_to_(check_to), + chan_name_(annotation_name) + { + BOOST_ASSERT(check_from); + BOOST_ASSERT(check_to); + } + + AnnSub( + const boost::shared_ptr<Function<Match> >& check_from_to, + const std::string& annotation_name) + : check_from_(check_from_to), + check_to_(check_from_to), + chan_name_(annotation_name) + { + BOOST_ASSERT(check_from_); + BOOST_ASSERT(check_to_); + } + + + /** + * @returns Name of the function + */ + std::string raw_name() const { + return "annsub"; + } + + /** + * @returns String representation of the predicate + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + /** + * Outputs the string value of the returned value + * @returns True + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; + + /** + * Writes string representation of the AnnSub to + * an output stream. + * @returns Stream written to. + * @note May be incomplete and/or containt internal info. + */ + std::ostream& write_to(std::ostream& ostream) const; + +private: + const boost::shared_ptr< const Function<Match> > check_from_; + const boost::shared_ptr< const Function<Match> > check_to_; + const std::string chan_name_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ANNSUB_H diff --git a/libwccl/ops/match/actions/markmatch.cpp b/libwccl/ops/match/actions/markmatch.cpp index 27933f151ae8afd92ce0985565df49a3043ac4ca..7970e710e49b84dad031566963f410992f40fde9 100644 --- a/libwccl/ops/match/actions/markmatch.cpp +++ b/libwccl/ops/match/actions/markmatch.cpp @@ -17,9 +17,9 @@ void MarkMatch::execute(const ActionExecContext& context) const boost::shared_ptr<const Match> match_from = match_from_->apply(context); boost::shared_ptr<const Match> match_to = - (match_from_.get() == match_to_.get()) ? match_from : match_to_->apply(context); + (match_from_ == match_to_) ? match_from : match_to_->apply(context); boost::shared_ptr<const Match> head_match = - (match_from_.get() == head_match_.get()) ? match_from : head_match_->apply(context); + (match_from_ == head_match_) ? match_from : head_match_->apply(context); int abs_left = match_from->first_token(as).get_value(); if (abs_left < 0) { diff --git a/libwccl/ops/match/applyoperator.cpp b/libwccl/ops/match/applyoperator.cpp index a24ce2646a08c644d5c8ae614b8bc6bce27ee3b1..9a5a9878b185a8ea5a4e254539354779d8fc70c2 100644 --- a/libwccl/ops/match/applyoperator.cpp +++ b/libwccl/ops/match/applyoperator.cpp @@ -20,8 +20,10 @@ ApplyOperator::ApplyOperator( void ApplyOperator::execute(const ActionExecContext &context) const { - boost::shared_ptr<MatchVector> matches = - boost::dynamic_pointer_cast<MatchVector>(context.variables()->get_fast(_matches)); + MatchVector* matches = + dynamic_cast<MatchVector*>( + &context.variables()->get_fast(_matches)->get_value()); + BOOST_ASSERT(matches); context.sentence_context().goto_start(); while(context.sentence_context().is_current_inside()) { int orig_pos = context.sentence_context().get_position(); diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 959054842860e10fa49c12b15a1b981158ce2e23..95d0c3b88ef8f6840461f34e5a3178e50fe9f733 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -39,6 +39,7 @@ header { #include <libwccl/ops/functions/bool/predicates/weakagreement.h> #include <libwccl/ops/functions/bool/predicates/pointagreement.h> #include <libwccl/ops/functions/bool/predicates/strongagreement.h> + #include <libwccl/ops/functions/bool/predicates/annsub.h> #include <libwccl/ops/functions/strset/affix.h> #include <libwccl/ops/functions/strset/getorth.h> @@ -690,7 +691,7 @@ symset_operator ; // ---------------------------------------------------------------------------- -// It's wrapper for symset variable and symset value. +// A wrapper for symset variable and symset value. symset_var_val [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<TSet> > op] @@ -1060,6 +1061,8 @@ bool_operator | ret = bool_agreement [tagset, vars] // | ret = bool_phrase [tagset, vars] + // + | ret = bool_annsub [tagset, vars] // debug operators | ret = debug_print_operator [tagset, vars] // @@ -1302,6 +1305,30 @@ inter_operator RPAREN ; +// ---------------------------------------------------------------------------- +// Annotation-sub operator. +bool_annsub + [const Corpus2::Tagset& tagset, Variables& vars] + returns [boost::shared_ptr<Function<Bool> > op] +{ + boost::shared_ptr< Function<Match> > match_from; + boost::shared_ptr< Function<Match> > match_to; + std::string chan_name; +} + : "annsub" LPAREN + match_from = match_fit [tagset, vars] COMMA + (match_to = match_fit [tagset, vars] COMMA)? + name : STRING + RPAREN + { + if (match_to) { + op.reset(new AnnSub(match_from, match_to, chan_name)); + } else { + op.reset(new AnnSub(match_from, chan_name)); + } + } +; + // ---------------------------------------------------------------------------- // Debug printing: debug_print_operator