diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index ecf74dfecea0e4b2e8b7e4c79f1c6168f231efe3..24e9c394bdeda1bbebad5673e35a5e5b4e07ac39 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -35,6 +35,7 @@ SET(libwccl_STAT_SRC ops/functions/bool/iterations/rightlook.cpp ops/functions/bool/predicate.cpp ops/functions/bool/predicates/and.cpp + ops/functions/bool/predicates/ann.cpp ops/functions/bool/predicates/annsub.cpp ops/functions/bool/predicates/debug.cpp ops/functions/bool/predicates/isinside.cpp diff --git a/libwccl/ops/functions/bool/predicates/ann.cpp b/libwccl/ops/functions/bool/predicates/ann.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c2faa04cbbc6d4f45de1c480f757a51036614066 --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/ann.cpp @@ -0,0 +1,74 @@ +#include <libwccl/ops/functions/bool/predicates/ann.h> +#include <libwccl/values/match.h> +#include <libcorpus2/ann/annotatedsentence.h> + +namespace Wccl { + +Ann::BaseRetValPtr Ann::apply_internal(const FunExecContext& context) const +{ + boost::shared_ptr<Corpus2::AnnotatedSentence> as + = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>( + context.sentence_context().get_sentence_ptr()); + if (!as) { + throw InvalidArgument("context", "Operator needs an annotated sentence."); + } + + boost::shared_ptr<const Match> check_from = check_from_->apply(context); + boost::shared_ptr<const Match> check_to = + (check_from_ == check_to_) ? check_from : check_to_->apply(context); + int abs_left = check_from->first_token(as).get_value(); + if (abs_left < 0) { + throw WcclError("Received starting match that points outside sentence."); + } + int abs_right = check_to->last_token(as).get_value(); + if (abs_right >= context.sentence_context().size()) { + throw WcclError("Received ending match that points outside sentence."); + } + if (abs_left > abs_right) { + throw WcclError("Received starting match points after the received ending match."); + } + if (!as->has_channel(chan_name_)) { + as->create_channel(chan_name_); + } + Corpus2::AnnotationChannel& channel = as->get_channel(chan_name_); + + int segment_idx = channel.get_segment_at(abs_left); + if (segment_idx == 0) { + return Predicate::False(context); //not in a segment? + } else if (channel.get_segment_at(abs_left - 1) == segment_idx) { + return Predicate::False(context); //segment extends further left + } else if (channel.get_segment_at(abs_right + 1) == segment_idx) { + return Predicate::False(context); //segment extends further right + } else { + for (int i = abs_left + 1; i <= abs_right; ++i) { + if (segment_idx != channel.get_segment_at(i)) { + return Predicate::False(context); //not a continous segment + // between abs_left and abs_right + } + } + } + return Predicate::True(context); +} + +std::string Ann::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream ostream; + ostream << raw_name() << "(" << check_from_->to_string(tagset); + if (check_from_ != check_to_) { + ostream << ", " << check_to_->to_string(tagset); + } + ostream << ", \"" << chan_name_ << "\")"; + return ostream.str(); +} + +std::ostream& Ann::write_to(std::ostream& ostream) const +{ + ostream << raw_name() << "(" << *check_from_; + if (check_from_ != check_to_) { + ostream << ", " << *check_to_; + } + ostream << ", \"" << chan_name_ << "\")"; + return ostream; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/bool/predicates/ann.h b/libwccl/ops/functions/bool/predicates/ann.h new file mode 100644 index 0000000000000000000000000000000000000000..6b7d878b0fc89fd7a78dc83a4b89778c483c7696 --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/ann.h @@ -0,0 +1,76 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ANN_H +#define LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ANN_H + +#include <libwccl/ops/functions/bool/predicate.h> + +namespace Wccl { + +/** + * An annotation-checking match condition: checks whether the computed + * range corresponds to an annotation within a given channel. In case + * of a disjoint annotation, it is sufficient (and olny possible) to correspond + * to one of the disjoint fragments. + */ +class Ann : public Predicate +{ +public: + Ann( + const boost::shared_ptr<Function<Match> >& check_from, + const boost::shared_ptr<Function<Match> >& check_to, + const std::string& annotation_name) + : check_from_(check_from), + check_to_(check_to), + chan_name_(annotation_name) + { + BOOST_ASSERT(check_from); + BOOST_ASSERT(check_to); + } + + Ann( + const boost::shared_ptr<Function<Match> >& check_from_to, + const std::string& annotation_name) + : check_from_(check_from_to), + check_to_(check_from_to), + chan_name_(annotation_name) + { + BOOST_ASSERT(check_from_); + BOOST_ASSERT(check_to_); + } + + + /** + * @returns Name of the function + */ + std::string raw_name() const { + return "ann"; + } + + /** + * @returns String representation of the predicate + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + /** + * Outputs the string value of the returned value + * @returns True + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; + + /** + * Writes string representation of the AnnSub to + * an output stream. + * @returns Stream written to. + * @note May be incomplete and/or containt internal info. + */ + std::ostream& write_to(std::ostream& ostream) const; + +private: + const boost::shared_ptr< const Function<Match> > check_from_; + const boost::shared_ptr< const Function<Match> > check_to_; + const std::string chan_name_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ANN_H diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index cd33823cf73faef150e026a42d2261ef5d5936b5..6d8712b0500fd7974f41b61a3a0273f4e5a97fdf 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -41,6 +41,7 @@ header { #include <libwccl/ops/functions/bool/predicates/pointagreement.h> #include <libwccl/ops/functions/bool/predicates/strongagreement.h> #include <libwccl/ops/functions/bool/predicates/annsub.h> + #include <libwccl/ops/functions/bool/predicates/ann.h> #include <libwccl/ops/functions/strset/affix.h> #include <libwccl/ops/functions/strset/getorth.h> @@ -1334,11 +1335,9 @@ bool_ann name : STRING RPAREN { if (match_to) { - // TODO - // op.reset(new Ann(match_from, match_to, chan_name)); + op.reset(new Ann(match_from, match_to, chan_name)); } else { - // TODO - // op.reset(new Ann(match_from, chan_name)); + op.reset(new Ann(match_from, chan_name)); } } ;