From dbb28904730f7c63a8971cd6aeff9f95081413de Mon Sep 17 00:00:00 2001 From: Adam Wardynski <award@.(win7-laptop)> Date: Thu, 21 Apr 2011 17:35:33 +0200 Subject: [PATCH] is() match condition (matching an annotation at beginning of a segment). --- libwccl/CMakeLists.txt | 1 + .../ops/match/conditions/isannotatedas.cpp | 58 +++++++++++++++++++ libwccl/ops/match/conditions/isannotatedas.h | 51 ++++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 libwccl/ops/match/conditions/isannotatedas.cpp create mode 100644 libwccl/ops/match/conditions/isannotatedas.h diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 05affc1..59dc656 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -62,6 +62,7 @@ SET(libwccl_STAT_SRC ops/match/actions/unmarkmatch.cpp ops/match/applyoperator.cpp ops/match/conditions/conjconditions.cpp + ops/match/conditions/isannotatedas.cpp ops/match/conditions/longest.cpp ops/match/conditions/oneof.cpp ops/match/conditions/optionalmatch.cpp diff --git a/libwccl/ops/match/conditions/isannotatedas.cpp b/libwccl/ops/match/conditions/isannotatedas.cpp new file mode 100644 index 0000000..0b6defb --- /dev/null +++ b/libwccl/ops/match/conditions/isannotatedas.cpp @@ -0,0 +1,58 @@ +#include <libwccl/ops/match/conditions/isannotatedas.h> +#include <sstream> + +namespace Wccl { + +MatchResult IsAnnotatedAs::apply(const ActionExecContext& context) const +{ + SentenceContext& sc = context.sentence_context(); + boost::shared_ptr<Corpus2::AnnotatedSentence> as; + as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(sc.get_sentence_ptr()); + if (!as) { + throw InvalidArgument("context", "Operator needs an annotated sentence."); + } + if (!as->has_channel(chan_name_)) { + return MatchResult(); + } + + int orig_iter = sc.get_position(); + const Corpus2::AnnotationChannel& channel = as->get_channel(chan_name_); + int segment_idx = channel.get_segment_at(orig_iter); + + // are we even within a segment annotaded with given annotation? + if (segment_idx == 0) { + return MatchResult(); + } + // ok, so are we at the beginning of the segment? + if (channel.get_segment_at(orig_iter - 1) != segment_idx) { + return MatchResult(); + } + // we are at the beginning of a segment with given annotation, so match it (continuous fragment at least). + boost::shared_ptr<AnnotationMatch> ann_match( + new AnnotationMatch(orig_iter, chan_name_)); + int segment_length = 1; + for ( + int i = orig_iter + 1; + (i < sc.size()) && (channel.get_segment_at(i) == segment_idx); + ++i + ) { + ++segment_length; + } + // increase current sentence position to point after the matched segment + sc.set_position(orig_iter + segment_length); + return MatchResult(ann_match); +} + +std::string IsAnnotatedAs::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream os; + os << name() << "(" << chan_name_ << ")"; + return os.str(); +} + +std::ostream& IsAnnotatedAs::write_to(std::ostream& os) const +{ + return os << name() << "(" << chan_name_ << ")"; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/match/conditions/isannotatedas.h b/libwccl/ops/match/conditions/isannotatedas.h new file mode 100644 index 0000000..f2dcfbb --- /dev/null +++ b/libwccl/ops/match/conditions/isannotatedas.h @@ -0,0 +1,51 @@ +#ifndef LIBWCCL_OPS_MATCH_CONDITIONS_ISANNOTATEDAS_H +#define LIBWCCL_OPS_MATCH_CONDITIONS_ISANNOTATEDAS_H + +#include <libwccl/ops/match/matchcondition.h> +#include <libwccl/ops/function.h> + +namespace Wccl { + +/** + * is() match condition - matches an annotation + */ +class IsAnnotatedAs : public MatchCondition +{ +public: + IsAnnotatedAs(const std::string& annotation_name) + : chan_name_(annotation_name) { + BOOST_ASSERT(!chan_name_.empty()); + } + /** + * @returns Name of the Condition. + */ + std::string name() const { + return "is"; + } + /** + * Applies the condition to the given execution context. + * If a match is found, the current sentence Position is increased + * by the lenght of matched annotation segment. + */ + MatchResult apply(const ActionExecContext& context) const; + + /** + * @returns String representation of the Condition + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + /** + * Writes the string representation of the Condition to + * an output stream. + * @returns Stream written to. + * @note May be incomplete and/or containt internal info. + */ + std::ostream& write_to(std::ostream& ostream) const; +private: + const std::string chan_name_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_MATCH_CONDITIONS_ISANNOTATEDAS_H -- GitLab