diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 5fd29ec6ab2cc2a299698609a846698601a893df..92bfe506d4a7dd9ebd1467a471c1c31432cbbb64 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -63,6 +63,7 @@ SET(libwccl_STAT_SRC ops/match/actions/unmarkmatch.cpp ops/match/applyoperator.cpp ops/match/conditions/conjconditions.cpp + ops/match/conditions/isannotatedas.cpp ops/match/conditions/longest.cpp ops/match/conditions/oneof.cpp ops/match/conditions/optionalmatch.cpp diff --git a/libwccl/ops/match/conditions/isannotatedas.cpp b/libwccl/ops/match/conditions/isannotatedas.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0b6defb03b410872818d899f7faab0d496919533 --- /dev/null +++ b/libwccl/ops/match/conditions/isannotatedas.cpp @@ -0,0 +1,58 @@ +#include <libwccl/ops/match/conditions/isannotatedas.h> +#include <sstream> + +namespace Wccl { + +MatchResult IsAnnotatedAs::apply(const ActionExecContext& context) const +{ + SentenceContext& sc = context.sentence_context(); + boost::shared_ptr<Corpus2::AnnotatedSentence> as; + as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(sc.get_sentence_ptr()); + if (!as) { + throw InvalidArgument("context", "Operator needs an annotated sentence."); + } + if (!as->has_channel(chan_name_)) { + return MatchResult(); + } + + int orig_iter = sc.get_position(); + const Corpus2::AnnotationChannel& channel = as->get_channel(chan_name_); + int segment_idx = channel.get_segment_at(orig_iter); + + // are we even within a segment annotaded with given annotation? + if (segment_idx == 0) { + return MatchResult(); + } + // ok, so are we at the beginning of the segment? + if (channel.get_segment_at(orig_iter - 1) != segment_idx) { + return MatchResult(); + } + // we are at the beginning of a segment with given annotation, so match it (continuous fragment at least). + boost::shared_ptr<AnnotationMatch> ann_match( + new AnnotationMatch(orig_iter, chan_name_)); + int segment_length = 1; + for ( + int i = orig_iter + 1; + (i < sc.size()) && (channel.get_segment_at(i) == segment_idx); + ++i + ) { + ++segment_length; + } + // increase current sentence position to point after the matched segment + sc.set_position(orig_iter + segment_length); + return MatchResult(ann_match); +} + +std::string IsAnnotatedAs::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream os; + os << name() << "(" << chan_name_ << ")"; + return os.str(); +} + +std::ostream& IsAnnotatedAs::write_to(std::ostream& os) const +{ + return os << name() << "(" << chan_name_ << ")"; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/match/conditions/isannotatedas.h b/libwccl/ops/match/conditions/isannotatedas.h new file mode 100644 index 0000000000000000000000000000000000000000..f2dcfbbb0cf57c4365836f7ed6446512fe1a45a7 --- /dev/null +++ b/libwccl/ops/match/conditions/isannotatedas.h @@ -0,0 +1,51 @@ +#ifndef LIBWCCL_OPS_MATCH_CONDITIONS_ISANNOTATEDAS_H +#define LIBWCCL_OPS_MATCH_CONDITIONS_ISANNOTATEDAS_H + +#include <libwccl/ops/match/matchcondition.h> +#include <libwccl/ops/function.h> + +namespace Wccl { + +/** + * is() match condition - matches an annotation + */ +class IsAnnotatedAs : public MatchCondition +{ +public: + IsAnnotatedAs(const std::string& annotation_name) + : chan_name_(annotation_name) { + BOOST_ASSERT(!chan_name_.empty()); + } + /** + * @returns Name of the Condition. + */ + std::string name() const { + return "is"; + } + /** + * Applies the condition to the given execution context. + * If a match is found, the current sentence Position is increased + * by the lenght of matched annotation segment. + */ + MatchResult apply(const ActionExecContext& context) const; + + /** + * @returns String representation of the Condition + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + /** + * Writes the string representation of the Condition to + * an output stream. + * @returns Stream written to. + * @note May be incomplete and/or containt internal info. + */ + std::ostream& write_to(std::ostream& ostream) const; +private: + const std::string chan_name_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_MATCH_CONDITIONS_ISANNOTATEDAS_H diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 0473540241f6b7ac236a752ded0ee4e2347398f2..95d0c3b88ef8f6840461f34e5a3178e50fe9f733 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -54,6 +54,8 @@ header { #include <libwccl/ops/functions/tset/getsymbolsinrange.h> #include <libwccl/ops/functions/position/relativeposition.h> + #include <libwccl/ops/functions/position/lasttoken.h> + #include <libwccl/ops/functions/position/firsttoken.h> #include <libwccl/ops/functions/bool/iterations/only.h> #include <libwccl/ops/functions/bool/iterations/atleast.h> @@ -831,8 +833,10 @@ position_operator [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<Position> > ret] : - ( ret = position_var_val [vars] - | ret = position_condition [tagset, vars] + ( ret = position_var_val [vars] + | ret = position_condition [tagset, vars] + | ret = position_first_token [tagset, vars] + | ret = position_last_token [tagset, vars] | LPAREN ret = position_operator [tagset, vars] RPAREN ) ( // if there is SIGNED_INT after the position, it is actually a relative position @@ -881,6 +885,32 @@ position_condition } ; +// ---------------------------------------------------------------------------- +// Taking position of a first token in a match +// first(Match) +position_first_token [const Corpus2::Tagset& tagset, Variables& vars] + returns [boost::shared_ptr<Function<Position> > ret] +{ + boost::shared_ptr<Function<Match> > m; +} + : "first" LPAREN m = match_fit [tagset, vars] RPAREN { + ret.reset(new FirstToken(m)); + } +; + +// ---------------------------------------------------------------------------- +// Taking position of a first token in a match +// last(Match) +position_last_token [const Corpus2::Tagset& tagset, Variables& vars] + returns [boost::shared_ptr<Function<Position> > ret] +{ + boost::shared_ptr<Function<Match> > m; +} + : "last" LPAREN m = match_fit [tagset, vars] RPAREN { + ret.reset(new LastToken(m)); + } +; + /////////////////////////////////////////////////////////////////////////////// // Stiring operator // Returns boost::shared_ptr<Function<StrSet> >