diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 054fb849cb1dbef58703d08c2f0d62c64e6bf298..dae29550b6d000fb14ff1f3c788bba7c473136c2 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -57,13 +57,14 @@ SET(libwccl_STAT_SRC ops/functions/tset/catfilter.cpp ops/functions/tset/getsymbols.cpp ops/functions/tset/getsymbolsinrange.cpp + ops/match/actions/markmatch.cpp + ops/match/applyoperator.cpp ops/match/conditions/conjconditions.cpp ops/match/conditions/longest.cpp ops/match/conditions/oneof.cpp ops/match/conditions/optionalmatch.cpp ops/match/conditions/repeatedmatch.cpp ops/match/conditions/tokencondition.cpp - ops/match/applyoperator.cpp ops/match/matchoperator.cpp ops/rulesequence.cpp ops/tagaction.cpp diff --git a/libwccl/ops/match/actions/markmatch.cpp b/libwccl/ops/match/actions/markmatch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9c987a2f9144198e4b88ca959ebb26d9a6226183 --- /dev/null +++ b/libwccl/ops/match/actions/markmatch.cpp @@ -0,0 +1,78 @@ +#include <libwccl/values/match.h> +#include <libwccl/ops/match/actions/markmatch.h> + +#include <sstream> + + +namespace Wccl { + +void MarkMatch::execute(const ActionExecContext& context) const +{ + SentenceContext& sc = context.sentence_context(); + boost::shared_ptr<Corpus2::AnnotatedSentence> as; + as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(sc.get_sentence_ptr()); + if (!as) { + throw InvalidArgument("context", "Operator needs an annotated sentence."); + } + + boost::shared_ptr<const Match> match_from = match_from_->apply(context); + boost::shared_ptr<const Match> match_to = + (match_from_.get() == match_to_.get()) ? match_from : match_to_->apply(context); + + int abs_left = match_from->first_token(as).get_value(); + int abs_right = match_to->last_token(as).get_value(); + if (abs_left < 0) { + throw WcclError("Received starting match that points outside sentence."); + } + if (abs_right >= sc.size()) { + throw WcclError("Received ending match that points outside sentence."); + } + if (abs_left > abs_right) { + throw WcclError("Received starting match points after the received ending match."); + } + // TODO: what about head in this mark from match actions? Mark from tag actions does have it. + int abs_head = abs_left; + + if (!as->has_channel(chan_name_)) { + as->create_channel(chan_name_); + } + Corpus2::AnnotationChannel& channel = as->get_channel(chan_name_); + + int segment_idx = channel.get_new_segment_index(); + + for (int i = abs_left; i <= abs_right; ++i) { + if (channel.get_segment_at(i) > 0) { + throw WcclError("Mark action would overwrite existing annotation"); + } + } + for (int i = abs_left; i <= abs_right; ++i) { + channel.set_segment_at(i, segment_idx); + channel.set_head_at(i, false); + } + channel.set_head_at(abs_head, true); +} + +std::string MarkMatch::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream os; + os << name() << "(" + << match_from_->to_string(tagset) << ", "; + if (match_from_.get() != match_to_.get()) { + os << match_to_->to_string(tagset) << ", "; + } + os << "\"" << chan_name_ << "\")"; + return os.str(); +} + +std::ostream& MarkMatch::write_to(std::ostream& os) const +{ + os << name() << "(" + << *match_from_ << ", "; + if (match_from_.get() != match_to_.get()) { + os << *match_to_ << ", "; + } + os << "\"" << chan_name_ << "\")"; + return os; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/match/actions/markmatch.h b/libwccl/ops/match/actions/markmatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9f5b6e51377854d08181154898cfc7d3c0a3971e --- /dev/null +++ b/libwccl/ops/match/actions/markmatch.h @@ -0,0 +1,67 @@ +#ifndef LIBWCCL_OPS_MATCH_ACTIONS_MARKMATCH_H +#define LIBWCCL_OPS_MATCH_ACTIONS_MARKMATCH_H + +#include <libwccl/ops/match/matchaction.h> +#include <libwccl/ops/function.h> + +namespace Wccl { + +class MarkMatch : public MatchAction +{ +public: + MarkMatch( + const boost::shared_ptr<Function<Match> >& match_from, + const boost::shared_ptr<Function<Match> >& match_to, + const std::string& annotation_name) + : match_from_(match_from), + match_to_(match_to), + chan_name_(annotation_name) + { + BOOST_ASSERT(match_from_); + BOOST_ASSERT(match_to_); + } + + MarkMatch( + const boost::shared_ptr<Function<Match> >& match_from_to, + const std::string& annotation_name) + : match_from_(match_from_to), + match_to_(match_from_to), + chan_name_(annotation_name) + { + BOOST_ASSERT(match_from_); + BOOST_ASSERT(match_to_); + } + /** + * @returns Name of the action. + */ + std::string name() const { + return "mark"; + } + + /** + * Executes the action for the given execution context. + */ + void execute(const ActionExecContext& context) const; + + /** + * @returns String representation of the expression. + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + /** + * Writes string representation of the MatchAction to + * an output stream. + * @returns Stream written to. + * @note May be incomplete and/or containt internal info. + */ + virtual std::ostream& write_to(std::ostream& ostream) const; +private: + const boost::shared_ptr<Function<Match> >& match_from_; + const boost::shared_ptr<Function<Match> >& match_to_; + const std::string chan_name_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_MATCH_ACTIONS_MARKMATCH_H diff --git a/libwccl/ops/match/matchaction.h b/libwccl/ops/match/matchaction.h index 507499cbe5caeced65dfe067c2ec5ba8820e5cb0..115e5b17d43dab54a5159b49533f7284519062ee 100644 --- a/libwccl/ops/match/matchaction.h +++ b/libwccl/ops/match/matchaction.h @@ -1,6 +1,9 @@ #ifndef LIBWCCL_OPS_MATCH_MATCHACTION_H #define LIBWCCL_OPS_MATCH_MATCHACTION_H +#include <libwccl/ops/expression.h> +#include <libwccl/ops/actionexeccontext.h> + namespace Wccl { /**