diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index dae29550b6d000fb14ff1f3c788bba7c473136c2..5466587c9b661694529a0eff7f3faf5aef994edb 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -58,6 +58,7 @@ SET(libwccl_STAT_SRC ops/functions/tset/getsymbols.cpp ops/functions/tset/getsymbolsinrange.cpp ops/match/actions/markmatch.cpp + ops/match/actions/unmarkmatch.cpp ops/match/applyoperator.cpp ops/match/conditions/conjconditions.cpp ops/match/conditions/longest.cpp diff --git a/libwccl/ops/match/actions/unmarkmatch.cpp b/libwccl/ops/match/actions/unmarkmatch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f1ac2a90b1c40002f1652911690a2fd0ff551062 --- /dev/null +++ b/libwccl/ops/match/actions/unmarkmatch.cpp @@ -0,0 +1,52 @@ +#include <libwccl/values/match.h> +#include <libwccl/ops/match/actions/unmarkmatch.h> +#include <libcorpus2/ann/annotatedsentence.h> + +#include <sstream> + +namespace Wccl { + +void UnmarkMatch::execute(const ActionExecContext& context) const +{ + SentenceContext& sc = context.sentence_context(); + boost::shared_ptr<Corpus2::AnnotatedSentence> as; + as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(sc.get_sentence_ptr()); + if (!as) { + throw InvalidArgument("context", "Operator needs an annotated sentence."); + } + if (!as->has_channel(chan_name_)) { + throw InvalidArgument("context", "Sentence does not have annotation channel \"" + chan_name_ + "\"."); + } + + int abs_pos = match_->apply(context)->first_token(as).get_value(); + if(sc.is_outside(abs_pos)) { + throw WcclError("Received starting match that points outside sentence."); + } + + Corpus2::AnnotationChannel& channel = as->get_channel(chan_name_); + + int segment_idx = channel.get_segment_at(abs_pos); + if (segment_idx == 0) { + throw WcclError("No annotation \"" + chan_name_ + "\" to delete at the specified position."); + } + + for (int i = 0; i < channel.size(); ++i) { + if (channel.segments()[i] == segment_idx) { + channel.set_segment_at(i, 0); + } + } +} + +std::string UnmarkMatch::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream os; + os << name() << "(" << match_->to_string(tagset) << ", \"" << chan_name_ << "\")"; + return os.str(); +} + +std::ostream& UnmarkMatch::write_to(std::ostream& os) const +{ + return os << name() << "(" << *match_ << ", \"" << chan_name_ << "\")"; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/match/actions/unmarkmatch.h b/libwccl/ops/match/actions/unmarkmatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b0cfd31e7bace0dbc916f6ab0116ed15ea2f5cfd --- /dev/null +++ b/libwccl/ops/match/actions/unmarkmatch.h @@ -0,0 +1,58 @@ +#ifndef LIBWCCL_OPS_MATCH_ACTIONS_UNMARKMATCH_H +#define LIBWCCL_OPS_MATCH_ACTIONS_UNMARKMATCH_H + +#include <libwccl/ops/match/matchaction.h> +#include <libwccl/ops/function.h> + +namespace Wccl { + +/** + * Action to unmark (delete) an annotation passing through a first + * token of a Match. + */ +class UnmarkMatch : public MatchAction +{ +public: + UnmarkMatch( + const boost::shared_ptr<Function<Match> >& match, + const std::string& annotation_name) + : match_(match_), + chan_name_(annotation_name) + { + BOOST_ASSERT(match_); + BOOST_ASSERT(!chan_name_.empty()); + } + + /** + * @returns Name of the action. + */ + std::string name() const { + return "unmark"; + } + + /** + * Executes the action for the given execution context. + */ + void execute(const ActionExecContext& context) const; + + /** + * @returns String representation of the expression. + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + /** + * Writes string representation of the MatchAction to + * an output stream. + * @returns Stream written to. + * @note May be incomplete and/or containt internal info. + */ + virtual std::ostream& write_to(std::ostream& ostream) const; +private: + const boost::shared_ptr<Function<Match> >& match_; + const std::string chan_name_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_MATCH_ACTIONS_UNMARKMATCH_H diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 9bea7459323dd907b6dd87c11b3c0ec026a1e118..3e553affe1fa62daaa202e3996b7c17469c53724 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -79,6 +79,7 @@ header { #include <libwccl/ops/match/conditions/repeatedmatch.h> #include <libwccl/ops/match/conditions/conjconditions.h> #include <libwccl/ops/match/actions/markmatch.h> + #include <libwccl/ops/match/actions/unmarkmatch.h> // Unicode String #include <unicode/uniset.h> @@ -1981,11 +1982,22 @@ match_mark_action ; // Match unmark action -// Returns ??? +// Returns boost::shared_ptr<UnmarkMatch> match_unmark_action [const Corpus2::Tagset& tagset, Variables& vars] - returns [boost::shared_ptr<MatchAction> m_act] - : "unmark" LPAREN /* TODO */ RPAREN + returns [boost::shared_ptr<UnmarkMatch> m_act] +{ + boost::shared_ptr<Function<Match> > match_at; +} + : "unmark" LPAREN + match_at = match_fit[tagset, vars] COMMA + annotation_name : STRING + RPAREN { + m_act.reset( + new UnmarkMatch( + match_at, + ((antlr::Token*)annotation_name)->getText())); + } ; // Match action separated by comma