diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 054fb849cb1dbef58703d08c2f0d62c64e6bf298..dae29550b6d000fb14ff1f3c788bba7c473136c2 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -57,13 +57,14 @@ SET(libwccl_STAT_SRC ops/functions/tset/catfilter.cpp ops/functions/tset/getsymbols.cpp ops/functions/tset/getsymbolsinrange.cpp + ops/match/actions/markmatch.cpp + ops/match/applyoperator.cpp ops/match/conditions/conjconditions.cpp ops/match/conditions/longest.cpp ops/match/conditions/oneof.cpp ops/match/conditions/optionalmatch.cpp ops/match/conditions/repeatedmatch.cpp ops/match/conditions/tokencondition.cpp - ops/match/applyoperator.cpp ops/match/matchoperator.cpp ops/rulesequence.cpp ops/tagaction.cpp diff --git a/libwccl/ops/match/actions/markmatch.cpp b/libwccl/ops/match/actions/markmatch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9c987a2f9144198e4b88ca959ebb26d9a6226183 --- /dev/null +++ b/libwccl/ops/match/actions/markmatch.cpp @@ -0,0 +1,78 @@ +#include <libwccl/values/match.h> +#include <libwccl/ops/match/actions/markmatch.h> + +#include <sstream> + + +namespace Wccl { + +void MarkMatch::execute(const ActionExecContext& context) const +{ + SentenceContext& sc = context.sentence_context(); + boost::shared_ptr<Corpus2::AnnotatedSentence> as; + as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(sc.get_sentence_ptr()); + if (!as) { + throw InvalidArgument("context", "Operator needs an annotated sentence."); + } + + boost::shared_ptr<const Match> match_from = match_from_->apply(context); + boost::shared_ptr<const Match> match_to = + (match_from_.get() == match_to_.get()) ? match_from : match_to_->apply(context); + + int abs_left = match_from->first_token(as).get_value(); + int abs_right = match_to->last_token(as).get_value(); + if (abs_left < 0) { + throw WcclError("Received starting match that points outside sentence."); + } + if (abs_right >= sc.size()) { + throw WcclError("Received ending match that points outside sentence."); + } + if (abs_left > abs_right) { + throw WcclError("Received starting match points after the received ending match."); + } + // TODO: what about head in this mark from match actions? Mark from tag actions does have it. + int abs_head = abs_left; + + if (!as->has_channel(chan_name_)) { + as->create_channel(chan_name_); + } + Corpus2::AnnotationChannel& channel = as->get_channel(chan_name_); + + int segment_idx = channel.get_new_segment_index(); + + for (int i = abs_left; i <= abs_right; ++i) { + if (channel.get_segment_at(i) > 0) { + throw WcclError("Mark action would overwrite existing annotation"); + } + } + for (int i = abs_left; i <= abs_right; ++i) { + channel.set_segment_at(i, segment_idx); + channel.set_head_at(i, false); + } + channel.set_head_at(abs_head, true); +} + +std::string MarkMatch::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream os; + os << name() << "(" + << match_from_->to_string(tagset) << ", "; + if (match_from_.get() != match_to_.get()) { + os << match_to_->to_string(tagset) << ", "; + } + os << "\"" << chan_name_ << "\")"; + return os.str(); +} + +std::ostream& MarkMatch::write_to(std::ostream& os) const +{ + os << name() << "(" + << *match_from_ << ", "; + if (match_from_.get() != match_to_.get()) { + os << *match_to_ << ", "; + } + os << "\"" << chan_name_ << "\")"; + return os; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/match/actions/markmatch.h b/libwccl/ops/match/actions/markmatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9f5b6e51377854d08181154898cfc7d3c0a3971e --- /dev/null +++ b/libwccl/ops/match/actions/markmatch.h @@ -0,0 +1,67 @@ +#ifndef LIBWCCL_OPS_MATCH_ACTIONS_MARKMATCH_H +#define LIBWCCL_OPS_MATCH_ACTIONS_MARKMATCH_H + +#include <libwccl/ops/match/matchaction.h> +#include <libwccl/ops/function.h> + +namespace Wccl { + +class MarkMatch : public MatchAction +{ +public: + MarkMatch( + const boost::shared_ptr<Function<Match> >& match_from, + const boost::shared_ptr<Function<Match> >& match_to, + const std::string& annotation_name) + : match_from_(match_from), + match_to_(match_to), + chan_name_(annotation_name) + { + BOOST_ASSERT(match_from_); + BOOST_ASSERT(match_to_); + } + + MarkMatch( + const boost::shared_ptr<Function<Match> >& match_from_to, + const std::string& annotation_name) + : match_from_(match_from_to), + match_to_(match_from_to), + chan_name_(annotation_name) + { + BOOST_ASSERT(match_from_); + BOOST_ASSERT(match_to_); + } + /** + * @returns Name of the action. + */ + std::string name() const { + return "mark"; + } + + /** + * Executes the action for the given execution context. + */ + void execute(const ActionExecContext& context) const; + + /** + * @returns String representation of the expression. + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + /** + * Writes string representation of the MatchAction to + * an output stream. + * @returns Stream written to. + * @note May be incomplete and/or containt internal info. + */ + virtual std::ostream& write_to(std::ostream& ostream) const; +private: + const boost::shared_ptr<Function<Match> >& match_from_; + const boost::shared_ptr<Function<Match> >& match_to_; + const std::string chan_name_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_MATCH_ACTIONS_MARKMATCH_H diff --git a/libwccl/ops/match/matchaction.h b/libwccl/ops/match/matchaction.h index 507499cbe5caeced65dfe067c2ec5ba8820e5cb0..115e5b17d43dab54a5159b49533f7284519062ee 100644 --- a/libwccl/ops/match/matchaction.h +++ b/libwccl/ops/match/matchaction.h @@ -1,6 +1,9 @@ #ifndef LIBWCCL_OPS_MATCH_MATCHACTION_H #define LIBWCCL_OPS_MATCH_MATCHACTION_H +#include <libwccl/ops/expression.h> +#include <libwccl/ops/actionexeccontext.h> + namespace Wccl { /** diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 4326f696ce7c1d85aaa348209121b29417ad6c9c..ba4f99c0fdb5fba5a1f0acfe110e574891085b6a 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -78,6 +78,7 @@ header { #include <libwccl/ops/match/conditions/optionalmatch.h> #include <libwccl/ops/match/conditions/repeatedmatch.h> #include <libwccl/ops/match/conditions/conjconditions.h> + #include <libwccl/ops/match/actions/markmatch.h> // Unicode String #include <unicode/uniset.h> @@ -407,41 +408,53 @@ position_value // ---------------------------------------------------------------------------- // Value used into match operator such as TOK[position] and ANN[position, name] // Returns boost::shared_ptr<Match> -match_value +match_literal returns [boost::shared_ptr<Match> val] { boost::shared_ptr<MatchData> m; } - : m = match_data_value { + : m = match_data_literal { val.reset(new Match(m)); } ; +// Constant match value +// Returns boost::shared_ptr<Constant<Match> > +match_value_const + returns [boost::shared_ptr<Constant<Match> > val] +{ + boost::shared_ptr<Match> m; +} + : m = match_literal { + val.reset(new Constant<Match>(*m)); + } +; + // ---------------------------------------------------------------------------- // Value used into match operator such as TOK[position] and ANN[position, name] // Returns boost::shared_ptr<MatchData> -match_data_value +match_data_literal returns [boost::shared_ptr<MatchData> val] - : val = token_match_value - | val = ann_match_value - | val = match_vector_value + : val = token_match_literal + | val = ann_match_literal + | val = match_vector_literal ; -// token match value +// token match literal - TOK[position] // Returns boost::shared_ptr<TokenMatch> -token_match_value +token_match_literal returns [boost::shared_ptr<TokenMatch> val] { boost::shared_ptr<Position> p; } - : "TOK"LBRACKET p = position_literal RBRACKET { + : "TOK" LBRACKET p = position_literal RBRACKET { val.reset(new TokenMatch(*p)); } ; -// annotation match value +// annotation match literal - ANN[position, name] // Returns boost::shared_ptr<AnnotationMatch> -ann_match_value +ann_match_literal returns [boost::shared_ptr<AnnotationMatch> val] { boost::shared_ptr<Position> p; @@ -451,28 +464,28 @@ ann_match_value } ; -// annotation match vector: MATCH() or MATCH(token, ann, MATCH()) +// annotation match vector literal: MATCH() or MATCH(token, ann, MATCH()) // Returns boost::shared_ptr<MatchVector> -match_vector_value +match_vector_literal returns [boost::shared_ptr<MatchVector> val] { val.reset(new MatchVector()); } - : "MATCH" LPAREN (match_vector_value_item[val])? RPAREN + : "MATCH" LPAREN (match_vector_literal_item[val])? RPAREN ; // Body of the MATCH value. It only adds vector items to the MatchVector // Item may be single or multiple -match_vector_value_item [boost::shared_ptr<MatchVector>& mvector] +match_vector_literal_item [boost::shared_ptr<MatchVector>& mvector] { boost::shared_ptr<Match> m_val; } - : m_val = match_value { + : m_val = match_literal { mvector->append(m_val); } ( COMMA - m_val = match_value { + m_val = match_literal { mvector->append(m_val); } )* @@ -1902,16 +1915,16 @@ match_cond_optional } ; -// Match condition - repleace -// Returns boost::shared_ptr<OptionalMatch> +// Match condition - repeat +// Returns boost::shared_ptr<RepeatedMatch> match_cond_repeate [const Corpus2::Tagset& tagset, Variables& vars] - returns [boost::shared_ptr<OptionalMatch> mtch] + returns [boost::shared_ptr<RepeatedMatch> mtch] { boost::shared_ptr<ConjConditions> m_cond; } - : "repeate" LPAREN m_cond = match_condition [tagset, vars] RPAREN { - mtch.reset(new OptionalMatch(m_cond)); + : "repeat" LPAREN m_cond = match_condition [tagset, vars] RPAREN { + mtch.reset(new RepeatedMatch(m_cond)); } ; @@ -1927,11 +1940,32 @@ match_action ; // Match mark action -// Returns ??? +// Returns match_mark_action [const Corpus2::Tagset& tagset, Variables& vars] - returns [boost::shared_ptr<MatchAction> m_act] - : "mark" LPAREN /* TODO */ RPAREN + returns [boost::shared_ptr<MarkMatch> m_act] +{ + boost::shared_ptr<Function<Match> > match_to; + boost::shared_ptr<Function<Match> > match_from; +} + : "mark" LPAREN + match_from = match_fit[tagset, vars] COMMA + (match_to = match_fit[tagset, vars] COMMA) ? + annotation_name : STRING + RPAREN { + if (!match_to) { + m_act.reset( + new MarkMatch( + match_from, + ((antlr::Token*)annotation_name)->getText())); + } else { + m_act.reset( + new MarkMatch( + match_from, + match_to, + ((antlr::Token*)annotation_name)->getText())); + } + } ; // Match unmark action @@ -1964,6 +1998,19 @@ match_action_comma_sep )* ; + +// Function<Match> is wrapper for Constant<Match> and Function<Match> +// Returns boost::shared_ptr<Function<Match> > +match_fit + [const Corpus2::Tagset& tagset, Variables& vars] + returns [boost::shared_ptr<Function<Match> > ret] +{ + // +} + : ret = match_vector_variable [vars] + | ret = match_value_const +; + /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // ANTLR LEXER diff --git a/libwccl/values/match.h b/libwccl/values/match.h index d2ac17fbd1ecbd0dfe17ff7f1bbff26dda816e3d..6685ce3d8f376ead79ecdd2518911f511a765cef 100644 --- a/libwccl/values/match.h +++ b/libwccl/values/match.h @@ -49,6 +49,16 @@ public: { } + Match(const Match& match) + : match_(match.match_->clone()) + { + } + + Match& operator=(const Match& match) { + match_ = match.match_->clone(); + return *this; + } + const MatchData& get_value() const { return *match_; } @@ -61,7 +71,7 @@ public: * Check if the match is empty (matches nothing). Match objects themselves * are by definition empty, child classes are sometimes or always non-empty. */ - virtual bool empty() const { + bool empty() const { return match_->empty(); } @@ -69,7 +79,7 @@ public: * Getter for the first token matched. If the match is empty, must return * Nowhere. */ - virtual Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { + Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { return match_->first_token(s); } @@ -77,7 +87,7 @@ public: * Getter for the last token matched. If the match is empty, must return * Nowhere. */ - virtual Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { + Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { return match_->last_token(s); } diff --git a/libwccl/values/matchdata.h b/libwccl/values/matchdata.h index c8bd50d85881cd127547c66d10b14866019c2b81..1062c42792ba92e2fb82c0e63aa279db0bea2d6b 100644 --- a/libwccl/values/matchdata.h +++ b/libwccl/values/matchdata.h @@ -11,7 +11,7 @@ namespace Wccl { * - VectorMatch, TokenMatch or AnnotationMatch. * (empty VectorMatch should be default option) */ -class MatchData// : boost::noncopyable +class MatchData { public: @@ -38,6 +38,8 @@ public: virtual std::string to_raw_string() const = 0; + virtual ~MatchData() {} + protected: virtual MatchData* clone_internal() const = 0; }; diff --git a/tests/isempty.cpp b/tests/isempty.cpp index 11fcee8fdfb9d2db2442732c16901f406302f07f..a55593a3b92d03b955d7e1ccfdb8e9b1baf21c02 100644 --- a/tests/isempty.cpp +++ b/tests/isempty.cpp @@ -90,7 +90,7 @@ BOOST_FIXTURE_TEST_CASE(empty_matchvector, IsEmptyFix) Match v_match(v); boost::shared_ptr<Function<Match> > match_expr(new Constant<Match>(v_match)); IsEmpty<Match> e(match_expr); - BOOST_CHECK(!e.apply(cx)->get_value()); + BOOST_CHECK(e.apply(cx)->get_value()); } BOOST_FIXTURE_TEST_CASE(full_matchvector, IsEmptyFix) @@ -100,7 +100,7 @@ BOOST_FIXTURE_TEST_CASE(full_matchvector, IsEmptyFix) Match v_match(v); boost::shared_ptr<Function<Match> > match_expr(new Constant<Match>(v_match)); IsEmpty<Match> e(match_expr); - BOOST_CHECK(e.apply(cx)->get_value()); + BOOST_CHECK(!e.apply(cx)->get_value()); } //------------ To string ----------