From a3897913ea0ac318cc9d80c07514942da827e5f4 Mon Sep 17 00:00:00 2001 From: Adam Wardynski <award@.(win7-laptop)> Date: Tue, 19 Apr 2011 09:15:04 +0200 Subject: [PATCH] Redesign Match to act more like other Value types. MatchVector, AnnotationMatch, TokenMatch are now MatchData MatchData is value_type of Match which now has get_value/set_value Match has also copy construction working properly now Default value for Match holds MatchVector so adding $m:_M var in grammar works as intended. --- .../ops/match/conditions/tokencondition.cpp | 2 +- libwccl/ops/match/matchresult.h | 28 +++++++++ libwccl/parser/grammar.g | 15 ++++- libwccl/values/annotationmatch.cpp | 7 --- libwccl/values/annotationmatch.h | 19 +++--- libwccl/values/match.cpp | 5 -- libwccl/values/match.h | 59 +++++++++++++++++-- libwccl/values/matchdata.h | 46 +++++++++++++++ libwccl/values/matchvector.cpp | 28 ++++++--- libwccl/values/matchvector.h | 28 ++++++--- libwccl/values/tokenmatch.cpp | 7 --- libwccl/values/tokenmatch.h | 19 +++--- tests/match.cpp | 2 +- 13 files changed, 209 insertions(+), 56 deletions(-) create mode 100644 libwccl/values/matchdata.h diff --git a/libwccl/ops/match/conditions/tokencondition.cpp b/libwccl/ops/match/conditions/tokencondition.cpp index e7f670f..533a2ab 100644 --- a/libwccl/ops/match/conditions/tokencondition.cpp +++ b/libwccl/ops/match/conditions/tokencondition.cpp @@ -23,7 +23,7 @@ MatchResult TokenCondition::apply(const ActionExecContext& context) const { int orig_iter = context.sentence_context().get_position(); if (_predicate->apply(context)->get_value()) { - boost::shared_ptr<Match> match(new TokenMatch(context.sentence_context().get_position())); + boost::shared_ptr<TokenMatch> match(new TokenMatch(context.sentence_context().get_position())); // increase current sentence position by one after successful token match context.sentence_context().set_position(orig_iter + 1); return MatchResult(match); diff --git a/libwccl/ops/match/matchresult.h b/libwccl/ops/match/matchresult.h index 84cdea5..a8b0745 100644 --- a/libwccl/ops/match/matchresult.h +++ b/libwccl/ops/match/matchresult.h @@ -13,6 +13,10 @@ class MatchResult { public: MatchResult(const boost::shared_ptr<Match>& match); + MatchResult(const boost::shared_ptr<MatchData>& match); + MatchResult(const boost::shared_ptr<MatchVector>& match); + MatchResult(const boost::shared_ptr<TokenMatch>& match); + MatchResult(const boost::shared_ptr<AnnotationMatch>& match); MatchResult(); boost::shared_ptr<Match> get_match() const; bool matched() const; @@ -35,6 +39,30 @@ MatchResult::MatchResult(const boost::shared_ptr<Match>& match) _match(match) { } +inline +MatchResult::MatchResult(const boost::shared_ptr<MatchData>& match) + : _matched(true), + _match(new Match(match)) { +} + +inline +MatchResult::MatchResult(const boost::shared_ptr<TokenMatch>& match) + : _matched(true), + _match(new Match(match)) { +} + +inline +MatchResult::MatchResult(const boost::shared_ptr<MatchVector>& match) + : _matched(true), + _match(new Match(match)) { +} + +inline +MatchResult::MatchResult(const boost::shared_ptr<AnnotationMatch>& match) + : _matched(true), + _match(new Match(match)) { +} + inline bool MatchResult::matched() const { return _matched; diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 4892a65..f6c5c4d 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -108,7 +108,7 @@ options { private: // const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { - return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText()).unescape(); + return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText().c_str()).unescape(); } /* const UnicodeString str_token_ref_to_ustring(antlr::RefToken& rstr) const { @@ -409,6 +409,19 @@ position_value // Returns boost::shared_ptr<Match> match_value returns [boost::shared_ptr<Match> val] +{ + boost::shared_ptr<MatchData> m; +} + : m = match_data_value { + val.reset(new Match(m)); + } +; + +// ---------------------------------------------------------------------------- +// Value used into match operator such as TOK[position] and ANN[position, name] +// Returns boost::shared_ptr<MatchData> +match_data_value + returns [boost::shared_ptr<MatchData> val] : val = token_match_value | val = ann_match_value | val = match_vector_value diff --git a/libwccl/values/annotationmatch.cpp b/libwccl/values/annotationmatch.cpp index adca5db..54fb777 100644 --- a/libwccl/values/annotationmatch.cpp +++ b/libwccl/values/annotationmatch.cpp @@ -2,18 +2,11 @@ namespace Wccl { -const char* AnnotationMatch::type_name = "AnnotationMatch"; - std::string AnnotationMatch::to_raw_string() const { return "ANN[" + position_.to_raw_string() + "," + channel_ + "]"; } -std::string AnnotationMatch::var_repr(const std::string &var_name) -{ - return Match::var_repr(var_name); -} - Position AnnotationMatch::first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { size_t fpos = position_.get_value(); diff --git a/libwccl/values/annotationmatch.h b/libwccl/values/annotationmatch.h index 43c8885..90015fe 100644 --- a/libwccl/values/annotationmatch.h +++ b/libwccl/values/annotationmatch.h @@ -1,14 +1,13 @@ #ifndef LIBWCCL_VALUES_ANNOTATIONMATCH_H #define LIBWCCL_VALUES_ANNOTATIONMATCH_H -#include <libwccl/values/match.h> +#include <libwccl/values/matchdata.h> namespace Wccl { -class AnnotationMatch : public Match +class AnnotationMatch : public MatchData { public: - WCCL_VALUE_PREAMBLE explicit AnnotationMatch(Position position, const std::string channel) : position_(position), channel_(channel) @@ -22,20 +21,26 @@ public: BOOST_ASSERT(position_.get_value() != Position::Nowhere); } - /// Match override. + /// MatchData override. bool empty() const { return false; } - /// Match override. + /// MatchData override. Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; - /// Match override. + /// MatchData override. Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; - /// Value override + /// MatchData override std::string to_raw_string() const; +protected: + /// MatchData override + AnnotationMatch* clone_internal() const { + return new AnnotationMatch(*this); + } + private: Position position_; std::string channel_; diff --git a/libwccl/values/match.cpp b/libwccl/values/match.cpp index a35cf8f..a0f5cbc 100644 --- a/libwccl/values/match.cpp +++ b/libwccl/values/match.cpp @@ -5,11 +5,6 @@ namespace Wccl { const char* Match::type_name = "Match"; -std::string Match::to_raw_string() const -{ - return "MATCH()"; -} - std::string Match::var_repr(const std::string &var_name) { std::stringstream ss; diff --git a/libwccl/values/match.h b/libwccl/values/match.h index 0b05c44..d2ac17f 100644 --- a/libwccl/values/match.h +++ b/libwccl/values/match.h @@ -3,6 +3,9 @@ #include <libwccl/values/position.h> #include <libcorpus2/ann/annotatedsentence.h> +#include <libwccl/values/matchvector.h> +#include <libwccl/values/annotationmatch.h> +#include <libwccl/values/tokenmatch.h> namespace Wccl { @@ -11,36 +14,80 @@ class Match : public Value public: WCCL_VALUE_PREAMBLE + typedef MatchData value_type; + + /** + * The default data held is an empty MatchVector + */ Match() + : match_(new MatchVector()) + { + } + + Match(const boost::shared_ptr<MatchData>& data) + : match_(data) + { + } + + Match(const boost::shared_ptr<TokenMatch>& data) + : match_(data) + { + } + + Match(const boost::shared_ptr<AnnotationMatch>& data) + : match_(data) { } + Match(const boost::shared_ptr<MatchVector>& data) + : match_(data) + { + } + + Match(const MatchData& data) + : match_(data.clone()) + { + } + + const MatchData& get_value() const { + return *match_; + } + + void set_value(const MatchData& m) { + match_ = m.clone(); + } + /** * Check if the match is empty (matches nothing). Match objects themselves * are by definition empty, child classes are sometimes or always non-empty. */ virtual bool empty() const { - return true; + return match_->empty(); } /** * Getter for the first token matched. If the match is empty, must return * Nowhere. */ - virtual Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { - return Position(Position::Nowhere); + virtual Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { + return match_->first_token(s); } /** * Getter for the last token matched. If the match is empty, must return * Nowhere. */ - virtual Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { - return Position(Position::Nowhere); + virtual Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { + return match_->last_token(s); } /// Value override - std::string to_raw_string() const; + std::string to_raw_string() const { + return match_->to_raw_string(); + } + +private: + boost::shared_ptr<MatchData> match_; }; } /* end ns Wccl */ diff --git a/libwccl/values/matchdata.h b/libwccl/values/matchdata.h new file mode 100644 index 0000000..c8bd50d --- /dev/null +++ b/libwccl/values/matchdata.h @@ -0,0 +1,46 @@ +#ifndef LIBWCCL_VALUES_MATCHDATA_H +#define LIBWCCL_VALUES_MATCHDATA_H + +#include <libwccl/values/position.h> +#include <libcorpus2/ann/annotatedsentence.h> + +namespace Wccl { + +/** + * Base abstract class for data held by a Match Value + * - VectorMatch, TokenMatch or AnnotationMatch. + * (empty VectorMatch should be default option) + */ +class MatchData// : boost::noncopyable +{ +public: + + /** + * Check if the match is empty (matches nothing). Match objects themselves + * are by definition empty, child classes are sometimes or always non-empty. + */ + virtual bool empty() const = 0; + /** + * Getter for the first token matched. If the match is empty, must return + * Nowhere. + */ + virtual Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const = 0; + + /** + * Getter for the last token matched. If the match is empty, must return + * Nowhere. + */ + virtual Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const = 0; + + boost::shared_ptr<MatchData> clone() const { + return boost::shared_ptr<MatchData>(clone_internal()); + } + + virtual std::string to_raw_string() const = 0; + +protected: + virtual MatchData* clone_internal() const = 0; +}; + +} +#endif // LIBWCCL_VALUES_MATCHDATA_H diff --git a/libwccl/values/matchvector.cpp b/libwccl/values/matchvector.cpp index faf4459..6ad6096 100644 --- a/libwccl/values/matchvector.cpp +++ b/libwccl/values/matchvector.cpp @@ -1,12 +1,11 @@ #include <libwccl/values/matchvector.h> +#include <libwccl/values/match.h> #include <libpwrutils/foreach.h> #include <sstream> #include <libwccl/exception.h> namespace Wccl { -const char* MatchVector::type_name = "MatchVector"; - std::string MatchVector::to_raw_string() const { std::stringstream ss; @@ -23,11 +22,6 @@ std::string MatchVector::to_raw_string() const return ss.str(); } -std::string MatchVector::var_repr(const std::string &var_name) -{ - return Match::var_repr(var_name); -} - Position MatchVector::first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { if (matches_.empty()) { @@ -75,6 +69,26 @@ void MatchVector::append(const boost::shared_ptr<Match> &m) matches_.push_back(m); } +void MatchVector::append(const boost::shared_ptr<MatchVector> &m) +{ + matches_.push_back(boost::shared_ptr<Match>(new Match(m))); +} + +void MatchVector::append(const boost::shared_ptr<TokenMatch> &m) +{ + matches_.push_back(boost::shared_ptr<Match>(new Match(m))); +} + +void MatchVector::append(const boost::shared_ptr<AnnotationMatch> &m) +{ + matches_.push_back(boost::shared_ptr<Match>(new Match(m))); +} + +void MatchVector::append(const boost::shared_ptr<MatchData> &m) +{ + matches_.push_back(boost::shared_ptr<Match>(new Match(m))); +} + const boost::shared_ptr<Match>& MatchVector::submatch(size_t idx) { if (idx < matches_.size()) { diff --git a/libwccl/values/matchvector.h b/libwccl/values/matchvector.h index d25f799..795ef86 100644 --- a/libwccl/values/matchvector.h +++ b/libwccl/values/matchvector.h @@ -1,36 +1,45 @@ #ifndef LIBWCCL_VALUES_MATCHVECTOR_H #define LIBWCCL_VALUES_MATCHVECTOR_H -#include <libwccl/values/match.h> +#include <libwccl/values/matchdata.h> #include <boost/shared_ptr.hpp> #include <vector> namespace Wccl { -class MatchVector : public Match +class Match; +class MatchData; +class MatchVector; +class TokenMatch; +class AnnotationMatch; + +class MatchVector : public MatchData { public: - WCCL_VALUE_PREAMBLE MatchVector() { } - /// Match override. A MatchVector is empty if it contains no sub-matches, + /// MatchData override. A MatchVector is empty if it contains no sub-matches, /// or if they are all empty. bool empty() const; - /// Match override. + /// MatchData override. Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; - /// Match override. + /// MatchData override. Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; - /// Value override + /// MatchData override std::string to_raw_string() const; /// Append a sub-match void append(const boost::shared_ptr<Match>& m); + void append(const boost::shared_ptr<MatchData>& m); + void append(const boost::shared_ptr<MatchVector>& m); + void append(const boost::shared_ptr<TokenMatch>& m); + void append(const boost::shared_ptr<AnnotationMatch>& m); /// Size (number of direct sub-matches) size_t size() const { @@ -54,6 +63,11 @@ public: matches_.clear(); } +protected: + MatchVector* clone_internal() const { + return new MatchVector(*this); + } + private: std::vector< boost::shared_ptr<Match> > matches_; }; diff --git a/libwccl/values/tokenmatch.cpp b/libwccl/values/tokenmatch.cpp index 1af5e1a..0852ad9 100644 --- a/libwccl/values/tokenmatch.cpp +++ b/libwccl/values/tokenmatch.cpp @@ -2,16 +2,9 @@ namespace Wccl { -const char* TokenMatch::type_name = "TokenMatch"; - std::string TokenMatch::to_raw_string() const { return "TOK[" + position_.to_raw_string() + "]"; } -std::string TokenMatch::var_repr(const std::string &var_name) -{ - return Match::var_repr(var_name); -} - } /* end ns Wccl */ diff --git a/libwccl/values/tokenmatch.h b/libwccl/values/tokenmatch.h index f220c2d..46e085c 100644 --- a/libwccl/values/tokenmatch.h +++ b/libwccl/values/tokenmatch.h @@ -1,14 +1,13 @@ #ifndef LIBWCCL_VALUES_TOKENMATCH_H #define LIBWCCL_VALUES_TOKENMATCH_H -#include <libwccl/values/match.h> +#include <libwccl/values/matchdata.h> namespace Wccl { -class TokenMatch : public Match +class TokenMatch : public MatchData { public: - WCCL_VALUE_PREAMBLE explicit TokenMatch(Position position) : position_(position) @@ -22,24 +21,30 @@ public: BOOST_ASSERT(position_.get_value() != Position::Nowhere); } - /// Match override. + /// MatchData override. bool empty() const { return false; } - /// Match override. + /// MatchData override. Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { return position_; } - /// Match override. + /// MatchData override. Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { return position_; } - /// Value override + /// MatchData override std::string to_raw_string() const; +protected: + /// MatchData override + TokenMatch* clone_internal() const { + return new TokenMatch(*this); + } + private: Position position_; }; diff --git a/tests/match.cpp b/tests/match.cpp index 15b0790..72ea47d 100644 --- a/tests/match.cpp +++ b/tests/match.cpp @@ -104,7 +104,7 @@ BOOST_AUTO_TEST_CASE(varmatch) { boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::Variables v; - v.put<Wccl::Match>("a", new Wccl::TokenMatch(1)); + v.put<Wccl::Match>("a", new Wccl::Match(boost::shared_ptr<MatchData>(new Wccl::TokenMatch(1)))); BOOST_CHECK_EQUAL(v.get<Wccl::Match>("a")->first_token(ptr).get_value(), 1); BOOST_CHECK(v.get_put<Wccl::Match>("b")->empty()); BOOST_CHECK_EQUAL(v.get_put<Wccl::Match>("b")->first_token(ptr).get_value(), -- GitLab