From f4cdbdeed38bdde57bad8b332a65269c0a788275 Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Wed, 9 Mar 2011 10:59:38 +0100 Subject: [PATCH] refactor Match last/first token, fix issues --- libwccl/values/annotationmatch.cpp | 37 ++++++++++++---- libwccl/values/annotationmatch.h | 4 +- libwccl/values/match.h | 5 ++- libwccl/values/matchvector.cpp | 18 ++++---- libwccl/values/matchvector.h | 4 +- libwccl/values/tokenmatch.h | 4 +- tests/match.cpp | 70 ++++++++++++++++-------------- 7 files changed, 84 insertions(+), 58 deletions(-) diff --git a/libwccl/values/annotationmatch.cpp b/libwccl/values/annotationmatch.cpp index a8419e3..adca5db 100644 --- a/libwccl/values/annotationmatch.cpp +++ b/libwccl/values/annotationmatch.cpp @@ -14,19 +14,38 @@ std::string AnnotationMatch::var_repr(const std::string &var_name) return Match::var_repr(var_name); } - -Position AnnotationMatch::first_token() const +Position AnnotationMatch::first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { - Position p; - // TODO - return p; + size_t fpos = position_.get_value(); + const Corpus2::AnnotationChannel& chan = s->get_channel(channel_); + int seg = chan.get_segment_at(fpos); + if (seg > 0) { + for (size_t i = 0; i < fpos; ++i) { + if (chan.get_segment_at(i) == seg) { + return Position(i); + } + } + return position_; + } else { + return Position(); + } } -Position AnnotationMatch::last_token() const +Position AnnotationMatch::last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { - Position p; - // TODO - return p; + size_t fpos = position_.get_value(); + const Corpus2::AnnotationChannel& chan = s->get_channel(channel_); + int seg = chan.get_segment_at(fpos); + if (seg > 0) { + for (size_t i = s->size() - 1; i > fpos; --i) { + if (chan.get_segment_at(i) == seg) { + return Position(i); + } + } + return position_; + } else { + return Position(); + } } diff --git a/libwccl/values/annotationmatch.h b/libwccl/values/annotationmatch.h index e0d85b8..43c8885 100644 --- a/libwccl/values/annotationmatch.h +++ b/libwccl/values/annotationmatch.h @@ -28,10 +28,10 @@ public: } /// Match override. - Position first_token() const; + Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; /// Match override. - Position last_token() const; + Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; /// Value override std::string to_raw_string() const; diff --git a/libwccl/values/match.h b/libwccl/values/match.h index 2b80b9f..0b05c44 100644 --- a/libwccl/values/match.h +++ b/libwccl/values/match.h @@ -2,6 +2,7 @@ #define LIBWCCL_VALUES_MATCH_H #include <libwccl/values/position.h> +#include <libcorpus2/ann/annotatedsentence.h> namespace Wccl { @@ -26,7 +27,7 @@ public: * Getter for the first token matched. If the match is empty, must return * Nowhere. */ - virtual Position first_token() const { + virtual Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { return Position(Position::Nowhere); } @@ -34,7 +35,7 @@ public: * Getter for the last token matched. If the match is empty, must return * Nowhere. */ - virtual Position last_token() const { + virtual Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { return Position(Position::Nowhere); } diff --git a/libwccl/values/matchvector.cpp b/libwccl/values/matchvector.cpp index 87a0f35..faf4459 100644 --- a/libwccl/values/matchvector.cpp +++ b/libwccl/values/matchvector.cpp @@ -28,14 +28,14 @@ std::string MatchVector::var_repr(const std::string &var_name) return Match::var_repr(var_name); } -Position MatchVector::first_token() const +Position MatchVector::first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { if (matches_.empty()) { return Position(Position::Nowhere); } else { - Position p = matches_.front()->first_token(); - for (size_t i = 1; i < matches_.end(); ++i) { - Position c = matches_[i]->first_token(); + Position p = matches_.front()->first_token(s); + for (size_t i = 1; i < matches_.size(); ++i) { + Position c = matches_[i]->first_token(s); if (c.get_value() < p.get_value()) { p = c; } @@ -44,14 +44,14 @@ Position MatchVector::first_token() const } } -Position MatchVector::last_token() const +Position MatchVector::last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { if (matches_.empty()) { return Position(Position::Nowhere); } else { - Position p = matches_.front()->last_token(); - for (size_t i = 1; i < matches_.end(); ++i) { - Position c = matches_[i]->last_token(); + Position p = matches_.front()->last_token(s); + for (size_t i = 1; i < matches_.size(); ++i) { + Position c = matches_[i]->last_token(s); if (c.get_value() > p.get_value()) { p = c; } @@ -78,7 +78,7 @@ void MatchVector::append(const boost::shared_ptr<Match> &m) const boost::shared_ptr<Match>& MatchVector::submatch(size_t idx) { if (idx < matches_.size()) { - return matches_[size]; + return matches_[idx]; } else { throw Wccl::WcclError("Match vector index out of range"); } diff --git a/libwccl/values/matchvector.h b/libwccl/values/matchvector.h index bcf2650..1293525 100644 --- a/libwccl/values/matchvector.h +++ b/libwccl/values/matchvector.h @@ -21,10 +21,10 @@ public: bool empty() const; /// Match override. - Position first_token() const; + Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; /// Match override. - Position last_token() const; + Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; /// Value override std::string to_raw_string() const; diff --git a/libwccl/values/tokenmatch.h b/libwccl/values/tokenmatch.h index f7b340d..f220c2d 100644 --- a/libwccl/values/tokenmatch.h +++ b/libwccl/values/tokenmatch.h @@ -28,12 +28,12 @@ public: } /// Match override. - Position first_token() const { + Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { return position_; } /// Match override. - Position last_token() const { + Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { return position_; } diff --git a/tests/match.cpp b/tests/match.cpp index c8fae95..3e66277 100644 --- a/tests/match.cpp +++ b/tests/match.cpp @@ -19,91 +19,97 @@ BOOST_AUTO_TEST_SUITE(match_value) BOOST_AUTO_TEST_CASE(empty) { + boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::Match m; BOOST_CHECK(m.empty()); - BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH()"); } BOOST_AUTO_TEST_CASE(token) { + boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::TokenMatch m(1); BOOST_CHECK(!m.empty()); - BOOST_CHECK_EQUAL(m.first_token().get_value(), 1); - BOOST_CHECK_EQUAL(m.last_token().get_value(), 1); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 1); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 1); BOOST_CHECK_EQUAL(m.to_raw_string(), "TOK[1]"); } BOOST_AUTO_TEST_CASE(vector1) { + boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::MatchVector m; BOOST_CHECK(m.empty()); - BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH()"); m.append(boost::make_shared<Wccl::MatchVector>()); BOOST_CHECK(m.empty()); - BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(MATCH())"); m.append(boost::make_shared<Wccl::MatchVector>()); BOOST_CHECK(m.empty()); - BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(MATCH(),MATCH())"); Wccl::MatchVector m2; m2.append(boost::make_shared<Wccl::MatchVector>(m)); BOOST_CHECK(m2.empty()); - BOOST_CHECK_EQUAL(m2.first_token().get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m2.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m2.to_raw_string(), "MATCH(MATCH(MATCH(),MATCH()))"); m2.append(boost::make_shared<Wccl::MatchVector>()); BOOST_CHECK(m2.empty()); - BOOST_CHECK_EQUAL(m2.first_token().get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m2.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m2.to_raw_string(), "MATCH(MATCH(MATCH(),MATCH()),MATCH())"); } BOOST_AUTO_TEST_CASE(matchvector_first_last) { + boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::MatchVector m; m.append(boost::make_shared<TokenMatch>(5)); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5])"); - BOOST_CHECK_EQUAL(m.first_token().get_value(), 5); - BOOST_CHECK_EQUAL(m.last_token().get_value(), 5); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 5); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 5); m.append(boost::make_shared<TokenMatch>(6)); - BOOST_CHECK_EQUAL(m.first_token().get_value(), 5); - BOOST_CHECK_EQUAL(m.last_token().get_value(), 6); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 5); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 6); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5],TOK[6])"); m.append(boost::make_shared<TokenMatch>(4)); - BOOST_CHECK_EQUAL(m.first_token().get_value(), 4); - BOOST_CHECK_EQUAL(m.last_token().get_value(), 6); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 4); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 6); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5],TOK[6],TOK[4])"); Wccl::MatchVector m2; m2.append(boost::make_shared<TokenMatch>(5)); - m2.append(boost::make_shared<Wccl::MatchVector>(m)); - BOOST_CHECK_EQUAL(m2.first_token().get_value(), 4); - BOOST_CHECK_EQUAL(m2.last_token().get_value(), 6); + boost::shared_ptr<Wccl::MatchVector> m1 = boost::make_shared<Wccl::MatchVector>(m); + m2.append(m1); + BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 4); + BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 6); BOOST_CHECK_EQUAL(m2.to_raw_string(), "MATCH(TOK[5],MATCH(TOK[5],TOK[6],TOK[4]))"); m2.append(boost::make_shared<TokenMatch>(2)); - BOOST_CHECK_EQUAL(m2.first_token().get_value(), 2); - BOOST_CHECK_EQUAL(m2.last_token().get_value(), 6); + BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 2); + BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 6); m2.append(boost::make_shared<TokenMatch>(7)); - BOOST_CHECK_EQUAL(m2.first_token().get_value(), 2); - BOOST_CHECK_EQUAL(m2.last_token().get_value(), 7); - m.append(boost::make_shared<TokenMatch>(1)); - BOOST_CHECK_EQUAL(m2.first_token().get_value(), 1); - BOOST_CHECK_EQUAL(m2.last_token().get_value(), 7); + BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 2); + BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 7); + m1->append(boost::make_shared<TokenMatch>(1)); + BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 1); + BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 7); } BOOST_AUTO_TEST_CASE(varmatch) { + boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::Variables v; v.put<Wccl::Match>("a", new Wccl::TokenMatch(1)); - BOOST_CHECK_EQUAL(v.get<Wccl::Match>("a")->first_token().get_value(), 1); + BOOST_CHECK_EQUAL(v.get<Wccl::Match>("a")->first_token(ptr).get_value(), 1); BOOST_CHECK(v.get_put<Wccl::Match>("b")->empty()); - BOOST_CHECK_EQUAL(v.get_put<Wccl::Match>("b")->first_token().get_value(), + BOOST_CHECK_EQUAL(v.get_put<Wccl::Match>("b")->first_token(ptr).get_value(), Wccl::Position::Nowhere); } -- GitLab