diff --git a/libwccl/values/annotationmatch.cpp b/libwccl/values/annotationmatch.cpp index a8419e3d7d3aca5d4f56d2c367b6e6bc264e7b53..adca5dbb8eb6201a81c44ff2df70e6e0067e55a4 100644 --- a/libwccl/values/annotationmatch.cpp +++ b/libwccl/values/annotationmatch.cpp @@ -14,19 +14,38 @@ std::string AnnotationMatch::var_repr(const std::string &var_name) return Match::var_repr(var_name); } - -Position AnnotationMatch::first_token() const +Position AnnotationMatch::first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { - Position p; - // TODO - return p; + size_t fpos = position_.get_value(); + const Corpus2::AnnotationChannel& chan = s->get_channel(channel_); + int seg = chan.get_segment_at(fpos); + if (seg > 0) { + for (size_t i = 0; i < fpos; ++i) { + if (chan.get_segment_at(i) == seg) { + return Position(i); + } + } + return position_; + } else { + return Position(); + } } -Position AnnotationMatch::last_token() const +Position AnnotationMatch::last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { - Position p; - // TODO - return p; + size_t fpos = position_.get_value(); + const Corpus2::AnnotationChannel& chan = s->get_channel(channel_); + int seg = chan.get_segment_at(fpos); + if (seg > 0) { + for (size_t i = s->size() - 1; i > fpos; --i) { + if (chan.get_segment_at(i) == seg) { + return Position(i); + } + } + return position_; + } else { + return Position(); + } } diff --git a/libwccl/values/annotationmatch.h b/libwccl/values/annotationmatch.h index e0d85b84a011828da45a506d55b1a56ce4358fb4..43c88858b043f7219e8042d58d92f710bda8d050 100644 --- a/libwccl/values/annotationmatch.h +++ b/libwccl/values/annotationmatch.h @@ -28,10 +28,10 @@ public: } /// Match override. - Position first_token() const; + Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; /// Match override. - Position last_token() const; + Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; /// Value override std::string to_raw_string() const; diff --git a/libwccl/values/match.h b/libwccl/values/match.h index 2b80b9f7212da4171ada8be5dc49742561dc285b..0b05c44c857ee3f4d4556747adbe4abc0fa48b03 100644 --- a/libwccl/values/match.h +++ b/libwccl/values/match.h @@ -2,6 +2,7 @@ #define LIBWCCL_VALUES_MATCH_H #include <libwccl/values/position.h> +#include <libcorpus2/ann/annotatedsentence.h> namespace Wccl { @@ -26,7 +27,7 @@ public: * Getter for the first token matched. If the match is empty, must return * Nowhere. */ - virtual Position first_token() const { + virtual Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { return Position(Position::Nowhere); } @@ -34,7 +35,7 @@ public: * Getter for the last token matched. If the match is empty, must return * Nowhere. */ - virtual Position last_token() const { + virtual Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { return Position(Position::Nowhere); } diff --git a/libwccl/values/matchvector.cpp b/libwccl/values/matchvector.cpp index 87a0f352aa537fbbac14a53488158c884423e4a0..faf4459f628efcfb3ce8e13a130d6f20cab832f1 100644 --- a/libwccl/values/matchvector.cpp +++ b/libwccl/values/matchvector.cpp @@ -28,14 +28,14 @@ std::string MatchVector::var_repr(const std::string &var_name) return Match::var_repr(var_name); } -Position MatchVector::first_token() const +Position MatchVector::first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { if (matches_.empty()) { return Position(Position::Nowhere); } else { - Position p = matches_.front()->first_token(); - for (size_t i = 1; i < matches_.end(); ++i) { - Position c = matches_[i]->first_token(); + Position p = matches_.front()->first_token(s); + for (size_t i = 1; i < matches_.size(); ++i) { + Position c = matches_[i]->first_token(s); if (c.get_value() < p.get_value()) { p = c; } @@ -44,14 +44,14 @@ Position MatchVector::first_token() const } } -Position MatchVector::last_token() const +Position MatchVector::last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { if (matches_.empty()) { return Position(Position::Nowhere); } else { - Position p = matches_.front()->last_token(); - for (size_t i = 1; i < matches_.end(); ++i) { - Position c = matches_[i]->last_token(); + Position p = matches_.front()->last_token(s); + for (size_t i = 1; i < matches_.size(); ++i) { + Position c = matches_[i]->last_token(s); if (c.get_value() > p.get_value()) { p = c; } @@ -78,7 +78,7 @@ void MatchVector::append(const boost::shared_ptr<Match> &m) const boost::shared_ptr<Match>& MatchVector::submatch(size_t idx) { if (idx < matches_.size()) { - return matches_[size]; + return matches_[idx]; } else { throw Wccl::WcclError("Match vector index out of range"); } diff --git a/libwccl/values/matchvector.h b/libwccl/values/matchvector.h index bcf26505db95f6cc343701d6c0256d02c49b660f..129352506a39929305b90cad019179a24cd2c587 100644 --- a/libwccl/values/matchvector.h +++ b/libwccl/values/matchvector.h @@ -21,10 +21,10 @@ public: bool empty() const; /// Match override. - Position first_token() const; + Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; /// Match override. - Position last_token() const; + Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; /// Value override std::string to_raw_string() const; diff --git a/libwccl/values/tokenmatch.h b/libwccl/values/tokenmatch.h index f7b340df65a0491b606cd8ac340c004a2a3c7203..f220c2d7c306c18e328f7b66db45250e31c697fb 100644 --- a/libwccl/values/tokenmatch.h +++ b/libwccl/values/tokenmatch.h @@ -28,12 +28,12 @@ public: } /// Match override. - Position first_token() const { + Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { return position_; } /// Match override. - Position last_token() const { + Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { return position_; } diff --git a/tests/match.cpp b/tests/match.cpp index c8fae952771ce849a35a3c47ec8bcde3a4954fd6..3e662775bea02e4803951b584869a769357336cf 100644 --- a/tests/match.cpp +++ b/tests/match.cpp @@ -19,91 +19,97 @@ BOOST_AUTO_TEST_SUITE(match_value) BOOST_AUTO_TEST_CASE(empty) { + boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::Match m; BOOST_CHECK(m.empty()); - BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH()"); } BOOST_AUTO_TEST_CASE(token) { + boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::TokenMatch m(1); BOOST_CHECK(!m.empty()); - BOOST_CHECK_EQUAL(m.first_token().get_value(), 1); - BOOST_CHECK_EQUAL(m.last_token().get_value(), 1); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 1); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 1); BOOST_CHECK_EQUAL(m.to_raw_string(), "TOK[1]"); } BOOST_AUTO_TEST_CASE(vector1) { + boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::MatchVector m; BOOST_CHECK(m.empty()); - BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH()"); m.append(boost::make_shared<Wccl::MatchVector>()); BOOST_CHECK(m.empty()); - BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(MATCH())"); m.append(boost::make_shared<Wccl::MatchVector>()); BOOST_CHECK(m.empty()); - BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(MATCH(),MATCH())"); Wccl::MatchVector m2; m2.append(boost::make_shared<Wccl::MatchVector>(m)); BOOST_CHECK(m2.empty()); - BOOST_CHECK_EQUAL(m2.first_token().get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m2.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m2.to_raw_string(), "MATCH(MATCH(MATCH(),MATCH()))"); m2.append(boost::make_shared<Wccl::MatchVector>()); BOOST_CHECK(m2.empty()); - BOOST_CHECK_EQUAL(m2.first_token().get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m2.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m2.to_raw_string(), "MATCH(MATCH(MATCH(),MATCH()),MATCH())"); } BOOST_AUTO_TEST_CASE(matchvector_first_last) { + boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::MatchVector m; m.append(boost::make_shared<TokenMatch>(5)); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5])"); - BOOST_CHECK_EQUAL(m.first_token().get_value(), 5); - BOOST_CHECK_EQUAL(m.last_token().get_value(), 5); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 5); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 5); m.append(boost::make_shared<TokenMatch>(6)); - BOOST_CHECK_EQUAL(m.first_token().get_value(), 5); - BOOST_CHECK_EQUAL(m.last_token().get_value(), 6); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 5); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 6); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5],TOK[6])"); m.append(boost::make_shared<TokenMatch>(4)); - BOOST_CHECK_EQUAL(m.first_token().get_value(), 4); - BOOST_CHECK_EQUAL(m.last_token().get_value(), 6); + BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 4); + BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 6); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5],TOK[6],TOK[4])"); Wccl::MatchVector m2; m2.append(boost::make_shared<TokenMatch>(5)); - m2.append(boost::make_shared<Wccl::MatchVector>(m)); - BOOST_CHECK_EQUAL(m2.first_token().get_value(), 4); - BOOST_CHECK_EQUAL(m2.last_token().get_value(), 6); + boost::shared_ptr<Wccl::MatchVector> m1 = boost::make_shared<Wccl::MatchVector>(m); + m2.append(m1); + BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 4); + BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 6); BOOST_CHECK_EQUAL(m2.to_raw_string(), "MATCH(TOK[5],MATCH(TOK[5],TOK[6],TOK[4]))"); m2.append(boost::make_shared<TokenMatch>(2)); - BOOST_CHECK_EQUAL(m2.first_token().get_value(), 2); - BOOST_CHECK_EQUAL(m2.last_token().get_value(), 6); + BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 2); + BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 6); m2.append(boost::make_shared<TokenMatch>(7)); - BOOST_CHECK_EQUAL(m2.first_token().get_value(), 2); - BOOST_CHECK_EQUAL(m2.last_token().get_value(), 7); - m.append(boost::make_shared<TokenMatch>(1)); - BOOST_CHECK_EQUAL(m2.first_token().get_value(), 1); - BOOST_CHECK_EQUAL(m2.last_token().get_value(), 7); + BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 2); + BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 7); + m1->append(boost::make_shared<TokenMatch>(1)); + BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 1); + BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 7); } BOOST_AUTO_TEST_CASE(varmatch) { + boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::Variables v; v.put<Wccl::Match>("a", new Wccl::TokenMatch(1)); - BOOST_CHECK_EQUAL(v.get<Wccl::Match>("a")->first_token().get_value(), 1); + BOOST_CHECK_EQUAL(v.get<Wccl::Match>("a")->first_token(ptr).get_value(), 1); BOOST_CHECK(v.get_put<Wccl::Match>("b")->empty()); - BOOST_CHECK_EQUAL(v.get_put<Wccl::Match>("b")->first_token().get_value(), + BOOST_CHECK_EQUAL(v.get_put<Wccl::Match>("b")->first_token(ptr).get_value(), Wccl::Position::Nowhere); }