From f4cdbdeed38bdde57bad8b332a65269c0a788275 Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Wed, 9 Mar 2011 10:59:38 +0100
Subject: [PATCH] refactor Match last/first token, fix issues

---
 libwccl/values/annotationmatch.cpp | 37 ++++++++++++----
 libwccl/values/annotationmatch.h   |  4 +-
 libwccl/values/match.h             |  5 ++-
 libwccl/values/matchvector.cpp     | 18 ++++----
 libwccl/values/matchvector.h       |  4 +-
 libwccl/values/tokenmatch.h        |  4 +-
 tests/match.cpp                    | 70 ++++++++++++++++--------------
 7 files changed, 84 insertions(+), 58 deletions(-)

diff --git a/libwccl/values/annotationmatch.cpp b/libwccl/values/annotationmatch.cpp
index a8419e3..adca5db 100644
--- a/libwccl/values/annotationmatch.cpp
+++ b/libwccl/values/annotationmatch.cpp
@@ -14,19 +14,38 @@ std::string AnnotationMatch::var_repr(const std::string &var_name)
 	return Match::var_repr(var_name);
 }
 
-
-Position AnnotationMatch::first_token() const
+Position AnnotationMatch::first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const
 {
-	Position p;
-	// TODO
-	return p;
+	size_t fpos = position_.get_value();
+	const Corpus2::AnnotationChannel& chan = s->get_channel(channel_);
+	int seg = chan.get_segment_at(fpos);
+	if (seg > 0) {
+		for (size_t i = 0; i < fpos; ++i) {
+			if (chan.get_segment_at(i) == seg) {
+				return Position(i);
+			}
+		}
+		return position_;
+	} else {
+		return Position();
+	}
 }
 
-Position AnnotationMatch::last_token() const
+Position AnnotationMatch::last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const
 {
-	Position p;
-	// TODO
-	return p;
+	size_t fpos = position_.get_value();
+	const Corpus2::AnnotationChannel& chan = s->get_channel(channel_);
+	int seg = chan.get_segment_at(fpos);
+	if (seg > 0) {
+		for (size_t i = s->size() - 1; i > fpos; --i) {
+			if (chan.get_segment_at(i) == seg) {
+				return Position(i);
+			}
+		}
+		return position_;
+	} else {
+		return Position();
+	}
 }
 
 
diff --git a/libwccl/values/annotationmatch.h b/libwccl/values/annotationmatch.h
index e0d85b8..43c8885 100644
--- a/libwccl/values/annotationmatch.h
+++ b/libwccl/values/annotationmatch.h
@@ -28,10 +28,10 @@ public:
 	}
 
 	/// Match override.
-	Position first_token() const;
+	Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const;
 
 	/// Match override.
-	Position last_token() const;
+	Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const;
 
 	/// Value override
 	std::string to_raw_string() const;
diff --git a/libwccl/values/match.h b/libwccl/values/match.h
index 2b80b9f..0b05c44 100644
--- a/libwccl/values/match.h
+++ b/libwccl/values/match.h
@@ -2,6 +2,7 @@
 #define LIBWCCL_VALUES_MATCH_H
 
 #include <libwccl/values/position.h>
+#include <libcorpus2/ann/annotatedsentence.h>
 
 namespace Wccl {
 
@@ -26,7 +27,7 @@ public:
 	 * Getter for the first token matched. If the match is empty, must return
 	 * Nowhere.
 	 */
-	virtual Position first_token() const {
+	virtual Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const {
 		return Position(Position::Nowhere);
 	}
 
@@ -34,7 +35,7 @@ public:
 	 * Getter for the last token matched. If the match is empty, must return
 	 * Nowhere.
 	 */
-	virtual Position last_token() const {
+	virtual Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const {
 		return Position(Position::Nowhere);
 	}
 
diff --git a/libwccl/values/matchvector.cpp b/libwccl/values/matchvector.cpp
index 87a0f35..faf4459 100644
--- a/libwccl/values/matchvector.cpp
+++ b/libwccl/values/matchvector.cpp
@@ -28,14 +28,14 @@ std::string MatchVector::var_repr(const std::string &var_name)
 	return Match::var_repr(var_name);
 }
 
-Position MatchVector::first_token() const
+Position MatchVector::first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const
 {
 	if (matches_.empty()) {
 		return Position(Position::Nowhere);
 	} else {
-		Position p = matches_.front()->first_token();
-		for (size_t i = 1; i < matches_.end(); ++i) {
-			Position c = matches_[i]->first_token();
+		Position p = matches_.front()->first_token(s);
+		for (size_t i = 1; i < matches_.size(); ++i) {
+			Position c = matches_[i]->first_token(s);
 			if (c.get_value() < p.get_value()) {
 				p = c;
 			}
@@ -44,14 +44,14 @@ Position MatchVector::first_token() const
 	}
 }
 
-Position MatchVector::last_token() const
+Position MatchVector::last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const
 {
 	if (matches_.empty()) {
 		return Position(Position::Nowhere);
 	} else {
-		Position p = matches_.front()->last_token();
-		for (size_t i = 1; i < matches_.end(); ++i) {
-			Position c = matches_[i]->last_token();
+		Position p = matches_.front()->last_token(s);
+		for (size_t i = 1; i < matches_.size(); ++i) {
+			Position c = matches_[i]->last_token(s);
 			if (c.get_value() > p.get_value()) {
 				p = c;
 			}
@@ -78,7 +78,7 @@ void MatchVector::append(const boost::shared_ptr<Match> &m)
 const boost::shared_ptr<Match>& MatchVector::submatch(size_t idx)
 {
 	if (idx < matches_.size()) {
-		return matches_[size];
+		return matches_[idx];
 	} else {
 		throw Wccl::WcclError("Match vector index out of range");
 	}
diff --git a/libwccl/values/matchvector.h b/libwccl/values/matchvector.h
index bcf2650..1293525 100644
--- a/libwccl/values/matchvector.h
+++ b/libwccl/values/matchvector.h
@@ -21,10 +21,10 @@ public:
 	bool empty() const;
 
 	/// Match override.
-	Position first_token() const;
+	Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const;
 
 	/// Match override.
-	Position last_token() const;
+	Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const;
 
 	/// Value override
 	std::string to_raw_string() const;
diff --git a/libwccl/values/tokenmatch.h b/libwccl/values/tokenmatch.h
index f7b340d..f220c2d 100644
--- a/libwccl/values/tokenmatch.h
+++ b/libwccl/values/tokenmatch.h
@@ -28,12 +28,12 @@ public:
 	}
 
 	/// Match override.
-	Position first_token() const {
+	Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const {
 		return position_;
 	}
 
 	/// Match override.
-	Position last_token() const {
+	Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const {
 		return position_;
 	}
 
diff --git a/tests/match.cpp b/tests/match.cpp
index c8fae95..3e66277 100644
--- a/tests/match.cpp
+++ b/tests/match.cpp
@@ -19,91 +19,97 @@ BOOST_AUTO_TEST_SUITE(match_value)
 
 BOOST_AUTO_TEST_CASE(empty)
 {
+	boost::shared_ptr<Corpus2::AnnotatedSentence> ptr;
 	Wccl::Match m;
 	BOOST_CHECK(m.empty());
-	BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere);
-	BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere);
+	BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere);
+	BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere);
 	BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH()");
 }
 
 BOOST_AUTO_TEST_CASE(token)
 {
+	boost::shared_ptr<Corpus2::AnnotatedSentence> ptr;
 	Wccl::TokenMatch m(1);
 	BOOST_CHECK(!m.empty());
-	BOOST_CHECK_EQUAL(m.first_token().get_value(), 1);
-	BOOST_CHECK_EQUAL(m.last_token().get_value(), 1);
+	BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 1);
+	BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 1);
 	BOOST_CHECK_EQUAL(m.to_raw_string(), "TOK[1]");
 }
 
 BOOST_AUTO_TEST_CASE(vector1)
 {
+	boost::shared_ptr<Corpus2::AnnotatedSentence> ptr;
 	Wccl::MatchVector m;
 	BOOST_CHECK(m.empty());
-	BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere);
-	BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere);
+	BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere);
+	BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere);
 	BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH()");
 	m.append(boost::make_shared<Wccl::MatchVector>());
 	BOOST_CHECK(m.empty());
-	BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere);
-	BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere);
+	BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere);
+	BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere);
 	BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(MATCH())");
 	m.append(boost::make_shared<Wccl::MatchVector>());
 	BOOST_CHECK(m.empty());
-	BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere);
-	BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere);
+	BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere);
+	BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere);
 	BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(MATCH(),MATCH())");
 	Wccl::MatchVector m2;
 	m2.append(boost::make_shared<Wccl::MatchVector>(m));
 	BOOST_CHECK(m2.empty());
-	BOOST_CHECK_EQUAL(m2.first_token().get_value(), Wccl::Position::Nowhere);
-	BOOST_CHECK_EQUAL(m2.last_token().get_value(), Wccl::Position::Nowhere);
+	BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), Wccl::Position::Nowhere);
+	BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), Wccl::Position::Nowhere);
 	BOOST_CHECK_EQUAL(m2.to_raw_string(), "MATCH(MATCH(MATCH(),MATCH()))");
 	m2.append(boost::make_shared<Wccl::MatchVector>());
 	BOOST_CHECK(m2.empty());
-	BOOST_CHECK_EQUAL(m2.first_token().get_value(), Wccl::Position::Nowhere);
-	BOOST_CHECK_EQUAL(m2.last_token().get_value(), Wccl::Position::Nowhere);
+	BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), Wccl::Position::Nowhere);
+	BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), Wccl::Position::Nowhere);
 	BOOST_CHECK_EQUAL(m2.to_raw_string(), "MATCH(MATCH(MATCH(),MATCH()),MATCH())");
 }
 
 BOOST_AUTO_TEST_CASE(matchvector_first_last)
 {
+	boost::shared_ptr<Corpus2::AnnotatedSentence> ptr;
 	Wccl::MatchVector m;
 	m.append(boost::make_shared<TokenMatch>(5));
 	BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5])");
-	BOOST_CHECK_EQUAL(m.first_token().get_value(), 5);
-	BOOST_CHECK_EQUAL(m.last_token().get_value(), 5);
+	BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 5);
+	BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 5);
 	m.append(boost::make_shared<TokenMatch>(6));
-	BOOST_CHECK_EQUAL(m.first_token().get_value(), 5);
-	BOOST_CHECK_EQUAL(m.last_token().get_value(), 6);
+	BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 5);
+	BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 6);
 	BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5],TOK[6])");
 	m.append(boost::make_shared<TokenMatch>(4));
-	BOOST_CHECK_EQUAL(m.first_token().get_value(), 4);
-	BOOST_CHECK_EQUAL(m.last_token().get_value(), 6);
+	BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 4);
+	BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 6);
 	BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5],TOK[6],TOK[4])");
 	Wccl::MatchVector m2;
 	m2.append(boost::make_shared<TokenMatch>(5));
-	m2.append(boost::make_shared<Wccl::MatchVector>(m));
-	BOOST_CHECK_EQUAL(m2.first_token().get_value(), 4);
-	BOOST_CHECK_EQUAL(m2.last_token().get_value(), 6);
+	boost::shared_ptr<Wccl::MatchVector> m1 = boost::make_shared<Wccl::MatchVector>(m);
+	m2.append(m1);
+	BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 4);
+	BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 6);
 	BOOST_CHECK_EQUAL(m2.to_raw_string(), "MATCH(TOK[5],MATCH(TOK[5],TOK[6],TOK[4]))");
 	m2.append(boost::make_shared<TokenMatch>(2));
-	BOOST_CHECK_EQUAL(m2.first_token().get_value(), 2);
-	BOOST_CHECK_EQUAL(m2.last_token().get_value(), 6);
+	BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 2);
+	BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 6);
 	m2.append(boost::make_shared<TokenMatch>(7));
-	BOOST_CHECK_EQUAL(m2.first_token().get_value(), 2);
-	BOOST_CHECK_EQUAL(m2.last_token().get_value(), 7);
-	m.append(boost::make_shared<TokenMatch>(1));
-	BOOST_CHECK_EQUAL(m2.first_token().get_value(), 1);
-	BOOST_CHECK_EQUAL(m2.last_token().get_value(), 7);
+	BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 2);
+	BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 7);
+	m1->append(boost::make_shared<TokenMatch>(1));
+	BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 1);
+	BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 7);
 }
 
 BOOST_AUTO_TEST_CASE(varmatch)
 {
+	boost::shared_ptr<Corpus2::AnnotatedSentence> ptr;
 	Wccl::Variables v;
 	v.put<Wccl::Match>("a", new Wccl::TokenMatch(1));
-	BOOST_CHECK_EQUAL(v.get<Wccl::Match>("a")->first_token().get_value(), 1);
+	BOOST_CHECK_EQUAL(v.get<Wccl::Match>("a")->first_token(ptr).get_value(), 1);
 	BOOST_CHECK(v.get_put<Wccl::Match>("b")->empty());
-	BOOST_CHECK_EQUAL(v.get_put<Wccl::Match>("b")->first_token().get_value(),
+	BOOST_CHECK_EQUAL(v.get_put<Wccl::Match>("b")->first_token(ptr).get_value(),
 		Wccl::Position::Nowhere);
 }
 
-- 
GitLab