From 1270ffe8237eff041bd8745f503e3e90982402c3 Mon Sep 17 00:00:00 2001 From: Adam Wardynski <award@.(win7-laptop)> Date: Tue, 26 Apr 2011 16:50:25 +0200 Subject: [PATCH] Fix MatchVector::first_token to ignore negative values. It was looking for minimum value and Position::Nowhere is the smallest you can get value-wise (min_int). So if there was an empty Match in a vector, first_token for the vector would always end up returning Position::Nowhere, which was wrong (empty submatches should've been ignored and Position::Nowhere returned only if there were no non-empty alternatives). --- libwccl/values/matchvector.cpp | 13 +++++++++++-- tests/match.cpp | 12 ++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/libwccl/values/matchvector.cpp b/libwccl/values/matchvector.cpp index bf80136..6415ed0 100644 --- a/libwccl/values/matchvector.cpp +++ b/libwccl/values/matchvector.cpp @@ -27,12 +27,21 @@ Position MatchVector::first_token(const boost::shared_ptr<Corpus2::AnnotatedSent if (matches_.empty()) { return Position(Position::Nowhere); } else { + // Negative positions are invalid, including specials like Nowhere, + // so we can't just find minimum value but minimum *non-negative* value. + // Note: yes, the code assumes the special values like Nowhere are indeed negative. Position p = matches_.front()->first_token(s); - for (size_t i = 1; i < matches_.size(); ++i) { + size_t i = 1; + while ((p.get_value() < 0) && (i < matches_.size())) { + p = matches_[i]->first_token(s); + ++i; + } + while (i < matches_.size()) { Position c = matches_[i]->first_token(s); - if (c.get_value() < p.get_value()) { + if ((c.get_value() >= 0) && (c.get_value() < p.get_value())) { p = c; } + ++i; } return p; } diff --git a/tests/match.cpp b/tests/match.cpp index 72ea47d..9b3ab09 100644 --- a/tests/match.cpp +++ b/tests/match.cpp @@ -98,6 +98,18 @@ BOOST_AUTO_TEST_CASE(matchvector_first_last) m1->append(boost::make_shared<TokenMatch>(1)); BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 1); BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 7); + Wccl::MatchVector m3; + BOOST_CHECK_EQUAL(m3.to_raw_string(), "MATCH()"); + BOOST_CHECK_EQUAL(m3.first_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m3.last_token(ptr).get_value(), Wccl::Position::Nowhere); + m3.append(boost::make_shared<MatchVector>()); + BOOST_CHECK_EQUAL(m3.to_raw_string(), "MATCH(MATCH())"); + BOOST_CHECK_EQUAL(m3.first_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m3.last_token(ptr).get_value(), Wccl::Position::Nowhere); + m3.append(boost::make_shared<TokenMatch>(1)); + BOOST_CHECK_EQUAL(m3.to_raw_string(), "MATCH(MATCH(),TOK[1])"); + BOOST_CHECK_EQUAL(m3.first_token(ptr).get_value(), 1); + BOOST_CHECK_EQUAL(m3.last_token(ptr).get_value(), 1); } BOOST_AUTO_TEST_CASE(varmatch) -- GitLab