From 5769ec06ee4b3da2d6134550dbe7b6a3ed0bba61 Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Thu, 3 Mar 2011 08:48:59 +0100 Subject: [PATCH] First draft of Match value type with some tests. MatchVector does not yet properly implement first/last token, hence a test fails, and AnnotationMatch is pretty much a stub. --- libwccl/CMakeLists.txt | 4 ++ libwccl/values/annotationmatch.cpp | 33 ++++++++++++ libwccl/values/annotationmatch.h | 46 +++++++++++++++++ libwccl/values/match.cpp | 20 ++++++++ libwccl/values/match.h | 47 +++++++++++++++++ libwccl/values/matchvector.cpp | 60 ++++++++++++++++++++++ libwccl/values/matchvector.h | 41 +++++++++++++++ libwccl/values/tokenmatch.cpp | 17 +++++++ libwccl/values/tokenmatch.h | 49 ++++++++++++++++++ libwccl/variables.h | 6 ++- tests/CMakeLists.txt | 1 + tests/match.cpp | 82 ++++++++++++++++++++++++++++++ 12 files changed, 404 insertions(+), 2 deletions(-) create mode 100644 libwccl/values/annotationmatch.cpp create mode 100644 libwccl/values/annotationmatch.h create mode 100644 libwccl/values/match.cpp create mode 100644 libwccl/values/match.h create mode 100644 libwccl/values/matchvector.cpp create mode 100644 libwccl/values/matchvector.h create mode 100644 libwccl/values/tokenmatch.cpp create mode 100644 libwccl/values/tokenmatch.h create mode 100644 tests/match.cpp diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index f9dc761..f5f5796 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -65,9 +65,13 @@ SET(libwccl_STAT_SRC parser/Parser.cpp parser/ParserException.cpp sentencecontext.cpp + values/annotationmatch.cpp values/bool.cpp + values/match.cpp + values/matchvector.cpp values/position.cpp values/strset.cpp + values/tokenmatch.cpp values/tset.cpp values/value.cpp variables.cpp diff --git a/libwccl/values/annotationmatch.cpp b/libwccl/values/annotationmatch.cpp new file mode 100644 index 0000000..a8419e3 --- /dev/null +++ b/libwccl/values/annotationmatch.cpp @@ -0,0 +1,33 @@ +#include <libwccl/values/annotationmatch.h> + +namespace Wccl { + +const char* AnnotationMatch::type_name = "AnnotationMatch"; + +std::string AnnotationMatch::to_raw_string() const +{ + return "ANN[" + position_.to_raw_string() + "," + channel_ + "]"; +} + +std::string AnnotationMatch::var_repr(const std::string &var_name) +{ + return Match::var_repr(var_name); +} + + +Position AnnotationMatch::first_token() const +{ + Position p; + // TODO + return p; +} + +Position AnnotationMatch::last_token() const +{ + Position p; + // TODO + return p; +} + + +} /* end ns Wccl */ diff --git a/libwccl/values/annotationmatch.h b/libwccl/values/annotationmatch.h new file mode 100644 index 0000000..e0d85b8 --- /dev/null +++ b/libwccl/values/annotationmatch.h @@ -0,0 +1,46 @@ +#ifndef LIBWCCL_VALUES_ANNOTATIONMATCH_H +#define LIBWCCL_VALUES_ANNOTATIONMATCH_H + +#include <libwccl/values/match.h> + +namespace Wccl { + +class AnnotationMatch : public Match +{ +public: + WCCL_VALUE_PREAMBLE + + explicit AnnotationMatch(Position position, const std::string channel) + : position_(position), channel_(channel) + { + BOOST_ASSERT(position_.get_value() != Position::Nowhere); + } + + explicit AnnotationMatch(int pos,const std::string channel) + : position_(pos), channel_(channel) + { + BOOST_ASSERT(position_.get_value() != Position::Nowhere); + } + + /// Match override. + bool empty() const { + return false; + } + + /// Match override. + Position first_token() const; + + /// Match override. + Position last_token() const; + + /// Value override + std::string to_raw_string() const; + +private: + Position position_; + std::string channel_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_VALUES_ANNOTATIONMATCH_H diff --git a/libwccl/values/match.cpp b/libwccl/values/match.cpp new file mode 100644 index 0000000..a35cf8f --- /dev/null +++ b/libwccl/values/match.cpp @@ -0,0 +1,20 @@ +#include <libwccl/values/match.h> +#include <sstream> + +namespace Wccl { + +const char* Match::type_name = "Match"; + +std::string Match::to_raw_string() const +{ + return "MATCH()"; +} + +std::string Match::var_repr(const std::string &var_name) +{ + std::stringstream ss; + ss << "$m:" << var_name; + return ss.str(); +} + +} /* end ns Wccl */ diff --git a/libwccl/values/match.h b/libwccl/values/match.h new file mode 100644 index 0000000..2b80b9f --- /dev/null +++ b/libwccl/values/match.h @@ -0,0 +1,47 @@ +#ifndef LIBWCCL_VALUES_MATCH_H +#define LIBWCCL_VALUES_MATCH_H + +#include <libwccl/values/position.h> + +namespace Wccl { + +class Match : public Value +{ +public: + WCCL_VALUE_PREAMBLE + + Match() + { + } + + /** + * Check if the match is empty (matches nothing). Match objects themselves + * are by definition empty, child classes are sometimes or always non-empty. + */ + virtual bool empty() const { + return true; + } + + /** + * Getter for the first token matched. If the match is empty, must return + * Nowhere. + */ + virtual Position first_token() const { + return Position(Position::Nowhere); + } + + /** + * Getter for the last token matched. If the match is empty, must return + * Nowhere. + */ + virtual Position last_token() const { + return Position(Position::Nowhere); + } + + /// Value override + std::string to_raw_string() const; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_VALUES_MATCH_H diff --git a/libwccl/values/matchvector.cpp b/libwccl/values/matchvector.cpp new file mode 100644 index 0000000..a0f1b3b --- /dev/null +++ b/libwccl/values/matchvector.cpp @@ -0,0 +1,60 @@ +#include <libwccl/values/matchvector.h> +#include <libpwrutils/foreach.h> +#include <sstream> + +namespace Wccl { + +const char* MatchVector::type_name = "MatchVector"; + +std::string MatchVector::to_raw_string() const +{ + std::stringstream ss; + ss << "MATCH("; + bool comma = false; + foreach (const boost::shared_ptr<Match>& m, matches_) { + if (comma) { + ss << ","; + } + ss << m->to_raw_string(); + comma = true; + } + ss << ")"; + return ss.str(); +} + +std::string MatchVector::var_repr(const std::string &var_name) +{ + return Match::var_repr(var_name); +} + +Position MatchVector::first_token() const +{ + Position p; + // TODO + return p; +} + +Position MatchVector::last_token() const +{ + Position p; + // TODO + return p; +} + +bool MatchVector::empty() const +{ + foreach (const boost::shared_ptr<Match>& m, matches_) { + if (!m->empty()) { + return false; + } + } + return true; +} + +void MatchVector::append(const boost::shared_ptr<Match> &m) +{ + matches_.push_back(m); +} + + +} /* end ns Wccl */ diff --git a/libwccl/values/matchvector.h b/libwccl/values/matchvector.h new file mode 100644 index 0000000..c4912de --- /dev/null +++ b/libwccl/values/matchvector.h @@ -0,0 +1,41 @@ +#ifndef LIBWCCL_VALUES_MATCHVECTOR_H +#define LIBWCCL_VALUES_MATCHVECTOR_H + +#include <libwccl/values/match.h> +#include <boost/shared_ptr.hpp> +#include <vector> + +namespace Wccl { + +class MatchVector : public Match +{ +public: + WCCL_VALUE_PREAMBLE + + MatchVector() + { + } + + /// Match override. A MatchVector is empty if it contains no sub-matches, + /// or if they are all empty. + bool empty() const; + + /// Match override. + Position first_token() const; + + /// Match override. + Position last_token() const; + + /// Value override + std::string to_raw_string() const; + + /// Append a sub-match + void append(const boost::shared_ptr<Match>& m); + +private: + std::vector< boost::shared_ptr<Match> > matches_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_VALUES_MATCHVECTOR_H diff --git a/libwccl/values/tokenmatch.cpp b/libwccl/values/tokenmatch.cpp new file mode 100644 index 0000000..1af5e1a --- /dev/null +++ b/libwccl/values/tokenmatch.cpp @@ -0,0 +1,17 @@ +#include <libwccl/values/tokenmatch.h> + +namespace Wccl { + +const char* TokenMatch::type_name = "TokenMatch"; + +std::string TokenMatch::to_raw_string() const +{ + return "TOK[" + position_.to_raw_string() + "]"; +} + +std::string TokenMatch::var_repr(const std::string &var_name) +{ + return Match::var_repr(var_name); +} + +} /* end ns Wccl */ diff --git a/libwccl/values/tokenmatch.h b/libwccl/values/tokenmatch.h new file mode 100644 index 0000000..f7b340d --- /dev/null +++ b/libwccl/values/tokenmatch.h @@ -0,0 +1,49 @@ +#ifndef LIBWCCL_VALUES_TOKENMATCH_H +#define LIBWCCL_VALUES_TOKENMATCH_H + +#include <libwccl/values/match.h> + +namespace Wccl { + +class TokenMatch : public Match +{ +public: + WCCL_VALUE_PREAMBLE + + explicit TokenMatch(Position position) + : position_(position) + { + BOOST_ASSERT(position_.get_value() != Position::Nowhere); + } + + explicit TokenMatch(int pos) + : position_(pos) + { + BOOST_ASSERT(position_.get_value() != Position::Nowhere); + } + + /// Match override. + bool empty() const { + return false; + } + + /// Match override. + Position first_token() const { + return position_; + } + + /// Match override. + Position last_token() const { + return position_; + } + + /// Value override + std::string to_raw_string() const; + +private: + Position position_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_VALUES_TOKENMATCH_H diff --git a/libwccl/variables.h b/libwccl/variables.h index 35a99d3..d836bab 100644 --- a/libwccl/variables.h +++ b/libwccl/variables.h @@ -1,8 +1,9 @@ #ifndef LIBWCCL_VARIABLES_H #define LIBWCCL_VARIABLES_H -#include <libwccl/values/bool.h> #include <libwccl/exception.h> +#include <libwccl/values/bool.h> +#include <libwccl/values/match.h> #include <libwccl/values/position.h> #include <libwccl/values/strset.h> #include <libwccl/values/tset.h> @@ -152,13 +153,14 @@ class Variables : detail::Vmap<Value> , detail::Vmap<Position> , detail::Vmap<StrSet> , detail::Vmap<TSet> + , detail::Vmap<Match> , boost::noncopyable { public: /// Valid value types, should match the inheritance. /// the type Value must be first, order of other items is not important typedef boost::mpl::list<Value, - Bool, Position, StrSet, TSet> types; + Bool, Position, StrSet, TSet, Match> types; /// Constructor, creates an empty instance. Variables(); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 55e7aa8..5ff0f38 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -27,6 +27,7 @@ add_executable(tests getorth.cpp logicalpredicates.cpp main.cpp + match.cpp position.cpp positionpredicates.cpp regex.cpp diff --git a/tests/match.cpp b/tests/match.cpp new file mode 100644 index 0000000..1cbb430 --- /dev/null +++ b/tests/match.cpp @@ -0,0 +1,82 @@ +#include <boost/test/unit_test.hpp> +#include <boost/bind.hpp> +#include <libcorpus2/tagsetmanager.h> + +#include <libwccl/sentencecontext.h> +#include <libwccl/variables.h> + +#include <libwccl/values/tokenmatch.h> +#include <libwccl/values/annotationmatch.h> +#include <libwccl/values/matchvector.h> + +#include <iostream> + +using namespace Wccl; + +BOOST_AUTO_TEST_SUITE(match_value) + +BOOST_AUTO_TEST_CASE(empty) +{ + Wccl::Match m; + BOOST_CHECK(m.empty()); + BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH()"); +} + +BOOST_AUTO_TEST_CASE(token) +{ + Wccl::TokenMatch m(1); + BOOST_CHECK(!m.empty()); + BOOST_CHECK_EQUAL(m.first_token().get_value(), 1); + BOOST_CHECK_EQUAL(m.last_token().get_value(), 1); + BOOST_CHECK_EQUAL(m.to_raw_string(), "TOK[1]"); +} + +BOOST_AUTO_TEST_CASE(vector1) +{ + Wccl::MatchVector m; + BOOST_CHECK(m.empty()); + BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH()"); + m.append(boost::make_shared<Wccl::MatchVector>()); + BOOST_CHECK(m.empty()); + BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(MATCH())"); + m.append(boost::make_shared<Wccl::MatchVector>()); + BOOST_CHECK(m.empty()); + BOOST_CHECK_EQUAL(m.first_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(MATCH(),MATCH())"); + Wccl::MatchVector m2; + m2.append(boost::make_shared<Wccl::MatchVector>(m)); + BOOST_CHECK(m2.empty()); + BOOST_CHECK_EQUAL(m2.first_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.to_raw_string(), "MATCH(MATCH(MATCH(),MATCH()))"); + m2.append(boost::make_shared<Wccl::MatchVector>()); + BOOST_CHECK(m2.empty()); + BOOST_CHECK_EQUAL(m2.first_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.last_token().get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.to_raw_string(), "MATCH(MATCH(MATCH(),MATCH()),MATCH())"); +} + +BOOST_AUTO_TEST_CASE(vector2) +{ + Wccl::MatchVector m; + m.append(boost::make_shared<TokenMatch>(5)); + BOOST_CHECK_EQUAL(m.first_token().get_value(), 5); + BOOST_CHECK_EQUAL(m.last_token().get_value(), 5); + BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5])"); + m.append(boost::make_shared<TokenMatch>(6)); + BOOST_CHECK_EQUAL(m.first_token().get_value(), 5); + BOOST_CHECK_EQUAL(m.last_token().get_value(), 6); + BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5],TOK[6])"); + m.append(boost::make_shared<TokenMatch>(4)); + BOOST_CHECK_EQUAL(m.first_token().get_value(), 4); + BOOST_CHECK_EQUAL(m.last_token().get_value(), 6); + BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5],TOK[6],TOK[4])"); +} +BOOST_AUTO_TEST_SUITE_END() -- GitLab