From 8d796a6e0a16e4e3ac50842e0e0fe1e68df9e198 Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Mon, 6 Dec 2010 10:36:01 +0100
Subject: [PATCH] Data-driven tests of operator outputs, example test cases in
 test/data/*.ccl

---
 tests/CMakeLists.txt   |   1 +
 tests/data/basic.ccl   |  38 +++++++++
 tests/data/nest_if.ccl |  17 ++++
 tests/datadriven.cpp   | 176 +++++++++++++++++++++++++++++++++++++++++
 tests/datadriven.h     |   8 ++
 tests/main.cpp         |  16 +++-
 6 files changed, 255 insertions(+), 1 deletion(-)
 create mode 100644 tests/data/basic.ccl
 create mode 100644 tests/data/nest_if.ccl
 create mode 100644 tests/datadriven.cpp
 create mode 100644 tests/datadriven.h

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 8b0d92d..190919b 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -8,6 +8,7 @@ add_executable(tests
 	conditional.cpp
 	constant.cpp
 	context.cpp
+	datadriven.cpp
 	getlemmas.cpp
 	getsymbols.cpp
 	getsymbolsinrange.cpp
diff --git a/tests/data/basic.ccl b/tests/data/basic.ccl
new file mode 100644
index 0000000..64877f4
--- /dev/null
+++ b/tests/data/basic.ccl
@@ -0,0 +1,38 @@
+tagset=kipi
+---
+True
+
+True
+---
+False
+
+False
+---
+[]
+
+[]
+---
+{}
+
+{}
+---
+["a"]
+
+["a"]
+---
+{subst}
+
+{subst}
+---
+["a", "a"]
+
+["a"]
+---
+{ign, ign}
+
+{ign}
+---
+{sg, pl}
+
+{nmb}
+---
diff --git a/tests/data/nest_if.ccl b/tests/data/nest_if.ccl
new file mode 100644
index 0000000..0d0606b
--- /dev/null
+++ b/tests/data/nest_if.ccl
@@ -0,0 +1,17 @@
+---
+if(True, False, True)
+
+False
+---
+if(False, ["a"], ["b"])
+
+["b"]
+---
+if(if(True, False, True), {subst}, {ger})
+
+{ger}
+---
+if(equal(if(if(True, False, True), {subst}, {ger}), {ger}), "ger", "nie")
+
+["ger"]
+---
diff --git a/tests/datadriven.cpp b/tests/datadriven.cpp
new file mode 100644
index 0000000..fe8eced
--- /dev/null
+++ b/tests/datadriven.cpp
@@ -0,0 +1,176 @@
+#include "datadriven.h"
+
+#include <libpwrutils/util.h>
+#include <libpwrutils/foreach.h>
+#include <libpwrutils/pathsearch.h>
+#include <libcorpus2/util/settings.h>
+#include <libcorpus2/tagsetmanager.h>
+#include <libcorpus2/io/xcesreader.h>
+
+#include <libwccl/sentencecontext.h>
+#include <libwccl/parser/Parser.h>
+#include <libwccl/ops/funexeccontext.h>
+
+#include <fstream>
+#include <boost/filesystem/fstream.hpp>
+#include <iostream>
+#include <iomanip>
+#include <map>
+#include <set>
+#include <sstream>
+
+#include <boost/algorithm/string.hpp>
+#include <boost/bind.hpp>
+#include <boost/filesystem.hpp>
+#include <boost/foreach.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/test/unit_test.hpp>
+#include <boost/test/parameterized_test.hpp>
+
+using boost::filesystem::directory_iterator;
+using boost::filesystem::exists;
+using boost::filesystem::is_directory;
+using boost::filesystem::path;
+using boost::filesystem::ifstream;
+
+struct compare_test
+{
+	path in_file;
+	path search_path;
+};
+
+void test_one_item_actual(const compare_test& c)
+{
+	ifstream ifs_in(c.in_file);
+
+	std::string separators = "=";
+	std::string tagset_name = "kipi";
+	std::string sentence_filename;
+	std::string line;
+	while (ifs_in.good() && line != "---") {
+		std::getline(ifs_in, line);
+		std::vector<std::string> fields;
+		boost::algorithm::split(fields, line, boost::is_any_of(separators));
+		if (fields.size() == 2) {
+			if (fields[0] == "tagset") {
+				tagset_name = fields[1];
+			} else if (fields[0] == "sentence") {
+				sentence_filename = fields[1];
+			}
+		}
+	}
+	const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_name);
+	boost::shared_ptr<Corpus2::Sentence> sentence(new Corpus2::Sentence);
+	if (!sentence_filename.empty()) {
+		path sentence_fullpath = c.search_path / sentence_filename;
+		Corpus2::XcesReader reader(tagset, sentence_fullpath.string());
+		sentence.reset(reader.get_next_sentence());
+		BOOST_REQUIRE(sentence);
+	}
+	Wccl::SentenceContext sc(sentence);
+
+	std::string operator_string, expected_output;
+
+	int line_no = 0;
+	while (ifs_in.good()) {
+		++line_no;
+		std::getline(ifs_in, line);
+		if (line == "" && operator_string != "") {
+			++line_no;
+			std::getline(ifs_in, line);
+			expected_output = line;
+			Wccl::Parser parser(tagset);
+			boost::shared_ptr<ANTLRParserResultBase> parsed;
+			try {
+				parsed = parser.parseAnyOperator(operator_string);
+			} catch (Wccl::WcclError& e) {
+				std::cerr << e.info() << "\n---\n" << operator_string << "\n---\n";
+				throw;
+			}
+			Wccl::FunExecContext fu(sc, parsed->variables);
+			std::string output = parsed->get_op_base()->apply_internal(fu)->to_string(tagset);
+			if (output != expected_output) {
+				BOOST_ERROR("Mismatch on line " << line_no << ":\n"
+				 << "Expected: " << expected_output << "\n"
+				 << "Actual  : " << output << "\n"
+				 << "Operator: " << operator_string);
+			}
+			expected_output = "";
+			operator_string = "";
+			++line_no;
+			std::getline(ifs_in, line);
+			BOOST_REQUIRE(line == "---" || line == "");
+		} else {
+			operator_string += line + "\n";
+		}
+	}
+}
+
+
+int init_subdir(const path& dir, std::string ps, std::vector<compare_test>& tests)
+{
+	int count = 0;
+	ps += dir.string();
+	if (!ps.empty()) {
+		ps += Corpus2::Path::Instance().get_path_separator();
+	}
+
+	directory_iterator end_itr; // default-constructed is past-the-end
+	std::set<std::string> txt_tests;
+	std::set<path> subdirs;
+
+	for (directory_iterator itr(dir); itr != end_itr; ++itr) {
+		if (is_directory(itr->status())) {
+			subdirs.insert(itr->path());
+		} else {
+			if (itr->path().extension() == ".ccl") {
+				txt_tests.insert(itr->path().string());
+			}
+		}
+	}
+	foreach (const std::string& s, txt_tests) {
+		compare_test c;
+		c.in_file = s;
+		c.search_path = dir;
+		tests.push_back(c);
+		++count;
+	}
+	BOOST_TEST_MESSAGE("Found " << count << " valid data test case"
+		<< (count > 1 ? "s" : "")
+		<< " in " << dir
+		<< " [" << ps << "]"
+		);
+	foreach (const path& s, subdirs) {
+		count += init_subdir(s, ps, tests);
+	}
+	return count;
+}
+
+void test_one_item(const compare_test& c)
+{
+	try {
+		test_one_item_actual(c);
+	} catch (PwrNlp::PwrNlpError& e) {
+		BOOST_ERROR("Caught " << e.scope() << " exception: \n" << e.info());
+	}
+}
+
+void init_data_suite(boost::unit_test::test_suite *ts, const std::string& path)
+{
+	std::string subdir_name = LIBWCCL_TEST_DATA_DIR "data";
+	if (!path.empty()) {
+		subdir_name = path;
+	}
+	if (!exists(subdir_name)) {
+		BOOST_TEST_MESSAGE("Test data subdir does not exist");
+	}
+	std::vector<compare_test> compares;
+	init_subdir(subdir_name, "", compares);
+	foreach (const compare_test& ci, compares) {
+		std::string rel_path = boost::algorithm::replace_first_copy(
+				ci.in_file.string(), subdir_name, "");
+		std::string name = "data_test:" + rel_path;
+		ts->add(boost::unit_test::make_test_case(
+			boost::bind(test_one_item, ci), name));
+	}
+}
diff --git a/tests/datadriven.h b/tests/datadriven.h
new file mode 100644
index 0000000..513e899
--- /dev/null
+++ b/tests/datadriven.h
@@ -0,0 +1,8 @@
+#ifndef LIBWCCL_TESTS_DATADRIVEN_H
+#define LIBWCCL_TESTS_DATADRIVEN_H
+
+#include <boost/test/unit_test.hpp>
+
+void init_data_suite(boost::unit_test::test_suite* ts, const std::string& path);
+
+#endif // LIBWCCL_TESTS_DATADRIVEN_H
diff --git a/tests/main.cpp b/tests/main.cpp
index 0cbc1e4..beb1c01 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -1,6 +1,7 @@
 //#define BOOST_TEST_MODULE master
 #include <boost/test/included/unit_test.hpp>
 #include <unicode/uclean.h>
+#include "datadriven.h"
 
 BOOST_AUTO_TEST_CASE(test_test)
 {
@@ -22,7 +23,20 @@ static struct ICU_cleaner
 	}
 } the_cleaner;
 
-boost::unit_test::test_suite* init_unit_test_suite(int /*argc*/, char* /*argv*/[])
+boost::unit_test::test_suite* init_unit_test_suite(int argc, char* argv[])
 {
+	boost::unit_test::test_suite* ts1 = BOOST_TEST_SUITE("compare");
+	std::string compare_path;
+	for (int i = 0; i < argc; ++i) {
+		if (strcmp(argv[i], "--compare-tests-dir") == 0) {
+			++i;
+			if (i < argc) {
+				compare_path = argv[i];
+			}
+		}
+	}
+	init_data_suite(ts1, compare_path);
+	boost::unit_test::framework::master_test_suite().add(ts1);
+
 	return 0;
 }
-- 
GitLab