Skip to content
Snippets Groups Projects
datadriven.cpp 4.88 KiB
Newer Older
#include "datadriven.h"

#include <libpwrutils/util.h>
#include <libpwrutils/foreach.h>
#include <libpwrutils/pathsearch.h>
#include <libcorpus2/util/settings.h>
#include <libcorpus2/tagsetmanager.h>
#include <libcorpus2/io/xcesreader.h>

#include <libwccl/sentencecontext.h>
#include <libwccl/parser/Parser.h>
#include <libwccl/ops/funexeccontext.h>

#include <fstream>
#include <boost/filesystem/fstream.hpp>
#include <iostream>
#include <iomanip>
#include <map>
#include <set>
#include <sstream>

#include <boost/algorithm/string.hpp>
#include <boost/bind.hpp>
#include <boost/filesystem.hpp>
#include <boost/foreach.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/test/unit_test.hpp>
#include <boost/test/parameterized_test.hpp>

using boost::filesystem::directory_iterator;
using boost::filesystem::exists;
using boost::filesystem::is_directory;
using boost::filesystem::path;
using boost::filesystem::ifstream;

struct compare_test
{
	path in_file;
	path search_path;
};

void test_one_item_actual(const compare_test& c)
{
	ifstream ifs_in(c.in_file);

	std::string separators = "=";
	std::string tagset_name = "kipi";
	std::string sentence_filename;
	std::string line;
	while (ifs_in.good() && line != "---") {
		std::getline(ifs_in, line);
		std::vector<std::string> fields;
		boost::algorithm::split(fields, line, boost::is_any_of(separators));
		if (fields.size() == 2) {
			if (fields[0] == "tagset") {
				tagset_name = fields[1];
			} else if (fields[0] == "sentence") {
				sentence_filename = fields[1];
			}
		}
	}
	const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_name);
	boost::shared_ptr<Corpus2::Sentence> sentence(new Corpus2::Sentence);
	if (!sentence_filename.empty()) {
		path sentence_fullpath = c.search_path / sentence_filename;
		Corpus2::XcesReader reader(tagset, sentence_fullpath.string());
		sentence.reset(reader.get_next_sentence());
		BOOST_REQUIRE(sentence);
	}
	Wccl::SentenceContext sc(sentence);

	std::string operator_string, expected_output;

	int line_no = 0;
	while (ifs_in.good()) {
		++line_no;
		std::getline(ifs_in, line);
		if (line == "" && operator_string != "") {
			++line_no;
			std::getline(ifs_in, line);
			expected_output = line;
			Wccl::Parser parser(tagset);
			boost::shared_ptr<ANTLRParserResultBase> parsed;
			try {
				parsed = parser.parseAnyOperator(operator_string);
			} catch (Wccl::WcclError& e) {
				std::cerr << e.info() << "\n---\n" << operator_string << "\n---\n";
				throw;
			}
			Wccl::FunExecContext fu(sc, parsed->variables);
			std::string output = parsed->get_op_base()->apply_internal(fu)->to_string(tagset);
			if (output != expected_output) {
				BOOST_ERROR("Mismatch on line " << line_no << ":\n"
				 << "Expected: " << expected_output << "\n"
				 << "Actual  : " << output << "\n"
				 << "Operator: " << operator_string);
			}
			expected_output = "";
			operator_string = "";
			++line_no;
			std::getline(ifs_in, line);
			BOOST_REQUIRE(line == "---" || line == "");
		} else {
			if (operator_string.empty() && line.substr(0, 9) == "position=") {
				std::string new_position = line.substr(9);
				sc.set_position(atoi(new_position.c_str()));
			} else {
				operator_string += line + "\n";
			}
		}
	}
}


int init_subdir(const path& dir, std::string ps, std::vector<compare_test>& tests)
{
	int count = 0;
	ps += dir.string();
	if (!ps.empty()) {
		ps += Corpus2::Path::Instance().get_path_separator();
	}

	directory_iterator end_itr; // default-constructed is past-the-end
	std::set<std::string> txt_tests;
	std::set<path> subdirs;

	for (directory_iterator itr(dir); itr != end_itr; ++itr) {
		if (is_directory(itr->status())) {
			subdirs.insert(itr->path());
		} else {
			if (itr->path().extension() == ".ccl") {
				txt_tests.insert(itr->path().string());
			}
		}
	}
	foreach (const std::string& s, txt_tests) {
		compare_test c;
		c.in_file = s;
		c.search_path = dir;
		tests.push_back(c);
		++count;
	}
	BOOST_TEST_MESSAGE("Found " << count << " valid data test case"
		<< (count > 1 ? "s" : "")
		<< " in " << dir
		<< " [" << ps << "]"
		);
	foreach (const path& s, subdirs) {
		count += init_subdir(s, ps, tests);
	}
	return count;
}

void test_one_item(const compare_test& c)
{
	try {
		test_one_item_actual(c);
	} catch (PwrNlp::PwrNlpError& e) {
		BOOST_ERROR("Caught " << e.scope() << " exception: \n" << e.info());
	}
}

void init_data_suite(boost::unit_test::test_suite *ts, const std::string& path)
{
	std::string subdir_name = LIBWCCL_TEST_DATA_DIR "data";
	if (!path.empty()) {
		subdir_name = path;
	}
	if (!exists(subdir_name)) {
		BOOST_TEST_MESSAGE("Test data subdir does not exist");
	}
	std::vector<compare_test> compares;
	init_subdir(subdir_name, "", compares);
	foreach (const compare_test& ci, compares) {
		std::string rel_path = boost::algorithm::replace_first_copy(
				ci.in_file.string(), subdir_name, "");
		std::string name = "data_test:" + rel_path;
		ts->add(boost::unit_test::make_test_case(
			boost::bind(test_one_item, ci), name));
	}
}