#include "datadriven.h" #include <libpwrutils/util.h> #include <libpwrutils/foreach.h> #include <libpwrutils/pathsearch.h> #include <libcorpus2/util/settings.h> #include <libcorpus2/tagsetmanager.h> #include <libcorpus2/io/xcesreader.h> #include <libwccl/sentencecontext.h> #include <libwccl/parser/Parser.h> #include <libwccl/ops/funexeccontext.h> #include <fstream> #include <boost/filesystem/fstream.hpp> #include <iostream> #include <iomanip> #include <map> #include <set> #include <sstream> #include <boost/algorithm/string.hpp> #include <boost/bind.hpp> #include <boost/filesystem.hpp> #include <boost/foreach.hpp> #include <boost/shared_ptr.hpp> #include <boost/test/unit_test.hpp> #include <boost/test/parameterized_test.hpp> using boost::filesystem::directory_iterator; using boost::filesystem::exists; using boost::filesystem::is_directory; using boost::filesystem::path; using boost::filesystem::ifstream; struct compare_test { path in_file; path search_path; }; void test_one_item_actual(const compare_test& c) { ifstream ifs_in(c.in_file); std::string separators = "="; std::string tagset_name = "kipi"; std::string sentence_filename; std::string line; while (ifs_in.good() && line != "---") { std::getline(ifs_in, line); std::vector<std::string> fields; boost::algorithm::split(fields, line, boost::is_any_of(separators)); if (fields.size() == 2) { if (fields[0] == "tagset") { tagset_name = fields[1]; } else if (fields[0] == "sentence") { sentence_filename = fields[1]; } } } const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_name); boost::shared_ptr<Corpus2::Sentence> sentence(new Corpus2::Sentence); if (!sentence_filename.empty()) { path sentence_fullpath = c.search_path / sentence_filename; Corpus2::XcesReader reader(tagset, sentence_fullpath.string()); sentence.reset(reader.get_next_sentence()); BOOST_REQUIRE(sentence); } Wccl::SentenceContext sc(sentence); std::string operator_string, expected_output; int line_no = 0; while (ifs_in.good()) { ++line_no; std::getline(ifs_in, line); if (line == "" && operator_string != "") { ++line_no; std::getline(ifs_in, line); expected_output = line; Wccl::Parser parser(tagset); boost::shared_ptr<ANTLRParserResultBase> parsed; try { parsed = parser.parseAnyOperator(operator_string); } catch (Wccl::WcclError& e) { std::cerr << e.info() << "\n---\n" << operator_string << "\n---\n"; throw; } Wccl::FunExecContext fu(sc, parsed->variables); std::string output = parsed->get_op_base()->apply_internal(fu)->to_string(tagset); if (output != expected_output) { BOOST_ERROR("Mismatch on line " << line_no << ":\n" << "Expected: " << expected_output << "\n" << "Actual : " << output << "\n" << "Operator: " << operator_string); } expected_output = ""; operator_string = ""; ++line_no; while (ifs_in.good() && line != "---" && line != "") { std::getline(ifs_in, line); std::vector<std::string> fields; boost::algorithm::split(fields, line, boost::is_any_of(separators)); if (fields.size() == 2) { boost::shared_ptr<Wccl::Value> v; v = fu.variables()->get<Wccl::Value>(fields[0]); if (!v) { BOOST_ERROR("Invalid variable name in test: " << fields[0] << " on line " << line_no); } else if (v->to_string(tagset) != fields[1]) { BOOST_ERROR("Variable " << fields[0] << " value mismatch on line " << line_no << "\n: expected " << fields[1] << " got " << v->to_string(tagset)); } } } BOOST_REQUIRE(line == "---" || line == ""); } else { if (operator_string.empty() && line.substr(0, 9) == "position=") { std::string new_position = line.substr(9); sc.set_position(atoi(new_position.c_str())); } else { operator_string += line + "\n"; } } } } int init_subdir(const path& dir, std::string ps, std::vector<compare_test>& tests) { int count = 0; ps += dir.string(); if (!ps.empty()) { ps += Corpus2::Path::Instance().get_path_separator(); } directory_iterator end_itr; // default-constructed is past-the-end std::set<std::string> txt_tests; std::set<path> subdirs; for (directory_iterator itr(dir); itr != end_itr; ++itr) { if (is_directory(itr->status())) { subdirs.insert(itr->path()); } else { if (itr->path().extension() == ".ccl") { txt_tests.insert(itr->path().string()); } } } foreach (const std::string& s, txt_tests) { compare_test c; c.in_file = s; c.search_path = dir; tests.push_back(c); ++count; } BOOST_TEST_MESSAGE("Found " << count << " valid data test case" << (count > 1 ? "s" : "") << " in " << dir << " [" << ps << "]" ); foreach (const path& s, subdirs) { count += init_subdir(s, ps, tests); } return count; } void test_one_item(const compare_test& c) { try { test_one_item_actual(c); } catch (PwrNlp::PwrNlpError& e) { BOOST_ERROR("Caught " << e.scope() << " exception: \n" << e.info()); } } void init_data_suite(boost::unit_test::test_suite *ts, const std::string& path) { std::string subdir_name = LIBWCCL_TEST_DATA_DIR "data"; if (!path.empty()) { subdir_name = path; } if (!exists(subdir_name)) { BOOST_TEST_MESSAGE("Test data subdir does not exist"); } std::vector<compare_test> compares; init_subdir(subdir_name, "", compares); foreach (const compare_test& ci, compares) { std::string rel_path = boost::algorithm::replace_first_copy( ci.in_file.string(), subdir_name, ""); std::string name = "data_test:" + rel_path; ts->add(boost::unit_test::make_test_case( boost::bind(test_one_item, ci), name)); } }