diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8b0d92d9c232601bce1f4a9decfbff64d076cf99..190919b1baab9c1351583919255c19132cd7d5ed 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,6 +8,7 @@ add_executable(tests conditional.cpp constant.cpp context.cpp + datadriven.cpp getlemmas.cpp getsymbols.cpp getsymbolsinrange.cpp diff --git a/tests/data/basic.ccl b/tests/data/basic.ccl new file mode 100644 index 0000000000000000000000000000000000000000..64877f43631885b716e771fadd58a67ab28ca614 --- /dev/null +++ b/tests/data/basic.ccl @@ -0,0 +1,38 @@ +tagset=kipi +--- +True + +True +--- +False + +False +--- +[] + +[] +--- +{} + +{} +--- +["a"] + +["a"] +--- +{subst} + +{subst} +--- +["a", "a"] + +["a"] +--- +{ign, ign} + +{ign} +--- +{sg, pl} + +{nmb} +--- diff --git a/tests/data/nest_if.ccl b/tests/data/nest_if.ccl new file mode 100644 index 0000000000000000000000000000000000000000..0d0606bd8516a3e909746afd95ecf50eb00d14a1 --- /dev/null +++ b/tests/data/nest_if.ccl @@ -0,0 +1,17 @@ +--- +if(True, False, True) + +False +--- +if(False, ["a"], ["b"]) + +["b"] +--- +if(if(True, False, True), {subst}, {ger}) + +{ger} +--- +if(equal(if(if(True, False, True), {subst}, {ger}), {ger}), "ger", "nie") + +["ger"] +--- diff --git a/tests/datadriven.cpp b/tests/datadriven.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fe8eced153a923503a32c82953843bb0441a2401 --- /dev/null +++ b/tests/datadriven.cpp @@ -0,0 +1,176 @@ +#include "datadriven.h" + +#include <libpwrutils/util.h> +#include <libpwrutils/foreach.h> +#include <libpwrutils/pathsearch.h> +#include <libcorpus2/util/settings.h> +#include <libcorpus2/tagsetmanager.h> +#include <libcorpus2/io/xcesreader.h> + +#include <libwccl/sentencecontext.h> +#include <libwccl/parser/Parser.h> +#include <libwccl/ops/funexeccontext.h> + +#include <fstream> +#include <boost/filesystem/fstream.hpp> +#include <iostream> +#include <iomanip> +#include <map> +#include <set> +#include <sstream> + +#include <boost/algorithm/string.hpp> +#include <boost/bind.hpp> +#include <boost/filesystem.hpp> +#include <boost/foreach.hpp> +#include <boost/shared_ptr.hpp> +#include <boost/test/unit_test.hpp> +#include <boost/test/parameterized_test.hpp> + +using boost::filesystem::directory_iterator; +using boost::filesystem::exists; +using boost::filesystem::is_directory; +using boost::filesystem::path; +using boost::filesystem::ifstream; + +struct compare_test +{ + path in_file; + path search_path; +}; + +void test_one_item_actual(const compare_test& c) +{ + ifstream ifs_in(c.in_file); + + std::string separators = "="; + std::string tagset_name = "kipi"; + std::string sentence_filename; + std::string line; + while (ifs_in.good() && line != "---") { + std::getline(ifs_in, line); + std::vector<std::string> fields; + boost::algorithm::split(fields, line, boost::is_any_of(separators)); + if (fields.size() == 2) { + if (fields[0] == "tagset") { + tagset_name = fields[1]; + } else if (fields[0] == "sentence") { + sentence_filename = fields[1]; + } + } + } + const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_name); + boost::shared_ptr<Corpus2::Sentence> sentence(new Corpus2::Sentence); + if (!sentence_filename.empty()) { + path sentence_fullpath = c.search_path / sentence_filename; + Corpus2::XcesReader reader(tagset, sentence_fullpath.string()); + sentence.reset(reader.get_next_sentence()); + BOOST_REQUIRE(sentence); + } + Wccl::SentenceContext sc(sentence); + + std::string operator_string, expected_output; + + int line_no = 0; + while (ifs_in.good()) { + ++line_no; + std::getline(ifs_in, line); + if (line == "" && operator_string != "") { + ++line_no; + std::getline(ifs_in, line); + expected_output = line; + Wccl::Parser parser(tagset); + boost::shared_ptr<ANTLRParserResultBase> parsed; + try { + parsed = parser.parseAnyOperator(operator_string); + } catch (Wccl::WcclError& e) { + std::cerr << e.info() << "\n---\n" << operator_string << "\n---\n"; + throw; + } + Wccl::FunExecContext fu(sc, parsed->variables); + std::string output = parsed->get_op_base()->apply_internal(fu)->to_string(tagset); + if (output != expected_output) { + BOOST_ERROR("Mismatch on line " << line_no << ":\n" + << "Expected: " << expected_output << "\n" + << "Actual : " << output << "\n" + << "Operator: " << operator_string); + } + expected_output = ""; + operator_string = ""; + ++line_no; + std::getline(ifs_in, line); + BOOST_REQUIRE(line == "---" || line == ""); + } else { + operator_string += line + "\n"; + } + } +} + + +int init_subdir(const path& dir, std::string ps, std::vector<compare_test>& tests) +{ + int count = 0; + ps += dir.string(); + if (!ps.empty()) { + ps += Corpus2::Path::Instance().get_path_separator(); + } + + directory_iterator end_itr; // default-constructed is past-the-end + std::set<std::string> txt_tests; + std::set<path> subdirs; + + for (directory_iterator itr(dir); itr != end_itr; ++itr) { + if (is_directory(itr->status())) { + subdirs.insert(itr->path()); + } else { + if (itr->path().extension() == ".ccl") { + txt_tests.insert(itr->path().string()); + } + } + } + foreach (const std::string& s, txt_tests) { + compare_test c; + c.in_file = s; + c.search_path = dir; + tests.push_back(c); + ++count; + } + BOOST_TEST_MESSAGE("Found " << count << " valid data test case" + << (count > 1 ? "s" : "") + << " in " << dir + << " [" << ps << "]" + ); + foreach (const path& s, subdirs) { + count += init_subdir(s, ps, tests); + } + return count; +} + +void test_one_item(const compare_test& c) +{ + try { + test_one_item_actual(c); + } catch (PwrNlp::PwrNlpError& e) { + BOOST_ERROR("Caught " << e.scope() << " exception: \n" << e.info()); + } +} + +void init_data_suite(boost::unit_test::test_suite *ts, const std::string& path) +{ + std::string subdir_name = LIBWCCL_TEST_DATA_DIR "data"; + if (!path.empty()) { + subdir_name = path; + } + if (!exists(subdir_name)) { + BOOST_TEST_MESSAGE("Test data subdir does not exist"); + } + std::vector<compare_test> compares; + init_subdir(subdir_name, "", compares); + foreach (const compare_test& ci, compares) { + std::string rel_path = boost::algorithm::replace_first_copy( + ci.in_file.string(), subdir_name, ""); + std::string name = "data_test:" + rel_path; + ts->add(boost::unit_test::make_test_case( + boost::bind(test_one_item, ci), name)); + } +} diff --git a/tests/datadriven.h b/tests/datadriven.h new file mode 100644 index 0000000000000000000000000000000000000000..513e89965dd7e8e67a37cbb769a961d896d3d088 --- /dev/null +++ b/tests/datadriven.h @@ -0,0 +1,8 @@ +#ifndef LIBWCCL_TESTS_DATADRIVEN_H +#define LIBWCCL_TESTS_DATADRIVEN_H + +#include <boost/test/unit_test.hpp> + +void init_data_suite(boost::unit_test::test_suite* ts, const std::string& path); + +#endif // LIBWCCL_TESTS_DATADRIVEN_H diff --git a/tests/main.cpp b/tests/main.cpp index 0cbc1e4064ed5aedfa613b73afebc702bc11bee5..beb1c01bc5aa9b589fc0bbd83d9c8e6f72c572a7 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -1,6 +1,7 @@ //#define BOOST_TEST_MODULE master #include <boost/test/included/unit_test.hpp> #include <unicode/uclean.h> +#include "datadriven.h" BOOST_AUTO_TEST_CASE(test_test) { @@ -22,7 +23,20 @@ static struct ICU_cleaner } } the_cleaner; -boost::unit_test::test_suite* init_unit_test_suite(int /*argc*/, char* /*argv*/[]) +boost::unit_test::test_suite* init_unit_test_suite(int argc, char* argv[]) { + boost::unit_test::test_suite* ts1 = BOOST_TEST_SUITE("compare"); + std::string compare_path; + for (int i = 0; i < argc; ++i) { + if (strcmp(argv[i], "--compare-tests-dir") == 0) { + ++i; + if (i < argc) { + compare_path = argv[i]; + } + } + } + init_data_suite(ts1, compare_path); + boost::unit_test::framework::master_test_suite().add(ts1); + return 0; }