Skip to content
Snippets Groups Projects
Commit 51e69c41 authored by ilor's avatar ilor
Browse files

move some tests from maca tha belong in libcorpus2

parent 66596291
No related branches found
No related tags found
No related merge requests found
......@@ -6,6 +6,9 @@ add_definitions(-DLIBCORPUS2_TEST_DATA_DIR="${PROJECT_SOURCE_DIR}/")
add_executable( tests
main.cpp
basic.cpp
tag_split.cpp
tagset_parse.cpp
)
target_link_libraries ( tests maca ${Boost_LIBRARIES})
......@@ -13,6 +16,4 @@ target_link_libraries ( tests maca ${Boost_LIBRARIES})
include_directories(${Boost_INCLUDE_DIR})
link_directories(${Boost_LIBRARY_DIRS})
#configure_file(test-sanity.sh ${CMAKE_CURRENT_BINARY_DIR})
#add_custom_target(test tests COMMAND ./test-sanity.sh small)
#add_custom_target(test-large ./test-sanity.sh large)
add_custom_target(test tests)
#include <boost/test/unit_test.hpp>
#include <libcorpus2/token.h>
const char tagsetstr1[] = "[ATTR]\n"
"A tag tog other a3 \n"
"B data thing tag-thing thang\n"
"C a b c \n"
"[POS]\n some A B [C]\n";
BOOST_AUTO_TEST_CASE( token )
{
Corpus2::Token t(UnicodeString::fromUTF8("ZZ"), PwrNlp::Whitespace::ManySpaces);
BOOST_CHECK_EQUAL(t.orth_utf8(), "ZZ");
BOOST_CHECK_EQUAL(t.wa(), PwrNlp::Whitespace::ManySpaces);
BOOST_CHECK(t.lexemes().empty());
}
BOOST_AUTO_TEST_CASE( token_dup_lexemes )
{
Corpus2::Token t(UnicodeString::fromUTF8("ZZ"), PwrNlp::Whitespace::ManySpaces);
//Corpus2::Tagset tagset(tagsetstr1);
Corpus2::Tag t1(Corpus2::tagset_idx_t(0), Corpus2::pos_idx_t(0));
Corpus2::Lexeme l1(UnicodeString::fromUTF8("aaa"), t1);
Corpus2::Lexeme l2(UnicodeString::fromUTF8("bbb"), t1);
BOOST_CHECK(!t.check_duplicate_lexemes());
BOOST_CHECK(!t.remove_duplicate_lexemes());
t.add_lexeme(l1);
BOOST_CHECK(!t.check_duplicate_lexemes());
BOOST_CHECK(!t.remove_duplicate_lexemes());
t.add_lexeme(l2);
BOOST_CHECK(!t.check_duplicate_lexemes());
BOOST_CHECK(!t.remove_duplicate_lexemes());
Corpus2::Token tt(t);
t.add_lexeme(l1);
BOOST_CHECK(t != tt);
BOOST_CHECK(t.check_duplicate_lexemes());
BOOST_CHECK(t.remove_duplicate_lexemes());
BOOST_CHECK(!t.check_duplicate_lexemes());
BOOST_CHECK(!t.remove_duplicate_lexemes());
BOOST_CHECK(t == tt);
}
#include <boost/test/unit_test.hpp>
#include <set>
#include <libpwrutils/foreach.h>
#include <libcorpus2/tagset.h>
#include <libcorpus2/token.h>
BOOST_AUTO_TEST_SUITE( tag_split )
struct F {
F() {
const char tagset_string[] = "[ATTR]\n"
"A tag tog other a3 \n"
"B data thing tag-thing thang\n"
"C a b c \n"
"[POS]\n some A B [C]\n";
tagset.reset(new Corpus2::Tagset(tagset_string));
}
boost::shared_ptr<Corpus2::Tagset> tagset;
std::vector<Corpus2::Tag> check_split(const std::string s, const std::set<std::string> expect)
{
std::set<std::string> actual;
std::vector<Corpus2::Tag> tags;
Corpus2::Token t;
tagset->lexemes_into_token(t, UnicodeString(), s);
foreach (const Corpus2::Lexeme& lex, t.lexemes()) {
const Corpus2::Tag& tag = lex.tag();
BOOST_WARN(tagset->validate_tag(tag, false));
actual.insert(tagset->tag_to_string(tag));
tags.push_back(tag);
}
BOOST_CHECK_EQUAL_COLLECTIONS(actual.begin(), actual.end(), expect.begin(), expect.end());
return tags;
}
};
BOOST_FIXTURE_TEST_CASE( plain, F )
{
const char tag[] = "some:tag:data";
std::set<std::string> r;
r.insert(tag);
check_split(tag, r);
}
BOOST_FIXTURE_TEST_CASE( plus, F )
{
const char tag[] = "some:tag:data+some:other:tag-thing";
std::set<std::string> result;
result.insert("some:tag:data");
result.insert("some:other:tag-thing");
check_split(tag, result);
}
BOOST_FIXTURE_TEST_CASE( dot, F )
{
const char tag[] = "some:tag.tog:data";
std::set<std::string> result;
result.insert("some:tag:data");
result.insert("some:tog:data");
check_split(tag, result);
}
BOOST_FIXTURE_TEST_CASE( dots, F )
{
const char tag[] = "some:tag.tog:data:a.b.c";
std::set<std::string> result;
result.insert("some:tag:data:a");
result.insert("some:tog:data:a");
result.insert("some:tag:data:b");
result.insert("some:tog:data:b");
result.insert("some:tag:data:c");
result.insert("some:tog:data:c");
check_split(tag, result);
}
BOOST_FIXTURE_TEST_CASE( dots_plus, F )
{
const char tag[] = "some:tag.tog:data:a.b+some:other:thing.thang";
std::set<std::string> result;
result.insert("some:tag:data:a");
result.insert("some:tog:data:a");
result.insert("some:tag:data:b");
result.insert("some:tog:data:b");
result.insert("some:other:thing");
result.insert("some:other:thang");
check_split(tag, result);
}
BOOST_FIXTURE_TEST_CASE( missing, F )
{
const char tag[] = "some:data";
std::set<std::string> r;
r.insert("some::data");
check_split(tag, r);
}
BOOST_FIXTURE_TEST_CASE( bad_value, F )
{
const char tag[] = "some:bad:data";
std::set<std::string> r;
BOOST_CHECK_THROW(
check_split(tag, r), Corpus2::TagParseError
);
}
BOOST_FIXTURE_TEST_CASE( bad_pos, F )
{
const char tag[] = "something:data";
std::set<std::string> r;
BOOST_CHECK_THROW(
check_split(tag, r), Corpus2::TagParseError
);
}
BOOST_FIXTURE_TEST_CASE( underscore, F )
{
const char tag[] = "some:_:data";
std::set<std::string> r;
r.insert("some:tag:data");
r.insert("some:tog:data");
r.insert("some:other:data");
r.insert("some:a3:data");
check_split(tag, r);
}
BOOST_FIXTURE_TEST_CASE( underscores, F )
{
const char tag[] = "some:_:data:_";
std::set<std::string> r0;
r0.insert("some:tag:data");
r0.insert("some:tog:data");
r0.insert("some:other:data");
r0.insert("some:a3:data");
std::set<std::string> r;
foreach (const std::string& s, r0) {
r.insert(s + ":a");
r.insert(s + ":b");
r.insert(s + ":c");
}
check_split(tag, r);
}
BOOST_FIXTURE_TEST_CASE( underscore_dots, F )
{
const char tag[] = "some:_:data:c.a";
std::set<std::string> r0;
r0.insert("some:tag:data");
r0.insert("some:tog:data");
r0.insert("some:other:data");
r0.insert("some:a3:data");
std::set<std::string> r;
foreach (const std::string& s, r0) {
r.insert(s + ":a");
r.insert(s + ":c");
}
check_split(tag, r);
}
BOOST_AUTO_TEST_SUITE_END()
#include <boost/test/unit_test.hpp>
#include <set>
#include <libpwrutils/foreach.h>
#include <libcorpus2/tagsetparser.h>
#include <libcorpus2/tagsetmanager.h>
#include <iostream>
BOOST_AUTO_TEST_SUITE( tagset_parse );
Corpus2::Tagset parse(const char* s)
{
std::stringstream ss;
ss << s;
return Corpus2::TagsetParser::load_ini(ss);
}
#define PRE "[ATTR]\n"
#define POSA "[POS]\n POS1\n"
BOOST_AUTO_TEST_CASE( empty )
{
BOOST_CHECK_THROW(
parse(""), Corpus2::TagsetParseError
);
}
BOOST_AUTO_TEST_CASE( minimal )
{
try {
parse(PRE POSA);
} catch (Corpus2::TagsetParseError& e) {
BOOST_FAIL(e.info());
}
}
BOOST_AUTO_TEST_CASE( minimal_nonewline )
{
try {
parse(PRE "[POS]\n POS1");
} catch (Corpus2::TagsetParseError& e) {
BOOST_FAIL(e.info());
}
}
BOOST_AUTO_TEST_CASE( dupe_val )
{
BOOST_CHECK_THROW(
parse(PRE "A a a " POSA), Corpus2::TagsetParseError
);
}
BOOST_AUTO_TEST_CASE( dupe_val2 )
{
BOOST_CHECK_THROW(
parse(PRE "A a b\nB c d\n C x a" POSA), Corpus2::TagsetParseError
);
}
BOOST_AUTO_TEST_CASE( dupe_sym )
{
BOOST_CHECK_THROW(
parse(PRE "A a b\nB c d\n a x z" POSA), Corpus2::TagsetParseError
);
}
BOOST_AUTO_TEST_CASE( dupe_sym2 )
{
BOOST_CHECK_THROW(
parse(PRE "A a b\nB c d" POSA "A B"), Corpus2::TagsetParseError
);
}
BOOST_AUTO_TEST_CASE( dupe_attr )
{
BOOST_CHECK_THROW(
parse(PRE "A a b\nB c d\n C x z" POSA), Corpus2::TagsetParseError
);
}
BOOST_AUTO_TEST_CASE( dupe_pos )
{
BOOST_CHECK_THROW(
parse(PRE "A a b\n" POSA "P1 A\n P1\n"), Corpus2::TagsetParseError
);
}
BOOST_AUTO_TEST_CASE( bad_pos_attr )
{
BOOST_CHECK_THROW(
parse(PRE "A a b\n" POSA "P1 A\n P2 ZZ\n"), Corpus2::TagsetParseError
);
}
BOOST_AUTO_TEST_CASE( bad_value_name )
{
BOOST_CHECK_THROW(
parse(PRE "@@ a b\n" POSA "P1 A\n P2"), Corpus2::TagsetParseError
);
}
BOOST_AUTO_TEST_CASE( size1 )
{
Corpus2::Tagset t = parse(PRE POSA);
BOOST_CHECK_EQUAL(t.size(), 1);
BOOST_CHECK_EQUAL(t.size_extra(), 1);
}
BOOST_AUTO_TEST_CASE( size2 )
{
Corpus2::Tagset t = parse(PRE POSA " POS2\n");
BOOST_CHECK_EQUAL(t.size(), 2);
BOOST_CHECK_EQUAL(t.size_extra(), 2);
}
BOOST_AUTO_TEST_CASE( size3 )
{
Corpus2::Tagset t = parse(PRE "A a b\n" POSA "POS2 A");
BOOST_CHECK_EQUAL(t.size(), 3);
BOOST_CHECK_EQUAL(t.size_extra(), 6);
}
BOOST_AUTO_TEST_CASE( size6 )
{
Corpus2::Tagset t = parse(PRE "A a b \n B c d\n" POSA "POS2 A\n POS3 [A]\n");
BOOST_CHECK_EQUAL(t.size(), 6);
BOOST_CHECK_EQUAL(t.size_extra(), 27);
}
BOOST_AUTO_TEST_CASE( load_named )
{
BOOST_CHECK_NO_THROW(
try {
Corpus2::get_named_tagset("test");
}catch(Corpus2::Error& e) {
std::cerr << e.info();
throw;
}
);
BOOST_CHECK_THROW(
Corpus2::get_named_tagset("__nonexistant_9867s8t"),
Corpus2::FileNotFound);
}
BOOST_AUTO_TEST_SUITE_END();
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment