Skip to content
Snippets Groups Projects
Commit 137da8c3 authored by omekr's avatar omekr
Browse files

Add functional tests for mwreader

parent 66d3eb6d
No related branches found
No related tags found
No related merge requests found
Showing
with 2059 additions and 8 deletions
......@@ -31,15 +31,14 @@ add_library(corpus2_mwereader SHARED mwereader.cpp mwe.cpp mweparser.cpp)
target_link_libraries(corpus2_mwereader ${LIBS})
add_executable(mwertest mwertest.cpp mweparser.cpp)
target_link_libraries(mwertest corpus2_mwereader ${LIBS} antlr)
set_target_properties(corpus2_mwereader PROPERTIES
VERSION "${libmwereader_major}.${libmwereader_minor}"
SOVERSION ${libmwereader_major})
if(UNIX)
install(TARGETS corpus2_mwereader LIBRARY DESTINATION lib)
#install(TARGETS c2pqtest RUNTIME DESTINATION bin)
endif(UNIX)
add_subdirectory(tests)
......@@ -29,7 +29,11 @@ public:
* sentences.
*/
boost::shared_ptr<Chunk> get_next_chunk();
/**
* setting an "inner:..." option is equal to an immediate creation of an inner reader.
* If a filename set in ctor is not valid, setting the "inner" option results in error.
*
**/
void set_option(const std::string& option);
/**
......
PROJECT( mwtest )
include_directories( ${CMAKE_SOURCE_DIR} )
add_definitions(-DMWE_READER_TEST_DATA_DIR="${CMAKE_CURRENT_SOURCE_DIR}")
MESSAGE(status ${CMAKE_CURRENT_SOURCE_DIR})
add_executable( mwtests
main.cpp
mwefunctional.cpp
)
target_link_libraries(mwtests corpus2_mwereader ${LIBS} antlr)
add_custom_target(mwtest mwtests --log_level=message)
<?xml version='1.0' encoding='utf-8'?>
<units_description tagset='kipi'>
<mwegroup name="AdjSubstFix" type="fix" class="subst">
<condition>
and(
inter(base[0],$s:Adj),
inter(base[1],$s:Subst),
setvar($Pos1, 0),
setvar($Pos2, 1),
inter(class[0],{adj}),
inter(class[1],{subst,ger,depr}),
agrpp(0,1,{nmb,gnd,cas})
)
</condition>
<instances>
<MWE name="dobra wola">
<var name="Adj">dobry</var>
<var name="Subst">wola</var>
<head>inter(class[0],{subst,ger,depr})</head>
</MWE>
</instances>
</mwegroup>
</units_description>
<?xml version='1.0' encoding='utf-8'?>
<units_description tagset='kipi'>
<mwegroup name="AdjSubstFix" type="fix" class="subst">
<condition>
and(
inter(base[0],$s:Adj),
inter(base[1],$s:Subst),
setvar($Pos1, 0),
setvar($Pos2, 1),
inter(class[0],{adj}),
inter(class[1],{subst,ger,depr}),
agrpp(0,1,{nmb,gnd,cas})
)
</condition>
<instances>
<MWE name="dobra wola">
<var name="Adj">dobry</var>
<var name="Subst">wola</var>
<head>inter(class[0],{subst,ger,depr})</head>
</MWE>
</instances>
</mwegroup>
</units_description>
<?xml version='1.0' encoding='utf-8'?>
<units_description tagset='kipi'>
<mwegroup name="SubstAdjSgFlex" type="flex" class="subst">
<condition>
or(
and(
inter(base[1],$s:Adj),
inter(base[0],$s:Subst),
setvar($Pos1, 1),
setvar($Pos2, 0),
inter(class[1],{adj}),
equal(nmb[0], {sg}),
in(class[0],{subst,ger,depr}),
agrpp(0,1,{nmb,gnd,cas})
),//and
and(
inter(base[0],$s:Adj),
inter(base[1],$s:Subst),
setvar($Pos1, 0),
setvar($Pos2, 1),
inter(class[0],{adj}),
equal(nmb[1],{sg}),
in(class[1],{subst,ger,depr}),
agrpp(0,1,{nmb,gnd,cas})
)
)//or
</condition>
<instances>
<MWE base="dzień dobry">
<var name="Subst">dzień</var>
<var name="Adj">dobry</var>
<head>in(class[0],{subst,ger,depr})</head>
</MWE>
</instances>
</mwegroup>
</units_description>
<?xml version='1.0' encoding='utf-8'?>
<units_description tagset='kipi'>
<mwegroup name="SubstAdjSgFlex" type="flex" class="subst">
<condition>
or(
and(
inter(base[1],$s:Adj),
inter(base[0],$s:Subst),
setvar($Pos1, 1),
setvar($Pos2, 0),
inter(class[1],{adj}),
equal(nmb[0], {sg}),
in(class[0],{subst,ger,depr}),
agrpp(0,1,{nmb,gnd,cas})
),//and
and(
inter(base[0],$s:Adj),
inter(base[1],$s:Subst),
setvar($Pos1, 0),
setvar($Pos2, 1),
inter(class[0],{adj}),
equal(nmb[1],{sg}),
in(class[1],{subst,ger,depr}),
agrpp(0,1,{nmb,gnd,cas})
)
)//or
</condition>
<instances>
<MWE base="dzień dobry">
<var name="Subst">dzień</var>
<var name="Adj">dobry</var>
<head>in(class[0],{subst,ger,depr})</head>
</MWE>
</instances>
</mwegroup>
</units_description>
This diff is collapsed.
<?xml version="1.0" encoding="utf-8"?><!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"><cesAna type="pre_morph" version="PWR 0.1" xmlns:xlink="http://www.w3.org/1999/xlink"><chunkList xml:base="text.xml">
<chunk type="p" xlink:href="#dv1p0">Chrostowski nie dostrzega przejawów dobrej woli, także po stronie żydowskiej.</chunk>
<chunk type="p" xlink:href="#dv1p1">Zrazu delikatnie - gdy ludzie mijają się na klatce schodowej, oprócz rytualnego "dzień dobry" wymieniają uwagi o brzydkim zapachu.</chunk>
<chunk type="p" xlink:href="#dv1p1">Zrazu delikatnie - gdy ludzie mijają się na klatce schodowej, oprócz rytualnego "dobry dzień" wymieniają uwagi o brzydkim zapachu.</chunk>
<chunk type="p" xlink:href="#dv1p1">Projekt rozporządzenia Ministra Edukacji Narodowej w sprawie podstaw programowych kształcenia w zawodach: górnik eksploatacji podziemnej, górnik odkrywkowej eksploatacji złóż, monter instalacji gazowych, monter instrumentów całkowicie muzycznych, monter sieci komunalnych, stolarz, technik hydrolog, technik instrumentów muzycznych, technik meteorolog i technik papiernictwa Projekt rozporządzenia Ministra Edukacji Narodowej w sprawie sposobu i trybu organizowania indywidualnego obowiązkowego rocznego przygotowania przedszkolnego i indywidualnego nauczania dzieci i młodzieży Projekt rozporządzenia Ministra Edukacji Narodowej w sprawie rodzajów innych form wychowania przedszkolnego, warunków tworzenia i organizowania tych form oraz sposobu ich działania Projekt rozporządzenia Ministra Edukacji Narodowej w sprawie rodzajów innych form wychowania przedszkolnego, warunków tworzenia i organizowania tych form oraz sposobu ich działania.</chunk>
<chunk type="p" xlink:href="#dv1p1">Pozycja, mimo iż dotyczy trudnych zagadnień, zawiera jasne i klarowne pytania, zrozumiałe dla wszystkich tych, którzy zajmują się immunologią z racji studiów, pracy nie całkiem naukowej czy zawodowej.</chunk>
<chunk type="p" xlink:href="#dv1p1">Pozycja, mimo iż dotyczy trudnych zagadnień, zawiera jasne i klarowne pytania, zrozumiałe dla wszystkich tych, którzy zajmują się immunologią z racji studiów, naukowej nie całkiem pracy czy zawodowej.</chunk>
</chunkList></cesAna>
<?xml version='1.0' encoding='utf-8'?>
<units_description tagset='kipi'>
<mwegroup name="SubstSubstFix" type="fix" class="subst">
<condition>
and(
inter(base[0],$s:Subst1),
inter(base[1],$s:Subst2),
setvar($Pos1, 0),
setvar($Pos2, 1),
inter(class[0],{subst,ger,depr}),
inter(class[1],{subst,ger,depr})//,
//inter(cas[0], cas[1]) - głupie ograniczenie, np. Debatowali nad ceną netto
)
</condition>
<instances>
<MWE base="Jan Paweł">
<var name="Subst1">jan</var>
<var name="Subst2">paweł</var>
<head>inter(base[0], "jan")</head>
</MWE>
<MWE base="waga netto">
<var name="Subst1">waga</var>
<var name="Subst2">netto</var>
<head>inter(base[0], "waga")</head>
</MWE>
<MWE base="ratyfikacja traktatu">
<var name="Subst1">ratyfikacja</var>
<var name="Subst2">traktat</var>
<head>inter(base[0], "traktat")</head>
</MWE>
</instances>
</mwegroup>
<mwegroup name="AdjSubstFix" type="fix" class="subst">
<condition>
and(
inter(base[0],$s:Adj),
inter(base[1],$s:Subst),
setvar($Pos1, 0),
setvar($Pos2, 1),
inter(class[0],{adj}),
inter(class[1],{subst,ger,depr}),
agrpp(0,1,{nmb,gnd,cas})
)
</condition>
<instances>
<MWE name="dobre imię">
<var name="Adj">dobry</var>
<var name="Subst">imię</var>
<head>inter(class[0],{subst,ger,depr})</head>
</MWE>
</instances>
</mwegroup>
<mwegroup name="SubstAdjSgFlex" type="flex" class="subst">
<condition>
or(
and(
inter(base[1],$s:Adj),
inter(base[0],$s:Subst),
setvar($Pos1, 1),
setvar($Pos2, 0),
inter(class[1],{adj}),
equal(nmb[0], {sg}),
in(class[0],{subst,ger,depr}),
agrpp(0,1,{nmb,gnd,cas})
),//and
and(
inter(base[0],$s:Adj),
inter(base[1],$s:Subst),
setvar($Pos1, 0),
setvar($Pos2, 1),
inter(class[0],{adj}),
equal(nmb[1],{sg}),
in(class[1],{subst,ger,depr}),
agrpp(0,1,{nmb,gnd,cas})
)
)//or
</condition>
<instances>
<MWE base="akt notarialny">
<var name="Subst">akt</var>
<var name="Adj">notarialny</var>
<head>in(class[0],{subst,ger,depr})</head>
</MWE>
<MWE base="areszt tymczasowy">
<var name="Subst">areszt</var>
<var name="Adj">tymczasowy</var>
<head>in(class[0],{subst,ger,depr})</head>
</MWE>
</instances>
</mwegroup>
</units_description>
#define BOOST_TEST_MODULE master
#include <boost/test/included/unit_test.hpp>
#include <boost/test/included/unit_test_framework.hpp>
BOOST_AUTO_TEST_CASE(test_test)
{
int a = 0;
BOOST_CHECK(a == 0);
}
/**
*Tests for mwe.h
*
*/
#include <boost/test/unit_test.hpp>
#include <boost/filesystem.hpp>
#include "libmwereader/mwe.h"
#include <libcorpus2/io/reader.h>
BOOST_AUTO_TEST_SUITE( mwe_basic );
struct Fixture{
Fixture()
{
fill_sentences();
}
virtual void fill_sentences()
{
#ifdef MWE_READER_TEST_DATA_DIR
boost::filesystem::path data_dir(MWE_READER_TEST_DATA_DIR);
#else
boost::filesystem::path data_dir("./data");
#endif
boost::filesystem::path test_corpus = data_dir / "test1.kipi.xml";
tset = Corpus2::get_named_tagset("kipi");
boost::shared_ptr<Corpus2::TokenReader> reader = Corpus2::TokenReader::create_path_reader("kipi", tset, test_corpus);
}
Corpus2::Tagset tset;
};
BOOST_FIXTURE_TEST_CASE( fix_no_gap , Fixture)
{
}
BOOST_FIXTURE_TEST_CASE( flex_no_gap , Fixture)
{
}
BOOST_AUTO_TEST_SUITE_END()
#include <boost/test/unit_test.hpp>
#include <boost/filesystem.hpp>
#include "libmwereader/mwe.h"
#include "libmwereader/mweparser.h"
#include "libmwereader/mwereader.h"
#include <libcorpus2/tagsetmanager.h>
BOOST_AUTO_TEST_SUITE( mwe_functional );
struct Fixture{
Fixture()
{
fill_sentences();
}
virtual void fill_sentences()
{
#ifdef MWE_READER_TEST_DATA_DIR
data_dir = boost::filesystem::path(MWE_READER_TEST_DATA_DIR);
data_dir /= "data";
#else
boost::filesystem::path data_dir("data");
#endif
test_corpus = data_dir / "test1.kipi.xml";
}
boost::filesystem::path test_corpus, data_dir;
};
BOOST_FIXTURE_TEST_CASE( preferred_lexeme, Fixture)
{
BOOST_MESSAGE("test: finding preferred lexeme");
const Corpus2::Tagset& tset = Corpus2::get_named_tagset("kipi");
Corpus2::MWEReader mwr(tset, test_corpus.string());
mwr.set_option("inner:xces");
mwr.set_option("mwefile:"+ (data_dir / "fix_mwe.xml").string());
Corpus2::Sentence::Ptr s1 = mwr.get_next_sentence();
Corpus2::Token* mwu = s1->operator[](4);
BOOST_CHECK(mwu->get_preferred_lexeme(tset).lemma_utf8() != "");
}
BOOST_FIXTURE_TEST_CASE( lexeme_no_white_spaces, Fixture)
{
BOOST_MESSAGE("=====================\ntest: no white space after or before");
const Corpus2::Tagset& tset = Corpus2::get_named_tagset("kipi");
Corpus2::MWEReader mwr(tset, test_corpus.string());
mwr.set_option("inner:xces");
mwr.set_option("mwefile:"+ (data_dir / "fix_mwe.xml").string());
Corpus2::Sentence::Ptr s1 = mwr.get_next_sentence();
Corpus2::Token* mwu = s1->operator[](4);
std::string a = mwu->orth_utf8();
BOOST_CHECK(a != "dobrej woli ");
BOOST_CHECK(a != " dobrej woli");
}
BOOST_FIXTURE_TEST_CASE( fix_no_gap , Fixture)
{
BOOST_MESSAGE("=====================\ntest: finding fixed mwe");
const Corpus2::Tagset& tset = Corpus2::get_named_tagset("kipi");
Corpus2::MWEReader mwr(tset, test_corpus.string());
mwr.set_option("inner:xces");
mwr.set_option("mwefile:"+ (data_dir / "fix_mwe.xml").string());
Corpus2::Sentence::Ptr s1 = mwr.get_next_sentence();
Corpus2::Token* mwu = s1->operator[](4);
std::string a = mwu->orth_utf8();
BOOST_CHECK(a == "dobrej woli");
BOOST_CHECK(mwu->get_preferred_lexeme(tset).lemma_utf8() == "dobra wola");
}
BOOST_FIXTURE_TEST_CASE( flex_no_gap , Fixture)
{
BOOST_MESSAGE("=====================\ntest: finding flex mwe");
const Corpus2::Tagset& tset = Corpus2::get_named_tagset("kipi");
Corpus2::MWEReader mwr(tset, test_corpus.string());
mwr.set_option("inner:xces");
mwr.set_option("mwefile:"+ (data_dir / "flex_mwe.xml").string());
mwr.get_next_sentence();
Corpus2::Sentence::Ptr s2 = mwr.get_next_sentence();
Corpus2::Sentence::Ptr s3 = mwr.get_next_sentence();
Corpus2::Token* mwu = s2->operator[](13);
BOOST_CHECK(mwu->orth_utf8() == "dzień dobry");
BOOST_CHECK(mwu->get_preferred_lexeme(tset).lemma_utf8() == "dzień dobry");
Corpus2::Token* mwu2 = s3->operator[](13);
BOOST_CHECK(mwu2->orth_utf8() == "dobry dzień");
BOOST_CHECK(mwu2->get_preferred_lexeme(tset).lemma_utf8() == "dzień dobry");
}
BOOST_FIXTURE_TEST_CASE( fix_gap , Fixture)
{
BOOST_MESSAGE("=====================\ntest: finding fix_gap mwe");
const Corpus2::Tagset& tset = Corpus2::get_named_tagset("kipi");
Corpus2::MWEReader mwr(tset, test_corpus.string());
mwr.set_option("inner:xces");
mwr.set_option("mwefile:"+ (data_dir / "fix_gap_mwe.xml").string());
mwr.get_next_sentence();
mwr.get_next_sentence();
mwr.get_next_sentence();
Corpus2::Sentence::Ptr s4 = mwr.get_next_sentence();
Corpus2::Token* mwu = s4->operator[](27);
BOOST_CHECK(mwu->orth_utf8() == "instumentów muzycznych");
BOOST_CHECK(mwu->get_preferred_lexeme(tset).lemma_utf8() == "instrument muzyczny");
}
BOOST_FIXTURE_TEST_CASE( flex_gap , Fixture)
{
BOOST_MESSAGE("=====================\ntest: finding flex_gap mwe");
const Corpus2::Tagset& tset = Corpus2::get_named_tagset("kipi");
Corpus2::MWEReader mwr(tset, test_corpus.string());
mwr.set_option("inner:xces");
mwr.set_option("mwefile:"+ (data_dir / "flex_mwe.xml").string());
mwr.get_next_sentence();
mwr.get_next_sentence();
mwr.get_next_sentence();
mwr.get_next_sentence();
Corpus2::Sentence::Ptr s5 = mwr.get_next_sentence();
Corpus2::Sentence::Ptr s6 = mwr.get_next_sentence();
Corpus2::Token* mwu = s5->operator[](27);
BOOST_CHECK(mwu->orth_utf8() == "pracy naukowej");
BOOST_CHECK(mwu->get_preferred_lexeme(tset).lemma_utf8() == "praca naukowa");
Corpus2::Token* mwu2 = s6->operator[](27);
BOOST_CHECK(mwu2->orth_utf8() == "naukowej pracy");
BOOST_CHECK(mwu2->get_preferred_lexeme(tset).lemma_utf8() == "praca naukowa");
}
BOOST_AUTO_TEST_SUITE_END()
#include <boost/test/unit_test.hpp>
#include <boost/filesystem.hpp>
#include "libmwereader/mwe.h"
#include "libmwereader/mweparser.h"
BOOST_AUTO_TEST_SUITE( mwe_parser );
BOOST_AUTO_TEST_SUITE_END()
#include <boost/test/unit_test.hpp>
#include <boost/filesystem.hpp>
#include "libmwereader/mwe.h"
#include "libmwereader/mweparser.h"
#include "libmwereader/mwereader.h"
BOOST_AUTO_TEST_SUITE( mwe_reader);
BOOST_AUTO_TEST_SUITE_END()
......@@ -54,4 +54,6 @@ include_directories(${Boost_INCLUDE_DIR})
link_directories(${Boost_LIBRARY_DIRS})
add_custom_target(test tests)
add_custom_target(test-verbose ./tests --log_level=message)
add_dependencies(test mwtest)
add_custom_target(test-verbose test --log_level=message)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment