Skip to content
Snippets Groups Projects
Commit 75c79a2f authored by Adam Wardynski's avatar Adam Wardynski
Browse files

GetLemmas, the base[pos] operator realisation.

parent ccb85ebf
No related merge requests found
......@@ -36,6 +36,7 @@ SET(libwccl_STAT_SRC
ops/functions/bool/predicates/regex.cpp
ops/functions/position/relativeposition.cpp
ops/functions/strset/affix.cpp
ops/functions/strset/getlemmas.cpp
ops/functions/strset/getorth.cpp
ops/functions/strset/tolower.cpp
ops/functions/strset/toupper.cpp
......
#include <libwccl/ops/functions/strset/getlemmas.h>
#include <libwccl/ops/formatters.h>
#include <libwccl/ops/functions/constant.h>
namespace Wccl {
std::string GetLemmas::to_string(const Corpus2::Tagset& tagset) const
{
return UnaryFunctionFormatter::to_string(tagset, *this, *pos_expr_, "[", "]");
}
std::string GetLemmas::to_raw_string() const {
return UnaryFunctionFormatter::to_raw_string(*this, *pos_expr_, "[", "]");
}
GetLemmas::BaseRetValPtr GetLemmas::apply_internal(const FunExecContext& context) const
{
const boost::shared_ptr<const Position>& pos = pos_expr_->apply(context);
const SentenceContext& sc = context.sentence_context();
if(pos->is_outside(sc) || !sc.is_current_inside()) {
return detail::DefaultFunction<StrSet>()->apply(context);
}
boost::shared_ptr<StrSet> u_set = boost::make_shared<StrSet>();
foreach(const Corpus2::Lexeme& lexeme, sc.at(*pos)->lexemes()) {
u_set->insert(lexeme.lemma());
}
return u_set;
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_FUNCTIONS_STRSET_GETBASE_H
#define LIBWCCL_OPS_FUNCTIONS_STRSET_GETBASE_H
#include <libwccl/values/strset.h>
#include <libwccl/values/position.h>
#include <libwccl/ops/function.h>
namespace Wccl {
/**
* Operator that takes a position, gets word pointed by the
* position and returns the lemmas of the word.
* Returns empty string set if position pointed outside of
* the sentence boundaries.
*/
class GetLemmas : public Function<StrSet> {
public:
typedef boost::shared_ptr<Function<Position> > PosFunctionPtr;
GetLemmas(const PosFunctionPtr& pos_expr)
: pos_expr_(pos_expr)
{
BOOST_ASSERT(pos_expr_);
}
/**
* @returns String representation of the function in the form of:
* "base(pos_expr_string)"
*/
std::string to_string(const Corpus2::Tagset& tagset) const;
/**
* @returns String representation of the function in the form of:
* "base(pos_expr_string)"
* @note This version does not require tagset, but may be inclomplete
* and/or contain internal info.
*/
std::string to_raw_string() const;
/**
* @returns Name of the function: "base"
*/
std::string raw_name() const {
return "base";
}
protected:
const PosFunctionPtr pos_expr_;
/**
* Gets a position from the argument expression, then gets
* word at that position from Sentence in the SentenceContext,
* then gets the lemmas of the word and returns them.
* @returns Lemmas of the word poitned to, if position
* lies within boundaries of the Sentence. Empty string set otherwise.
*/
BaseRetValPtr apply_internal(const FunExecContext& context) const;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_FUNCTIONS_STRSET_GETBASE_H
......@@ -8,6 +8,7 @@ add_executable(tests
conditional.cpp
constant.cpp
context.cpp
getlemmas.cpp
getorth.cpp
logicalpredicates.cpp
main.cpp
......
#include <boost/test/unit_test.hpp>
#include <boost/bind.hpp>
#include <boost/shared_ptr.hpp>
#include <libcorpus2/sentence.h>
#include <libwccl/ops/functions/constant.h>
#include <libwccl/ops/functions/strset/getlemmas.h>
using namespace Wccl;
BOOST_AUTO_TEST_SUITE(relative_position)
struct LemmasPredFix
{
LemmasPredFix()
: s(boost::make_shared<Corpus2::Sentence>()),
sc(s),
tagset(),
cx(sc, boost::make_shared<Variables>()),
pos_zero(0),
pos_one(1),
pos_minus_one(-1),
nowhere(Position::Nowhere),
begin(Position::Begin),
end(Position::End),
pos_zero_constant(new Constant<Position>(pos_zero)),
pos_one_constant(new Constant<Position>(pos_one)),
pos_minus_one_constant(new Constant<Position>(pos_minus_one)),
nowhere_constant(new Constant<Position>(nowhere)),
begin_constant(new Constant<Position>(begin)),
end_constant(new Constant<Position>(end)),
empty_set(),
first_lemmas(),
second_lemmas()
{
first_lemmas.insert("aaa");
first_lemmas.insert("bbb");
second_lemmas.insert("ccc");
second_lemmas.insert("ddd");
Corpus2::Token* the_token = new Corpus2::Token(
"One",
PwrNlp::Whitespace::ManySpaces);
Corpus2::Tag t1(Corpus2::mask_t(0));
Corpus2::Lexeme l1("aaa", t1);
Corpus2::Lexeme l2("bbb", t1);
the_token->add_lexeme(l1);
the_token->add_lexeme(l2);
s->append(the_token);
Corpus2::Token* another_token = new Corpus2::Token(
"Two",
PwrNlp::Whitespace::ManySpaces);
Corpus2::Tag t2(Corpus2::mask_t(0));
Corpus2::Lexeme l3("ccc", t2);
Corpus2::Lexeme l4("ddd", t2);
another_token->add_lexeme(l3);
another_token->add_lexeme(l4);
s->append(another_token);
}
boost::shared_ptr<Corpus2::Sentence> s;
SentenceContext sc;
Corpus2::Tagset tagset;
FunExecContext cx;
Position pos_zero;
Position pos_one;
Position pos_minus_one;
Position nowhere;
Position begin;
Position end;
boost::shared_ptr<Function<Position> > pos_zero_constant;
boost::shared_ptr<Function<Position> > pos_one_constant;
boost::shared_ptr<Function<Position> > pos_minus_one_constant;
boost::shared_ptr<Function<Position> > nowhere_constant;
boost::shared_ptr<Function<Position> > begin_constant;
boost::shared_ptr<Function<Position> > end_constant;
StrSet empty_set;
StrSet first_lemmas;
StrSet second_lemmas;
};
BOOST_FIXTURE_TEST_CASE(lemmas_nowhere, LemmasPredFix)
{
GetLemmas lemmas(nowhere_constant);
BOOST_CHECK(lemmas.apply(cx)->equals(empty_set));
sc.advance();
BOOST_CHECK(lemmas.apply(cx)->equals(empty_set));
sc.advance();
BOOST_CHECK(lemmas.apply(cx)->equals(empty_set));
sc.goto_start();
}
BOOST_FIXTURE_TEST_CASE(lemmas_begin, LemmasPredFix)
{
GetLemmas lemmas(begin_constant);
BOOST_CHECK(lemmas.apply(cx)->equals(first_lemmas));
sc.advance();
BOOST_CHECK(lemmas.apply(cx)->equals(first_lemmas));
sc.advance();
BOOST_CHECK(lemmas.apply(cx)->equals(empty_set));
sc.goto_start();
}
BOOST_FIXTURE_TEST_CASE(lemmas_end, LemmasPredFix)
{
GetLemmas lemmas(end_constant);
BOOST_CHECK(lemmas.apply(cx)->equals(second_lemmas));
sc.advance();
BOOST_CHECK(lemmas.apply(cx)->equals(second_lemmas));
sc.advance();
BOOST_CHECK(lemmas.apply(cx)->equals(empty_set));
sc.goto_start();
}
BOOST_FIXTURE_TEST_CASE(lemmas_zero, LemmasPredFix)
{
GetLemmas lemmas(pos_zero_constant);
BOOST_CHECK(lemmas.apply(cx)->equals(first_lemmas));
sc.advance();
BOOST_CHECK(lemmas.apply(cx)->equals(second_lemmas));
sc.advance();
BOOST_CHECK(lemmas.apply(cx)->equals(empty_set));
sc.goto_start();
}
BOOST_FIXTURE_TEST_CASE(lemmas_one, LemmasPredFix)
{
GetLemmas lemmas(pos_one_constant);
BOOST_CHECK(lemmas.apply(cx)->equals(second_lemmas));
sc.advance();
BOOST_CHECK(lemmas.apply(cx)->equals(empty_set));
sc.advance();
BOOST_CHECK(lemmas.apply(cx)->equals(empty_set));
sc.goto_start();
}
BOOST_FIXTURE_TEST_CASE(lemmas_minus_one, LemmasPredFix)
{
GetLemmas lemmas(pos_minus_one_constant);
BOOST_CHECK(lemmas.apply(cx)->equals(empty_set));
sc.advance();
BOOST_CHECK(lemmas.apply(cx)->equals(first_lemmas));
sc.advance();
BOOST_CHECK(lemmas.apply(cx)->equals(empty_set));
sc.goto_start();
}
//------ to_string test cases -------
BOOST_FIXTURE_TEST_CASE(lemmas_to_string, LemmasPredFix)
{
GetLemmas lemmas(begin_constant);
BOOST_CHECK_EQUAL("base[begin]", lemmas.to_string(tagset));
}
BOOST_FIXTURE_TEST_CASE(lemmas_to_raw_string, LemmasPredFix)
{
GetLemmas lemmas(end_constant);
BOOST_CHECK_EQUAL("base[end]", lemmas.to_string(tagset));
GetLemmas lemmas2(pos_minus_one_constant);
BOOST_CHECK_EQUAL("base[-1]", lemmas2.to_string(tagset));
}
BOOST_AUTO_TEST_SUITE_END()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment