diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index d9cf19efb2f0b2befcc72f1b4381d8a31fa25c53..70729be5912b3fa4fa4c0aff6676e8c1c63240db 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -36,6 +36,7 @@ SET(libwccl_STAT_SRC ops/functions/bool/predicates/regex.cpp ops/functions/position/relativeposition.cpp ops/functions/strset/affix.cpp + ops/functions/strset/getorth.cpp ops/functions/strset/tolower.cpp ops/functions/strset/toupper.cpp parser/grammar.g diff --git a/libwccl/ops/functions/strset/getorth.cpp b/libwccl/ops/functions/strset/getorth.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3bddb13612cfc761af0e3405272cad087a1f62f3 --- /dev/null +++ b/libwccl/ops/functions/strset/getorth.cpp @@ -0,0 +1,28 @@ +#include <libwccl/ops/functions/strset/getorth.h> +#include <libwccl/ops/formatters.h> +#include <libwccl/ops/functions/constant.h> + +namespace Wccl { + +std::string GetOrth::to_string(const Corpus2::Tagset& tagset) const +{ + return UnaryFunctionFormatter::to_string(tagset, *this, *pos_expr_, "[", "]"); +} + +std::string GetOrth::to_raw_string() const { + return UnaryFunctionFormatter::to_raw_string(*this, *pos_expr_, "[", "]"); +} + +GetOrth::BaseRetValPtr GetOrth::apply_internal(const FunExecContext& context) const +{ + const boost::shared_ptr<const Position>& pos = pos_expr_->apply(context); + const SentenceContext& sc = context.sentence_context(); + if(pos->is_outside(sc) || !sc.is_current_inside()) { + return detail::DefaultFunction<StrSet>()->apply(context); + } + boost::shared_ptr<StrSet> u_set = boost::make_shared<StrSet>(); + u_set->insert(sc.at(*pos)->orth()); + return u_set; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/strset/getorth.h b/libwccl/ops/functions/strset/getorth.h new file mode 100644 index 0000000000000000000000000000000000000000..5a1e670dc48d8f3837e6dba4bdfda43beadc987d --- /dev/null +++ b/libwccl/ops/functions/strset/getorth.h @@ -0,0 +1,63 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_STRSET_GETORTH_H +#define LIBWCCL_OPS_FUNCTIONS_STRSET_GETORTH_H + +#include <libwccl/values/strset.h> +#include <libwccl/values/position.h> +#include <libwccl/ops/function.h> + +namespace Wccl { + +/** + * Operator that takes a position, gets word pointed by the + * position and returns the orthographical form of the word + * (as it was originally written in the sentence). + * Returns empty string set if position pointed outside of + * the sentence boundaries. + */ +class GetOrth : public Function<StrSet> { +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + + GetOrth(const PosFunctionPtr& pos_expr) + : pos_expr_(pos_expr) + { + BOOST_ASSERT(pos_expr_); + } + + /** + * @returns String representation of the function in the form of: + * "orth(pos_expr_string)" + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * @returns String representation of the function in the form of: + * "orth(pos_expr_string)" + * @note This version does not require tagset, but may be inclomplete + * and/or contain internal info. + */ + std::string to_raw_string() const; + + /** + * @returns Name of the function: "orth" + */ + std::string raw_name() const { + return "orth"; + } +protected: + const PosFunctionPtr pos_expr_; + + /** + * Gets a position from the argument expression, then gets + * word at that position from Sentence in the SentenceContext, + * then gets the orthographical form of the word (as it was + * originally written in the sentence). + * @returns Orthographical form of the word if position pointed to + * lies within boundaries of the Sentence. Empty string set otherwise. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_STRSET_GETORTH_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d965a98803a0f21a4e099c2827cc7792dc86b1ff..b2e582a82be446ccb304d1ef76c9e02b70d28823 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,6 +8,7 @@ add_executable(tests conditional.cpp constant.cpp context.cpp + getorth.cpp logicalpredicates.cpp main.cpp position.cpp diff --git a/tests/getorth.cpp b/tests/getorth.cpp new file mode 100644 index 0000000000000000000000000000000000000000..947e9bb5e1e16f217efb5513828079df0846ad8a --- /dev/null +++ b/tests/getorth.cpp @@ -0,0 +1,161 @@ +#include <boost/test/unit_test.hpp> +#include <boost/bind.hpp> +#include <boost/shared_ptr.hpp> +#include <libcorpus2/sentence.h> + +#include <libwccl/ops/functions/constant.h> +#include <libwccl/ops/functions/strset/getorth.h> + +using namespace Wccl; + +BOOST_AUTO_TEST_SUITE(relative_position) + +struct OrthPredFix +{ + OrthPredFix() + : s(boost::make_shared<Corpus2::Sentence>()), + sc(s), + tagset(), + cx(sc, boost::make_shared<Variables>()), + pos_zero(0), + pos_one(1), + pos_minus_one(-1), + nowhere(Position::Nowhere), + begin(Position::Begin), + end(Position::End), + pos_zero_constant(new Constant<Position>(pos_zero)), + pos_one_constant(new Constant<Position>(pos_one)), + pos_minus_one_constant(new Constant<Position>(pos_minus_one)), + nowhere_constant(new Constant<Position>(nowhere)), + begin_constant(new Constant<Position>(begin)), + end_constant(new Constant<Position>(end)), + empty_set(), + first_orth(), + second_orth() + { + first_orth.insert("One"); + second_orth.insert("two"); + Corpus2::Token* the_token = new Corpus2::Token( + *first_orth.contents().begin(), + PwrNlp::Whitespace::ManySpaces); + Corpus2::Tag t1(Corpus2::mask_t(0)); + Corpus2::Lexeme l1("aaa", t1); + Corpus2::Lexeme l2("bbb", t1); + the_token->add_lexeme(l1); + the_token->add_lexeme(l2); + s->append(the_token); + Corpus2::Token* another_token = new Corpus2::Token( + *second_orth.contents().begin(), + PwrNlp::Whitespace::ManySpaces); + Corpus2::Tag t2(Corpus2::mask_t(0)); + Corpus2::Lexeme l3("ccc", t2); + Corpus2::Lexeme l4("ddd", t2); + another_token->add_lexeme(l3); + another_token->add_lexeme(l4); + s->append(another_token); + } + + boost::shared_ptr<Corpus2::Sentence> s; + SentenceContext sc; + Corpus2::Tagset tagset; + + FunExecContext cx; + Position pos_zero; + Position pos_one; + Position pos_minus_one; + Position nowhere; + Position begin; + Position end; + boost::shared_ptr<Function<Position> > pos_zero_constant; + boost::shared_ptr<Function<Position> > pos_one_constant; + boost::shared_ptr<Function<Position> > pos_minus_one_constant; + boost::shared_ptr<Function<Position> > nowhere_constant; + boost::shared_ptr<Function<Position> > begin_constant; + boost::shared_ptr<Function<Position> > end_constant; + StrSet empty_set; + StrSet first_orth; + StrSet second_orth; + +}; + +BOOST_FIXTURE_TEST_CASE(orth_nowhere, OrthPredFix) +{ + GetOrth orth(nowhere_constant); + BOOST_CHECK(orth.apply(cx)->equals(empty_set)); + sc.advance(); + BOOST_CHECK(orth.apply(cx)->equals(empty_set)); + sc.advance(); + BOOST_CHECK(orth.apply(cx)->equals(empty_set)); + sc.goto_start(); +} + +BOOST_FIXTURE_TEST_CASE(orth_begin, OrthPredFix) +{ + GetOrth orth(begin_constant); + BOOST_CHECK(orth.apply(cx)->equals(first_orth)); + sc.advance(); + BOOST_CHECK(orth.apply(cx)->equals(first_orth)); + sc.advance(); + BOOST_CHECK(orth.apply(cx)->equals(empty_set)); + sc.goto_start(); +} + +BOOST_FIXTURE_TEST_CASE(orth_end, OrthPredFix) +{ + GetOrth orth(end_constant); + BOOST_CHECK(orth.apply(cx)->equals(second_orth)); + sc.advance(); + BOOST_CHECK(orth.apply(cx)->equals(second_orth)); + sc.advance(); + BOOST_CHECK(orth.apply(cx)->equals(empty_set)); + sc.goto_start(); +} + +BOOST_FIXTURE_TEST_CASE(orth_zero, OrthPredFix) +{ + GetOrth orth(pos_zero_constant); + BOOST_CHECK(orth.apply(cx)->equals(first_orth)); + sc.advance(); + BOOST_CHECK(orth.apply(cx)->equals(second_orth)); + sc.advance(); + BOOST_CHECK(orth.apply(cx)->equals(empty_set)); + sc.goto_start(); +} + +BOOST_FIXTURE_TEST_CASE(orth_one, OrthPredFix) +{ + GetOrth orth(pos_one_constant); + BOOST_CHECK(orth.apply(cx)->equals(second_orth)); + sc.advance(); + BOOST_CHECK(orth.apply(cx)->equals(empty_set)); + sc.advance(); + BOOST_CHECK(orth.apply(cx)->equals(empty_set)); + sc.goto_start(); +} + +BOOST_FIXTURE_TEST_CASE(orth_minus_one, OrthPredFix) +{ + GetOrth orth(pos_minus_one_constant); + BOOST_CHECK(orth.apply(cx)->equals(empty_set)); + sc.advance(); + BOOST_CHECK(orth.apply(cx)->equals(first_orth)); + sc.advance(); + BOOST_CHECK(orth.apply(cx)->equals(empty_set)); + sc.goto_start(); +} +//------ to_string test cases ------- + +BOOST_FIXTURE_TEST_CASE(orth_to_string, OrthPredFix) +{ + GetOrth orth(begin_constant); + BOOST_CHECK_EQUAL("orth[begin]", orth.to_string(tagset)); +} + +BOOST_FIXTURE_TEST_CASE(orth_to_raw_string, OrthPredFix) +{ + GetOrth orth(end_constant); + BOOST_CHECK_EQUAL("orth[end]", orth.to_string(tagset)); + GetOrth orth2(pos_minus_one_constant); + BOOST_CHECK_EQUAL("orth[-1]", orth2.to_string(tagset)); +} +BOOST_AUTO_TEST_SUITE_END()