From ffbcfed6f8194257761ee243c1cbe892c668f438 Mon Sep 17 00:00:00 2001 From: Adam Wardynski <award@.(B-4.4.46a)> Date: Thu, 2 Dec 2010 17:58:46 +0100 Subject: [PATCH] GetWordClass - to get word class/part-of-speech tags It is called "flex" atm but it is subject to change. --- libwccl/ops/functions/tset/getwordclass.h | 37 ++++++++++++++++++ tests/getsymbols.cpp | 46 +++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 libwccl/ops/functions/tset/getwordclass.h diff --git a/libwccl/ops/functions/tset/getwordclass.h b/libwccl/ops/functions/tset/getwordclass.h new file mode 100644 index 0000000..57bd6bd --- /dev/null +++ b/libwccl/ops/functions/tset/getwordclass.h @@ -0,0 +1,37 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_TSET_GETWORDCLASS_H +#define LIBWCCL_OPS_FUNCTIONS_TSET_GETWORDCLASS_H + +#include <libwccl/ops/functions/tset/getsymbols.h> + +namespace Wccl { + +/** + * Operator that gets word class/part of speech symbols from + * a token at given position. + */ +class GetWordClass : public GetSymbols { +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + + GetWordClass(const PosFunctionPtr& pos_expr) + : GetSymbols(Corpus2::Tag(-1), pos_expr) + { + } + + /** + * @returns Name of the operator, "flex" + */ + std::string name(const Corpus2::Tagset&) const { + return "flex"; + } + /** + * @returns Name of the operator, "flex" + */ + std::string raw_name() const { + return "flex"; + } +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_TSET_GETWORDCLASS_H diff --git a/tests/getsymbols.cpp b/tests/getsymbols.cpp index ad5ac6a..8fb6fed 100644 --- a/tests/getsymbols.cpp +++ b/tests/getsymbols.cpp @@ -6,6 +6,7 @@ #include <libwccl/ops/functions/constant.h> #include <libwccl/ops/functions/tset/getsymbols.h> +#include <libwccl/ops/functions/tset/getwordclass.h> using namespace Wccl; @@ -73,6 +74,13 @@ BOOST_FIXTURE_TEST_CASE(symbols_nowhere, SymbolsFix) BOOST_CHECK(s3.apply(cx)->equals(empty)); } +BOOST_FIXTURE_TEST_CASE(symbols_outside, SymbolsFix) +{ + GetSymbols symbols(gnd, boost::shared_ptr<Function<Position> >( + new Constant<Position>(Position(-1)))); + BOOST_CHECK(symbols.apply(cx)->equals(empty)); +} + BOOST_FIXTURE_TEST_CASE(get_gnd, SymbolsFix) { GetSymbols symbols(gnd, pos_zero_constant); @@ -113,5 +121,43 @@ BOOST_FIXTURE_TEST_CASE(get_pos, SymbolsFix) BOOST_CHECK_EQUAL(symbols.apply(cx)->to_string(tagset), "{}"); } +BOOST_FIXTURE_TEST_CASE(get_symbols_to_string, SymbolsFix) +{ + GetSymbols symbols(nmb, pos_zero_constant); + BOOST_CHECK_EQUAL("nmb[0]", symbols.to_string(tagset)); +} + +BOOST_FIXTURE_TEST_CASE(get_symbols_to_raw_string, SymbolsFix) +{ + GetSymbols symbols(nmb, pos_zero_constant); + std::string expected = nmb.raw_dump() + "[0]"; + BOOST_CHECK_EQUAL(expected, symbols.to_raw_string()); +} + +// +// ---------- GetWordClass cases ---------- +// + +BOOST_FIXTURE_TEST_CASE(get_word_class, SymbolsFix) +{ + GetWordClass wclass(pos_zero_constant); + BOOST_CHECK_EQUAL(wclass.apply(cx)->to_string(tagset), "{subst}"); + sc.advance(); + BOOST_CHECK_EQUAL(wclass.apply(cx)->to_string(tagset), "{adja,prep,subst}"); + sc.advance(); + BOOST_CHECK_EQUAL(wclass.apply(cx)->to_string(tagset), "{}"); +} + +BOOST_FIXTURE_TEST_CASE(get_word_class_to_string, SymbolsFix) +{ + GetWordClass wclass(pos_zero_constant); + BOOST_CHECK_EQUAL("flex[0]", wclass.to_string(tagset)); +} + +BOOST_FIXTURE_TEST_CASE(get_word_class_to_raw_string, SymbolsFix) +{ + GetWordClass wclass(pos_zero_constant); + BOOST_CHECK_EQUAL("flex[0]", wclass.to_raw_string()); +} BOOST_AUTO_TEST_SUITE_END() -- GitLab