diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index a1d869794ff87c66b71bab02fafa6fa4cc38dbb2..c969bf7866504908bfc591e752162d76419d1d12 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -4,7 +4,7 @@ PROJECT(wccl) include_directories( ${CMAKE_CURRENT_BINARY_DIR}/include/ ) -find_package(Corpus2 0.1.2 REQUIRED) +find_package(Corpus2 1.0.2 REQUIRED) set(LIBS ${LIBS} ${Corpus2_LIBRARY}) find_package(PwrUtils 0.0.3 REQUIRED) @@ -40,6 +40,7 @@ SET(libwccl_STAT_SRC ops/functions/strset/getorth.cpp ops/functions/strset/tolower.cpp ops/functions/strset/toupper.cpp + ops/functions/tset/getsymbols.cpp parser/grammar.g parser/Parser.cpp parser/ParserException.cpp diff --git a/libwccl/ops/function.h b/libwccl/ops/function.h index f2e4842d3f7d0541a4f674ea697ce14ce047167e..3aa008325f83f49f4371353afb150028d4a76559 100644 --- a/libwccl/ops/function.h +++ b/libwccl/ops/function.h @@ -21,7 +21,7 @@ public: /** * @returns Name of the function. By default it is same as raw name. */ - virtual const std::string name(const Corpus2::Tagset&) const { + virtual std::string name(const Corpus2::Tagset&) const { return raw_name(); } /** diff --git a/libwccl/ops/functions/tset/getsymbols.cpp b/libwccl/ops/functions/tset/getsymbols.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fe5d1fa35e48ac2390c00ea7f742d36db918d694 --- /dev/null +++ b/libwccl/ops/functions/tset/getsymbols.cpp @@ -0,0 +1,42 @@ +#include <libwccl/ops/functions/tset/getsymbols.h> +#include <libwccl/ops/formatters.h> +#include <libwccl/ops/functions/constant.h> + +namespace Wccl { + +std::string GetSymbols::to_string(const Corpus2::Tagset& tagset) const +{ + return UnaryFunctionFormatter::to_string(tagset, *this, *pos_expr_, "[", "]"); +} + +std::string GetSymbols::to_raw_string() const { + return UnaryFunctionFormatter::to_raw_string(*this, *pos_expr_, "[", "]"); +} + +std::string GetSymbols::name(const Corpus2::Tagset &tagset) const +{ + return tagset.get_attribute_name(mask_.get_values()); +} + +std::string GetSymbols::raw_name() const +{ + return mask_.raw_dump(); +} + +GetSymbols::BaseRetValPtr GetSymbols::apply_internal(const FunExecContext& context) const +{ + const boost::shared_ptr<const Position>& pos = pos_expr_->apply(context); + const SentenceContext& sc = context.sentence_context(); + if(pos->is_outside(sc) || !sc.is_current_inside()) { + return detail::DefaultFunction<TSet>()->apply(context); + } + + boost::shared_ptr<TSet> tset = boost::make_shared<TSet>(); + const Corpus2::Token* token = sc.at(*pos); + foreach (const Corpus2::Lexeme& lexeme, token->lexemes()) { + tset->combine_with(lexeme.tag()); + } + return tset; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/tset/getsymbols.h b/libwccl/ops/functions/tset/getsymbols.h new file mode 100644 index 0000000000000000000000000000000000000000..2c492016924d8024d39fb52d8a413de8d43c8272 --- /dev/null +++ b/libwccl/ops/functions/tset/getsymbols.h @@ -0,0 +1,56 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_TSET_GETSYMBOLS_H +#define LIBWCCL_OPS_FUNCTIONS_TSET_GETSYMBOLS_H + +#include <libwccl/values/tset.h> +#include <libwccl/values/position.h> +#include <libwccl/ops/function.h> + +namespace Wccl { + +class GetSymbols : public Function<TSet> { +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + + GetSymbols(const PosFunctionPtr& pos_expr, const Corpus2::Tag& mask) + : pos_expr_(pos_expr), mask_(mask) + { + BOOST_ASSERT(pos_expr_); + } + + /** + * @returns String representation of the function in the form of: + * "attribute_name[pos_expr_string]" + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * @returns String representation of the function in the form of: + * "attribute_name[pos_expr_string]" + * @note This version does not require tagset, but will be inclomplete + * and/or contain internal info. + */ + std::string to_raw_string() const; + + std::string raw_name() const; + + std::string name(const Corpus2::Tagset& tagset) const; + +protected: + const PosFunctionPtr pos_expr_; + + Corpus2::Tag mask_; + + /** + * Gets a position from the argument expression, then gets the + * word at that position from the Sentence in the SentenceContext, + * then gets the tagset symbols matching the mask of the word. + * + * @returns A tagset symbol set of the word if position pointed to + * lies within boundaries of the Sentence. Empty Tset otherwise. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_TSET_GETSYMBOLS_H diff --git a/libwccl/sentencecontext.h b/libwccl/sentencecontext.h index 6ff68a3351077c988cc6a73d7f2fd357fddee7ca..7451d78c17bacf179c454ae02af0906a1a5028f5 100644 --- a/libwccl/sentencecontext.h +++ b/libwccl/sentencecontext.h @@ -75,7 +75,7 @@ public: /// Token access convenience function - const. /// Will return NULL if the passed position is not valid in this Sentence const Corpus2::Token* at(int abs_pos) const { - if (is_current_inside()) { + if (is_inside(abs_pos)) { return get_sentence()[abs_pos]; } else { return NULL; @@ -90,7 +90,7 @@ public: /// Token access convenience function. /// Will return NULL if the passed position is not valid in this Sentence Corpus2::Token* at(int abs_pos) { - if (is_current_inside()) { + if (is_inside(abs_pos)) { return get_sentence()[abs_pos]; } else { return NULL; diff --git a/libwccl/values/tset.h b/libwccl/values/tset.h index 0378628128aae708f119a46790c0a8c4de790e48..47533f126b335ddc9917daf766d76f5cd1b4c8dc 100644 --- a/libwccl/values/tset.h +++ b/libwccl/values/tset.h @@ -61,6 +61,15 @@ public: return tag_ == other.tag_; } + void combine_with(const Corpus2::Tag& other) { + tag_.combine_with(other); + } + + void combine_with(const TSet& other) { + tag_.combine_with(other.get_value()); + } + + std::string to_string(const Corpus2::Tagset &) const; std::string to_raw_string() const; diff --git a/tests/values.cpp b/tests/values.cpp index 6f9e5c391c56d3a61c65d43ab741d7748aa0fce8..d5f02c842f5fa3a531c7a2a39a254bbc01cb269c 100644 --- a/tests/values.cpp +++ b/tests/values.cpp @@ -123,8 +123,8 @@ BOOST_AUTO_TEST_CASE(position_ops) v.push_back(minus_one); v.push_back(minus_two); - for(int i = 0; i < v.size(); ++i) { - for(int j = 0; j < v.size(); ++j) { + for(size_t i = 0; i < v.size(); ++i) { + for(size_t j = 0; j < v.size(); ++j) { BOOST_CHECK_EQUAL(i == j, v[i].equals(v[j])); BOOST_CHECK_EQUAL(i == j, v[j].equals(v[i])); if(i >= 2 && j >= 2) { //nowhere, zero, one, minus_one, minus_two