From 098e822150c30b9b3335a3a26190ed0d1f168f79 Mon Sep 17 00:00:00 2001 From: Adam Wardynski <award@.(win7-laptop)> Date: Tue, 5 Apr 2011 12:48:11 +0200 Subject: [PATCH] first(), last() ops: Position of first/last token of a Match. --- libwccl/CMakeLists.txt | 2 + libwccl/ops/functions/position/firsttoken.cpp | 39 +++++++++++++ libwccl/ops/functions/position/firsttoken.h | 56 +++++++++++++++++++ libwccl/ops/functions/position/lasttoken.cpp | 38 +++++++++++++ libwccl/ops/functions/position/lasttoken.h | 56 +++++++++++++++++++ 5 files changed, 191 insertions(+) create mode 100644 libwccl/ops/functions/position/firsttoken.cpp create mode 100644 libwccl/ops/functions/position/firsttoken.h create mode 100644 libwccl/ops/functions/position/lasttoken.cpp create mode 100644 libwccl/ops/functions/position/lasttoken.h diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 4e2017b..fd26bba 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -44,6 +44,8 @@ SET(libwccl_STAT_SRC ops/functions/bool/predicates/regex.cpp ops/functions/bool/predicates/strongagreement.cpp ops/functions/bool/predicates/weakagreement.cpp + ops/functions/position/firsttoken.cpp + ops/functions/position/lasttoken.cpp ops/functions/position/relativeposition.cpp ops/functions/strset/affix.cpp ops/functions/strset/getlemmas.cpp diff --git a/libwccl/ops/functions/position/firsttoken.cpp b/libwccl/ops/functions/position/firsttoken.cpp new file mode 100644 index 0000000..8c90075 --- /dev/null +++ b/libwccl/ops/functions/position/firsttoken.cpp @@ -0,0 +1,39 @@ +#include <libwccl/ops/functions/position/firsttoken.h> +#include <libwccl/ops/functions/constant.h> +#include <libwccl/exception.h> + +#include <sstream> + +namespace Wccl { + +FirstToken::BaseRetValPtr FirstToken::apply_internal( + const FunExecContext &context) const +{ + boost::shared_ptr<Corpus2::AnnotatedSentence> s = + boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>( + context.sentence_context().get_sentence_ptr()); + if (!s) { + throw InvalidArgument( + "context", + "Supplied context does not have valid Corpus2::AnnotatedSentence."); + } + const Function<Match>::RetValPtr match = match_expr_->apply(context); + if(match->empty()) { + return detail::DefaultFunction<Position>()->apply(context); + } + return boost::make_shared<Position>(match->first_token(s)); +} + +std::string FirstToken::to_string(const Corpus2::Tagset &tagset) const +{ + std::ostringstream ostream; + ostream << name(tagset) << "(" << match_expr_->to_string(tagset) << ")"; + return ostream.str(); +} + +std::ostream& FirstToken::write_to(std::ostream& ostream) const +{ + return ostream << raw_name() << "(" << *match_expr_ << ")"; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/position/firsttoken.h b/libwccl/ops/functions/position/firsttoken.h new file mode 100644 index 0000000..3081574 --- /dev/null +++ b/libwccl/ops/functions/position/firsttoken.h @@ -0,0 +1,56 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_POSITION_FIRSTTOKEN_H +#define LIBWCCL_OPS_FUNCTIONS_POSITION_FIRSTTOKEN_H + +#include <libwccl/ops/function.h> +#include <libwccl/values/match.h> + +namespace Wccl { + +/** + * Operator that takes a Match and returns Position of the first token + * within the given Match. + */ +class FirstToken : public Function<Position> +{ +public: + typedef boost::shared_ptr<Function<Match> > MatchFunctionPtr; + + FirstToken(const MatchFunctionPtr& match_expr) + : match_expr_(match_expr) + { + BOOST_ASSERT(match_expr_); + } + + /** + * @returns String representation of the function + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * @returns Name of the function + */ + std::string raw_name() const { + return "first"; + } + +protected: + const MatchFunctionPtr match_expr_; + + /** + * Writes string representation of the function + * @returns Stream written to. + */ + std::ostream& write_to(std::ostream& ostream) const; + + /** + * Takes the value of a Match from argument expression, and returns + * a Position that corresponds to first token of the given Match. + * @returns Position corresponding to first token of a Match. + * Nowhere in case of an empty Match. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_POSITION_FIRSTTOKEN_H diff --git a/libwccl/ops/functions/position/lasttoken.cpp b/libwccl/ops/functions/position/lasttoken.cpp new file mode 100644 index 0000000..6d4ca00 --- /dev/null +++ b/libwccl/ops/functions/position/lasttoken.cpp @@ -0,0 +1,38 @@ +#include <libwccl/ops/functions/position/lasttoken.h> +#include <libwccl/ops/functions/constant.h> +#include <libwccl/exception.h> +#include <sstream> + +namespace Wccl { + +LastToken::BaseRetValPtr LastToken::apply_internal( + const FunExecContext &context) const +{ + boost::shared_ptr<Corpus2::AnnotatedSentence> s = + boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>( + context.sentence_context().get_sentence_ptr()); + if (!s) { + throw InvalidArgument( + "context", + "Supplied context does not have valid Corpus2::AnnotatedSentence."); + } + const Function<Match>::RetValPtr match = match_expr_->apply(context); + if(match->empty()) { + return detail::DefaultFunction<Position>()->apply(context); + } + return boost::make_shared<Position>(match->last_token(s)); +} + +std::string LastToken::to_string(const Corpus2::Tagset &tagset) const +{ + std::ostringstream ostream; + ostream << name(tagset) << "(" << match_expr_->to_string(tagset) << ")"; + return ostream.str(); +} + +std::ostream& LastToken::write_to(std::ostream& ostream) const +{ + return ostream << raw_name() << "(" << *match_expr_ << ")"; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/position/lasttoken.h b/libwccl/ops/functions/position/lasttoken.h new file mode 100644 index 0000000..995949e --- /dev/null +++ b/libwccl/ops/functions/position/lasttoken.h @@ -0,0 +1,56 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_POSITION_LASTTOKEN_H +#define LIBWCCL_OPS_FUNCTIONS_POSITION_LASTTOKEN_H + +#include <libwccl/ops/function.h> +#include <libwccl/values/match.h> + +namespace Wccl { + +/** + * Operator that takes a Match and returns Position of the last token + * within the given Match. + */ +class LastToken : public Function<Position> +{ +public: + typedef boost::shared_ptr<Function<Match> > MatchFunctionPtr; + + LastToken(const MatchFunctionPtr& match_expr) + : match_expr_(match_expr) + { + BOOST_ASSERT(match_expr_); + } + + /** + * @returns String representation of the function + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * @returns Name of the function + */ + std::string raw_name() const { + return "last"; + } + +protected: + const MatchFunctionPtr match_expr_; + + /** + * Writes string representation of the function + * @returns Stream written to. + */ + std::ostream& write_to(std::ostream& ostream) const; + + /** + * Takes the value of a Match from argument expression, and returns + * a Position that corresponds to the last token of the given Match. + * @returns Position corresponding to the last token of a Match. + * Nowhere in case of an empty Match. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_POSITION_LASTTOKEN_H -- GitLab