diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 4e2017bf3e2316f3e40890c89829639ff78cd467..fd26bbabad1c768d7d76643f4460fa6ba8009c23 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -44,6 +44,8 @@ SET(libwccl_STAT_SRC ops/functions/bool/predicates/regex.cpp ops/functions/bool/predicates/strongagreement.cpp ops/functions/bool/predicates/weakagreement.cpp + ops/functions/position/firsttoken.cpp + ops/functions/position/lasttoken.cpp ops/functions/position/relativeposition.cpp ops/functions/strset/affix.cpp ops/functions/strset/getlemmas.cpp diff --git a/libwccl/ops/functions/position/firsttoken.cpp b/libwccl/ops/functions/position/firsttoken.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8c900756afc3c6029c85b8a75c73d4b3bdb3bf63 --- /dev/null +++ b/libwccl/ops/functions/position/firsttoken.cpp @@ -0,0 +1,39 @@ +#include <libwccl/ops/functions/position/firsttoken.h> +#include <libwccl/ops/functions/constant.h> +#include <libwccl/exception.h> + +#include <sstream> + +namespace Wccl { + +FirstToken::BaseRetValPtr FirstToken::apply_internal( + const FunExecContext &context) const +{ + boost::shared_ptr<Corpus2::AnnotatedSentence> s = + boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>( + context.sentence_context().get_sentence_ptr()); + if (!s) { + throw InvalidArgument( + "context", + "Supplied context does not have valid Corpus2::AnnotatedSentence."); + } + const Function<Match>::RetValPtr match = match_expr_->apply(context); + if(match->empty()) { + return detail::DefaultFunction<Position>()->apply(context); + } + return boost::make_shared<Position>(match->first_token(s)); +} + +std::string FirstToken::to_string(const Corpus2::Tagset &tagset) const +{ + std::ostringstream ostream; + ostream << name(tagset) << "(" << match_expr_->to_string(tagset) << ")"; + return ostream.str(); +} + +std::ostream& FirstToken::write_to(std::ostream& ostream) const +{ + return ostream << raw_name() << "(" << *match_expr_ << ")"; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/position/firsttoken.h b/libwccl/ops/functions/position/firsttoken.h new file mode 100644 index 0000000000000000000000000000000000000000..3081574ab4d1112bc787ef9636f8d6300db95df3 --- /dev/null +++ b/libwccl/ops/functions/position/firsttoken.h @@ -0,0 +1,56 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_POSITION_FIRSTTOKEN_H +#define LIBWCCL_OPS_FUNCTIONS_POSITION_FIRSTTOKEN_H + +#include <libwccl/ops/function.h> +#include <libwccl/values/match.h> + +namespace Wccl { + +/** + * Operator that takes a Match and returns Position of the first token + * within the given Match. + */ +class FirstToken : public Function<Position> +{ +public: + typedef boost::shared_ptr<Function<Match> > MatchFunctionPtr; + + FirstToken(const MatchFunctionPtr& match_expr) + : match_expr_(match_expr) + { + BOOST_ASSERT(match_expr_); + } + + /** + * @returns String representation of the function + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * @returns Name of the function + */ + std::string raw_name() const { + return "first"; + } + +protected: + const MatchFunctionPtr match_expr_; + + /** + * Writes string representation of the function + * @returns Stream written to. + */ + std::ostream& write_to(std::ostream& ostream) const; + + /** + * Takes the value of a Match from argument expression, and returns + * a Position that corresponds to first token of the given Match. + * @returns Position corresponding to first token of a Match. + * Nowhere in case of an empty Match. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_POSITION_FIRSTTOKEN_H diff --git a/libwccl/ops/functions/position/lasttoken.cpp b/libwccl/ops/functions/position/lasttoken.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6d4ca00193da63f17cd35f717054eb21d93ffbd0 --- /dev/null +++ b/libwccl/ops/functions/position/lasttoken.cpp @@ -0,0 +1,38 @@ +#include <libwccl/ops/functions/position/lasttoken.h> +#include <libwccl/ops/functions/constant.h> +#include <libwccl/exception.h> +#include <sstream> + +namespace Wccl { + +LastToken::BaseRetValPtr LastToken::apply_internal( + const FunExecContext &context) const +{ + boost::shared_ptr<Corpus2::AnnotatedSentence> s = + boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>( + context.sentence_context().get_sentence_ptr()); + if (!s) { + throw InvalidArgument( + "context", + "Supplied context does not have valid Corpus2::AnnotatedSentence."); + } + const Function<Match>::RetValPtr match = match_expr_->apply(context); + if(match->empty()) { + return detail::DefaultFunction<Position>()->apply(context); + } + return boost::make_shared<Position>(match->last_token(s)); +} + +std::string LastToken::to_string(const Corpus2::Tagset &tagset) const +{ + std::ostringstream ostream; + ostream << name(tagset) << "(" << match_expr_->to_string(tagset) << ")"; + return ostream.str(); +} + +std::ostream& LastToken::write_to(std::ostream& ostream) const +{ + return ostream << raw_name() << "(" << *match_expr_ << ")"; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/position/lasttoken.h b/libwccl/ops/functions/position/lasttoken.h new file mode 100644 index 0000000000000000000000000000000000000000..995949eb2f4abf468691e96fafe63897b94c8942 --- /dev/null +++ b/libwccl/ops/functions/position/lasttoken.h @@ -0,0 +1,56 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_POSITION_LASTTOKEN_H +#define LIBWCCL_OPS_FUNCTIONS_POSITION_LASTTOKEN_H + +#include <libwccl/ops/function.h> +#include <libwccl/values/match.h> + +namespace Wccl { + +/** + * Operator that takes a Match and returns Position of the last token + * within the given Match. + */ +class LastToken : public Function<Position> +{ +public: + typedef boost::shared_ptr<Function<Match> > MatchFunctionPtr; + + LastToken(const MatchFunctionPtr& match_expr) + : match_expr_(match_expr) + { + BOOST_ASSERT(match_expr_); + } + + /** + * @returns String representation of the function + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * @returns Name of the function + */ + std::string raw_name() const { + return "last"; + } + +protected: + const MatchFunctionPtr match_expr_; + + /** + * Writes string representation of the function + * @returns Stream written to. + */ + std::ostream& write_to(std::ostream& ostream) const; + + /** + * Takes the value of a Match from argument expression, and returns + * a Position that corresponds to the last token of the given Match. + * @returns Position corresponding to the last token of a Match. + * Nowhere in case of an empty Match. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_POSITION_LASTTOKEN_H diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 998e823e330625a7131e946742465a924a79acad..376de76f7c442bc3b23bcd5effa3d3b4cad3552a 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -248,7 +248,7 @@ parse_match_rule returns [boost::shared_ptr<Expression> ret_match] { Variables vars; - vars.get_put<Match>("$m:_M"); + vars.get_put<Match>("_M"); } : ret_match = match_rule_operator[tagset, vars] ; @@ -644,7 +644,7 @@ symset_operator [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<Function<TSet> > ret] : ret = symset_condition [tagset, vars] - | (SYMBOL LBRACKET) => (ret = symset_getsymbol [tagset, vars]) + | {LA(1)==SYMBOL && LA(2)==LBRACKET}? (ret = symset_getsymbol [tagset, vars]) | ret = symset_var_val [tagset, vars] | ret = symset_class [tagset, vars] | ret = symset_range [tagset, vars] @@ -1718,7 +1718,7 @@ match_apply_operator [const Corpus2::Tagset& tagset, Variables& vars] returns [boost::shared_ptr<ApplyOperator> ret_op] { - VariableAccessor<Match> matches = vars.create_accessor<Match>("$m:_M");; + VariableAccessor<Match> matches = vars.create_accessor<Match>("_M");; boost::shared_ptr<const MatchOperator> match_op; boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > actions; boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > conditions;