diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index fd6bd63180a43b86cf917ce70366707e9a424eb7..e1e07c7bd9134be8390156d3de4211d49bd0ab87 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -40,13 +40,17 @@ SET(libwccl_STAT_SRC ops/functions/bool/predicates/logicalpredicate.cpp ops/functions/bool/predicates/nor.cpp ops/functions/bool/predicates/or.cpp + ops/functions/bool/predicates/pointagreement.cpp ops/functions/bool/predicates/regex.cpp + ops/functions/bool/predicates/strongagreement.cpp + ops/functions/bool/predicates/weakagreement.cpp ops/functions/position/relativeposition.cpp ops/functions/strset/affix.cpp ops/functions/strset/getlemmas.cpp ops/functions/strset/getorth.cpp ops/functions/strset/tolower.cpp ops/functions/strset/toupper.cpp + ops/functions/tset/agrfilter.cpp ops/functions/tset/catfilter.cpp ops/functions/tset/getsymbols.cpp ops/functions/tset/getsymbolsinrange.cpp diff --git a/libwccl/ops/functions/bool/predicates/pointagreement.cpp b/libwccl/ops/functions/bool/predicates/pointagreement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1c7e8591d9622cd943a96b8b00f8b6ae5b39709d --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/pointagreement.cpp @@ -0,0 +1,46 @@ +#include <libwccl/ops/functions/bool/predicates/pointagreement.h> + +namespace Wccl { + +std::string PointAgreement::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream ss; + ss << name(tagset) << "(" + << pos1_expr_->to_string(tagset) << ", " + << pos2_expr_->to_string(tagset) << ", " + << attribs_expr_->to_string(tagset) << ")"; + return ss.str(); +} + +std::ostream& PointAgreement::write_to(std::ostream& os) const +{ + return os << raw_name() << "(" + << *pos1_expr_ << ", " + << *pos2_expr_ << ", " + << *attribs_expr_ << ")"; +} + +PointAgreement::BaseRetValPtr PointAgreement::apply_internal(const FunExecContext& context) const +{ + const SentenceContext& sc = context.sentence_context(); + + const boost::shared_ptr<const Position>& pos1 = pos1_expr_->apply(context); + if (pos1->is_outside(sc)) { + return Predicate::False(context); + } + const boost::shared_ptr<const Position>& pos2 = pos2_expr_->apply(context); + if (pos2->is_outside(sc)) { + return Predicate::False(context); + } + + const boost::shared_ptr<const TSet>& attribs_tset = attribs_expr_->apply(context); + const Corpus2::Tag& attribs = attribs_tset->get_value(); + + // + // @todo: implement + // + + return Predicate::False(context); +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/bool/predicates/pointagreement.h b/libwccl/ops/functions/bool/predicates/pointagreement.h new file mode 100644 index 0000000000000000000000000000000000000000..b9b58e910e707f0d821b571f14e3114174769765 --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/pointagreement.h @@ -0,0 +1,74 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_POINTAGREEMENT_H +#define LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_POINTAGREEMENT_H + +#include <libwccl/ops/functions/bool/predicate.h> +#include <libwccl/values/position.h> + +namespace Wccl { + +/** + * Predicate checking point-to-point strong agreement + * between tokens on two positions. + */ +class PointAgreement : public Predicate +{ +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + typedef boost::shared_ptr<Function<TSet> > TSetFunctionPtr; + + PointAgreement( + const PosFunctionPtr& pos1_expr, + const PosFunctionPtr& pos2_expr, + const TSetFunctionPtr& attribs_expr) + : pos1_expr_(pos1_expr), + pos2_expr_(pos2_expr), + attribs_expr_(attribs_expr) + { + BOOST_ASSERT(pos1_expr_); + BOOST_ASSERT(pos2_expr_); + BOOST_ASSERT(attribs_expr); + } + /** + * @returns Name of the function: "agrpp" + */ + std::string raw_name() const { + return "agrpp"; + } + + /** + * @returns String reperesentation of the predicate in form of + * "agrpp(pos1_expr, pos2_expr, attrib_expr)" + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + const PosFunctionPtr pos1_expr_; + const PosFunctionPtr pos2_expr_; + const TSetFunctionPtr attribs_expr_; + + /** + * Gets two positions from arguments. If either of them + * is Nowhere, or points outside the sentence, False is returned. + * Otherwise, when both positions point inside the sentence, + * strong agreement on specified attriutes is checked between + * tokens on the positions and result of this test is returned. + * @returns True if the tokens at specified positions are in strong + * agreement for specified attributes. False otherwise, including + * cases when either of the positions points outside the sentence. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; + + /** + * Writes raw string reperesentation of the predicate in form of + * "agrpp(pos1_raw_expr, pos2_raw_expr, attribs_raw_expr)" + * @note This version doesn't require a Tagset, but will + * be incomplete and/or contain internal info. + * @returns Stream written to. + */ + std::ostream& write_to(std::ostream& ostream) const; + +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_POINTAGREEMENT_H diff --git a/libwccl/ops/functions/bool/predicates/strongagreement.cpp b/libwccl/ops/functions/bool/predicates/strongagreement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..82b27fd6afab98b5b6962e23db9088e87fdbd562 --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/strongagreement.cpp @@ -0,0 +1,60 @@ +#include <libwccl/ops/functions/bool/predicates/strongagreement.h> + +namespace Wccl { + +std::string StrongAgreement::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream ss; + ss << name(tagset) << "(" + << left_pos_expr_->to_string(tagset) << ", " + << right_pos_expr_->to_string(tagset) << ", " + << attribs_expr_->to_string(tagset) << ")"; + return ss.str(); +} + +std::ostream& StrongAgreement::write_to(std::ostream& os) const +{ + return os << raw_name() << "(" + << *left_pos_expr_ << ", " + << *right_pos_expr_ << ", " + << *attribs_expr_ << ")"; +} + +StrongAgreement::BaseRetValPtr StrongAgreement::apply_internal(const FunExecContext& context) const +{ + const SentenceContext& sc = context.sentence_context(); + + const boost::shared_ptr<const Position>& range_left = left_pos_expr_->apply(context); + if (range_left->get_value() == Position::Nowhere) { + return Predicate::False(context); + } + const boost::shared_ptr<const Position>& range_right = right_pos_expr_->apply(context); + if (range_right->get_value() == Position::Nowhere) { + return Predicate::False(context); + } + // Get absolute values for left and right extremes of the range. + int abs_left = sc.get_abs_position(*range_left); + int abs_right = sc.get_abs_position(*range_right); + // Trim range to sentence boundaries + if (abs_left < 0) { + abs_left = 0; + } + if (abs_right >= sc.size()) { + abs_right = sc.size() - 1; + } + // Proceed only if range isn't empty (range outside of sentence or empty sentence are covered) + if (abs_left > abs_right) { + return Predicate::False(context); + } + + const boost::shared_ptr<const TSet>& attribs_tset = attribs_expr_->apply(context); + const Corpus2::Tag& attribs = attribs_tset->get_value(); + + // + // @todo: implement + // + + return Predicate::False(context); +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/bool/predicates/strongagreement.h b/libwccl/ops/functions/bool/predicates/strongagreement.h new file mode 100644 index 0000000000000000000000000000000000000000..c59ee02a08dcfa47736cf938aff6920063cbe804 --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/strongagreement.h @@ -0,0 +1,75 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_STRONGAGREEMENT_H +#define LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_STRONGAGREEMENT_H + +#include <libwccl/ops/functions/bool/predicate.h> +#include <libwccl/values/position.h> + +namespace Wccl { + +/** + * Predicate checking strong agreement on a range of tokens. + */ +class StrongAgreement : public Predicate +{ +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + typedef boost::shared_ptr<Function<TSet> > TSetFunctionPtr; + + StrongAgreement( + const PosFunctionPtr& left_pos_expr, + const PosFunctionPtr& right_pos_expr, + const TSetFunctionPtr& attribs_expr) + : left_pos_expr_(left_pos_expr), + right_pos_expr_(right_pos_expr), + attribs_expr_(attribs_expr) + { + BOOST_ASSERT(left_pos_expr_); + BOOST_ASSERT(right_pos_expr_); + BOOST_ASSERT(attribs_expr); + } + + /** + * @returns Name of the function: "agr" + */ + std::string raw_name() const { + return "agr"; + } + + /** + * @returns String reperesentation of the predicate in form of + * "agr(pos1_expr, pos2_expr, attrib_expr)" + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + const PosFunctionPtr left_pos_expr_; + const PosFunctionPtr right_pos_expr_; + const TSetFunctionPtr attribs_expr_; + + /** + * Gets start and end positions for range from arguments. + * Range is trimmed to sentence boundaries. + * If either of the positions is Nowhere, or range is empty, False + * is returned. + * Otherwise, strong agreement for specified attributes is checked + * on the specified range of tokens and result of this test is returned. + * @returns True if the tokens in specified range are in strong + * agreement for given attributes. False otherwise, including cases + * of empty range or either of the positions being Nowhere. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; + + /** + * Writes raw string reperesentation of the predicate in form of + * "agr(pos1_raw_expr, pos2_raw_expr, attribs_raw_expr)" + * @note This version doesn't require a Tagset, but will + * be incomplete and/or contain internal info. + * @returns Stream written to. + */ + std::ostream& write_to(std::ostream& ostream) const; + +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_STRONGAGREEMENT_H diff --git a/libwccl/ops/functions/bool/predicates/weakagreement.cpp b/libwccl/ops/functions/bool/predicates/weakagreement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ec373186957592ab6c8de77187b68e77526cfed6 --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/weakagreement.cpp @@ -0,0 +1,60 @@ +#include <libwccl/ops/functions/bool/predicates/weakagreement.h> + +namespace Wccl { + +std::string WeakAgreement::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream ss; + ss << name(tagset) << "(" + << left_pos_expr_->to_string(tagset) << ", " + << right_pos_expr_->to_string(tagset) << ", " + << attribs_expr_->to_string(tagset) << ")"; + return ss.str(); +} + +std::ostream& WeakAgreement::write_to(std::ostream& os) const +{ + return os << raw_name() << "(" + << *left_pos_expr_ << ", " + << *right_pos_expr_ << ", " + << *attribs_expr_ << ")"; +} + +WeakAgreement::BaseRetValPtr WeakAgreement::apply_internal(const FunExecContext& context) const +{ + const SentenceContext& sc = context.sentence_context(); + + const boost::shared_ptr<const Position>& range_left = left_pos_expr_->apply(context); + if (range_left->get_value() == Position::Nowhere) { + return Predicate::False(context); + } + const boost::shared_ptr<const Position>& range_right = right_pos_expr_->apply(context); + if (range_right->get_value() == Position::Nowhere) { + return Predicate::False(context); + } + // Get absolute values for left and right extremes of the range. + int abs_left = sc.get_abs_position(*range_left); + int abs_right = sc.get_abs_position(*range_right); + // Trim range to sentence boundaries + if (abs_left < 0) { + abs_left = 0; + } + if (abs_right >= sc.size()) { + abs_right = sc.size() - 1; + } + // Proceed only if range isn't empty (range outside of sentence or empty sentence are covered) + if (abs_left > abs_right) { + return Predicate::False(context); + } + + const boost::shared_ptr<const TSet>& attribs_tset = attribs_expr_->apply(context); + const Corpus2::Tag& attribs = attribs_tset->get_value(); + + // + // @todo: implement + // + + return Predicate::False(context); +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/bool/predicates/weakagreement.h b/libwccl/ops/functions/bool/predicates/weakagreement.h new file mode 100644 index 0000000000000000000000000000000000000000..f3f58838001b3032875a685a48414c98b510c305 --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/weakagreement.h @@ -0,0 +1,75 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_WEAKAGREEMENT_H +#define LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_WEAKAGREEMENT_H + +#include <libwccl/ops/functions/bool/predicate.h> +#include <libwccl/values/position.h> + +namespace Wccl { + +/** + * Predicate checking weak agreement on a range of tokens. + */ +class WeakAgreement : public Predicate +{ +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + typedef boost::shared_ptr<Function<TSet> > TSetFunctionPtr; + + WeakAgreement( + const PosFunctionPtr& left_pos_expr, + const PosFunctionPtr& right_pos_expr, + const TSetFunctionPtr& attribs_expr) + : left_pos_expr_(left_pos_expr), + right_pos_expr_(right_pos_expr), + attribs_expr_(attribs_expr) + { + BOOST_ASSERT(left_pos_expr_); + BOOST_ASSERT(right_pos_expr_); + BOOST_ASSERT(attribs_expr); + } + + /** + * @returns Name of the function: "wagr" + */ + std::string raw_name() const { + return "wagr"; + } + + /** + * @returns String reperesentation of the predicate in form of + * "wagr(pos1_expr, pos2_expr, attrib_expr)" + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + const PosFunctionPtr left_pos_expr_; + const PosFunctionPtr right_pos_expr_; + const TSetFunctionPtr attribs_expr_; + + /** + * Gets start and end positions for range from arguments. + * Range is trimmed to sentence boundaries. + * If either of the positions is Nowhere, or range is empty, False + * is returned. + * Otherwise, weak agreement for given attributes is checked + * on the specified range of tokens and result of this test is returned. + * @returns True if the tokens in specified range are in weak + * agreement for specified attributes. False otherwise, including + * cases of empty range or either of the positions being Nowhere. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; + + /** + * Writes raw string reperesentation of the predicate in form of + * "wagr(pos1_raw_expr, pos2_raw_expr, attribs_raw_expr)" + * @note This version doesn't require a Tagset, but will + * be incomplete and/or contain internal info. + * @returns Stream written to. + */ + std::ostream& write_to(std::ostream& ostream) const; + +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_WEAKAGREEMENT_H diff --git a/libwccl/ops/functions/tset/agrfilter.cpp b/libwccl/ops/functions/tset/agrfilter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2ce6440aa3a3046ee8e609953850f7c475dd1d66 --- /dev/null +++ b/libwccl/ops/functions/tset/agrfilter.cpp @@ -0,0 +1,66 @@ +#include <libwccl/ops/functions/tset/agrfilter.h> +#include <libwccl/ops/functions/constant.h> + +namespace Wccl { + +std::string AgrFilter::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream os; + os << name(tagset) << "(" + << left_pos_expr_->to_string(tagset) << ", " + << right_pos_expr_->to_string(tagset) << ", " + << attribs_expr_-> to_string(tagset) << ", " + << mask_expr_-> to_string(tagset) << ")"; + return os.str(); +} + +std::ostream& AgrFilter::write_to(std::ostream& os) const +{ + return os << raw_name() << "(" + << *left_pos_expr_ << ", " + << *right_pos_expr_ << ", " + << *attribs_expr_ << ", " + << *mask_expr_ << ")"; +} + +AgrFilter::BaseRetValPtr AgrFilter::apply_internal(const FunExecContext& context) const +{ + const SentenceContext& sc = context.sentence_context(); + + const boost::shared_ptr<const Position>& range_left = left_pos_expr_->apply(context); + if (range_left->get_value() == Position::Nowhere) { + return detail::DefaultFunction<TSet>()->apply(context); + } + const boost::shared_ptr<const Position>& range_right = right_pos_expr_->apply(context); + if (range_right->get_value() == Position::Nowhere) { + return detail::DefaultFunction<TSet>()->apply(context); + } + // Get absolute values for left and right extremes of the range. + int abs_left = sc.get_abs_position(*range_left); + int abs_right = sc.get_abs_position(*range_right); + // Trim range to sentence boundaries + if (abs_left < 0) { + abs_left = 0; + } + if (abs_right >= sc.size()) { + abs_right = sc.size() - 1; + } + // Proceed only if range isn't empty (range outside of sentence or empty sentence are covered) + if (abs_left > abs_right) { + return detail::DefaultFunction<TSet>()->apply(context); + } + + const boost::shared_ptr<const TSet>& attribs_tset = attribs_expr_->apply(context); + const boost::shared_ptr<const TSet>& mask_tset = mask_expr_->apply(context); + const Corpus2::Tag& attribs = attribs_tset->get_value(); + const Corpus2::Tag& mask = mask_tset->get_value(); + + boost::shared_ptr<TSet> tset = boost::make_shared<TSet>(); + // + //@ todo: implement + // + tset->tag_ref().mask_with(mask); + return tset; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/tset/agrfilter.h b/libwccl/ops/functions/tset/agrfilter.h new file mode 100644 index 0000000000000000000000000000000000000000..44b3e35704cbbe0735301be8edebf24d9ce91843 --- /dev/null +++ b/libwccl/ops/functions/tset/agrfilter.h @@ -0,0 +1,86 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_TSET_AGRFILTER_H +#define LIBWCCL_OPS_FUNCTIONS_TSET_AGRFILTER_H + +#include <libwccl/values/tset.h> +#include <libwccl/values/position.h> +#include <libwccl/ops/function.h> + +namespace Wccl { + +/** + * Operator that returs a filtered set of tagset symbols that + * describe weak agreement of tokens within specified range, + * provided that the tokens actually are in weak agreement. + */ +class AgrFilter : public Function<TSet> +{ +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + typedef boost::shared_ptr<Function<TSet> > TSetFunctionPtr; + + AgrFilter( + const PosFunctionPtr& left_pos_expr, + const PosFunctionPtr& right_pos_expr, + const TSetFunctionPtr& attribs_expr, + const TSetFunctionPtr& mask_expr) + : left_pos_expr_(left_pos_expr), + right_pos_expr(right_pos_expr), + attribs_expr_(attribs_expr), + mask_expr_(mask_expr), + + { + BOOST_ASSERT(left_pos_expr_); + BOOST_ASSERT(right_pos_expr_); + BOOST_ASSERT(attribs_expr_); + BOOST_ASSERT(mask_expr_); + } + + /** + * @returns String representation of the function in the form of: + * "agrflt(pos_expr, selector_expr, mask_expr) + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * @returns Name of the operator, "agrflt". + */ + std::string raw_name() const { + return "agrflt"; + } + +protected: + const PosFunctionPtr left_pos_expr_; + const PosFunctionPtr right_pos_expr_; + + const TSetFunctionPtr attribs_expr_; + const TSetFunctionPtr mask_expr_; + + + + /** + * Gets range specified via Position expressions. Trims the range + * to sentence boundaries if needed. Then computes + * the set that describes weak agreement of tokens within the range, + * over specified attributes, then filters that set and returns the result. + * @returns Tagset symbol set for tokens at specified range, + * describing weak agreement of tokens at that range taking into account + * specified attributes, after application of given filter. Empty TSet + * if range is invalid (either of the positions pointing Nowhere + * or the range is empty). Also empty TSet if tokens within the range + * are not in weak agreement over specified attributes. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; + + /** + * Writes raw string representation of the function in the form of: + * "agrflt(pos_expr_raw_str, selector_expr_raw_str, mask_expr_raw_str) + * @note This version does not require tagset, but will be inclomplete + * and/or contain internal info. + * @returns Stream written to. + */ + std::ostream& write_to(std::ostream& ostream) const; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_TSET_AGRFILTER_H