Skip to content
Snippets Groups Projects
Commit aabc6962 authored by Adam Wardynski's avatar Adam Wardynski
Browse files

Initial versions of various agreement operators.

The agreement stuff isn't implemented yet.
parent 1d5fd937
Branches
No related merge requests found
......@@ -40,13 +40,17 @@ SET(libwccl_STAT_SRC
ops/functions/bool/predicates/logicalpredicate.cpp
ops/functions/bool/predicates/nor.cpp
ops/functions/bool/predicates/or.cpp
ops/functions/bool/predicates/pointagreement.cpp
ops/functions/bool/predicates/regex.cpp
ops/functions/bool/predicates/strongagreement.cpp
ops/functions/bool/predicates/weakagreement.cpp
ops/functions/position/relativeposition.cpp
ops/functions/strset/affix.cpp
ops/functions/strset/getlemmas.cpp
ops/functions/strset/getorth.cpp
ops/functions/strset/tolower.cpp
ops/functions/strset/toupper.cpp
ops/functions/tset/agrfilter.cpp
ops/functions/tset/catfilter.cpp
ops/functions/tset/getsymbols.cpp
ops/functions/tset/getsymbolsinrange.cpp
......
#include <libwccl/ops/functions/bool/predicates/pointagreement.h>
namespace Wccl {
std::string PointAgreement::to_string(const Corpus2::Tagset& tagset) const
{
std::ostringstream ss;
ss << name(tagset) << "("
<< pos1_expr_->to_string(tagset) << ", "
<< pos2_expr_->to_string(tagset) << ", "
<< attribs_expr_->to_string(tagset) << ")";
return ss.str();
}
std::ostream& PointAgreement::write_to(std::ostream& os) const
{
return os << raw_name() << "("
<< *pos1_expr_ << ", "
<< *pos2_expr_ << ", "
<< *attribs_expr_ << ")";
}
PointAgreement::BaseRetValPtr PointAgreement::apply_internal(const FunExecContext& context) const
{
const SentenceContext& sc = context.sentence_context();
const boost::shared_ptr<const Position>& pos1 = pos1_expr_->apply(context);
if (pos1->is_outside(sc)) {
return Predicate::False(context);
}
const boost::shared_ptr<const Position>& pos2 = pos2_expr_->apply(context);
if (pos2->is_outside(sc)) {
return Predicate::False(context);
}
const boost::shared_ptr<const TSet>& attribs_tset = attribs_expr_->apply(context);
const Corpus2::Tag& attribs = attribs_tset->get_value();
//
// @todo: implement
//
return Predicate::False(context);
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_POINTAGREEMENT_H
#define LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_POINTAGREEMENT_H
#include <libwccl/ops/functions/bool/predicate.h>
#include <libwccl/values/position.h>
namespace Wccl {
/**
* Predicate checking point-to-point strong agreement
* between tokens on two positions.
*/
class PointAgreement : public Predicate
{
public:
typedef boost::shared_ptr<Function<Position> > PosFunctionPtr;
typedef boost::shared_ptr<Function<TSet> > TSetFunctionPtr;
PointAgreement(
const PosFunctionPtr& pos1_expr,
const PosFunctionPtr& pos2_expr,
const TSetFunctionPtr& attribs_expr)
: pos1_expr_(pos1_expr),
pos2_expr_(pos2_expr),
attribs_expr_(attribs_expr)
{
BOOST_ASSERT(pos1_expr_);
BOOST_ASSERT(pos2_expr_);
BOOST_ASSERT(attribs_expr);
}
/**
* @returns Name of the function: "agrpp"
*/
std::string raw_name() const {
return "agrpp";
}
/**
* @returns String reperesentation of the predicate in form of
* "agrpp(pos1_expr, pos2_expr, attrib_expr)"
*/
std::string to_string(const Corpus2::Tagset& tagset) const;
protected:
const PosFunctionPtr pos1_expr_;
const PosFunctionPtr pos2_expr_;
const TSetFunctionPtr attribs_expr_;
/**
* Gets two positions from arguments. If either of them
* is Nowhere, or points outside the sentence, False is returned.
* Otherwise, when both positions point inside the sentence,
* strong agreement on specified attriutes is checked between
* tokens on the positions and result of this test is returned.
* @returns True if the tokens at specified positions are in strong
* agreement for specified attributes. False otherwise, including
* cases when either of the positions points outside the sentence.
*/
BaseRetValPtr apply_internal(const FunExecContext& context) const;
/**
* Writes raw string reperesentation of the predicate in form of
* "agrpp(pos1_raw_expr, pos2_raw_expr, attribs_raw_expr)"
* @note This version doesn't require a Tagset, but will
* be incomplete and/or contain internal info.
* @returns Stream written to.
*/
std::ostream& write_to(std::ostream& ostream) const;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_POINTAGREEMENT_H
#include <libwccl/ops/functions/bool/predicates/strongagreement.h>
namespace Wccl {
std::string StrongAgreement::to_string(const Corpus2::Tagset& tagset) const
{
std::ostringstream ss;
ss << name(tagset) << "("
<< left_pos_expr_->to_string(tagset) << ", "
<< right_pos_expr_->to_string(tagset) << ", "
<< attribs_expr_->to_string(tagset) << ")";
return ss.str();
}
std::ostream& StrongAgreement::write_to(std::ostream& os) const
{
return os << raw_name() << "("
<< *left_pos_expr_ << ", "
<< *right_pos_expr_ << ", "
<< *attribs_expr_ << ")";
}
StrongAgreement::BaseRetValPtr StrongAgreement::apply_internal(const FunExecContext& context) const
{
const SentenceContext& sc = context.sentence_context();
const boost::shared_ptr<const Position>& range_left = left_pos_expr_->apply(context);
if (range_left->get_value() == Position::Nowhere) {
return Predicate::False(context);
}
const boost::shared_ptr<const Position>& range_right = right_pos_expr_->apply(context);
if (range_right->get_value() == Position::Nowhere) {
return Predicate::False(context);
}
// Get absolute values for left and right extremes of the range.
int abs_left = sc.get_abs_position(*range_left);
int abs_right = sc.get_abs_position(*range_right);
// Trim range to sentence boundaries
if (abs_left < 0) {
abs_left = 0;
}
if (abs_right >= sc.size()) {
abs_right = sc.size() - 1;
}
// Proceed only if range isn't empty (range outside of sentence or empty sentence are covered)
if (abs_left > abs_right) {
return Predicate::False(context);
}
const boost::shared_ptr<const TSet>& attribs_tset = attribs_expr_->apply(context);
const Corpus2::Tag& attribs = attribs_tset->get_value();
//
// @todo: implement
//
return Predicate::False(context);
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_STRONGAGREEMENT_H
#define LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_STRONGAGREEMENT_H
#include <libwccl/ops/functions/bool/predicate.h>
#include <libwccl/values/position.h>
namespace Wccl {
/**
* Predicate checking strong agreement on a range of tokens.
*/
class StrongAgreement : public Predicate
{
public:
typedef boost::shared_ptr<Function<Position> > PosFunctionPtr;
typedef boost::shared_ptr<Function<TSet> > TSetFunctionPtr;
StrongAgreement(
const PosFunctionPtr& left_pos_expr,
const PosFunctionPtr& right_pos_expr,
const TSetFunctionPtr& attribs_expr)
: left_pos_expr_(left_pos_expr),
right_pos_expr_(right_pos_expr),
attribs_expr_(attribs_expr)
{
BOOST_ASSERT(left_pos_expr_);
BOOST_ASSERT(right_pos_expr_);
BOOST_ASSERT(attribs_expr);
}
/**
* @returns Name of the function: "agr"
*/
std::string raw_name() const {
return "agr";
}
/**
* @returns String reperesentation of the predicate in form of
* "agr(pos1_expr, pos2_expr, attrib_expr)"
*/
std::string to_string(const Corpus2::Tagset& tagset) const;
protected:
const PosFunctionPtr left_pos_expr_;
const PosFunctionPtr right_pos_expr_;
const TSetFunctionPtr attribs_expr_;
/**
* Gets start and end positions for range from arguments.
* Range is trimmed to sentence boundaries.
* If either of the positions is Nowhere, or range is empty, False
* is returned.
* Otherwise, strong agreement for specified attributes is checked
* on the specified range of tokens and result of this test is returned.
* @returns True if the tokens in specified range are in strong
* agreement for given attributes. False otherwise, including cases
* of empty range or either of the positions being Nowhere.
*/
BaseRetValPtr apply_internal(const FunExecContext& context) const;
/**
* Writes raw string reperesentation of the predicate in form of
* "agr(pos1_raw_expr, pos2_raw_expr, attribs_raw_expr)"
* @note This version doesn't require a Tagset, but will
* be incomplete and/or contain internal info.
* @returns Stream written to.
*/
std::ostream& write_to(std::ostream& ostream) const;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_STRONGAGREEMENT_H
#include <libwccl/ops/functions/bool/predicates/weakagreement.h>
namespace Wccl {
std::string WeakAgreement::to_string(const Corpus2::Tagset& tagset) const
{
std::ostringstream ss;
ss << name(tagset) << "("
<< left_pos_expr_->to_string(tagset) << ", "
<< right_pos_expr_->to_string(tagset) << ", "
<< attribs_expr_->to_string(tagset) << ")";
return ss.str();
}
std::ostream& WeakAgreement::write_to(std::ostream& os) const
{
return os << raw_name() << "("
<< *left_pos_expr_ << ", "
<< *right_pos_expr_ << ", "
<< *attribs_expr_ << ")";
}
WeakAgreement::BaseRetValPtr WeakAgreement::apply_internal(const FunExecContext& context) const
{
const SentenceContext& sc = context.sentence_context();
const boost::shared_ptr<const Position>& range_left = left_pos_expr_->apply(context);
if (range_left->get_value() == Position::Nowhere) {
return Predicate::False(context);
}
const boost::shared_ptr<const Position>& range_right = right_pos_expr_->apply(context);
if (range_right->get_value() == Position::Nowhere) {
return Predicate::False(context);
}
// Get absolute values for left and right extremes of the range.
int abs_left = sc.get_abs_position(*range_left);
int abs_right = sc.get_abs_position(*range_right);
// Trim range to sentence boundaries
if (abs_left < 0) {
abs_left = 0;
}
if (abs_right >= sc.size()) {
abs_right = sc.size() - 1;
}
// Proceed only if range isn't empty (range outside of sentence or empty sentence are covered)
if (abs_left > abs_right) {
return Predicate::False(context);
}
const boost::shared_ptr<const TSet>& attribs_tset = attribs_expr_->apply(context);
const Corpus2::Tag& attribs = attribs_tset->get_value();
//
// @todo: implement
//
return Predicate::False(context);
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_WEAKAGREEMENT_H
#define LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_WEAKAGREEMENT_H
#include <libwccl/ops/functions/bool/predicate.h>
#include <libwccl/values/position.h>
namespace Wccl {
/**
* Predicate checking weak agreement on a range of tokens.
*/
class WeakAgreement : public Predicate
{
public:
typedef boost::shared_ptr<Function<Position> > PosFunctionPtr;
typedef boost::shared_ptr<Function<TSet> > TSetFunctionPtr;
WeakAgreement(
const PosFunctionPtr& left_pos_expr,
const PosFunctionPtr& right_pos_expr,
const TSetFunctionPtr& attribs_expr)
: left_pos_expr_(left_pos_expr),
right_pos_expr_(right_pos_expr),
attribs_expr_(attribs_expr)
{
BOOST_ASSERT(left_pos_expr_);
BOOST_ASSERT(right_pos_expr_);
BOOST_ASSERT(attribs_expr);
}
/**
* @returns Name of the function: "wagr"
*/
std::string raw_name() const {
return "wagr";
}
/**
* @returns String reperesentation of the predicate in form of
* "wagr(pos1_expr, pos2_expr, attrib_expr)"
*/
std::string to_string(const Corpus2::Tagset& tagset) const;
protected:
const PosFunctionPtr left_pos_expr_;
const PosFunctionPtr right_pos_expr_;
const TSetFunctionPtr attribs_expr_;
/**
* Gets start and end positions for range from arguments.
* Range is trimmed to sentence boundaries.
* If either of the positions is Nowhere, or range is empty, False
* is returned.
* Otherwise, weak agreement for given attributes is checked
* on the specified range of tokens and result of this test is returned.
* @returns True if the tokens in specified range are in weak
* agreement for specified attributes. False otherwise, including
* cases of empty range or either of the positions being Nowhere.
*/
BaseRetValPtr apply_internal(const FunExecContext& context) const;
/**
* Writes raw string reperesentation of the predicate in form of
* "wagr(pos1_raw_expr, pos2_raw_expr, attribs_raw_expr)"
* @note This version doesn't require a Tagset, but will
* be incomplete and/or contain internal info.
* @returns Stream written to.
*/
std::ostream& write_to(std::ostream& ostream) const;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATE_WEAKAGREEMENT_H
#include <libwccl/ops/functions/tset/agrfilter.h>
#include <libwccl/ops/functions/constant.h>
namespace Wccl {
std::string AgrFilter::to_string(const Corpus2::Tagset& tagset) const
{
std::ostringstream os;
os << name(tagset) << "("
<< left_pos_expr_->to_string(tagset) << ", "
<< right_pos_expr_->to_string(tagset) << ", "
<< attribs_expr_-> to_string(tagset) << ", "
<< mask_expr_-> to_string(tagset) << ")";
return os.str();
}
std::ostream& AgrFilter::write_to(std::ostream& os) const
{
return os << raw_name() << "("
<< *left_pos_expr_ << ", "
<< *right_pos_expr_ << ", "
<< *attribs_expr_ << ", "
<< *mask_expr_ << ")";
}
AgrFilter::BaseRetValPtr AgrFilter::apply_internal(const FunExecContext& context) const
{
const SentenceContext& sc = context.sentence_context();
const boost::shared_ptr<const Position>& range_left = left_pos_expr_->apply(context);
if (range_left->get_value() == Position::Nowhere) {
return detail::DefaultFunction<TSet>()->apply(context);
}
const boost::shared_ptr<const Position>& range_right = right_pos_expr_->apply(context);
if (range_right->get_value() == Position::Nowhere) {
return detail::DefaultFunction<TSet>()->apply(context);
}
// Get absolute values for left and right extremes of the range.
int abs_left = sc.get_abs_position(*range_left);
int abs_right = sc.get_abs_position(*range_right);
// Trim range to sentence boundaries
if (abs_left < 0) {
abs_left = 0;
}
if (abs_right >= sc.size()) {
abs_right = sc.size() - 1;
}
// Proceed only if range isn't empty (range outside of sentence or empty sentence are covered)
if (abs_left > abs_right) {
return detail::DefaultFunction<TSet>()->apply(context);
}
const boost::shared_ptr<const TSet>& attribs_tset = attribs_expr_->apply(context);
const boost::shared_ptr<const TSet>& mask_tset = mask_expr_->apply(context);
const Corpus2::Tag& attribs = attribs_tset->get_value();
const Corpus2::Tag& mask = mask_tset->get_value();
boost::shared_ptr<TSet> tset = boost::make_shared<TSet>();
//
//@ todo: implement
//
tset->tag_ref().mask_with(mask);
return tset;
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_FUNCTIONS_TSET_AGRFILTER_H
#define LIBWCCL_OPS_FUNCTIONS_TSET_AGRFILTER_H
#include <libwccl/values/tset.h>
#include <libwccl/values/position.h>
#include <libwccl/ops/function.h>
namespace Wccl {
/**
* Operator that returs a filtered set of tagset symbols that
* describe weak agreement of tokens within specified range,
* provided that the tokens actually are in weak agreement.
*/
class AgrFilter : public Function<TSet>
{
public:
typedef boost::shared_ptr<Function<Position> > PosFunctionPtr;
typedef boost::shared_ptr<Function<TSet> > TSetFunctionPtr;
AgrFilter(
const PosFunctionPtr& left_pos_expr,
const PosFunctionPtr& right_pos_expr,
const TSetFunctionPtr& attribs_expr,
const TSetFunctionPtr& mask_expr)
: left_pos_expr_(left_pos_expr),
right_pos_expr(right_pos_expr),
attribs_expr_(attribs_expr),
mask_expr_(mask_expr),
{
BOOST_ASSERT(left_pos_expr_);
BOOST_ASSERT(right_pos_expr_);
BOOST_ASSERT(attribs_expr_);
BOOST_ASSERT(mask_expr_);
}
/**
* @returns String representation of the function in the form of:
* "agrflt(pos_expr, selector_expr, mask_expr)
*/
std::string to_string(const Corpus2::Tagset& tagset) const;
/**
* @returns Name of the operator, "agrflt".
*/
std::string raw_name() const {
return "agrflt";
}
protected:
const PosFunctionPtr left_pos_expr_;
const PosFunctionPtr right_pos_expr_;
const TSetFunctionPtr attribs_expr_;
const TSetFunctionPtr mask_expr_;
/**
* Gets range specified via Position expressions. Trims the range
* to sentence boundaries if needed. Then computes
* the set that describes weak agreement of tokens within the range,
* over specified attributes, then filters that set and returns the result.
* @returns Tagset symbol set for tokens at specified range,
* describing weak agreement of tokens at that range taking into account
* specified attributes, after application of given filter. Empty TSet
* if range is invalid (either of the positions pointing Nowhere
* or the range is empty). Also empty TSet if tokens within the range
* are not in weak agreement over specified attributes.
*/
BaseRetValPtr apply_internal(const FunExecContext& context) const;
/**
* Writes raw string representation of the function in the form of:
* "agrflt(pos_expr_raw_str, selector_expr_raw_str, mask_expr_raw_str)
* @note This version does not require tagset, but will be inclomplete
* and/or contain internal info.
* @returns Stream written to.
*/
std::ostream& write_to(std::ostream& ostream) const;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_FUNCTIONS_TSET_AGRFILTER_H
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment