diff --git a/libwccl/ops/functions/listoperator.h b/libwccl/ops/functions/listoperator.h new file mode 100644 index 0000000000000000000000000000000000000000..0e8c8ac78568c524be22728c72715c63aa0b5bd8 --- /dev/null +++ b/libwccl/ops/functions/listoperator.h @@ -0,0 +1,83 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_LISTOPERATOR_H +#define LIBWCCL_OPS_FUNCTIONS_LISTOPERATOR_H + +#include <libwccl/ops/function.h> +#include <libwccl/ops/formatters.h> + +namespace Wccl { + +/** + * Abstract base class for operators over any number of arguments + * (of the same type) + */ +template<class T> +class ListOperator : public Function<T> +{ +public: + typedef boost::shared_ptr<Function<T> > TFunctionPtr; + typedef std::vector<TFunctionPtr> TFunctionPtrVector; + + ListOperator(const boost::shared_ptr<TFunctionPtrVector>& expressions) + : expressions_(expressions) + { + BOOST_ASSERT(expressions_); + BOOST_ASSERT(expressions_->size() > 0); + } + + /** + * @returns String representation of the logical predicate, realised by default + * as "operator_name(expr1_string, ..., exprn_string)" + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + const boost::shared_ptr<TFunctionPtrVector> expressions_; + + /** + * Writes raw string representation of the operator, realised by default + * as "raw_operator_name(raw_expr1_string, ..., raw_exprn_string)" + * @note This version doesn't require tagset, but may be incomplete and/or + * contain internal info. + * @returns Stream written to. + */ + std::ostream& write_to(std::ostream& ostream) const; +}; + +// +// ----- Implementation ----- +// + +template<class T> inline +std::string ListOperator<T>::to_string(const Corpus2::Tagset& tagset) const +{ + std::stringstream ss; + ss << this->name(tagset) << "("; + typename TFunctionPtrVector::const_iterator it = expressions_->begin(); + while(it != expressions_->end()) { + ss << (*it)->to_string(tagset); + if(++it != expressions_->end()) { + ss << ", "; + } + } + ss << ")"; + return ss.str(); +} + +template<class T> inline +std::ostream& ListOperator<T>::write_to(std::ostream& ostream) const +{ + ostream << this->raw_name() << "("; + typename TFunctionPtrVector::const_iterator it = expressions_->begin(); + while(it != expressions_->end()) { + ostream << (*it)->to_raw_string(); + if(++it != expressions_->end()) { + ostream << ", "; + } + } + ostream << ")"; + return ostream; +} + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_LISTOPERATOR_H diff --git a/libwccl/ops/functions/setops.h b/libwccl/ops/functions/setops.h new file mode 100644 index 0000000000000000000000000000000000000000..bdf20594d075ee2898a3c9bef037ed87a0a67b93 --- /dev/null +++ b/libwccl/ops/functions/setops.h @@ -0,0 +1,138 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_SETOPS_H +#define LIBWCCL_OPS_FUNCTIONS_SETOPS_H + +#include <libwccl/ops/functions/listoperator.h> +#include <libwccl/values/tset.h> +#include <libwccl/values/strset.h> + +namespace Wccl { + +template<class T> +class SetListOperator : public ListOperator<T> +{ + BOOST_MPL_ASSERT(( boost::mpl::count<boost::mpl::list<StrSet, TSet>, T> )); +public: + SetListOperator(const boost::shared_ptr<typename SetListOperator<T>::TFunctionPtrVector>& expressions) + : ListOperator<T>(expressions) + { + } +}; + +/** + * Set operation: union + */ +template<class T> +class SetUnion : public SetListOperator<T> +{ +public: + SetUnion(const boost::shared_ptr<typename SetListOperator<T>::TFunctionPtrVector>& expressions) + : SetListOperator<T>(expressions) + { + } + + /** + * @returns Name of the function: "union". + */ + std::string raw_name() const { + return "union"; + } + +protected: + FunctionBase::BaseRetValPtr apply_internal(const FunExecContext& context) const; +}; + +/** + * Set operation: intersection + */ +template<class T> +class SetIntersection : public SetListOperator<T> +{ +public: + SetIntersection(const boost::shared_ptr<typename SetListOperator<T>::TFunctionPtrVector>& expressions) + : SetListOperator<T>(expressions) + { + } + + /** + * @returns Name of the function: "intersection". + */ + std::string raw_name() const { + return "intersection"; + } + +protected: + FunctionBase::BaseRetValPtr apply_internal(const FunExecContext& context) const; +}; + +// +// ----- Implementation ----- +// + +template <> inline +FunctionBase::BaseRetValPtr SetUnion<TSet>::apply_internal(const FunExecContext& context) const { + Corpus2::Tag out; + foreach(boost::shared_ptr< Function<TSet> > expression, *expressions_) { + Corpus2::Tag s = expression->apply(context)->get_value(); + out.combine_with(s); + } + return boost::make_shared<TSet>(out); +} + +template <> inline +FunctionBase::BaseRetValPtr SetUnion<StrSet>::apply_internal(const FunExecContext& context) const { + if (expressions_->size() == 1) return expressions_->front()->apply(context); + boost::shared_ptr<StrSet> out = boost::make_shared<StrSet>(); + if (expressions_->empty()) return out; + const boost::shared_ptr<const StrSet>& set1 = (*expressions_)[0]->apply(context); + const boost::shared_ptr<const StrSet>& set2 = (*expressions_)[1]->apply(context); + std::set_union(set1->contents().begin(), set1->contents().end(), + set2->contents().begin(), set2->contents().end(), + std::inserter(out->contents(), out->contents().begin())); + + for (size_t i = 2; i < expressions_->size(); ++i) { + const boost::shared_ptr<const StrSet>& seti = (*expressions_)[i]->apply(context); + foreach (const UnicodeString& s, seti->contents()) { + out->insert(s); + } + } + return out; +} + +template <> inline +FunctionBase::BaseRetValPtr SetIntersection<TSet>::apply_internal(const FunExecContext& context) const { + Corpus2::Tag out; + if (!expressions_->empty()) { + out = (*expressions_)[0]->apply(context)->get_value(); + for (size_t i = 1; i < expressions_->size(); ++i) { + Corpus2::Tag s = (*expressions_)[i]->apply(context)->get_value(); + out.mask_with(s); + } + } + return boost::make_shared<TSet>(out); +} + +template <> inline +FunctionBase::BaseRetValPtr SetIntersection<StrSet>::apply_internal(const FunExecContext& context) const { + if (expressions_->size() == 1) return expressions_->front()->apply(context); + boost::shared_ptr<StrSet> out = boost::make_shared<StrSet>(); + if (expressions_->empty()) return out; + const boost::shared_ptr<const StrSet>& set1 = (*expressions_)[0]->apply(context); + const boost::shared_ptr<const StrSet>& set2 = (*expressions_)[1]->apply(context); + std::set_intersection(set1->contents().begin(), set1->contents().end(), + set2->contents().begin(), set2->contents().end(), + std::inserter(out->contents(), out->contents().begin())); + + for (size_t i = 2; i < expressions_->size(); ++i) { + boost::shared_ptr<StrSet> out2 = boost::make_shared<StrSet>(); + const boost::shared_ptr<const StrSet>& seti = (*expressions_)[i]->apply(context); + std::set_intersection(seti->contents().begin(), seti->contents().end(), + out->contents().begin(), out->contents().end(), + std::inserter(out->contents(), out2->contents().begin())); + out->contents().swap(out2->contents()); + } + return out; +} + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_SETOPS_H diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 253dc3b271df1c8f8db502a06d04d0528d1020e6..c073e68415f021c689fcd9cff39a4a1b1fe84461 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -27,6 +27,7 @@ header { #include <libwccl/ops/functions/vargetter.h> #include <libwccl/ops/functions/conditional.h> + #include <libwccl/ops/functions/setops.h> #include <libwccl/ops/functions/bool/varsetter.h> #include <libwccl/ops/functions/bool/predicates/debug.h> #include <libwccl/ops/functions/bool/predicates/ambiguous.h> @@ -728,10 +729,34 @@ symset_operator | ret = symset_range [scope] | ret = symset_catflt [scope] | ret = symset_agrflt [scope] + | ret = symset_union [scope] + | ret = symset_intersection [scope] // | LPAREN ret = symset_operator [scope] RPAREN ; +// ---------------------------------------------------------------------------- +// comma-separated symset operators +symset_operator_comma_sep + [ParsingScope& scope] + returns + [boost::shared_ptr<std::vector<boost::shared_ptr<Function<TSet> > > > ret_v] +{ + boost::shared_ptr<Function<TSet> > pred; + ret_v.reset( + new std::vector<boost::shared_ptr<Function<TSet> > > + ); +} + : pred = symset_operator [scope] { + ret_v->push_back(pred); + } + ( + COMMA pred = symset_operator [scope] { + ret_v->push_back(pred); + } + )* +; + // ---------------------------------------------------------------------------- // Wrapper from Function<TSet> to Operator<TSet> functional_operator_symset @@ -879,6 +904,35 @@ symset_agrflt } ; +// ---------------------------------------------------------------------------- +// Union operator +symset_union + [ParsingScope& scope] + returns [boost::shared_ptr<Function<TSet> > ret] +{ + boost::shared_ptr< std::vector< boost::shared_ptr<Function<TSet> > > > sets; +} + : "union" LPAREN + sets = symset_operator_comma_sep [scope] + RPAREN { + ret.reset(new SetUnion<TSet>(sets)); + } +; + +// ---------------------------------------------------------------------------- +// Intersection operator +symset_intersection + [ParsingScope& scope] + returns [boost::shared_ptr<Function<TSet> > ret] +{ + boost::shared_ptr< std::vector< boost::shared_ptr<Function<TSet> > > > sets; +} + : "intersection" LPAREN + sets = symset_operator_comma_sep [scope] + RPAREN { + ret.reset(new SetIntersection<TSet>(sets)); + } +; /////////////////////////////////////////////////////////////////////////////// // Position operator @@ -992,11 +1046,36 @@ strset_operator [ParsingScope& scope] | ret = strset_affix [scope] | ret = strset_var_val [scope] | ret = strset_condition [scope] + | ret = strset_union [scope] + | ret = strset_intersection [scope] | ret = strset_lex [scope] // | LPAREN ret = strset_operator [scope] RPAREN ; +// ---------------------------------------------------------------------------- +// comma-separated strset operators +strset_operator_comma_sep + [ParsingScope& scope] + returns + [boost::shared_ptr< std::vector<boost::shared_ptr<Function<StrSet> > > > ret_v] +{ + boost::shared_ptr<Function<StrSet> > pred; + ret_v.reset( + new std::vector<boost::shared_ptr<Function<StrSet> > > + ); +} + : pred = strset_operator [scope] { + ret_v->push_back(pred); + } + ( + COMMA pred = strset_operator [scope] { + ret_v->push_back(pred); + } + )* +; + + // ---------------------------------------------------------------------------- // Wrapper from Function<StrSet> to Operator<StrSet> functional_operator_strset @@ -1116,6 +1195,36 @@ strset_condition } ; +// ---------------------------------------------------------------------------- +// Union operator, strset +strset_union + [ParsingScope& scope] + returns [boost::shared_ptr<Function<StrSet> > ret] +{ + boost::shared_ptr<std::vector<boost::shared_ptr<Function<StrSet> > > > sets; +} + : "union" LPAREN + sets = strset_operator_comma_sep [scope] + RPAREN { + ret.reset(new SetUnion<StrSet>(sets)); + } +; + +// ---------------------------------------------------------------------------- +// Intersection operator, strset +strset_intersection + [ParsingScope& scope] + returns [boost::shared_ptr<Function<StrSet> > ret] +{ + boost::shared_ptr<std::vector<boost::shared_ptr<Function<StrSet> > > > sets; +} + : "intersection" LPAREN + sets = strset_operator_comma_sep [scope] + RPAREN { + ret.reset(new SetIntersection<StrSet>(sets)); + } +; + strset_lex [ParsingScope& scope] returns [boost::shared_ptr<Function<StrSet> > op]