diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 8901362518e67bfa6f3745c424bcce9b567a47a2..123bf257549ba0e2b2e72c4f9805feb62af305f5 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -38,12 +38,14 @@ SET(libwccl_STAT_SRC ops/functions/bool/iterations/only.cpp ops/functions/bool/iterations/rightlook.cpp ops/functions/bool/predicate.cpp + ops/functions/bool/predicates/ambiguous.cpp ops/functions/bool/predicates/and.cpp ops/functions/bool/predicates/ann.cpp ops/functions/bool/predicates/annsub.cpp ops/functions/bool/predicates/debug.cpp ops/functions/bool/predicates/isinside.cpp ops/functions/bool/predicates/isoutside.cpp + ops/functions/bool/predicates/issingular.cpp ops/functions/bool/predicates/logicalpredicate.cpp ops/functions/bool/predicates/nor.cpp ops/functions/bool/predicates/or.cpp diff --git a/libwccl/ops/functions/bool/predicates/ambiguous.cpp b/libwccl/ops/functions/bool/predicates/ambiguous.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9e9fdc4a93016520ddb3f59b3289edecbf5f83c1 --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/ambiguous.cpp @@ -0,0 +1,5 @@ +#include <libwccl/ops/functions/bool/predicates/ambiguous.h> + +namespace Wccl { + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/bool/predicates/ambiguous.h b/libwccl/ops/functions/bool/predicates/ambiguous.h new file mode 100644 index 0000000000000000000000000000000000000000..f21ab2de9aee5ebbc78141f007e8fa21e8ca0915 --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/ambiguous.h @@ -0,0 +1,95 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_AMBIGUOUS_H +#define LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_AMBIGUOUS_H + +#include <libwccl/ops/functions/bool/predicate.h> +#include <libwccl/values/position.h> +#include <libwccl/ops/formatters.h> + + +namespace Wccl { + +class StrSet; +class TSet; +class Position; + +/** + * Predicate that checks for ambiguity of a set type (size > 1). + * Can also check for ambiguity of a token at a position --- equivalent + * to checking if there is more than one possible lexeme. + */ +template <class T> +class IsAmbiguous : public Predicate +{ + BOOST_MPL_ASSERT(( boost::mpl::count<boost::mpl::list< + StrSet, TSet, Position>, T> )); +public: + typedef boost::shared_ptr<Function<T> > ArgFunctionPtr; + + IsAmbiguous(const ArgFunctionPtr& arg_expr) + : arg_expr_(arg_expr) + { + BOOST_ASSERT(arg_expr_); + } + + /** + * @returns String representation of the function + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * @returns Name of the function + */ + std::string raw_name() const { + return "ambiguous"; + } + +protected: + const ArgFunctionPtr arg_expr_; + + /** + * Take value of argument and return True if it is empty, False otherwise. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; + + /** + * Writes raw string representation of the function + * @note This version does not require tagset but may be incomplete + * and/or contain internal info. + * @returns Stream written to. + */ + std::ostream& write_to(std::ostream& ostream) const; +}; + +// +// ----- Implementation ----- +// + +template <class T> +inline FunctionBase::BaseRetValPtr IsAmbiguous<T>::apply_internal(const FunExecContext& context) const { + return Predicate::evaluate(this->arg_expr_->apply(context)->size() > 1, context); +} + +template <> +inline FunctionBase::BaseRetValPtr IsAmbiguous<Position>::apply_internal(const FunExecContext& context) const { + const SentenceContext& sc = context.sentence_context(); + const boost::shared_ptr<const Position>& pos = arg_expr_->apply(context); + if (sc.is_outside(*pos)) { + return Predicate::False(context); + } + const Corpus2::Token* tok = sc.at(*pos); + return Predicate::evaluate(tok->lexemes().size() > 1, context); +} + +template <class T> +inline std::string IsAmbiguous<T>::to_string(const Corpus2::Tagset &tagset) const { + return UnaryFunctionFormatter::to_string(tagset, *this, *arg_expr_); +} + +template <class T> +inline std::ostream& IsAmbiguous<T>::write_to(std::ostream &os) const { + return os << this->raw_name() << "(" << *this->arg_expr_ << ")"; +} + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_AMBIGUOUS_H diff --git a/libwccl/ops/functions/bool/predicates/issingular.cpp b/libwccl/ops/functions/bool/predicates/issingular.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ef9e653bda7a1783a6ea176968bdb68a9f270e0c --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/issingular.cpp @@ -0,0 +1,19 @@ +#include <libwccl/ops/functions/bool/predicates/issingular.h> +#include <libwccl/ops/formatters.h> + +namespace Wccl { + +FunctionBase::BaseRetValPtr IsSingular::apply_internal(const FunExecContext& context) const { + const boost::shared_ptr<const TSet>& tag = arg_expr_->apply(context); + return Predicate::evaluate(tagset_.tag_is_singular(tag->get_value()), context); +} + +std::string IsSingular::to_string(const Corpus2::Tagset &tagset) const { + return UnaryFunctionFormatter::to_string(tagset, *this, *arg_expr_); +} + +std::ostream& IsSingular::write_to(std::ostream &os) const { + return os << this->raw_name() << "(" << *this->arg_expr_ << ")"; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/bool/predicates/issingular.h b/libwccl/ops/functions/bool/predicates/issingular.h new file mode 100644 index 0000000000000000000000000000000000000000..8e5f51c3d62bc1ea38ab2fc4acaa3ef3688a17e2 --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/issingular.h @@ -0,0 +1,56 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ISSINGULAR_H +#define LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ISSINGULAR_H + +#include <libwccl/ops/functions/bool/predicate.h> +#include <libwccl/values/tset.h> + +namespace Wccl { + +/** + * Predicate that checks for tag singularity: at most one value + * chosen for each attribute. + */ +class IsSingular : public Predicate +{ +public: + typedef boost::shared_ptr<Function<TSet> > ArgFunctionPtr; + + IsSingular(const ArgFunctionPtr& arg_expr, const Corpus2::Tagset& tagset) + : arg_expr_(arg_expr), tagset_(tagset) + { + BOOST_ASSERT(arg_expr_); + } + + /** + * @returns String representation of the function + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * @returns Name of the function + */ + std::string raw_name() const { + return "singular"; + } + +protected: + const ArgFunctionPtr arg_expr_; + const Corpus2::Tagset& tagset_; + + /** + * Take value of argument and return True if it is empty, False otherwise. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; + + /** + * Writes raw string representation of the function + * @note This version does not require tagset but may be incomplete + * and/or contain internal info. + * @returns Stream written to. + */ + std::ostream& write_to(std::ostream& ostream) const; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ISSINGULAR_H diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index f342038bb7c8fec032db8cc17222478275fa8e08..253dc3b271df1c8f8db502a06d04d0528d1020e6 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -29,6 +29,8 @@ header { #include <libwccl/ops/functions/bool/varsetter.h> #include <libwccl/ops/functions/bool/predicates/debug.h> + #include <libwccl/ops/functions/bool/predicates/ambiguous.h> + #include <libwccl/ops/functions/bool/predicates/issingular.h> #include <libwccl/ops/functions/bool/predicates/or.h> #include <libwccl/ops/functions/bool/predicates/nor.h> #include <libwccl/ops/functions/bool/predicates/and.h> @@ -1161,6 +1163,9 @@ bool_operator // annotation | ret = bool_ann [scope] | ret = bool_annsub [scope] + // singular/amb + | ret = bool_ambiguous [scope] + | ret = bool_singular [scope] // debug operators | ret = debug_print_operator [scope] // @@ -1462,6 +1467,58 @@ bool_annsub } ; +// ---------------------------------------------------------------------------- +// Ambiguity checking operator +bool_ambiguous + [ParsingScope& scope] + returns [boost::shared_ptr<Function<Bool> > ret] +{ + boost::shared_ptr<Function<TSet> > tf; + boost::shared_ptr<Function<StrSet> > sf; + boost::shared_ptr<Function<Position> > pf; +} + : "ambiguous" LPAREN + ( + (position_operator [scope]) => + ( + pf = position_operator [scope] { + ret.reset(new IsAmbiguous<Position>(pf)); + } + ) + | + (symset_operator [scope]) => + ( + tf = symset_operator [scope] { + ret.reset(new IsAmbiguous<TSet>(tf)); + } + ) + | + (strset_operator [scope]) => + ( + sf = strset_operator [scope] { + ret.reset(new IsAmbiguous<StrSet>(sf)); + } + ) + ) + RPAREN +; + +// ---------------------------------------------------------------------------- +// Tag singularity checking operator +bool_singular + [ParsingScope& scope] + returns [boost::shared_ptr<Function<Bool> > ret] +{ + boost::shared_ptr< Function<TSet> > v; +} + : "singular" LPAREN + v = symset_operator [scope] + RPAREN + { + ret.reset(new IsSingular(v, scope.tagset())); + } +; + // ---------------------------------------------------------------------------- // Debug printing: debug_print_operator diff --git a/libwccl/values/tset.h b/libwccl/values/tset.h index 11f227932f65bee65b6286af079562f93fd202cd..73ed1f3326d73633982f2a446b6709d70dd461f1 100644 --- a/libwccl/values/tset.h +++ b/libwccl/values/tset.h @@ -62,6 +62,10 @@ public: return tag_.is_null(); } + size_t size() const { + return tag_.pos_count() + PwrNlp::count_bits_set(tag_.get_values()); + } + /** * @return true if each tagset symbol from this set exists in the other set * (note that an empty set is a subset of anything) diff --git a/tests/data/singular.ccl b/tests/data/singular.ccl new file mode 100644 index 0000000000000000000000000000000000000000..2739598edbd9f292c265e3d1e127abf6f9bd200e --- /dev/null +++ b/tests/data/singular.ccl @@ -0,0 +1,49 @@ +--- +singular({sg,pl}) + +False +--- +singular({}) + +True +--- +singular({praet,subst}) + +False +--- +singular({subst,nom,sg,ter,imperf}) + +True +--- +singular(cas) + +False +--- +ambiguous({}) + +False +--- +ambiguous([]) + +False +--- +ambiguous({subst}) + +False +--- +ambiguous({subst,sg}) + +True +--- +ambiguous("aaa") + +False +--- +ambiguous(["a", "b"]) + +True +--- +ambiguous(cas) + +True +---