diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index c501eb5382eeed6cef1a127464d1c559f8ad7793..fd6bd63180a43b86cf917ce70366707e9a424eb7 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -47,6 +47,7 @@ SET(libwccl_STAT_SRC ops/functions/strset/getorth.cpp ops/functions/strset/tolower.cpp ops/functions/strset/toupper.cpp + ops/functions/tset/catfilter.cpp ops/functions/tset/getsymbols.cpp ops/functions/tset/getsymbolsinrange.cpp parser/grammar.g diff --git a/libwccl/ops/functions/tset/catfilter.cpp b/libwccl/ops/functions/tset/catfilter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..76fe1f40fa33136269cad718ae237d6b84cdb610 --- /dev/null +++ b/libwccl/ops/functions/tset/catfilter.cpp @@ -0,0 +1,43 @@ +#include <libwccl/ops/functions/tset/catfilter.h> +#include <libwccl/ops/functions/constant.h> + +namespace Wccl { + +std::string CatFilter::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream os; + os << name(tagset) << "(" + << pos_expr_->to_string(tagset) << ", " + << tagset.get_attribute_name(selector_.get_values()) << ", " + << tagset.get_attribute_name(mask_.get_values()) << ")"; + return os.str(); +} + +std::ostream& CatFilter::write_to(std::ostream& os) const +{ + return os << raw_name() << "(" + << *pos_expr_ << ", " + << selector_.raw_dump() << ", " + << mask_.raw_dump() << ")"; +} + +CatFilter::BaseRetValPtr CatFilter::apply_internal(const FunExecContext& context) const +{ + const boost::shared_ptr<const Position>& pos = pos_expr_->apply(context); + const SentenceContext& sc = context.sentence_context(); + if (pos->is_outside(sc)) { + return detail::DefaultFunction<TSet>()->apply(context); + } + + boost::shared_ptr<TSet> tset = boost::make_shared<TSet>(); + const Corpus2::Token* token = sc.at(*pos); + foreach (const Corpus2::Lexeme& lexeme, token->lexemes()) { + if (!lexeme.tag().get_masked(selector_).is_null()) { + tset->combine_with(lexeme.tag()); + } + } + tset->tag_ref().mask_with(mask_); + return tset; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/tset/catfilter.h b/libwccl/ops/functions/tset/catfilter.h new file mode 100644 index 0000000000000000000000000000000000000000..d03cb6edca7164fe8abc6aca72daab15da8e41eb --- /dev/null +++ b/libwccl/ops/functions/tset/catfilter.h @@ -0,0 +1,68 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_TSET_CATFILTER_H +#define LIBWCCL_OPS_FUNCTIONS_TSET_CATFILTER_H + +#include <libwccl/values/tset.h> +#include <libwccl/values/position.h> +#include <libwccl/ops/function.h> + +namespace Wccl { + +/** + * Operator that gets tagset symbols from a token at given position, + * but only from a specified category of tags. + */ +class CatFilter : public Function<TSet> +{ +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + + CatFilter(const PosFunctionPtr& pos_expr, const Corpus2::Tag& selector, const Corpus2::Tag& mask) + : selector_(selector), mask_(mask), pos_expr_(pos_expr) + { + BOOST_ASSERT(pos_expr_); + } + + /** + * @returns String representation of the function in the form of: + * "catflt(pos_expr, selector, mask) + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * @returns Name of the operator, "catflt". + */ + std::string raw_name() const { + return "catflt"; + } + +protected: + Corpus2::Tag selector_; + Corpus2::Tag mask_; + + const PosFunctionPtr pos_expr_; + + /** + * Gets a position from the argument expression, then gets the + * word at that position from the Sentence in the SentenceContext, + * then selects tags for the word that match the selector, + * then gets the tagset symbols matching the mask. + * + * @returns A tagset symbol set of the word, but only for the + * selected tags, if position pointed to lies within boundaries + * of the Sentence. Empty Tset otherwise. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; + + /** + * Writes raw string representation of the function in the form of: + * "catflt(pos_expr_raw_string, raw_selector, raw_mask) + * @note This version does not require tagset, but will be inclomplete + * and/or contain internal info. + * @returns Stream written to. + */ + std::ostream& write_to(std::ostream& ostream) const; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_TSET_CATFILTER_H