Skip to content
Snippets Groups Projects
Commit aceed854 authored by Adam Wardynski's avatar Adam Wardynski
Browse files

CatFilter, returns tagset symbols for selected tags.

parent 4701b06c
No related merge requests found
......@@ -47,6 +47,7 @@ SET(libwccl_STAT_SRC
ops/functions/strset/getorth.cpp
ops/functions/strset/tolower.cpp
ops/functions/strset/toupper.cpp
ops/functions/tset/catfilter.cpp
ops/functions/tset/getsymbols.cpp
ops/functions/tset/getsymbolsinrange.cpp
parser/grammar.g
......
#include <libwccl/ops/functions/tset/catfilter.h>
#include <libwccl/ops/functions/constant.h>
namespace Wccl {
std::string CatFilter::to_string(const Corpus2::Tagset& tagset) const
{
std::ostringstream os;
os << name(tagset) << "("
<< pos_expr_->to_string(tagset) << ", "
<< tagset.get_attribute_name(selector_.get_values()) << ", "
<< tagset.get_attribute_name(mask_.get_values()) << ")";
return os.str();
}
std::ostream& CatFilter::write_to(std::ostream& os) const
{
return os << raw_name() << "("
<< *pos_expr_ << ", "
<< selector_.raw_dump() << ", "
<< mask_.raw_dump() << ")";
}
CatFilter::BaseRetValPtr CatFilter::apply_internal(const FunExecContext& context) const
{
const boost::shared_ptr<const Position>& pos = pos_expr_->apply(context);
const SentenceContext& sc = context.sentence_context();
if (pos->is_outside(sc)) {
return detail::DefaultFunction<TSet>()->apply(context);
}
boost::shared_ptr<TSet> tset = boost::make_shared<TSet>();
const Corpus2::Token* token = sc.at(*pos);
foreach (const Corpus2::Lexeme& lexeme, token->lexemes()) {
if (!lexeme.tag().get_masked(selector_).is_null()) {
tset->combine_with(lexeme.tag());
}
}
tset->tag_ref().mask_with(mask_);
return tset;
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_FUNCTIONS_TSET_CATFILTER_H
#define LIBWCCL_OPS_FUNCTIONS_TSET_CATFILTER_H
#include <libwccl/values/tset.h>
#include <libwccl/values/position.h>
#include <libwccl/ops/function.h>
namespace Wccl {
/**
* Operator that gets tagset symbols from a token at given position,
* but only from a specified category of tags.
*/
class CatFilter : public Function<TSet>
{
public:
typedef boost::shared_ptr<Function<Position> > PosFunctionPtr;
CatFilter(const PosFunctionPtr& pos_expr, const Corpus2::Tag& selector, const Corpus2::Tag& mask)
: selector_(selector), mask_(mask), pos_expr_(pos_expr)
{
BOOST_ASSERT(pos_expr_);
}
/**
* @returns String representation of the function in the form of:
* "catflt(pos_expr, selector, mask)
*/
std::string to_string(const Corpus2::Tagset& tagset) const;
/**
* @returns Name of the operator, "catflt".
*/
std::string raw_name() const {
return "catflt";
}
protected:
Corpus2::Tag selector_;
Corpus2::Tag mask_;
const PosFunctionPtr pos_expr_;
/**
* Gets a position from the argument expression, then gets the
* word at that position from the Sentence in the SentenceContext,
* then selects tags for the word that match the selector,
* then gets the tagset symbols matching the mask.
*
* @returns A tagset symbol set of the word, but only for the
* selected tags, if position pointed to lies within boundaries
* of the Sentence. Empty Tset otherwise.
*/
BaseRetValPtr apply_internal(const FunExecContext& context) const;
/**
* Writes raw string representation of the function in the form of:
* "catflt(pos_expr_raw_string, raw_selector, raw_mask)
* @note This version does not require tagset, but will be inclomplete
* and/or contain internal info.
* @returns Stream written to.
*/
std::ostream& write_to(std::ostream& ostream) const;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_FUNCTIONS_TSET_CATFILTER_H
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment