Skip to content
Snippets Groups Projects
getsymbolsinrange.h 2.52 KiB
#ifndef LIBWCCL_OPS_FUNCTIONS_TSET_GETSYMBOLSINRANGE_H
#define LIBWCCL_OPS_FUNCTIONS_TSET_GETSYMBOLSINRANGE_H

#include <libwccl/values/tset.h>
#include <libwccl/values/position.h>
#include <libwccl/ops/function.h>

namespace Wccl {

/**
 * Operator that gets tagset symbols from tokens in given range.
 */
class GetSymbolsInRange : public Function<TSet>
{
public:
	typedef boost::shared_ptr<Function<Position> > PosFunctionPtr;

	GetSymbolsInRange(
			const Corpus2::Tag& mask,
			const PosFunctionPtr& range_begin_expr,
			const PosFunctionPtr& range_end_expr)
		: mask_(mask),
		  rbegin_expr_(range_begin_expr),
		  rend_expr_(range_end_expr)
	{
		BOOST_ASSERT(rbegin_expr_);
		BOOST_ASSERT(rend_expr_);
	}

	/**
	 * @returns String representation of the function in the form of:
	 * "range(tagset_symbol, range_begin_expr, range_end_expr)
	 */
	std::string to_string(const Corpus2::Tagset& tagset) const;

	/**
	 * @returns The operator name, "range"
	 */
	std::string raw_name() const {
		return "range";
	}

protected:
	Corpus2::Tag mask_;

	const PosFunctionPtr rbegin_expr_;
	const PosFunctionPtr rend_expr_;

	/**
	 * Gets positions for beginning and end of range we are
	 * interested in (from corresponding argument expressions).
	 * The range is trimmed to boundaries of the sentence we are working on.
	 * An empty set is returned if any of the positions points
	 * to nowhere, or when the range doesn't overlap with the sentence,
	 * or when supplied begin is actually after the supplied end.
	 * Otherwise we have a valid range, and a symbol set is returned,
	 * which is the sum of tagset symbols for words within the range
	 * (inclusive). The selection of symbols is based on the mask.
	 * The main intention is to supply masks that correspond
	 * directly to a single selected attribute, but the code accepts any
	 * valid mask (i.e. any combination of attributes and their values,
	 * and even includes the part-of-speech part).
	 * @returns A tagset symbol set for the words within range, given
	 * the mask, if the range is valid. An empty Tset otherwise.
	 */
	BaseRetValPtr apply_internal(const FunExecContext& context) const;

	/**
	 * Writes raw string representation of the function in the form of:
	 * "range(raw_tagset_symbol, range_begin_expr_raw, range_end_expr_raw)
	 * @note This version does not require tagset, but will be inclomplete
	 * and/or contain internal info.
	 * @returns The stream written to.
	 */
	std::ostream& write_to(std::ostream& ostream) const;
};

} /* end ns Wccl */

#endif // LIBWCCL_OPS_FUNCTIONS_TSET_GETSYMBOLSINRANGE_H