#include <libwccl/ops/functions/tset/getsymbolsinrange.h> #include <libwccl/ops/functions/constant.h> #include <sstream> namespace Wccl { std::string GetSymbolsInRange::to_string(const Corpus2::Tagset& tagset) const { std::stringstream ss; ss << name(tagset) << "(" << tagset.get_attribute_name(mask_.get_values()) << ", " << rbegin_expr_->to_string(tagset) << ", " << rend_expr_->to_string(tagset) << ")"; return ss.str(); } std::string GetSymbolsInRange::to_raw_string() const { std::stringstream ss; ss << raw_name() << "(" << mask_.raw_dump() << ", " << rbegin_expr_->to_raw_string() << ", " << rend_expr_->to_raw_string() << ")"; return ss.str(); } GetSymbolsInRange::BaseRetValPtr GetSymbolsInRange::apply_internal(const FunExecContext& context) const { const boost::shared_ptr<const Position>& range_begin = rbegin_expr_->apply(context); const boost::shared_ptr<const Position>& range_end = rend_expr_->apply(context); const SentenceContext& sc = context.sentence_context(); int abs_begin = sc.get_abs_position(*range_begin); int abs_end = sc.get_abs_position(*range_end); // Trim range to sentence boundaries if ((abs_begin != Position::Nowhere) && (abs_begin < 0)) { abs_begin = 0; } if ((abs_end != Position::Nowhere) && (abs_end >= sc.size())) { abs_end = sc.size() - 1; } // If range is empty, return an empty set - note the below also // covers ranges without overlap with actual sentence range // (including an empty sentence). if((abs_begin == Position::Nowhere) || (abs_end == Position::Nowhere) || (abs_begin > abs_end)) { return detail::DefaultFunction<TSet>()->apply(context); } boost::shared_ptr<TSet> tset = boost::make_shared<TSet>(); for(int abs_pos = abs_begin; abs_pos <= abs_end; abs_pos++) { const Corpus2::Token* token = sc.at(abs_pos); foreach (const Corpus2::Lexeme& lexeme, token->lexemes()) { tset->combine_with(lexeme.tag()); } } tset->tag_ref().mask_with(mask_); return tset; } } /* end ns Wccl */