From 6cb1d99332d61edf1377d98a1570bafa4c9a1779 Mon Sep 17 00:00:00 2001 From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl> Date: Mon, 1 Oct 2012 13:55:41 +0200 Subject: [PATCH] forgotten anninter sources --- libwccl/ops/functions/strset/anninter.cpp | 78 +++++++++++++++++++++ libwccl/ops/functions/strset/anninter.h | 83 +++++++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 libwccl/ops/functions/strset/anninter.cpp create mode 100644 libwccl/ops/functions/strset/anninter.h diff --git a/libwccl/ops/functions/strset/anninter.cpp b/libwccl/ops/functions/strset/anninter.cpp new file mode 100644 index 0000000..adcd06f --- /dev/null +++ b/libwccl/ops/functions/strset/anninter.cpp @@ -0,0 +1,78 @@ +/* + Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia, + Adam Radziszewski, Bartosz Broda + Part of the WCCL project + + This program is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + + This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. + + See the LICENSE, COPYING.LESSER and COPYING files for more details. +*/ + +#include <libwccl/ops/functions/strset/anninter.h> +#include <libcorpus2/ann/annotatedsentence.h> + +#include <libpwrutils/util.h> + +namespace Wccl { + +AnnInter::BaseRetValPtr AnnInter::apply_internal(const FunExecContext& context) const +{ + boost::shared_ptr<Corpus2::AnnotatedSentence> as + = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>( + context.sentence_context().get_sentence_ptr()); + if (!as) { + throw InvalidArgument("context", "Operator needs an annotated sentence."); + } + + const boost::shared_ptr<const Position>& pos = pos_expr_->apply(context); + const SentenceContext& sc = context.sentence_context(); + // prepare the output set + boost::shared_ptr<StrSet > out_set = boost::make_shared<StrSet>(); + // return empty set if out of bounds + int abs_idx = sc.get_abs_position(*pos); + + if (sc.is_outside(*pos)) { + return out_set; + } + + // get the value of the set of channel names to consider + const boost::shared_ptr<const StrSet >& in_set = strset_expr_->apply(context); + + BOOST_FOREACH (const UnicodeString& u_chan_name, in_set->contents()) { + // check if there is any annot at pos + const std::string chan_name(PwrNlp::to_utf8(u_chan_name)); + if (as->has_channel(chan_name)) { + const Corpus2::AnnotationChannel& chan = as->get_channel(chan_name); + if (chan.get_segment_at(abs_idx) != 0) { + // there is an annotation, so we take this channel's name + out_set->insert(u_chan_name); + } + } + } + + return out_set; +} + +std::string AnnInter::to_string(const Corpus2::Tagset&) const +{ + std::ostringstream ostream; + ostream << raw_name() << "(" << *pos_expr_ + << ", " << *strset_expr_ << ")"; + return ostream.str(); +} + +std::ostream& AnnInter::write_to(std::ostream& ostream) const +{ + ostream << raw_name() << "(" << *pos_expr_ << ", " + << ", " << *strset_expr_ << ")"; + return ostream; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/strset/anninter.h b/libwccl/ops/functions/strset/anninter.h new file mode 100644 index 0000000..ecbbee4 --- /dev/null +++ b/libwccl/ops/functions/strset/anninter.h @@ -0,0 +1,83 @@ +/* + Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia, + Adam Radziszewski, Bartosz Broda + Part of the WCCL project + + This program is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + + This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. + + See the LICENSE, COPYING.LESSER and COPYING files for more details. +*/ + +#ifndef LIBWCCL_OPS_FUNCTIONS_STRSET_ANNINTER_H +#define LIBWCCL_OPS_FUNCTIONS_STRSET_ANNINTER_H + +#include <libwccl/values/position.h> +#include <libwccl/values/strset.h> +#include <libwccl/ops/function.h> + +namespace Wccl { + +/** + * Operator that takes a position and a set of strings designating + * channel names and returns a subset of the given string set that + * corresponds to those channels with any annotation crossing given + * position. + */ +class AnnInter : public Function<StrSet> +{ +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + typedef boost::shared_ptr<Function<StrSet> > StrSetFunctionPtr; + + AnnInter(const PosFunctionPtr& pos_expr, + const StrSetFunctionPtr& strset_expr) + : pos_expr_(pos_expr), + strset_expr_(strset_expr) + { + BOOST_ASSERT(pos_expr_); + BOOST_ASSERT(strset_expr_); + } + + /** + * String representation of the operator in form of: + * "affix(strset_expr_string)" + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * @returns Name of the function: "affix" + */ + std::string raw_name() const { + return "anninter"; + } + +protected: + const PosFunctionPtr pos_expr_; + const StrSetFunctionPtr strset_expr_; + + /** + * Check which the channel names given in the strset_expr. Return those + * that have any annotation crossing the given position. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; + + /** + * Writes raw string representation of the operator in form of: + * "anninter(pos_expr_raw_string, strset_expr_raw_string)" + * @note This version doesn't require tagset, but may be incomplete and/or + * contain internal info. + * @returns Stream written to. + */ + std::ostream& write_to(std::ostream& ostream) const; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_STRSET_ANNINTER_H -- GitLab