Skip to content
Snippets Groups Projects
Commit 6cb1d993 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

forgotten anninter sources

parent c3048d5c
No related merge requests found
/*
Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia,
Adam Radziszewski, Bartosz Broda
Part of the WCCL project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE, COPYING.LESSER and COPYING files for more details.
*/
#include <libwccl/ops/functions/strset/anninter.h>
#include <libcorpus2/ann/annotatedsentence.h>
#include <libpwrutils/util.h>
namespace Wccl {
AnnInter::BaseRetValPtr AnnInter::apply_internal(const FunExecContext& context) const
{
boost::shared_ptr<Corpus2::AnnotatedSentence> as
= boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(
context.sentence_context().get_sentence_ptr());
if (!as) {
throw InvalidArgument("context", "Operator needs an annotated sentence.");
}
const boost::shared_ptr<const Position>& pos = pos_expr_->apply(context);
const SentenceContext& sc = context.sentence_context();
// prepare the output set
boost::shared_ptr<StrSet > out_set = boost::make_shared<StrSet>();
// return empty set if out of bounds
int abs_idx = sc.get_abs_position(*pos);
if (sc.is_outside(*pos)) {
return out_set;
}
// get the value of the set of channel names to consider
const boost::shared_ptr<const StrSet >& in_set = strset_expr_->apply(context);
BOOST_FOREACH (const UnicodeString& u_chan_name, in_set->contents()) {
// check if there is any annot at pos
const std::string chan_name(PwrNlp::to_utf8(u_chan_name));
if (as->has_channel(chan_name)) {
const Corpus2::AnnotationChannel& chan = as->get_channel(chan_name);
if (chan.get_segment_at(abs_idx) != 0) {
// there is an annotation, so we take this channel's name
out_set->insert(u_chan_name);
}
}
}
return out_set;
}
std::string AnnInter::to_string(const Corpus2::Tagset&) const
{
std::ostringstream ostream;
ostream << raw_name() << "(" << *pos_expr_
<< ", " << *strset_expr_ << ")";
return ostream.str();
}
std::ostream& AnnInter::write_to(std::ostream& ostream) const
{
ostream << raw_name() << "(" << *pos_expr_ << ", "
<< ", " << *strset_expr_ << ")";
return ostream;
}
} /* end ns Wccl */
/*
Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia,
Adam Radziszewski, Bartosz Broda
Part of the WCCL project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE, COPYING.LESSER and COPYING files for more details.
*/
#ifndef LIBWCCL_OPS_FUNCTIONS_STRSET_ANNINTER_H
#define LIBWCCL_OPS_FUNCTIONS_STRSET_ANNINTER_H
#include <libwccl/values/position.h>
#include <libwccl/values/strset.h>
#include <libwccl/ops/function.h>
namespace Wccl {
/**
* Operator that takes a position and a set of strings designating
* channel names and returns a subset of the given string set that
* corresponds to those channels with any annotation crossing given
* position.
*/
class AnnInter : public Function<StrSet>
{
public:
typedef boost::shared_ptr<Function<Position> > PosFunctionPtr;
typedef boost::shared_ptr<Function<StrSet> > StrSetFunctionPtr;
AnnInter(const PosFunctionPtr& pos_expr,
const StrSetFunctionPtr& strset_expr)
: pos_expr_(pos_expr),
strset_expr_(strset_expr)
{
BOOST_ASSERT(pos_expr_);
BOOST_ASSERT(strset_expr_);
}
/**
* String representation of the operator in form of:
* "affix(strset_expr_string)"
*/
std::string to_string(const Corpus2::Tagset& tagset) const;
/**
* @returns Name of the function: "affix"
*/
std::string raw_name() const {
return "anninter";
}
protected:
const PosFunctionPtr pos_expr_;
const StrSetFunctionPtr strset_expr_;
/**
* Check which the channel names given in the strset_expr. Return those
* that have any annotation crossing the given position.
*/
BaseRetValPtr apply_internal(const FunExecContext& context) const;
/**
* Writes raw string representation of the operator in form of:
* "anninter(pos_expr_raw_string, strset_expr_raw_string)"
* @note This version doesn't require tagset, but may be incomplete and/or
* contain internal info.
* @returns Stream written to.
*/
std::ostream& write_to(std::ostream& ostream) const;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_FUNCTIONS_STRSET_ANNINTER_H
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment