diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 1a0736a68abab08640ba48ee8977cff720d66599..1aaf30cf1e6236d36a69535848d81c0973cec811 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -45,6 +45,7 @@ SET(libwccl_STAT_SRC ops/functions/bool/predicates/and.cpp ops/functions/bool/predicates/ann.cpp ops/functions/bool/predicates/annsub.cpp + ops/functions/bool/predicates/anntoken.cpp ops/functions/bool/predicates/debug.cpp ops/functions/bool/predicates/isinside.cpp ops/functions/bool/predicates/isoutside.cpp diff --git a/libwccl/ops/functions/bool/predicates/anntoken.cpp b/libwccl/ops/functions/bool/predicates/anntoken.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9f2adc994efd2142d85db00de8d0e79eed5a5fd3 --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/anntoken.cpp @@ -0,0 +1,68 @@ +/* + Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia, + Adam Radziszewski, Bartosz Broda + Part of the WCCL project + + This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + + This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. + + See the LICENSE and COPYING files for more details. +*/ + +#include <libwccl/ops/functions/bool/predicates/anntoken.h> +#include <libcorpus2/ann/annotatedsentence.h> + +namespace Wccl { + +AnnToken::BaseRetValPtr AnnToken::apply_internal(const FunExecContext& context) const +{ + boost::shared_ptr<Corpus2::AnnotatedSentence> as + = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>( + context.sentence_context().get_sentence_ptr()); + if (!as) { + throw InvalidArgument("context", "Operator needs an annotated sentence."); + } + + const boost::shared_ptr<const Position>& pos = pos_expr_->apply(context); + const SentenceContext& sc = context.sentence_context(); + // return False if out of bounds + if (sc.is_outside(*pos)) { + return Predicate::False(context); + } + // return False if no such channel in sent + if (!as->has_channel(chan_name_)) { + return Predicate::False(context); + } + // channel exists, position in range, check if any annot at pos + int abs_idx = sc.get_abs_position(*pos); + + if (as->get_channel(chan_name_).get_segment_at(abs_idx) == 0) { + return Predicate::False(context); + } + else { + return Predicate::True(context); + } +} + +std::string AnnToken::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream ostream; + ostream << raw_name() << "(" << *pos_expr_ << ", " + << ", \"" << chan_name_ << "\")"; + return ostream.str(); +} + +std::ostream& AnnToken::write_to(std::ostream& ostream) const +{ + ostream << raw_name() << "(" << *pos_expr_ << ", " + << ", \"" << chan_name_ << "\")"; + return ostream; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/bool/predicates/anntoken.h b/libwccl/ops/functions/bool/predicates/anntoken.h new file mode 100644 index 0000000000000000000000000000000000000000..b162764ed7437ce7e788ce2f9fb1afc188710071 --- /dev/null +++ b/libwccl/ops/functions/bool/predicates/anntoken.h @@ -0,0 +1,76 @@ +/* + Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia, + Adam Radziszewski, Bartosz Broda + Part of the WCCL project + + This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + + This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. + + See the LICENSE and COPYING files for more details. +*/ + +#ifndef LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ANNTOKEN_H +#define LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ANNTOKEN_H + +#include <libwccl/ops/functions/bool/predicate.h> + +namespace Wccl { + +/** + * An annotation-checking predicate: checks whether the given position belongs + * to any annotation in the given channel. + */ +class AnnToken : public Predicate +{ +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + AnnToken( + const PosFunctionPtr& pos_expr, + const std::string& annotation_name) + : pos_expr_(pos_expr), + chan_name_(annotation_name) + { + BOOST_ASSERT(pos_expr_); + BOOST_ASSERT(!annotation_name.empty()); + } + + /** + * @returns Name of the function + */ + std::string raw_name() const { + return "isannpart"; + } + + /** + * @returns String representation of the predicate + */ + std::string to_string(const Corpus2::Tagset& /* tagset */) const; + +protected: + const PosFunctionPtr pos_expr_; + const std::string chan_name_; + + /** + * Returns whether the given position belongs to an annotation in + * a channel of the given name. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; + + /** + * Writes string representation of the predicate to + * an output stream. + * @returns Stream written to. + * @note May be incomplete and/or containt internal info. + */ + std::ostream& write_to(std::ostream& ostream) const; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_BOOL_PREDICATES_ANN_H diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index b78762979cb8ae21bbdcf639ffc5e4b76baea4b1..2e146abfe5bd63b6a54448ea311fc1cea065c217 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -47,6 +47,7 @@ header { #include <libwccl/ops/functions/bool/predicates/strongagreement.h> #include <libwccl/ops/functions/bool/predicates/annsub.h> #include <libwccl/ops/functions/bool/predicates/ann.h> + #include <libwccl/ops/functions/bool/predicates/anntoken.h> #include <libwccl/ops/functions/strset/affix.h> #include <libwccl/ops/functions/strset/getorth.h> @@ -1782,33 +1783,38 @@ bool_phrase_annotation { boost::shared_ptr<Function<Position> > lpos, rpos; } - : "phrase" LPAREN + : "isannpart" LPAREN lpos = position_operator [scope] COMMA n1: STRING RPAREN { - // TODO + ret.reset(new AnnToken(lpos, token_ref_to_std_string(n1))); } - | "phrase_beg" LPAREN + | "isannbeg" LPAREN lpos = position_operator [scope] COMMA n2: STRING RPAREN { // TODO } - | "phrase_end" LPAREN + | "isannend" LPAREN lpos = position_operator [scope] COMMA n3: STRING RPAREN { // TODO } - | "phrase_whole" LPAREN - lpos = position_operator [scope] COMMA - rpos = position_operator [scope] COMMA n4: STRING + | "isannhead" LPAREN + lpos = position_operator [scope] COMMA n4: STRING RPAREN { // TODO } - | "phrase_pp" LPAREN + | "isannwhole" LPAREN lpos = position_operator [scope] COMMA rpos = position_operator [scope] COMMA n5: STRING RPAREN { // TODO } + | "isannpp" LPAREN + lpos = position_operator [scope] COMMA + rpos = position_operator [scope] COMMA n6: STRING + RPAREN { + // TODO + } ; // ---------------------------------------------------------------------------- @@ -1820,14 +1826,14 @@ bool_phrase_iteration boost::shared_ptr<Function<Position> > position; boost::shared_ptr<VarGetter<Position> > var_position; } - : "lphrase" LPAREN + : "lanniter" LPAREN position = position_operator [scope] COMMA var_position = position_variable [scope.variables()] COMMA n1: STRING RPAREN { // TODO } - | "rphrase" LPAREN + | "ranniter" LPAREN position = position_operator [scope] COMMA var_position = position_variable [scope.variables()] COMMA n2: STRING