From 3244f737dde06289ccb6d893e7046dd6c949fc27 Mon Sep 17 00:00:00 2001 From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl> Date: Thu, 14 Feb 2013 11:17:40 +0100 Subject: [PATCH] add property/token metadata getter function named prop --- CMakeLists.txt | 2 +- libwccl/CMakeLists.txt | 1 + libwccl/ops/functions/strset/propval.cpp | 73 ++++++++++++++++++++ libwccl/ops/functions/strset/propval.h | 86 ++++++++++++++++++++++++ libwccl/parser/grammar.g | 19 ++++++ 5 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 libwccl/ops/functions/strset/propval.cpp create mode 100644 libwccl/ops/functions/strset/propval.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 62edb94..028b4e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8.0) set(wccl_ver_major "0") set(wccl_ver_minor "3") -set(wccl_ver_patch "0") +set(wccl_ver_patch "1") set(LIBWCCL_VERSION "${wccl_ver_major}.${wccl_ver_minor}.${wccl_ver_patch}") diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 4160351..0eb4448 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -70,6 +70,7 @@ SET(libwccl_STAT_SRC ops/functions/strset/tolower.cpp ops/functions/strset/toupper.cpp ops/functions/strset/anninter.cpp + ops/functions/strset/propval.cpp ops/functions/tset/agrfilter.cpp ops/functions/tset/catfilter.cpp ops/functions/tset/getsymbols.cpp diff --git a/libwccl/ops/functions/strset/propval.cpp b/libwccl/ops/functions/strset/propval.cpp new file mode 100644 index 0000000..c0a4302 --- /dev/null +++ b/libwccl/ops/functions/strset/propval.cpp @@ -0,0 +1,73 @@ +/* + Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia, + Adam Radziszewski, Bartosz Broda + Part of the WCCL project + + This program is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + + This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. + + See the LICENSE, COPYING.LESSER and COPYING files for more details. +*/ + +#include <libwccl/ops/functions/strset/propval.h> +#include <boost/foreach.hpp> +#include <libwccl/ops/functions/constant.h> + +namespace Wccl { + +std::string PropVal::to_string(const Corpus2::Tagset& tagset) const +{ + std::stringstream str; + str << name(tagset) << "(" + << *pos_expr_ << ", " + << strset_expr_->to_string(tagset) + << ")"; + return str.str(); +} + +std::ostream& PropVal::write_to(std::ostream& os) const +{ + return os + << raw_name() + << "(" << *pos_expr_ << ", " << *strset_expr_ << ")"; +} + +PropVal::BaseRetValPtr PropVal::apply_internal(const FunExecContext& context) const +{ + // check if position inside sentence boundaries + const boost::shared_ptr<const Position>& pos = pos_expr_->apply(context); + const SentenceContext& sc = context.sentence_context(); + if (sc.is_outside(*pos)) { + // outside, return an empty set + return detail::DefaultFunction<StrSet>()->apply(context); + } + // inside + // check if any metadata there + boost::shared_ptr<Corpus2::TokenMetaData> md = sc.at(*pos)->get_metadata(); + if (!md) { + // no metadata assigned, return an empty set + return detail::DefaultFunction<StrSet>()->apply(context); + } + // at least there is data structure for storing metadata + // create empty output StrSet and get the wanted keys + boost::shared_ptr<StrSet> ret_set = + boost::shared_ptr<StrSet>(new StrSet()); + const boost::shared_ptr<const StrSet>& keyset = + strset_expr_->apply(context); + // gather values that are assigned to keys present + BOOST_FOREACH (const UnicodeString& u_key, keyset->contents()) { + const std::string str_key(PwrNlp::to_utf8(u_key)); + if (md->has_attribute(str_key)) { + ret_set->insert_utf8(md->get_attribute(str_key)); + } + } + return ret_set; +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/strset/propval.h b/libwccl/ops/functions/strset/propval.h new file mode 100644 index 0000000..d40a771 --- /dev/null +++ b/libwccl/ops/functions/strset/propval.h @@ -0,0 +1,86 @@ +/* + Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia, + Adam Radziszewski, Bartosz Broda + Part of the WCCL project + + This program is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + + This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. + + See the LICENSE, COPYING.LESSER and COPYING files for more details. +*/ + +#ifndef LIBWCCL_OPS_FUNCTIONS_STRSET_PROPVAL_H +#define LIBWCCL_OPS_FUNCTIONS_STRSET_PROPVAL_H + +#include <libwccl/values/position.h> +#include <libwccl/values/strset.h> +#include <libwccl/ops/function.h> + +namespace Wccl { + +/** + * Operator that takes a position and a set of strings denoting property keys + * and return union of property values obtained via given keys from the + * metadata assigned to a token occupying the given position. + * If position out of sentence boundaries, will return an empty set. Also, + * if some of the given keys are not present in the metadata (or no metadata + * present at all at the given position), no error is raised but the keys + * are silently ignored. This may result in empty value set return. + */ +class PropVal : public Function<StrSet> +{ +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + typedef boost::shared_ptr<Function<StrSet> > StrSetFunctionPtr; + + PropVal(const PosFunctionPtr& pos_expr, + const StrSetFunctionPtr& strset_expr) + : pos_expr_(pos_expr), + strset_expr_(strset_expr) + { + BOOST_ASSERT(pos_expr_); + BOOST_ASSERT(strset_expr_); + } + + /** + * String representation of the operator in form of: + * "prop(pos_expr_string, strset_expr_string)" + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * @returns Name of the function: "affix" + */ + std::string raw_name() const { + return "prop"; + } + +protected: + const PosFunctionPtr pos_expr_; + const StrSetFunctionPtr strset_expr_; + + /** + * Get a string set that aggregates all property values retrieved from + * the given position using the given key set. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; + + /** + * Writes raw string representation of the operator in form of: + * "prop(pos_expr_string, strset_expr_raw_string)" + * @note This version doesn't require tagset, but may be incomplete and/or + * contain internal info. + * @returns Stream written to. + */ + std::ostream& write_to(std::ostream& ostream) const; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_STRSET_PROPVAL_H diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index c1dd8f3..52d560d 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -55,6 +55,7 @@ header { #include <libwccl/ops/functions/strset/tolower.h> #include <libwccl/ops/functions/strset/getlemmas.h> #include <libwccl/ops/functions/strset/lextranslator.h> + #include <libwccl/ops/functions/strset/propval.h> #include <libwccl/ops/functions/strset/anninter.h> #include <libwccl/ops/functions/tset/agrfilter.h> @@ -1046,6 +1047,7 @@ strset_operator [ParsingScope& scope] returns [boost::shared_ptr<Function<StrSet> > ret] : ret = strset_orth [scope] | ret = strset_base [scope] + | ret = strset_prop [scope] | ret = strset_lower [scope] | ret = strset_upper [scope] | ret = strset_affix [scope] @@ -1120,6 +1122,23 @@ strset_base } ; +// ---------------------------------------------------------------------------- +// Token-level property value operator. +strset_prop + [ParsingScope& scope] + returns [boost::shared_ptr<Function<StrSet> > ret] +{ + boost::shared_ptr<Function<Position> > pos; + boost::shared_ptr<Function<StrSet> > keys; +} + : "prop" LPAREN + pos = position_operator [scope] COMMA + keys = strset_operator [scope] + RPAREN { + ret.reset(new PropVal(pos, keys)); + } +; + // ---------------------------------------------------------------------------- // Lower operator. strset_lower -- GitLab