Skip to content
Snippets Groups Projects
Commit 3244f737 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

add property/token metadata getter function named prop

parent 27db878d
Branches
No related merge requests found
......@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8.0)
set(wccl_ver_major "0")
set(wccl_ver_minor "3")
set(wccl_ver_patch "0")
set(wccl_ver_patch "1")
set(LIBWCCL_VERSION "${wccl_ver_major}.${wccl_ver_minor}.${wccl_ver_patch}")
......
......@@ -70,6 +70,7 @@ SET(libwccl_STAT_SRC
ops/functions/strset/tolower.cpp
ops/functions/strset/toupper.cpp
ops/functions/strset/anninter.cpp
ops/functions/strset/propval.cpp
ops/functions/tset/agrfilter.cpp
ops/functions/tset/catfilter.cpp
ops/functions/tset/getsymbols.cpp
......
/*
Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia,
Adam Radziszewski, Bartosz Broda
Part of the WCCL project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE, COPYING.LESSER and COPYING files for more details.
*/
#include <libwccl/ops/functions/strset/propval.h>
#include <boost/foreach.hpp>
#include <libwccl/ops/functions/constant.h>
namespace Wccl {
std::string PropVal::to_string(const Corpus2::Tagset& tagset) const
{
std::stringstream str;
str << name(tagset) << "("
<< *pos_expr_ << ", "
<< strset_expr_->to_string(tagset)
<< ")";
return str.str();
}
std::ostream& PropVal::write_to(std::ostream& os) const
{
return os
<< raw_name()
<< "(" << *pos_expr_ << ", " << *strset_expr_ << ")";
}
PropVal::BaseRetValPtr PropVal::apply_internal(const FunExecContext& context) const
{
// check if position inside sentence boundaries
const boost::shared_ptr<const Position>& pos = pos_expr_->apply(context);
const SentenceContext& sc = context.sentence_context();
if (sc.is_outside(*pos)) {
// outside, return an empty set
return detail::DefaultFunction<StrSet>()->apply(context);
}
// inside
// check if any metadata there
boost::shared_ptr<Corpus2::TokenMetaData> md = sc.at(*pos)->get_metadata();
if (!md) {
// no metadata assigned, return an empty set
return detail::DefaultFunction<StrSet>()->apply(context);
}
// at least there is data structure for storing metadata
// create empty output StrSet and get the wanted keys
boost::shared_ptr<StrSet> ret_set =
boost::shared_ptr<StrSet>(new StrSet());
const boost::shared_ptr<const StrSet>& keyset =
strset_expr_->apply(context);
// gather values that are assigned to keys present
BOOST_FOREACH (const UnicodeString& u_key, keyset->contents()) {
const std::string str_key(PwrNlp::to_utf8(u_key));
if (md->has_attribute(str_key)) {
ret_set->insert_utf8(md->get_attribute(str_key));
}
}
return ret_set;
}
} /* end ns Wccl */
/*
Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia,
Adam Radziszewski, Bartosz Broda
Part of the WCCL project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE, COPYING.LESSER and COPYING files for more details.
*/
#ifndef LIBWCCL_OPS_FUNCTIONS_STRSET_PROPVAL_H
#define LIBWCCL_OPS_FUNCTIONS_STRSET_PROPVAL_H
#include <libwccl/values/position.h>
#include <libwccl/values/strset.h>
#include <libwccl/ops/function.h>
namespace Wccl {
/**
* Operator that takes a position and a set of strings denoting property keys
* and return union of property values obtained via given keys from the
* metadata assigned to a token occupying the given position.
* If position out of sentence boundaries, will return an empty set. Also,
* if some of the given keys are not present in the metadata (or no metadata
* present at all at the given position), no error is raised but the keys
* are silently ignored. This may result in empty value set return.
*/
class PropVal : public Function<StrSet>
{
public:
typedef boost::shared_ptr<Function<Position> > PosFunctionPtr;
typedef boost::shared_ptr<Function<StrSet> > StrSetFunctionPtr;
PropVal(const PosFunctionPtr& pos_expr,
const StrSetFunctionPtr& strset_expr)
: pos_expr_(pos_expr),
strset_expr_(strset_expr)
{
BOOST_ASSERT(pos_expr_);
BOOST_ASSERT(strset_expr_);
}
/**
* String representation of the operator in form of:
* "prop(pos_expr_string, strset_expr_string)"
*/
std::string to_string(const Corpus2::Tagset& tagset) const;
/**
* @returns Name of the function: "affix"
*/
std::string raw_name() const {
return "prop";
}
protected:
const PosFunctionPtr pos_expr_;
const StrSetFunctionPtr strset_expr_;
/**
* Get a string set that aggregates all property values retrieved from
* the given position using the given key set.
*/
BaseRetValPtr apply_internal(const FunExecContext& context) const;
/**
* Writes raw string representation of the operator in form of:
* "prop(pos_expr_string, strset_expr_raw_string)"
* @note This version doesn't require tagset, but may be incomplete and/or
* contain internal info.
* @returns Stream written to.
*/
std::ostream& write_to(std::ostream& ostream) const;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_FUNCTIONS_STRSET_PROPVAL_H
......@@ -55,6 +55,7 @@ header {
#include <libwccl/ops/functions/strset/tolower.h>
#include <libwccl/ops/functions/strset/getlemmas.h>
#include <libwccl/ops/functions/strset/lextranslator.h>
#include <libwccl/ops/functions/strset/propval.h>
#include <libwccl/ops/functions/strset/anninter.h>
#include <libwccl/ops/functions/tset/agrfilter.h>
......@@ -1046,6 +1047,7 @@ strset_operator [ParsingScope& scope]
returns [boost::shared_ptr<Function<StrSet> > ret]
: ret = strset_orth [scope]
| ret = strset_base [scope]
| ret = strset_prop [scope]
| ret = strset_lower [scope]
| ret = strset_upper [scope]
| ret = strset_affix [scope]
......@@ -1120,6 +1122,23 @@ strset_base
}
;
// ----------------------------------------------------------------------------
// Token-level property value operator.
strset_prop
[ParsingScope& scope]
returns [boost::shared_ptr<Function<StrSet> > ret]
{
boost::shared_ptr<Function<Position> > pos;
boost::shared_ptr<Function<StrSet> > keys;
}
: "prop" LPAREN
pos = position_operator [scope] COMMA
keys = strset_operator [scope]
RPAREN {
ret.reset(new PropVal(pos, keys));
}
;
// ----------------------------------------------------------------------------
// Lower operator.
strset_lower
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment