From 3244f737dde06289ccb6d893e7046dd6c949fc27 Mon Sep 17 00:00:00 2001
From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl>
Date: Thu, 14 Feb 2013 11:17:40 +0100
Subject: [PATCH] add property/token metadata getter function named prop

---
 CMakeLists.txt                           |  2 +-
 libwccl/CMakeLists.txt                   |  1 +
 libwccl/ops/functions/strset/propval.cpp | 73 ++++++++++++++++++++
 libwccl/ops/functions/strset/propval.h   | 86 ++++++++++++++++++++++++
 libwccl/parser/grammar.g                 | 19 ++++++
 5 files changed, 180 insertions(+), 1 deletion(-)
 create mode 100644 libwccl/ops/functions/strset/propval.cpp
 create mode 100644 libwccl/ops/functions/strset/propval.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 62edb94..028b4e2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8.0)
 
 set(wccl_ver_major "0")
 set(wccl_ver_minor "3")
-set(wccl_ver_patch "0")
+set(wccl_ver_patch "1")
 
 set(LIBWCCL_VERSION "${wccl_ver_major}.${wccl_ver_minor}.${wccl_ver_patch}")
 
diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt
index 4160351..0eb4448 100644
--- a/libwccl/CMakeLists.txt
+++ b/libwccl/CMakeLists.txt
@@ -70,6 +70,7 @@ SET(libwccl_STAT_SRC
 	ops/functions/strset/tolower.cpp
 	ops/functions/strset/toupper.cpp
 	ops/functions/strset/anninter.cpp
+	ops/functions/strset/propval.cpp
 	ops/functions/tset/agrfilter.cpp
 	ops/functions/tset/catfilter.cpp
 	ops/functions/tset/getsymbols.cpp
diff --git a/libwccl/ops/functions/strset/propval.cpp b/libwccl/ops/functions/strset/propval.cpp
new file mode 100644
index 0000000..c0a4302
--- /dev/null
+++ b/libwccl/ops/functions/strset/propval.cpp
@@ -0,0 +1,73 @@
+/*
+    Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia,
+    Adam Radziszewski, Bartosz Broda
+    Part of the WCCL project
+
+    This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the Free
+Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+    This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. 
+
+    See the LICENSE, COPYING.LESSER and COPYING files for more details.
+*/
+
+#include <libwccl/ops/functions/strset/propval.h>
+#include <boost/foreach.hpp>
+#include <libwccl/ops/functions/constant.h>
+
+namespace Wccl {
+
+std::string PropVal::to_string(const Corpus2::Tagset& tagset) const
+{
+	std::stringstream str;
+	str << name(tagset) << "("
+		<< *pos_expr_ << ", "
+		<< strset_expr_->to_string(tagset)
+		<< ")";
+	return str.str();
+}
+
+std::ostream& PropVal::write_to(std::ostream& os) const
+{
+	return os
+			<< raw_name()
+			<< "(" << *pos_expr_ << ", " << *strset_expr_ << ")";
+}
+
+PropVal::BaseRetValPtr PropVal::apply_internal(const FunExecContext& context) const
+{
+	// check if position inside sentence boundaries
+	const boost::shared_ptr<const Position>& pos = pos_expr_->apply(context);
+	const SentenceContext& sc = context.sentence_context();
+	if (sc.is_outside(*pos)) {
+		// outside, return an empty set
+		return detail::DefaultFunction<StrSet>()->apply(context);
+	}
+	// inside
+	// check if any metadata there
+	boost::shared_ptr<Corpus2::TokenMetaData> md = sc.at(*pos)->get_metadata();
+	if (!md) {
+		// no metadata assigned, return an empty set
+		return detail::DefaultFunction<StrSet>()->apply(context);
+	}
+	// at least there is data structure for storing metadata
+	// create empty output StrSet and get the wanted keys
+	boost::shared_ptr<StrSet> ret_set =
+			boost::shared_ptr<StrSet>(new StrSet());
+	const boost::shared_ptr<const StrSet>& keyset =
+			strset_expr_->apply(context);
+	// gather values that are assigned to keys present
+	BOOST_FOREACH (const UnicodeString& u_key, keyset->contents()) {
+		const std::string str_key(PwrNlp::to_utf8(u_key));
+		if (md->has_attribute(str_key)) {
+			ret_set->insert_utf8(md->get_attribute(str_key));
+		}
+	}
+	return ret_set;
+}
+
+} /* end ns Wccl */
diff --git a/libwccl/ops/functions/strset/propval.h b/libwccl/ops/functions/strset/propval.h
new file mode 100644
index 0000000..d40a771
--- /dev/null
+++ b/libwccl/ops/functions/strset/propval.h
@@ -0,0 +1,86 @@
+/*
+    Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia,
+    Adam Radziszewski, Bartosz Broda
+    Part of the WCCL project
+
+    This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the Free
+Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+    This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. 
+
+    See the LICENSE, COPYING.LESSER and COPYING files for more details.
+*/
+
+#ifndef LIBWCCL_OPS_FUNCTIONS_STRSET_PROPVAL_H
+#define LIBWCCL_OPS_FUNCTIONS_STRSET_PROPVAL_H
+
+#include <libwccl/values/position.h>
+#include <libwccl/values/strset.h>
+#include <libwccl/ops/function.h>
+
+namespace Wccl {
+
+/**
+ * Operator that takes a position and a set of strings denoting property keys
+ * and return union of property values obtained via given keys from the
+ * metadata assigned to a token occupying the given position.
+ * If position out of sentence boundaries, will return an empty set. Also,
+ * if some of the given keys are not present in the metadata (or no metadata
+ * present at all at the given position), no error is raised but the keys
+ * are silently ignored. This may result in empty value set return.
+ */
+class PropVal : public Function<StrSet>
+{
+public:
+	typedef boost::shared_ptr<Function<Position> > PosFunctionPtr;
+	typedef boost::shared_ptr<Function<StrSet> > StrSetFunctionPtr;
+	
+	PropVal(const PosFunctionPtr& pos_expr,
+			const StrSetFunctionPtr& strset_expr)
+		: pos_expr_(pos_expr),
+		  strset_expr_(strset_expr)
+	{
+		BOOST_ASSERT(pos_expr_);
+		BOOST_ASSERT(strset_expr_);
+	}
+
+	/**
+	 * String representation of the operator in form of:
+	 * "prop(pos_expr_string, strset_expr_string)"
+	 */
+	std::string to_string(const Corpus2::Tagset& tagset) const;
+
+	/**
+	 * @returns Name of the function: "affix"
+	 */
+	std::string raw_name() const {
+		return "prop";
+	}
+
+protected:
+	const PosFunctionPtr pos_expr_;
+	const StrSetFunctionPtr strset_expr_;
+
+	/**
+	 * Get a string set that aggregates all property values retrieved from
+	 * the given position using the given key set.
+	 */
+	BaseRetValPtr apply_internal(const FunExecContext& context) const;
+	
+	/**
+	 * Writes raw string representation of the operator in form of:
+	 * "prop(pos_expr_string, strset_expr_raw_string)"
+	 * @note This version doesn't require tagset, but may be incomplete and/or
+	 * contain internal info.
+	 * @returns Stream written to.
+	 */
+	std::ostream& write_to(std::ostream& ostream) const;
+};
+
+} /* end ns Wccl */
+
+#endif // LIBWCCL_OPS_FUNCTIONS_STRSET_PROPVAL_H
diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g
index c1dd8f3..52d560d 100644
--- a/libwccl/parser/grammar.g
+++ b/libwccl/parser/grammar.g
@@ -55,6 +55,7 @@ header {
 	#include <libwccl/ops/functions/strset/tolower.h>
 	#include <libwccl/ops/functions/strset/getlemmas.h>
 	#include <libwccl/ops/functions/strset/lextranslator.h>
+	#include <libwccl/ops/functions/strset/propval.h>
 	#include <libwccl/ops/functions/strset/anninter.h>
 	
 	#include <libwccl/ops/functions/tset/agrfilter.h>
@@ -1046,6 +1047,7 @@ strset_operator [ParsingScope& scope]
 	returns [boost::shared_ptr<Function<StrSet> > ret]
 	: ret = strset_orth      [scope] 
 	| ret = strset_base      [scope]
+	| ret = strset_prop      [scope]
 	| ret = strset_lower     [scope] 
 	| ret = strset_upper     [scope]
 	| ret = strset_affix     [scope] 
@@ -1120,6 +1122,23 @@ strset_base
 	}
 ;
 
+// ----------------------------------------------------------------------------
+// Token-level property value operator.
+strset_prop
+	[ParsingScope& scope]
+	returns [boost::shared_ptr<Function<StrSet> > ret]
+{
+	boost::shared_ptr<Function<Position> > pos;
+	boost::shared_ptr<Function<StrSet> > keys;
+}
+	: "prop" LPAREN 
+			pos = position_operator [scope] COMMA
+			keys = strset_operator [scope]
+	RPAREN {
+		ret.reset(new PropVal(pos, keys));
+	}
+;
+
 // ----------------------------------------------------------------------------
 // Lower operator.
 strset_lower 
-- 
GitLab