From b7d34d6187985ea72d9acae38674b6cac8e53e1e Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Tue, 30 Nov 2010 14:38:53 +0100
Subject: [PATCH] Add the tagset-symbol-getter operator (GetSymbols), bump
 required Corpus2 version to 1.0.2 since new features are needed.

---
 libwccl/CMakeLists.txt                    |  3 +-
 libwccl/ops/functions/tset/getsymbols.cpp | 42 +++++++++++++++++
 libwccl/ops/functions/tset/getsymbols.h   | 56 +++++++++++++++++++++++
 libwccl/values/tset.h                     |  9 ++++
 4 files changed, 109 insertions(+), 1 deletion(-)
 create mode 100644 libwccl/ops/functions/tset/getsymbols.cpp
 create mode 100644 libwccl/ops/functions/tset/getsymbols.h

diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt
index 70729be..c8159dd 100644
--- a/libwccl/CMakeLists.txt
+++ b/libwccl/CMakeLists.txt
@@ -4,7 +4,7 @@ PROJECT(wccl)
 
 include_directories( ${CMAKE_CURRENT_BINARY_DIR}/include/ )
 
-find_package(Corpus2 0.1.2 REQUIRED)
+find_package(Corpus2 1.0.2 REQUIRED)
 set(LIBS ${LIBS} ${Corpus2_LIBRARY})
 
 find_package(PwrUtils 0.0.3 REQUIRED)
@@ -39,6 +39,7 @@ SET(libwccl_STAT_SRC
 	ops/functions/strset/getorth.cpp
 	ops/functions/strset/tolower.cpp
 	ops/functions/strset/toupper.cpp
+	ops/functions/tset/getsymbols.cpp
 	parser/grammar.g
 	parser/Parser.cpp
 	parser/ParserException.cpp
diff --git a/libwccl/ops/functions/tset/getsymbols.cpp b/libwccl/ops/functions/tset/getsymbols.cpp
new file mode 100644
index 0000000..fe5d1fa
--- /dev/null
+++ b/libwccl/ops/functions/tset/getsymbols.cpp
@@ -0,0 +1,42 @@
+#include <libwccl/ops/functions/tset/getsymbols.h>
+#include <libwccl/ops/formatters.h>
+#include <libwccl/ops/functions/constant.h>
+
+namespace Wccl {
+
+std::string GetSymbols::to_string(const Corpus2::Tagset& tagset) const
+{
+	return UnaryFunctionFormatter::to_string(tagset, *this, *pos_expr_, "[", "]");
+}
+
+std::string GetSymbols::to_raw_string() const {
+	return UnaryFunctionFormatter::to_raw_string(*this, *pos_expr_, "[", "]");
+}
+
+std::string GetSymbols::name(const Corpus2::Tagset &tagset) const
+{
+	return tagset.get_attribute_name(mask_.get_values());
+}
+
+std::string GetSymbols::raw_name() const
+{
+	return mask_.raw_dump();
+}
+
+GetSymbols::BaseRetValPtr GetSymbols::apply_internal(const FunExecContext& context) const
+{
+	const boost::shared_ptr<const Position>& pos = pos_expr_->apply(context);
+	const SentenceContext& sc = context.sentence_context();
+	if(pos->is_outside(sc) || !sc.is_current_inside()) {
+		return detail::DefaultFunction<TSet>()->apply(context);
+	}
+
+	boost::shared_ptr<TSet> tset = boost::make_shared<TSet>();
+	const Corpus2::Token* token = sc.at(*pos);
+	foreach (const Corpus2::Lexeme& lexeme, token->lexemes()) {
+		tset->combine_with(lexeme.tag());
+	}
+	return tset;
+}
+
+} /* end ns Wccl */
diff --git a/libwccl/ops/functions/tset/getsymbols.h b/libwccl/ops/functions/tset/getsymbols.h
new file mode 100644
index 0000000..2c49201
--- /dev/null
+++ b/libwccl/ops/functions/tset/getsymbols.h
@@ -0,0 +1,56 @@
+#ifndef LIBWCCL_OPS_FUNCTIONS_TSET_GETSYMBOLS_H
+#define LIBWCCL_OPS_FUNCTIONS_TSET_GETSYMBOLS_H
+
+#include <libwccl/values/tset.h>
+#include <libwccl/values/position.h>
+#include <libwccl/ops/function.h>
+
+namespace Wccl {
+
+class GetSymbols : public Function<TSet> {
+public:
+	typedef boost::shared_ptr<Function<Position> > PosFunctionPtr;
+
+	GetSymbols(const PosFunctionPtr& pos_expr, const Corpus2::Tag& mask)
+		: pos_expr_(pos_expr), mask_(mask)
+	{
+		BOOST_ASSERT(pos_expr_);
+	}
+
+	/**
+	 * @returns String representation of the function in the form of:
+	 * "attribute_name[pos_expr_string]"
+	 */
+	std::string to_string(const Corpus2::Tagset& tagset) const;
+
+	/**
+	 * @returns String representation of the function in the form of:
+	 * "attribute_name[pos_expr_string]"
+	 * @note This version does not require tagset, but will be inclomplete
+	 * and/or contain internal info.
+	 */
+	std::string to_raw_string() const;
+
+	std::string raw_name() const;
+
+	std::string name(const Corpus2::Tagset& tagset) const;
+
+protected:
+	const PosFunctionPtr pos_expr_;
+
+	Corpus2::Tag mask_;
+
+	/**
+	 * Gets a position from the argument expression, then gets the
+	 * word at that position from the Sentence in the SentenceContext,
+	 * then gets the tagset symbols matching the mask of the word.
+	 *
+	 * @returns A tagset symbol set of the word if position pointed to
+	 * lies within boundaries of the Sentence. Empty Tset otherwise.
+	 */
+	BaseRetValPtr apply_internal(const FunExecContext& context) const;
+};
+
+} /* end ns Wccl */
+
+#endif // LIBWCCL_OPS_FUNCTIONS_TSET_GETSYMBOLS_H
diff --git a/libwccl/values/tset.h b/libwccl/values/tset.h
index 0378628..47533f1 100644
--- a/libwccl/values/tset.h
+++ b/libwccl/values/tset.h
@@ -61,6 +61,15 @@ public:
 		return tag_ == other.tag_;
 	}
 
+	void combine_with(const Corpus2::Tag& other) {
+		tag_.combine_with(other);
+	}
+
+	void combine_with(const TSet& other) {
+		tag_.combine_with(other.get_value());
+	}
+
+
 	std::string to_string(const Corpus2::Tagset &) const;
 
 	std::string to_raw_string() const;
-- 
GitLab