From 9be298d086e7438e58488aaa0090fc01a5e28ce6 Mon Sep 17 00:00:00 2001
From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl>
Date: Fri, 16 Sep 2011 11:32:25 +0200
Subject: [PATCH] =?UTF-8?q?iter=20operator=20=E2=80=98skip=E2=80=99?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CMakeLists.txt                                |  2 +-
 libwccl/CMakeLists.txt                        |  1 +
 .../ops/functions/bool/iterations/skip.cpp    | 61 ++++++++++++++
 libwccl/ops/functions/bool/iterations/skip.h  | 56 +++++++++++++
 libwccl/parser/grammar.g                      | 10 +++
 tests/data/interp.xml                         | 77 +++++++++++++++++
 tests/data/skip-iter.ccl                      | 84 +++++++++++++++++++
 7 files changed, 290 insertions(+), 1 deletion(-)
 create mode 100644 libwccl/ops/functions/bool/iterations/skip.cpp
 create mode 100644 libwccl/ops/functions/bool/iterations/skip.h
 create mode 100644 tests/data/interp.xml
 create mode 100644 tests/data/skip-iter.ccl

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 020717a..7b7652e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8.0)
 
 set(wccl_ver_major "0")
 set(wccl_ver_minor "1")
-set(wccl_ver_patch "1")
+set(wccl_ver_patch "2")
 
 set(LIBWCCL_VERSION "${wccl_ver_major}.${wccl_ver_minor}.${wccl_ver_patch}")
 
diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt
index bae27de..1a0736a 100644
--- a/libwccl/CMakeLists.txt
+++ b/libwccl/CMakeLists.txt
@@ -39,6 +39,7 @@ SET(libwccl_STAT_SRC
 	ops/functions/bool/iterations/leftlook.cpp
 	ops/functions/bool/iterations/only.cpp
 	ops/functions/bool/iterations/rightlook.cpp
+	ops/functions/bool/iterations/skip.cpp
 	ops/functions/bool/predicate.cpp
 	ops/functions/bool/predicates/ambiguous.cpp
 	ops/functions/bool/predicates/and.cpp
diff --git a/libwccl/ops/functions/bool/iterations/skip.cpp b/libwccl/ops/functions/bool/iterations/skip.cpp
new file mode 100644
index 0000000..de6ca5d
--- /dev/null
+++ b/libwccl/ops/functions/bool/iterations/skip.cpp
@@ -0,0 +1,61 @@
+#include <libwccl/ops/functions/bool/iterations/skip.h>
+#include <libwccl/ops/functions/bool/predicate.h>
+
+namespace Wccl {
+
+std::string Skip::to_string(const Corpus2::Tagset& tagset) const
+{
+	std::ostringstream ss;
+	ss << name(tagset) << "("
+		<< central_pos_expr_->to_string(tagset) << ", "
+		<< Position::var_repr(iter_var_acc_.get_name()) << ", "
+		<< evaluating_expr_->to_string(tagset) << ", "
+		<< offset_ << ")";
+	return ss.str();
+}
+
+std::ostream& Skip::write_to(std::ostream& os) const
+{
+	return os << raw_name() << "("
+			<< *central_pos_expr_ << ", "
+			<< Position::var_repr(iter_var_acc_.get_name()) << ", "
+			<< *evaluating_expr_ << ","
+			<< offset_ << ")";
+}
+
+Skip::BaseRetValPtr Skip::apply_internal(const FunExecContext& context) const
+{
+	const boost::shared_ptr<Position>& iter_var =
+		context.variables()->get_fast(iter_var_acc_);
+	const boost::shared_ptr<const Position>& central_pos =
+										central_pos_expr_->apply(context);
+
+	if (context.sentence_context().is_inside(*central_pos)) {
+		// set iter_var to central_pos and iterate from here
+		iter_var->set_value(central_pos->get_value());
+		int offset_now = offset_;
+
+		while (offset_now != 0) {
+			// go one step towards the right direction
+			int direction = (offset_now > 0) ? +1 : -1;
+			iter_var->set_value(iter_var->get_value() + direction);
+			// if it's already out of sentence, fail
+			if (context.sentence_context().is_outside(*iter_var)) {
+				iter_var->set_value(Position::Nowhere);
+				return Predicate::False(context);
+			}
+			// if the skip-pred not satisfied here, decrement the steps to go
+			if(!evaluating_expr_->apply(context)->get_value()) {
+				offset_now -= direction;
+			}
+		}
+		// offset reached, the iteration is complete
+		return Predicate::True(context);
+
+	}
+	// Failure. Set iteration variable to Nowhere and return False.
+	iter_var->set_value(Position::Nowhere);
+	return Predicate::False(context);
+}
+
+} /* end ns Wccl */
diff --git a/libwccl/ops/functions/bool/iterations/skip.h b/libwccl/ops/functions/bool/iterations/skip.h
new file mode 100644
index 0000000..5017ad4
--- /dev/null
+++ b/libwccl/ops/functions/bool/iterations/skip.h
@@ -0,0 +1,56 @@
+#ifndef LIBWCCL_OPS_FUNCTIONS_BOOL_ITERATIONS_SKIP_H
+#define LIBWCCL_OPS_FUNCTIONS_BOOL_ITERATIONS_SKIP_H
+
+#include <libwccl/ops/function.h>
+#include <libwccl/values/bool.h>
+#include <libwccl/values/position.h>
+
+namespace Wccl {
+
+/**
+ * Iterative operator "skip", setting the position variable to given
+ * value plus offset, having skipped positions where given predicate
+ * is satisfied.
+ */
+class Skip : public Function<Bool>
+{
+public:
+	typedef boost::shared_ptr<Function<Position> > PosFunctionPtr;
+	typedef boost::shared_ptr<Function<Bool> > BoolFunctionPtr;
+	Skip(
+		const PosFunctionPtr& central_pos_expr,
+		const VariableAccessor<Position>& iter_var_acc,
+		const BoolFunctionPtr& evaluating_expr,
+		int offset)
+		: central_pos_expr_(central_pos_expr),
+		  iter_var_acc_(iter_var_acc),
+		  evaluating_expr_(evaluating_expr),
+		  offset_(offset)
+	{
+		BOOST_ASSERT(central_pos_expr_);
+		BOOST_ASSERT(evaluating_expr_);
+	}
+
+	/**
+	 * @returns Name of the function: "skip".
+	 */
+	std::string raw_name() const {
+		return "skip";
+	}
+
+	std::string to_string(const Corpus2::Tagset& tagset) const;
+
+protected:
+	std::ostream& write_to(std::ostream& ostream) const;
+	BaseRetValPtr apply_internal(const FunExecContext& context) const;
+
+private:
+	const PosFunctionPtr central_pos_expr_;
+	const VariableAccessor<Position> iter_var_acc_;
+	const BoolFunctionPtr evaluating_expr_;
+	const int offset_;
+
+};
+} /* end ns Wccl */
+
+#endif // LIBWCCL_OPS_FUNCTIONS_BOOL_ITERATIONS_SKIP_H
diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g
index 7aa71ed..b787629 100644
--- a/libwccl/parser/grammar.g
+++ b/libwccl/parser/grammar.g
@@ -69,6 +69,7 @@ header {
 	#include <libwccl/ops/functions/bool/iterations/atleast.h>
 	#include <libwccl/ops/functions/bool/iterations/leftlook.h>
 	#include <libwccl/ops/functions/bool/iterations/rightlook.h>
+	#include <libwccl/ops/functions/bool/iterations/skip.h>
 	
 	// Rules, actions
 	#include <libwccl/ops/tagrule.h>
@@ -1683,6 +1684,7 @@ bool_iteration
 	returns [boost::shared_ptr<Function<Bool> > ret]
 {
 	int min_match = 0;
+	int offset = 0;
 	boost::shared_ptr<Function<Bool> > expr;
 	boost::shared_ptr<Function<Position> > lpos, rpos;
 	boost::shared_ptr<VariableAccessor<Position> > pacc;
@@ -1721,6 +1723,14 @@ bool_iteration
 		RPAREN {
 			ret.reset(new RightLook(lpos, rpos, *pacc, expr));
 		}
+	| "skip" LPAREN
+			lpos = position_operator [scope] COMMA 
+			pacc = position_variable_acc [scope.variables()]     COMMA
+			expr = bool_operator     [scope] COMMA
+			offset = number
+		RPAREN {
+			ret.reset(new Skip(lpos, *pacc, expr, offset));
+		}
 ;
 
 // ----------------------------------------------------------------------------
diff --git a/tests/data/interp.xml b/tests/data/interp.xml
new file mode 100644
index 0000000..782a60c
--- /dev/null
+++ b/tests/data/interp.xml
@@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd">
+<cesAna xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" type="lex disamb">
+<chunkList>
+ <chunk>
+  <chunk type="s">
+   <tok>
+    <orth>IdÄ™</orth>
+    <lex><base>iść</base><ctag>verb</ctag></lex>
+   </tok>
+   <tok>
+    <orth>na</orth>
+    <lex><base>na</base><ctag>other</ctag></lex>
+   </tok>
+   <tok>
+    <orth>„</orth>
+    <lex><base>„</base><ctag>interp</ctag></lex>
+   </tok>
+   <ns/>
+   <tok>
+    <orth>Czarnego</orth>
+    <lex><base>czarny</base><ctag>adj</ctag></lex>
+   </tok>
+   <tok>
+    <orth>Kota</orth>
+    <lex><base>kot</base><ctag>noun</ctag></lex>
+    <lex><base>kota</base><ctag>noun</ctag></lex>
+   </tok>
+   <tok>
+    <orth>(</orth>
+    <lex><base>(</base><ctag>interp</ctag></lex>
+   </tok>
+   <ns/>
+   <tok>
+    <orth>1998</orth>
+    <lex><base>1998</base><ctag>unk</ctag></lex>
+   </tok>
+   <ns/>
+   <tok>
+    <orth>)</orth>
+    <lex><base>)</base><ctag>interp</ctag></lex>
+   </tok>
+   <ns/>
+   <tok>
+    <orth>”</orth>
+    <lex><base>”</base><ctag>interp</ctag></lex>
+   </tok>
+   <tok>
+    <orth>do</orth>
+    <lex><base>do</base><ctag>other</ctag></lex>
+   </tok>
+   <tok>
+    <orth>kina</orth>
+    <lex><base>kino</base><ctag>noun</ctag></lex>
+   </tok>
+   <tok>
+    <orth>—</orth>
+    <lex><base>—</base><ctag>interp</ctag></lex>
+   </tok>
+   <tok>
+    <orth>już</orth>
+    <lex><base>już</base><ctag>qub</ctag></lex>
+   </tok>
+   <tok>
+    <orth>czas</orth>
+    <lex><base>czas</base><ctag>noun</ctag></lex>
+    <lex><base>czas</base><ctag>verb</ctag></lex>
+   </tok>
+   <ns/>
+   <tok>
+    <orth>.</orth>
+    <lex><base>.</base><ctag>interp</ctag></lex>
+   </tok>
+  </chunk>
+ </chunk>
+</chunkList>
+</cesAna>
diff --git a/tests/data/skip-iter.ccl b/tests/data/skip-iter.ccl
new file mode 100644
index 0000000..5aad869
--- /dev/null
+++ b/tests/data/skip-iter.ccl
@@ -0,0 +1,84 @@
+tagset=simple
+sentence=interp.xml
+---
+skip(0,$P, inter(class[$P],interp), 0)
+
+True
+P=0
+---
+skip(0,$P, inter(class[$P],interp), 1)
+
+True
+P=1
+---
+skip(0,$P, inter(class[$P],interp), 8)
+
+True
+P=13
+---
+skip(-1,$P, inter(class[$P],interp), 0)
+
+False
+P=nowhere
+---
+skip(0,$P, inter(class[$P],interp), 1)
+
+True
+P=1
+---
+skip(0,$P, inter(class[$P],interp), 2)
+
+True
+P=3
+---
+skip(0,$P, inter(class[$P],interp), 3)
+
+True
+P=4
+---
+
+skip(0,$P, inter(class[$P],interp), 5)
+
+True
+P=9
+---
+skip(0,$P, inter(class[$P],interp), 9) // OOS
+
+False
+P=nowhere
+---
+skip(2,$P, inter(class[$P],interp), 0) // at skipped
+
+True
+P=2
+---
+skip(2,$P, inter(class[$P],interp), 1) // at skipped
+
+True
+P=3
+---
+skip(2,$P, inter(class[$P],interp), -1) // at skipped
+
+True
+P=1
+---
+skip(11,$P, inter(class[$P],interp), -1) // już -> kina
+
+True
+P=10
+---
+skip(11,$P, inter(class[$P],interp), -2) // już -> do
+
+True
+P=9
+---
+skip(11,$P, inter(class[$P],interp), -6) // już -> na
+
+True
+P=1
+---
+skip(11,$P, inter(class[$P],interp), -8) // już -> OOS
+
+False
+P=nowhere
+---
-- 
GitLab