From 9be298d086e7438e58488aaa0090fc01a5e28ce6 Mon Sep 17 00:00:00 2001 From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl> Date: Fri, 16 Sep 2011 11:32:25 +0200 Subject: [PATCH] =?UTF-8?q?iter=20operator=20=E2=80=98skip=E2=80=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 +- libwccl/CMakeLists.txt | 1 + .../ops/functions/bool/iterations/skip.cpp | 61 ++++++++++++++ libwccl/ops/functions/bool/iterations/skip.h | 56 +++++++++++++ libwccl/parser/grammar.g | 10 +++ tests/data/interp.xml | 77 +++++++++++++++++ tests/data/skip-iter.ccl | 84 +++++++++++++++++++ 7 files changed, 290 insertions(+), 1 deletion(-) create mode 100644 libwccl/ops/functions/bool/iterations/skip.cpp create mode 100644 libwccl/ops/functions/bool/iterations/skip.h create mode 100644 tests/data/interp.xml create mode 100644 tests/data/skip-iter.ccl diff --git a/CMakeLists.txt b/CMakeLists.txt index 020717a..7b7652e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8.0) set(wccl_ver_major "0") set(wccl_ver_minor "1") -set(wccl_ver_patch "1") +set(wccl_ver_patch "2") set(LIBWCCL_VERSION "${wccl_ver_major}.${wccl_ver_minor}.${wccl_ver_patch}") diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index bae27de..1a0736a 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -39,6 +39,7 @@ SET(libwccl_STAT_SRC ops/functions/bool/iterations/leftlook.cpp ops/functions/bool/iterations/only.cpp ops/functions/bool/iterations/rightlook.cpp + ops/functions/bool/iterations/skip.cpp ops/functions/bool/predicate.cpp ops/functions/bool/predicates/ambiguous.cpp ops/functions/bool/predicates/and.cpp diff --git a/libwccl/ops/functions/bool/iterations/skip.cpp b/libwccl/ops/functions/bool/iterations/skip.cpp new file mode 100644 index 0000000..de6ca5d --- /dev/null +++ b/libwccl/ops/functions/bool/iterations/skip.cpp @@ -0,0 +1,61 @@ +#include <libwccl/ops/functions/bool/iterations/skip.h> +#include <libwccl/ops/functions/bool/predicate.h> + +namespace Wccl { + +std::string Skip::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream ss; + ss << name(tagset) << "(" + << central_pos_expr_->to_string(tagset) << ", " + << Position::var_repr(iter_var_acc_.get_name()) << ", " + << evaluating_expr_->to_string(tagset) << ", " + << offset_ << ")"; + return ss.str(); +} + +std::ostream& Skip::write_to(std::ostream& os) const +{ + return os << raw_name() << "(" + << *central_pos_expr_ << ", " + << Position::var_repr(iter_var_acc_.get_name()) << ", " + << *evaluating_expr_ << "," + << offset_ << ")"; +} + +Skip::BaseRetValPtr Skip::apply_internal(const FunExecContext& context) const +{ + const boost::shared_ptr<Position>& iter_var = + context.variables()->get_fast(iter_var_acc_); + const boost::shared_ptr<const Position>& central_pos = + central_pos_expr_->apply(context); + + if (context.sentence_context().is_inside(*central_pos)) { + // set iter_var to central_pos and iterate from here + iter_var->set_value(central_pos->get_value()); + int offset_now = offset_; + + while (offset_now != 0) { + // go one step towards the right direction + int direction = (offset_now > 0) ? +1 : -1; + iter_var->set_value(iter_var->get_value() + direction); + // if it's already out of sentence, fail + if (context.sentence_context().is_outside(*iter_var)) { + iter_var->set_value(Position::Nowhere); + return Predicate::False(context); + } + // if the skip-pred not satisfied here, decrement the steps to go + if(!evaluating_expr_->apply(context)->get_value()) { + offset_now -= direction; + } + } + // offset reached, the iteration is complete + return Predicate::True(context); + + } + // Failure. Set iteration variable to Nowhere and return False. + iter_var->set_value(Position::Nowhere); + return Predicate::False(context); +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/bool/iterations/skip.h b/libwccl/ops/functions/bool/iterations/skip.h new file mode 100644 index 0000000..5017ad4 --- /dev/null +++ b/libwccl/ops/functions/bool/iterations/skip.h @@ -0,0 +1,56 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_BOOL_ITERATIONS_SKIP_H +#define LIBWCCL_OPS_FUNCTIONS_BOOL_ITERATIONS_SKIP_H + +#include <libwccl/ops/function.h> +#include <libwccl/values/bool.h> +#include <libwccl/values/position.h> + +namespace Wccl { + +/** + * Iterative operator "skip", setting the position variable to given + * value plus offset, having skipped positions where given predicate + * is satisfied. + */ +class Skip : public Function<Bool> +{ +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + typedef boost::shared_ptr<Function<Bool> > BoolFunctionPtr; + Skip( + const PosFunctionPtr& central_pos_expr, + const VariableAccessor<Position>& iter_var_acc, + const BoolFunctionPtr& evaluating_expr, + int offset) + : central_pos_expr_(central_pos_expr), + iter_var_acc_(iter_var_acc), + evaluating_expr_(evaluating_expr), + offset_(offset) + { + BOOST_ASSERT(central_pos_expr_); + BOOST_ASSERT(evaluating_expr_); + } + + /** + * @returns Name of the function: "skip". + */ + std::string raw_name() const { + return "skip"; + } + + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + std::ostream& write_to(std::ostream& ostream) const; + BaseRetValPtr apply_internal(const FunExecContext& context) const; + +private: + const PosFunctionPtr central_pos_expr_; + const VariableAccessor<Position> iter_var_acc_; + const BoolFunctionPtr evaluating_expr_; + const int offset_; + +}; +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_BOOL_ITERATIONS_SKIP_H diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 7aa71ed..b787629 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -69,6 +69,7 @@ header { #include <libwccl/ops/functions/bool/iterations/atleast.h> #include <libwccl/ops/functions/bool/iterations/leftlook.h> #include <libwccl/ops/functions/bool/iterations/rightlook.h> + #include <libwccl/ops/functions/bool/iterations/skip.h> // Rules, actions #include <libwccl/ops/tagrule.h> @@ -1683,6 +1684,7 @@ bool_iteration returns [boost::shared_ptr<Function<Bool> > ret] { int min_match = 0; + int offset = 0; boost::shared_ptr<Function<Bool> > expr; boost::shared_ptr<Function<Position> > lpos, rpos; boost::shared_ptr<VariableAccessor<Position> > pacc; @@ -1721,6 +1723,14 @@ bool_iteration RPAREN { ret.reset(new RightLook(lpos, rpos, *pacc, expr)); } + | "skip" LPAREN + lpos = position_operator [scope] COMMA + pacc = position_variable_acc [scope.variables()] COMMA + expr = bool_operator [scope] COMMA + offset = number + RPAREN { + ret.reset(new Skip(lpos, *pacc, expr, offset)); + } ; // ---------------------------------------------------------------------------- diff --git a/tests/data/interp.xml b/tests/data/interp.xml new file mode 100644 index 0000000..782a60c --- /dev/null +++ b/tests/data/interp.xml @@ -0,0 +1,77 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" type="lex disamb"> +<chunkList> + <chunk> + <chunk type="s"> + <tok> + <orth>IdÄ™</orth> + <lex><base>iść</base><ctag>verb</ctag></lex> + </tok> + <tok> + <orth>na</orth> + <lex><base>na</base><ctag>other</ctag></lex> + </tok> + <tok> + <orth>„</orth> + <lex><base>„</base><ctag>interp</ctag></lex> + </tok> + <ns/> + <tok> + <orth>Czarnego</orth> + <lex><base>czarny</base><ctag>adj</ctag></lex> + </tok> + <tok> + <orth>Kota</orth> + <lex><base>kot</base><ctag>noun</ctag></lex> + <lex><base>kota</base><ctag>noun</ctag></lex> + </tok> + <tok> + <orth>(</orth> + <lex><base>(</base><ctag>interp</ctag></lex> + </tok> + <ns/> + <tok> + <orth>1998</orth> + <lex><base>1998</base><ctag>unk</ctag></lex> + </tok> + <ns/> + <tok> + <orth>)</orth> + <lex><base>)</base><ctag>interp</ctag></lex> + </tok> + <ns/> + <tok> + <orth>â€</orth> + <lex><base>â€</base><ctag>interp</ctag></lex> + </tok> + <tok> + <orth>do</orth> + <lex><base>do</base><ctag>other</ctag></lex> + </tok> + <tok> + <orth>kina</orth> + <lex><base>kino</base><ctag>noun</ctag></lex> + </tok> + <tok> + <orth>—</orth> + <lex><base>—</base><ctag>interp</ctag></lex> + </tok> + <tok> + <orth>już</orth> + <lex><base>już</base><ctag>qub</ctag></lex> + </tok> + <tok> + <orth>czas</orth> + <lex><base>czas</base><ctag>noun</ctag></lex> + <lex><base>czas</base><ctag>verb</ctag></lex> + </tok> + <ns/> + <tok> + <orth>.</orth> + <lex><base>.</base><ctag>interp</ctag></lex> + </tok> + </chunk> + </chunk> +</chunkList> +</cesAna> diff --git a/tests/data/skip-iter.ccl b/tests/data/skip-iter.ccl new file mode 100644 index 0000000..5aad869 --- /dev/null +++ b/tests/data/skip-iter.ccl @@ -0,0 +1,84 @@ +tagset=simple +sentence=interp.xml +--- +skip(0,$P, inter(class[$P],interp), 0) + +True +P=0 +--- +skip(0,$P, inter(class[$P],interp), 1) + +True +P=1 +--- +skip(0,$P, inter(class[$P],interp), 8) + +True +P=13 +--- +skip(-1,$P, inter(class[$P],interp), 0) + +False +P=nowhere +--- +skip(0,$P, inter(class[$P],interp), 1) + +True +P=1 +--- +skip(0,$P, inter(class[$P],interp), 2) + +True +P=3 +--- +skip(0,$P, inter(class[$P],interp), 3) + +True +P=4 +--- + +skip(0,$P, inter(class[$P],interp), 5) + +True +P=9 +--- +skip(0,$P, inter(class[$P],interp), 9) // OOS + +False +P=nowhere +--- +skip(2,$P, inter(class[$P],interp), 0) // at skipped + +True +P=2 +--- +skip(2,$P, inter(class[$P],interp), 1) // at skipped + +True +P=3 +--- +skip(2,$P, inter(class[$P],interp), -1) // at skipped + +True +P=1 +--- +skip(11,$P, inter(class[$P],interp), -1) // już -> kina + +True +P=10 +--- +skip(11,$P, inter(class[$P],interp), -2) // już -> do + +True +P=9 +--- +skip(11,$P, inter(class[$P],interp), -6) // już -> na + +True +P=1 +--- +skip(11,$P, inter(class[$P],interp), -8) // już -> OOS + +False +P=nowhere +--- -- GitLab