From 54f4cd4241bcddff3290284df57fbc5de1d75f6b Mon Sep 17 00:00:00 2001 From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl> Date: Tue, 5 Jun 2012 15:23:12 +0200 Subject: [PATCH] new token ann checking preds: isannbeg, isannend, isannhead; advance ver --- CMakeLists.txt | 4 ++-- .../functions/bool/predicates/anntoken.cpp | 24 ++++++++++++++++--- .../ops/functions/bool/predicates/anntoken.h | 20 +++++++++++++--- libwccl/parser/grammar.g | 8 +++---- 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aedc2ca..258b43d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ PROJECT(WCCL) cmake_minimum_required(VERSION 2.8.0) set(wccl_ver_major "0") -set(wccl_ver_minor "1") -set(wccl_ver_patch "6") +set(wccl_ver_minor "2") +set(wccl_ver_patch "0") set(LIBWCCL_VERSION "${wccl_ver_major}.${wccl_ver_minor}.${wccl_ver_patch}") diff --git a/libwccl/ops/functions/bool/predicates/anntoken.cpp b/libwccl/ops/functions/bool/predicates/anntoken.cpp index 9f2adc9..83cc101 100644 --- a/libwccl/ops/functions/bool/predicates/anntoken.cpp +++ b/libwccl/ops/functions/bool/predicates/anntoken.cpp @@ -42,15 +42,33 @@ AnnToken::BaseRetValPtr AnnToken::apply_internal(const FunExecContext& context) // channel exists, position in range, check if any annot at pos int abs_idx = sc.get_abs_position(*pos); - if (as->get_channel(chan_name_).get_segment_at(abs_idx) == 0) { + const Corpus2::AnnotationChannel& chan = as->get_channel(chan_name_); + + if (chan.get_segment_at(abs_idx) == 0) { return Predicate::False(context); } else { - return Predicate::True(context); + if (tok_constr_ == O_ANY) { + return Predicate::True(context); + } + if (tok_constr_ == O_HEAD) { + return chan.is_head_at(abs_idx) ? + Predicate::True(context) : Predicate::False(context); + } + if (tok_constr_ == O_FIRST) { + return (chan.get_segment_at(abs_idx) != + chan.get_segment_at(abs_idx - 1)) ? + Predicate::True(context) : Predicate::False(context); + } + if (tok_constr_ == O_LAST) { + return (chan.get_segment_at(abs_idx) != + chan.get_segment_at(abs_idx + 1)) ? + Predicate::True(context) : Predicate::False(context); + } } } -std::string AnnToken::to_string(const Corpus2::Tagset& tagset) const +std::string AnnToken::to_string(const Corpus2::Tagset& /* tagset */) const { std::ostringstream ostream; ostream << raw_name() << "(" << *pos_expr_ << ", " diff --git a/libwccl/ops/functions/bool/predicates/anntoken.h b/libwccl/ops/functions/bool/predicates/anntoken.h index b162764..179eda4 100644 --- a/libwccl/ops/functions/bool/predicates/anntoken.h +++ b/libwccl/ops/functions/bool/predicates/anntoken.h @@ -24,17 +24,30 @@ namespace Wccl { /** * An annotation-checking predicate: checks whether the given position belongs - * to any annotation in the given channel. + * to any annotation in the given channel. If ann constraint provided, will + * return True only if the additional constraint holds: token must be +* annotation's head, must be annotation-first or annotation-last. */ class AnnToken : public Predicate { public: typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + + enum TokenConstraint + { + O_ANY = 0, // no constraints + O_HEAD = 1, // token must be ann head + O_FIRST = 2, // must be ann-first + O_LAST = 3, // must be ann-last + }; + AnnToken( const PosFunctionPtr& pos_expr, - const std::string& annotation_name) + const std::string& annotation_name, + const TokenConstraint tok_constr = O_ANY) : pos_expr_(pos_expr), - chan_name_(annotation_name) + chan_name_(annotation_name), + tok_constr_(tok_constr) { BOOST_ASSERT(pos_expr_); BOOST_ASSERT(!annotation_name.empty()); @@ -55,6 +68,7 @@ public: protected: const PosFunctionPtr pos_expr_; const std::string chan_name_; + const TokenConstraint tok_constr_; /** * Returns whether the given position belongs to an annotation in diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 2e146ab..167e390 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -1786,22 +1786,22 @@ bool_phrase_annotation : "isannpart" LPAREN lpos = position_operator [scope] COMMA n1: STRING RPAREN { - ret.reset(new AnnToken(lpos, token_ref_to_std_string(n1))); + ret.reset(new AnnToken(lpos, token_ref_to_std_string(n1), AnnToken::O_ANY)); } | "isannbeg" LPAREN lpos = position_operator [scope] COMMA n2: STRING RPAREN { - // TODO + ret.reset(new AnnToken(lpos, token_ref_to_std_string(n2), AnnToken::O_FIRST)); } | "isannend" LPAREN lpos = position_operator [scope] COMMA n3: STRING RPAREN { - // TODO + ret.reset(new AnnToken(lpos, token_ref_to_std_string(n3), AnnToken::O_LAST)); } | "isannhead" LPAREN lpos = position_operator [scope] COMMA n4: STRING RPAREN { - // TODO + ret.reset(new AnnToken(lpos, token_ref_to_std_string(n4), AnnToken::O_HEAD)); } | "isannwhole" LPAREN lpos = position_operator [scope] COMMA -- GitLab