Skip to content
Snippets Groups Projects
Commit 54f4cd42 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

new token ann checking preds: isannbeg, isannend, isannhead; advance ver

parent 7838cf57
Branches
No related tags found
No related merge requests found
...@@ -2,8 +2,8 @@ PROJECT(WCCL) ...@@ -2,8 +2,8 @@ PROJECT(WCCL)
cmake_minimum_required(VERSION 2.8.0) cmake_minimum_required(VERSION 2.8.0)
set(wccl_ver_major "0") set(wccl_ver_major "0")
set(wccl_ver_minor "1") set(wccl_ver_minor "2")
set(wccl_ver_patch "6") set(wccl_ver_patch "0")
set(LIBWCCL_VERSION "${wccl_ver_major}.${wccl_ver_minor}.${wccl_ver_patch}") set(LIBWCCL_VERSION "${wccl_ver_major}.${wccl_ver_minor}.${wccl_ver_patch}")
......
...@@ -42,15 +42,33 @@ AnnToken::BaseRetValPtr AnnToken::apply_internal(const FunExecContext& context) ...@@ -42,15 +42,33 @@ AnnToken::BaseRetValPtr AnnToken::apply_internal(const FunExecContext& context)
// channel exists, position in range, check if any annot at pos // channel exists, position in range, check if any annot at pos
int abs_idx = sc.get_abs_position(*pos); int abs_idx = sc.get_abs_position(*pos);
if (as->get_channel(chan_name_).get_segment_at(abs_idx) == 0) { const Corpus2::AnnotationChannel& chan = as->get_channel(chan_name_);
if (chan.get_segment_at(abs_idx) == 0) {
return Predicate::False(context); return Predicate::False(context);
} }
else { else {
if (tok_constr_ == O_ANY) {
return Predicate::True(context); return Predicate::True(context);
} }
if (tok_constr_ == O_HEAD) {
return chan.is_head_at(abs_idx) ?
Predicate::True(context) : Predicate::False(context);
}
if (tok_constr_ == O_FIRST) {
return (chan.get_segment_at(abs_idx) !=
chan.get_segment_at(abs_idx - 1)) ?
Predicate::True(context) : Predicate::False(context);
}
if (tok_constr_ == O_LAST) {
return (chan.get_segment_at(abs_idx) !=
chan.get_segment_at(abs_idx + 1)) ?
Predicate::True(context) : Predicate::False(context);
}
}
} }
std::string AnnToken::to_string(const Corpus2::Tagset& tagset) const std::string AnnToken::to_string(const Corpus2::Tagset& /* tagset */) const
{ {
std::ostringstream ostream; std::ostringstream ostream;
ostream << raw_name() << "(" << *pos_expr_ << ", " ostream << raw_name() << "(" << *pos_expr_ << ", "
......
...@@ -24,17 +24,30 @@ namespace Wccl { ...@@ -24,17 +24,30 @@ namespace Wccl {
/** /**
* An annotation-checking predicate: checks whether the given position belongs * An annotation-checking predicate: checks whether the given position belongs
* to any annotation in the given channel. * to any annotation in the given channel. If ann constraint provided, will
* return True only if the additional constraint holds: token must be
* annotation's head, must be annotation-first or annotation-last.
*/ */
class AnnToken : public Predicate class AnnToken : public Predicate
{ {
public: public:
typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; typedef boost::shared_ptr<Function<Position> > PosFunctionPtr;
enum TokenConstraint
{
O_ANY = 0, // no constraints
O_HEAD = 1, // token must be ann head
O_FIRST = 2, // must be ann-first
O_LAST = 3, // must be ann-last
};
AnnToken( AnnToken(
const PosFunctionPtr& pos_expr, const PosFunctionPtr& pos_expr,
const std::string& annotation_name) const std::string& annotation_name,
const TokenConstraint tok_constr = O_ANY)
: pos_expr_(pos_expr), : pos_expr_(pos_expr),
chan_name_(annotation_name) chan_name_(annotation_name),
tok_constr_(tok_constr)
{ {
BOOST_ASSERT(pos_expr_); BOOST_ASSERT(pos_expr_);
BOOST_ASSERT(!annotation_name.empty()); BOOST_ASSERT(!annotation_name.empty());
...@@ -55,6 +68,7 @@ public: ...@@ -55,6 +68,7 @@ public:
protected: protected:
const PosFunctionPtr pos_expr_; const PosFunctionPtr pos_expr_;
const std::string chan_name_; const std::string chan_name_;
const TokenConstraint tok_constr_;
/** /**
* Returns whether the given position belongs to an annotation in * Returns whether the given position belongs to an annotation in
......
...@@ -1786,22 +1786,22 @@ bool_phrase_annotation ...@@ -1786,22 +1786,22 @@ bool_phrase_annotation
: "isannpart" LPAREN : "isannpart" LPAREN
lpos = position_operator [scope] COMMA n1: STRING lpos = position_operator [scope] COMMA n1: STRING
RPAREN { RPAREN {
ret.reset(new AnnToken(lpos, token_ref_to_std_string(n1))); ret.reset(new AnnToken(lpos, token_ref_to_std_string(n1), AnnToken::O_ANY));
} }
| "isannbeg" LPAREN | "isannbeg" LPAREN
lpos = position_operator [scope] COMMA n2: STRING lpos = position_operator [scope] COMMA n2: STRING
RPAREN { RPAREN {
// TODO ret.reset(new AnnToken(lpos, token_ref_to_std_string(n2), AnnToken::O_FIRST));
} }
| "isannend" LPAREN | "isannend" LPAREN
lpos = position_operator [scope] COMMA n3: STRING lpos = position_operator [scope] COMMA n3: STRING
RPAREN { RPAREN {
// TODO ret.reset(new AnnToken(lpos, token_ref_to_std_string(n3), AnnToken::O_LAST));
} }
| "isannhead" LPAREN | "isannhead" LPAREN
lpos = position_operator [scope] COMMA n4: STRING lpos = position_operator [scope] COMMA n4: STRING
RPAREN { RPAREN {
// TODO ret.reset(new AnnToken(lpos, token_ref_to_std_string(n4), AnnToken::O_HEAD));
} }
| "isannwhole" LPAREN | "isannwhole" LPAREN
lpos = position_operator [scope] COMMA lpos = position_operator [scope] COMMA
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment