diff --git a/libwccl/ops/functions/bool/predicates/ann.cpp b/libwccl/ops/functions/bool/predicates/ann.cpp index c2faa04cbbc6d4f45de1c480f757a51036614066..dc07e957a133b2bb9d5a044d244adf949dec8a41 100644 --- a/libwccl/ops/functions/bool/predicates/ann.cpp +++ b/libwccl/ops/functions/bool/predicates/ann.cpp @@ -16,11 +16,11 @@ Ann::BaseRetValPtr Ann::apply_internal(const FunExecContext& context) const boost::shared_ptr<const Match> check_from = check_from_->apply(context); boost::shared_ptr<const Match> check_to = (check_from_ == check_to_) ? check_from : check_to_->apply(context); - int abs_left = check_from->first_token(as).get_value(); + int abs_left = check_from->first_token(as); if (abs_left < 0) { throw WcclError("Received starting match that points outside sentence."); } - int abs_right = check_to->last_token(as).get_value(); + int abs_right = check_to->last_token(as); if (abs_right >= context.sentence_context().size()) { throw WcclError("Received ending match that points outside sentence."); } diff --git a/libwccl/ops/functions/bool/predicates/annsub.cpp b/libwccl/ops/functions/bool/predicates/annsub.cpp index bcaefc254bd9afbbc3c650f1be2788e2da39abdc..cb03321eac6d43b3df7273deb7ad862af8fe0c42 100644 --- a/libwccl/ops/functions/bool/predicates/annsub.cpp +++ b/libwccl/ops/functions/bool/predicates/annsub.cpp @@ -16,11 +16,11 @@ AnnSub::BaseRetValPtr AnnSub::apply_internal(const FunExecContext& context) cons boost::shared_ptr<const Match> check_from = check_from_->apply(context); boost::shared_ptr<const Match> check_to = (check_from_ == check_to_) ? check_from : check_to_->apply(context); - int abs_left = check_from->first_token(as).get_value(); + int abs_left = check_from->first_token(as); if (abs_left < 0) { throw WcclError("Received starting match that points outside sentence."); } - int abs_right = check_to->last_token(as).get_value(); + int abs_right = check_to->last_token(as); if (abs_right >= context.sentence_context().size()) { throw WcclError("Received ending match that points outside sentence."); } diff --git a/libwccl/ops/functions/position/firsttoken.cpp b/libwccl/ops/functions/position/firsttoken.cpp index 8c900756afc3c6029c85b8a75c73d4b3bdb3bf63..b92b66d608574bf2488495d942219e5ca014f3e8 100644 --- a/libwccl/ops/functions/position/firsttoken.cpp +++ b/libwccl/ops/functions/position/firsttoken.cpp @@ -21,7 +21,9 @@ FirstToken::BaseRetValPtr FirstToken::apply_internal( if(match->empty()) { return detail::DefaultFunction<Position>()->apply(context); } - return boost::make_shared<Position>(match->first_token(s)); + int abs_pos = match->first_token(s); + int rel_pos = abs_pos - context.sentence_context().get_position(); + return boost::make_shared<Position>(rel_pos); } std::string FirstToken::to_string(const Corpus2::Tagset &tagset) const diff --git a/libwccl/ops/functions/position/lasttoken.cpp b/libwccl/ops/functions/position/lasttoken.cpp index 6d4ca00193da63f17cd35f717054eb21d93ffbd0..6b4f43ecd83251faf7a4cdadd6792b5bec953e0d 100644 --- a/libwccl/ops/functions/position/lasttoken.cpp +++ b/libwccl/ops/functions/position/lasttoken.cpp @@ -14,13 +14,15 @@ LastToken::BaseRetValPtr LastToken::apply_internal( if (!s) { throw InvalidArgument( "context", - "Supplied context does not have valid Corpus2::AnnotatedSentence."); + "Supplied context does not have a valid Corpus2::AnnotatedSentence."); } const Function<Match>::RetValPtr match = match_expr_->apply(context); if(match->empty()) { return detail::DefaultFunction<Position>()->apply(context); } - return boost::make_shared<Position>(match->last_token(s)); + int abs_pos = match->last_token(s); + int rel_pos = abs_pos - context.sentence_context().get_position(); + return boost::make_shared<Position>(rel_pos); } std::string LastToken::to_string(const Corpus2::Tagset &tagset) const diff --git a/libwccl/ops/match/actions/markmatch.cpp b/libwccl/ops/match/actions/markmatch.cpp index 7970e710e49b84dad031566963f410992f40fde9..e80515179823c1f1e7633b9a54020c6d146ad3af 100644 --- a/libwccl/ops/match/actions/markmatch.cpp +++ b/libwccl/ops/match/actions/markmatch.cpp @@ -21,12 +21,12 @@ void MarkMatch::execute(const ActionExecContext& context) const boost::shared_ptr<const Match> head_match = (match_from_ == head_match_) ? match_from : head_match_->apply(context); - int abs_left = match_from->first_token(as).get_value(); + int abs_left = match_from->first_token(as); if (abs_left < 0) { throw WcclError("Received starting match that points outside sentence."); } - int abs_right = match_to->last_token(as).get_value(); + int abs_right = match_to->last_token(as); if (abs_right >= sc.size()) { throw WcclError("Received ending match that points outside sentence."); } @@ -34,7 +34,7 @@ void MarkMatch::execute(const ActionExecContext& context) const throw WcclError("Received starting match points after the received ending match."); } - int abs_head = head_match->first_token(as).get_value(); + int abs_head = head_match->first_token(as); if (abs_head < abs_left || abs_head > abs_right) { throw WcclError("Received head match points outside range defined by start and end matches."); } diff --git a/libwccl/ops/match/actions/unmarkmatch.cpp b/libwccl/ops/match/actions/unmarkmatch.cpp index 863bb0e67b7b1b68c4fa376a28ad517cf9fc2a28..3f0a8efe704c8e69aa42b1cee9ab95fed4c86ac8 100644 --- a/libwccl/ops/match/actions/unmarkmatch.cpp +++ b/libwccl/ops/match/actions/unmarkmatch.cpp @@ -18,7 +18,7 @@ void UnmarkMatch::execute(const ActionExecContext& context) const throw InvalidArgument("context", "Sentence does not have annotation channel \"" + chan_name_ + "\"."); } - int abs_pos = match_->apply(context)->first_token(as).get_value(); + int abs_pos = match_->apply(context)->first_token(as); if(sc.is_outside(abs_pos)) { throw WcclError("Received starting match that points outside sentence."); } diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index ca58421fa7cc28114830fd675af1ecda091e1f0a..eb9a9c74499399cfc9406a617a3d832bd15d0ecc 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -490,10 +490,9 @@ match_data_literal token_match_literal returns [boost::shared_ptr<TokenMatch> val] { - boost::shared_ptr<Position> p; } - : "TOK" LBRACKET p = position_literal RBRACKET { - val.reset(new TokenMatch(*p)); + : "TOK" LBRACKET u: UNSIGNED_INT RBRACKET { + val.reset(new TokenMatch(token_ref_to_int(u))); } ; @@ -502,10 +501,9 @@ token_match_literal ann_match_literal returns [boost::shared_ptr<AnnotationMatch> val] { - boost::shared_ptr<Position> p; } - : "ANN" LBRACKET p = position_literal COMMA channel : STRING RBRACKET { - val.reset(new AnnotationMatch(*p, token_ref_to_std_string(channel))); + : "ANN" LBRACKET u : UNSIGNED_INT COMMA channel : STRING RBRACKET { + val.reset(new AnnotationMatch(token_ref_to_int(u), token_ref_to_std_string(channel))); } ; diff --git a/libwccl/values/annotationmatch.cpp b/libwccl/values/annotationmatch.cpp index 54fb777c7d07969024e423371b5b009cb055ff7c..d13db4e26a12611f4a488d41262499d42c361f47 100644 --- a/libwccl/values/annotationmatch.cpp +++ b/libwccl/values/annotationmatch.cpp @@ -1,43 +1,43 @@ #include <libwccl/values/annotationmatch.h> +#include <libwccl/values/position.h> +#include <boost/lexical_cast.hpp> namespace Wccl { std::string AnnotationMatch::to_raw_string() const { - return "ANN[" + position_.to_raw_string() + "," + channel_ + "]"; + return "ANN[" + boost::lexical_cast<std::string>(abs_pos_) + "," + channel_ + "]"; } -Position AnnotationMatch::first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const +int AnnotationMatch::first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { - size_t fpos = position_.get_value(); const Corpus2::AnnotationChannel& chan = s->get_channel(channel_); - int seg = chan.get_segment_at(fpos); + int seg = chan.get_segment_at(abs_pos_); if (seg > 0) { - for (size_t i = 0; i < fpos; ++i) { + for (int i = 0; i < abs_pos_; ++i) { if (chan.get_segment_at(i) == seg) { - return Position(i); + return i; } } - return position_; + return abs_pos_; } else { - return Position(); + return Position::Nowhere; } } -Position AnnotationMatch::last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const +int AnnotationMatch::last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { - size_t fpos = position_.get_value(); const Corpus2::AnnotationChannel& chan = s->get_channel(channel_); - int seg = chan.get_segment_at(fpos); + int seg = chan.get_segment_at(abs_pos_); if (seg > 0) { - for (size_t i = s->size() - 1; i > fpos; --i) { + for (int i = s->size() - 1; i > abs_pos_; --i) { if (chan.get_segment_at(i) == seg) { - return Position(i); + return i; } } - return position_; + return abs_pos_; } else { - return Position(); + return Position::Nowhere; } } diff --git a/libwccl/values/annotationmatch.h b/libwccl/values/annotationmatch.h index 90015fed0bd966c7b0db53ecc4cdd854f7b5113f..7abc957e766fc8b98d65ef1fff9883a73b74794e 100644 --- a/libwccl/values/annotationmatch.h +++ b/libwccl/values/annotationmatch.h @@ -9,16 +9,10 @@ class AnnotationMatch : public MatchData { public: - explicit AnnotationMatch(Position position, const std::string channel) - : position_(position), channel_(channel) + explicit AnnotationMatch(int abs_pos, const std::string& channel) + : abs_pos_(abs_pos), channel_(channel) { - BOOST_ASSERT(position_.get_value() != Position::Nowhere); - } - - explicit AnnotationMatch(int pos,const std::string channel) - : position_(pos), channel_(channel) - { - BOOST_ASSERT(position_.get_value() != Position::Nowhere); + BOOST_ASSERT(abs_pos_ >= 0); } /// MatchData override. @@ -27,10 +21,10 @@ public: } /// MatchData override. - Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; + int first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; /// MatchData override. - Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; + int last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; /// MatchData override std::string to_raw_string() const; @@ -42,7 +36,7 @@ protected: } private: - Position position_; + int abs_pos_; std::string channel_; }; diff --git a/libwccl/values/match.h b/libwccl/values/match.h index 5be080f617e9459f8da069c2441619584ef10c82..ee6a588efeec10cb94dfeef56bea381c9b88b004 100644 --- a/libwccl/values/match.h +++ b/libwccl/values/match.h @@ -81,9 +81,9 @@ public: /** * Getter for the first token matched. If the match is empty, must return - * Nowhere. + * Position::Nowhere. */ - Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { + int first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { return match_->first_token(s); } @@ -91,7 +91,7 @@ public: * Getter for the last token matched. If the match is empty, must return * Nowhere. */ - Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { + int last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { return match_->last_token(s); } diff --git a/libwccl/values/matchdata.h b/libwccl/values/matchdata.h index f113256ade77789626b0df9c249797f6b5a40317..d415f9f6b93bb11b07b8892651d495487133af55 100644 --- a/libwccl/values/matchdata.h +++ b/libwccl/values/matchdata.h @@ -1,7 +1,6 @@ #ifndef LIBWCCL_VALUES_MATCHDATA_H #define LIBWCCL_VALUES_MATCHDATA_H -#include <libwccl/values/position.h> #include <libwccl/exception.h> #include <libcorpus2/ann/annotatedsentence.h> @@ -24,15 +23,15 @@ public: virtual bool empty() const = 0; /** * Getter for the first token matched. If the match is empty, must return - * Nowhere. + * Position::Nowhere. */ - virtual Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const = 0; + virtual int first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const = 0; /** * Getter for the last token matched. If the match is empty, must return - * Nowhere. + * Position::Nowhere. */ - virtual Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const = 0; + virtual int last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const = 0; /** * Getter for a submatch at given index (indexing starts from 1). diff --git a/libwccl/values/matchvector.cpp b/libwccl/values/matchvector.cpp index 6415ed0d4ac4bd2a7b1d19b928868ffa3c9ce62b..7fb0ab019bb209ed32f1e0c2b8155c34250bee40 100644 --- a/libwccl/values/matchvector.cpp +++ b/libwccl/values/matchvector.cpp @@ -22,44 +22,44 @@ std::string MatchVector::to_raw_string() const return ss.str(); } -Position MatchVector::first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const +int MatchVector::first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { if (matches_.empty()) { - return Position(Position::Nowhere); + return Position::Nowhere; } else { // Negative positions are invalid, including specials like Nowhere, // so we can't just find minimum value but minimum *non-negative* value. // Note: yes, the code assumes the special values like Nowhere are indeed negative. - Position p = matches_.front()->first_token(s); + int p = matches_.front()->first_token(s); size_t i = 1; - while ((p.get_value() < 0) && (i < matches_.size())) { + while ((p < 0) && (i < matches_.size())) { p = matches_[i]->first_token(s); ++i; } while (i < matches_.size()) { - Position c = matches_[i]->first_token(s); - if ((c.get_value() >= 0) && (c.get_value() < p.get_value())) { + int c = matches_[i]->first_token(s); + if ((c >= 0) && (c < p)) { p = c; } ++i; } - return p; + return p >= 0 ? p : Position::Nowhere; } } -Position MatchVector::last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const +int MatchVector::last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const { if (matches_.empty()) { - return Position(Position::Nowhere); + return Position::Nowhere; } else { - Position p = matches_.front()->last_token(s); + int p = matches_.front()->last_token(s); for (size_t i = 1; i < matches_.size(); ++i) { - Position c = matches_[i]->last_token(s); - if (c.get_value() > p.get_value()) { + int c = matches_[i]->last_token(s); + if (c > p) { p = c; } } - return p; + return p >= 0 ? p : Position::Nowhere; } } diff --git a/libwccl/values/matchvector.h b/libwccl/values/matchvector.h index 2519ae6804bac4a4d091926919035c3607f978b3..c97f7d9cd2b76b79de04ed804e604cff7223140a 100644 --- a/libwccl/values/matchvector.h +++ b/libwccl/values/matchvector.h @@ -26,10 +26,10 @@ public: bool empty() const; /// MatchData override. - Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; + int first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; /// MatchData override. - Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; + int last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const; /// MatchData override std::string to_raw_string() const; diff --git a/libwccl/values/tokenmatch.cpp b/libwccl/values/tokenmatch.cpp index 0852ad96367538980e54698af4d4dbeb87acd4d9..7cd31cf0fd9a7962ee37549366d70e5674a2fee2 100644 --- a/libwccl/values/tokenmatch.cpp +++ b/libwccl/values/tokenmatch.cpp @@ -1,10 +1,11 @@ #include <libwccl/values/tokenmatch.h> +#include <boost/lexical_cast.hpp> namespace Wccl { std::string TokenMatch::to_raw_string() const { - return "TOK[" + position_.to_raw_string() + "]"; + return "TOK[" + boost::lexical_cast<std::string>(abs_pos_) + "]"; } } /* end ns Wccl */ diff --git a/libwccl/values/tokenmatch.h b/libwccl/values/tokenmatch.h index 46e085cdd39f2e2624a13c233c515c0694d5c7f3..99382f70c861e86ec5660ca6553207791e59f209 100644 --- a/libwccl/values/tokenmatch.h +++ b/libwccl/values/tokenmatch.h @@ -9,16 +9,10 @@ class TokenMatch : public MatchData { public: - explicit TokenMatch(Position position) - : position_(position) + explicit TokenMatch(int abs_pos) + : abs_pos_(abs_pos) { - BOOST_ASSERT(position_.get_value() != Position::Nowhere); - } - - explicit TokenMatch(int pos) - : position_(pos) - { - BOOST_ASSERT(position_.get_value() != Position::Nowhere); + BOOST_ASSERT(abs_pos_ >= 0); } /// MatchData override. @@ -27,13 +21,13 @@ public: } /// MatchData override. - Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { - return position_; + int first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { + return abs_pos_; } /// MatchData override. - Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { - return position_; + int last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>&) const { + return abs_pos_; } /// MatchData override @@ -46,7 +40,7 @@ protected: } private: - Position position_; + int abs_pos_; }; } /* end ns Wccl */ diff --git a/tests/ann_op.cpp b/tests/ann_op.cpp index 3b24a010fcd54a6d59c87be9e9c2a06a3e8da6ad..5fcd11ec75b5ac0127899a6230b88ec4d6fdc34f 100644 --- a/tests/ann_op.cpp +++ b/tests/ann_op.cpp @@ -42,8 +42,8 @@ struct AnnSubFix : public Wccl::PositionFixture BOOST_FIXTURE_TEST_CASE(ann_not, AnnSubFix) { boost::shared_ptr< Constant<Match> > m0, m1; - m0.reset(new Constant<Match>(Match(TokenMatch(pos_value(0))))); - m1.reset(new Constant<Match>(Match(TokenMatch(pos_value(1))))); + m0.reset(new Constant<Match>(Match(TokenMatch(pos_value(0).get_value())))); + m1.reset(new Constant<Match>(Match(TokenMatch(pos_value(1).get_value())))); Ann a(m0, m1, "ch1"); boost::shared_ptr<const Bool> rv = a.apply(cx); BOOST_REQUIRE(rv); @@ -57,8 +57,8 @@ BOOST_FIXTURE_TEST_CASE(ann_not, AnnSubFix) BOOST_FIXTURE_TEST_CASE(ann_yes, AnnSubFix) { boost::shared_ptr< Constant<Match> > m0, m1; - m0.reset(new Constant<Match>(Match(TokenMatch(pos_value(2))))); - m1.reset(new Constant<Match>(Match(TokenMatch(pos_value(3))))); + m0.reset(new Constant<Match>(Match(TokenMatch(pos_value(2).get_value())))); + m1.reset(new Constant<Match>(Match(TokenMatch(pos_value(3).get_value())))); Ann a(m0, m1, "ch1"); boost::shared_ptr<const Bool> rv = a.apply(cx); BOOST_REQUIRE(rv); @@ -72,8 +72,8 @@ BOOST_FIXTURE_TEST_CASE(ann_yes, AnnSubFix) BOOST_FIXTURE_TEST_CASE(ann_sub, AnnSubFix) { boost::shared_ptr< Constant<Match> > m0, m1; - m0.reset(new Constant<Match>(Match(TokenMatch(pos_value(2))))); - m1.reset(new Constant<Match>(Match(TokenMatch(pos_value(2))))); + m0.reset(new Constant<Match>(Match(TokenMatch(pos_value(2).get_value())))); + m1.reset(new Constant<Match>(Match(TokenMatch(pos_value(2).get_value())))); Ann a(m0, m1, "ch1"); boost::shared_ptr<const Bool> rv = a.apply(cx); BOOST_REQUIRE(rv); @@ -90,8 +90,8 @@ BOOST_FIXTURE_TEST_CASE(ann_sub, AnnSubFix) BOOST_FIXTURE_TEST_CASE(ann_to_string, AnnSubFix) { boost::shared_ptr< Constant<Match> > m0, m1; - m0.reset(new Constant<Match>(Match(TokenMatch(pos_value(0))))); - m1.reset(new Constant<Match>(Match(TokenMatch(pos_value(1))))); + m0.reset(new Constant<Match>(Match(TokenMatch(pos_value(0).get_value())))); + m1.reset(new Constant<Match>(Match(TokenMatch(pos_value(1).get_value())))); Ann ann(m0, m1, "ch"); BOOST_CHECK_EQUAL("ann(TOK[0], TOK[1], \"ch\")", ann.to_string(tagset)); } @@ -99,8 +99,8 @@ BOOST_FIXTURE_TEST_CASE(ann_to_string, AnnSubFix) BOOST_FIXTURE_TEST_CASE(ann_to_string_one, AnnSubFix) { boost::shared_ptr< Constant<Match> > m0, m1; - m0.reset(new Constant<Match>(Match(TokenMatch(pos_value(0))))); - m1.reset(new Constant<Match>(Match(TokenMatch(pos_value(1))))); + m0.reset(new Constant<Match>(Match(TokenMatch(pos_value(0).get_value())))); + m1.reset(new Constant<Match>(Match(TokenMatch(pos_value(1).get_value())))); Ann ann(m0, "ch"); BOOST_CHECK_EQUAL("ann(TOK[0], \"ch\")", ann.to_string(tagset)); } @@ -108,8 +108,8 @@ BOOST_FIXTURE_TEST_CASE(ann_to_string_one, AnnSubFix) BOOST_FIXTURE_TEST_CASE(annsub_to_string, AnnSubFix) { boost::shared_ptr< Constant<Match> > m0, m1; - m0.reset(new Constant<Match>(Match(TokenMatch(pos_value(0))))); - m1.reset(new Constant<Match>(Match(TokenMatch(pos_value(1))))); + m0.reset(new Constant<Match>(Match(TokenMatch(pos_value(0).get_value())))); + m1.reset(new Constant<Match>(Match(TokenMatch(pos_value(1).get_value())))); AnnSub ann(m0, m1, "ch"); BOOST_CHECK_EQUAL("annsub(TOK[0], TOK[1], \"ch\")", ann.to_string(tagset)); } @@ -117,8 +117,8 @@ BOOST_FIXTURE_TEST_CASE(annsub_to_string, AnnSubFix) BOOST_FIXTURE_TEST_CASE(annsub_to_string_one, AnnSubFix) { boost::shared_ptr< Constant<Match> > m0, m1; - m0.reset(new Constant<Match>(Match(TokenMatch(pos_value(0))))); - m1.reset(new Constant<Match>(Match(TokenMatch(pos_value(1))))); + m0.reset(new Constant<Match>(Match(TokenMatch(pos_value(0).get_value())))); + m1.reset(new Constant<Match>(Match(TokenMatch(pos_value(1).get_value())))); AnnSub ann(m0, "ch"); BOOST_CHECK_EQUAL("annsub(TOK[0], \"ch\")", ann.to_string(tagset)); } diff --git a/tests/isempty.cpp b/tests/isempty.cpp index a55593a3b92d03b955d7e1ccfdb8e9b1baf21c02..0256bd570281e21390915f64419a81886bf52cb5 100644 --- a/tests/isempty.cpp +++ b/tests/isempty.cpp @@ -70,7 +70,7 @@ BOOST_FIXTURE_TEST_CASE(default_match, IsEmptyFix) BOOST_FIXTURE_TEST_CASE(token_match, IsEmptyFix) { - Match token_match(TokenMatch(Position(0))); + Match token_match(TokenMatch(0)); boost::shared_ptr<Function<Match> > match_expr(new Constant<Match>(token_match)); IsEmpty<Match> e(match_expr); BOOST_CHECK(!e.apply(cx)->get_value()); diff --git a/tests/match.cpp b/tests/match.cpp index 9b3ab09ee9b8da47a745fc1f0dbb23bfde605e2d..9a7008bdcf30719664e661b6a47a547ff47571a8 100644 --- a/tests/match.cpp +++ b/tests/match.cpp @@ -20,8 +20,8 @@ BOOST_AUTO_TEST_CASE(empty) boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::Match m; BOOST_CHECK(m.empty()); - BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.first_token(ptr), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token(ptr), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH()"); } @@ -30,8 +30,8 @@ BOOST_AUTO_TEST_CASE(token) boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::TokenMatch m(1); BOOST_CHECK(!m.empty()); - BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 1); - BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 1); + BOOST_CHECK_EQUAL(m.first_token(ptr), 1); + BOOST_CHECK_EQUAL(m.last_token(ptr), 1); BOOST_CHECK_EQUAL(m.to_raw_string(), "TOK[1]"); } @@ -40,29 +40,29 @@ BOOST_AUTO_TEST_CASE(vector1) boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::MatchVector m; BOOST_CHECK(m.empty()); - BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.first_token(ptr), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token(ptr), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH()"); m.append(boost::make_shared<Wccl::MatchVector>()); BOOST_CHECK(m.empty()); - BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.first_token(ptr), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token(ptr), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(MATCH())"); m.append(boost::make_shared<Wccl::MatchVector>()); BOOST_CHECK(m.empty()); - BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.first_token(ptr), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m.last_token(ptr), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(MATCH(),MATCH())"); Wccl::MatchVector m2; m2.append(boost::make_shared<Wccl::MatchVector>(m)); BOOST_CHECK(m2.empty()); - BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.first_token(ptr), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.last_token(ptr), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m2.to_raw_string(), "MATCH(MATCH(MATCH(),MATCH()))"); m2.append(boost::make_shared<Wccl::MatchVector>()); BOOST_CHECK(m2.empty()); - BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.first_token(ptr), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m2.last_token(ptr), Wccl::Position::Nowhere); BOOST_CHECK_EQUAL(m2.to_raw_string(), "MATCH(MATCH(MATCH(),MATCH()),MATCH())"); } @@ -72,44 +72,44 @@ BOOST_AUTO_TEST_CASE(matchvector_first_last) Wccl::MatchVector m; m.append(boost::make_shared<TokenMatch>(5)); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5])"); - BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 5); - BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 5); + BOOST_CHECK_EQUAL(m.first_token(ptr), 5); + BOOST_CHECK_EQUAL(m.last_token(ptr), 5); m.append(boost::make_shared<TokenMatch>(6)); - BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 5); - BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 6); + BOOST_CHECK_EQUAL(m.first_token(ptr), 5); + BOOST_CHECK_EQUAL(m.last_token(ptr), 6); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5],TOK[6])"); m.append(boost::make_shared<TokenMatch>(4)); - BOOST_CHECK_EQUAL(m.first_token(ptr).get_value(), 4); - BOOST_CHECK_EQUAL(m.last_token(ptr).get_value(), 6); + BOOST_CHECK_EQUAL(m.first_token(ptr), 4); + BOOST_CHECK_EQUAL(m.last_token(ptr), 6); BOOST_CHECK_EQUAL(m.to_raw_string(), "MATCH(TOK[5],TOK[6],TOK[4])"); Wccl::MatchVector m2; m2.append(boost::make_shared<TokenMatch>(5)); boost::shared_ptr<Wccl::MatchVector> m1 = boost::make_shared<Wccl::MatchVector>(m); m2.append(m1); - BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 4); - BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 6); + BOOST_CHECK_EQUAL(m2.first_token(ptr), 4); + BOOST_CHECK_EQUAL(m2.last_token(ptr), 6); BOOST_CHECK_EQUAL(m2.to_raw_string(), "MATCH(TOK[5],MATCH(TOK[5],TOK[6],TOK[4]))"); m2.append(boost::make_shared<TokenMatch>(2)); - BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 2); - BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 6); + BOOST_CHECK_EQUAL(m2.first_token(ptr), 2); + BOOST_CHECK_EQUAL(m2.last_token(ptr), 6); m2.append(boost::make_shared<TokenMatch>(7)); - BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 2); - BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 7); + BOOST_CHECK_EQUAL(m2.first_token(ptr), 2); + BOOST_CHECK_EQUAL(m2.last_token(ptr), 7); m1->append(boost::make_shared<TokenMatch>(1)); - BOOST_CHECK_EQUAL(m2.first_token(ptr).get_value(), 1); - BOOST_CHECK_EQUAL(m2.last_token(ptr).get_value(), 7); + BOOST_CHECK_EQUAL(m2.first_token(ptr), 1); + BOOST_CHECK_EQUAL(m2.last_token(ptr), 7); Wccl::MatchVector m3; BOOST_CHECK_EQUAL(m3.to_raw_string(), "MATCH()"); - BOOST_CHECK_EQUAL(m3.first_token(ptr).get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m3.last_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m3.first_token(ptr), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m3.last_token(ptr), Wccl::Position::Nowhere); m3.append(boost::make_shared<MatchVector>()); BOOST_CHECK_EQUAL(m3.to_raw_string(), "MATCH(MATCH())"); - BOOST_CHECK_EQUAL(m3.first_token(ptr).get_value(), Wccl::Position::Nowhere); - BOOST_CHECK_EQUAL(m3.last_token(ptr).get_value(), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m3.first_token(ptr), Wccl::Position::Nowhere); + BOOST_CHECK_EQUAL(m3.last_token(ptr), Wccl::Position::Nowhere); m3.append(boost::make_shared<TokenMatch>(1)); BOOST_CHECK_EQUAL(m3.to_raw_string(), "MATCH(MATCH(),TOK[1])"); - BOOST_CHECK_EQUAL(m3.first_token(ptr).get_value(), 1); - BOOST_CHECK_EQUAL(m3.last_token(ptr).get_value(), 1); + BOOST_CHECK_EQUAL(m3.first_token(ptr), 1); + BOOST_CHECK_EQUAL(m3.last_token(ptr), 1); } BOOST_AUTO_TEST_CASE(varmatch) @@ -117,9 +117,9 @@ BOOST_AUTO_TEST_CASE(varmatch) boost::shared_ptr<Corpus2::AnnotatedSentence> ptr; Wccl::Variables v; v.put<Wccl::Match>("a", new Wccl::Match(boost::shared_ptr<MatchData>(new Wccl::TokenMatch(1)))); - BOOST_CHECK_EQUAL(v.get<Wccl::Match>("a")->first_token(ptr).get_value(), 1); + BOOST_CHECK_EQUAL(v.get<Wccl::Match>("a")->first_token(ptr), 1); BOOST_CHECK(v.get_put<Wccl::Match>("b")->empty()); - BOOST_CHECK_EQUAL(v.get_put<Wccl::Match>("b")->first_token(ptr).get_value(), + BOOST_CHECK_EQUAL(v.get_put<Wccl::Match>("b")->first_token(ptr), Wccl::Position::Nowhere); } diff --git a/tests/rules-data/match/michal/cclmatch.xml b/tests/rules-data/match/michal/cclmatch.xml new file mode 100644 index 0000000000000000000000000000000000000000..c23ba8b14745c9ffc1adbbe17d67cb872e5c4fd8 --- /dev/null +++ b/tests/rules-data/match/michal/cclmatch.xml @@ -0,0 +1,124 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" type="lex disamb"> +<chunkList> + <chunk> + <sentence> + <tok> + <orth>To</orth> + <lex><base>ten</base><ctag>adj:sg:nom:n:pos</ctag></lex> + <lex><base>ten</base><ctag>adj:sg:acc:n:pos</ctag></lex> + <lex><base>to</base><ctag>conj</ctag></lex> + <lex><base>to</base><ctag>pred</ctag></lex> + <lex><base>to</base><ctag>qub</ctag></lex> + <lex><base>to</base><ctag>subst:sg:nom:n</ctag></lex> + <lex><base>to</base><ctag>subst:sg:acc:n</ctag></lex> + <lex><base>to</base><ctag>subst:sg:voc:n</ctag></lex> + <ann chan="capitalized_noun" head="1">1</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <tok> + <orth>Wyżyna</orth> + <lex><base>wyżyna</base><ctag>subst:sg:nom:f</ctag></lex> + <ann chan="capitalized_noun" head="1">2</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <tok> + <orth>Płaska</orth> + <lex><base>płaski</base><ctag>adj:sg:nom:f:pos</ctag></lex> + <ann chan="capitalized_noun" head="1">3</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based" head="1">1</ann> + </tok> + <ns/> + <tok> + <orth>,</orth> + <lex><base>,</base><ctag>interp</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <tok> + <orth>Wyżyna</orth> + <lex><base>wyżyna</base><ctag>subst:sg:nom:f</ctag></lex> + <ann chan="capitalized_noun" head="1">4</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <tok> + <orth>Wypukła</orth> + <lex><base>wypukły</base><ctag>adj:sg:nom:f:pos</ctag></lex> + <ann chan="capitalized_noun" head="1">5</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based" head="1">2</ann> + </tok> + <ns/> + <tok> + <orth>,</orth> + <lex><base>,</base><ctag>interp</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <tok> + <orth>wyżyna</orth> + <lex><base>wyżyna</base><ctag>subst:sg:nom:f</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <tok> + <orth>Kot</orth> + <lex><base>kot</base><ctag>subst:sg:nom:m2</ctag></lex> + <ann chan="capitalized_noun" head="1">7</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word" head="1">1</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <tok> + <orth>i</orth> + <lex><base>i</base><ctag>conj</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <tok> + <orth>miasto</orth> + <lex><base>miasto</base><ctag>subst:sg:nom:n</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger" head="1">1</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <tok> + <orth>Gdańsk</orth> + <lex><base>gdańsk</base><ctag>subst:sg:nom:m3</ctag></lex> + <ann chan="capitalized_noun" head="1">8</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word" head="1">2</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <ns/> + <tok> + <orth>.</orth> + <lex><base>.</base><ctag>interp</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + </sentence> + </chunk> +</chunkList> +</cesAna> diff --git a/tests/rules-data/match/michal/cclmatch1.ccl b/tests/rules-data/match/michal/cclmatch1.ccl new file mode 100644 index 0000000000000000000000000000000000000000..d2ea62b80385cf4e2095adaae4d42b88fabbaca2 --- /dev/null +++ b/tests/rules-data/match/michal/cclmatch1.ccl @@ -0,0 +1,16 @@ +apply( + match( + regex( base[0], 'wyżyna' ), + is( 'reladj_gaz_based' ) + ), + cond( + ann(:1, 'capitalized_noun' ), + equal( nmb[first(:1)], nmb[first(:2)] ), + equal( cas[first(:1)], cas[first(:2)] ), + equal( gnd[first(:1)], gnd[first(:2)] ) + ), + actions( + mark(M, 'HIGHLAND_NAM') + ) +) + diff --git a/tests/rules-data/match/michal/cclmatch1.out.xml b/tests/rules-data/match/michal/cclmatch1.out.xml new file mode 100644 index 0000000000000000000000000000000000000000..3ec16111de4bfeee240c6982e19fd2d40f1eb108 --- /dev/null +++ b/tests/rules-data/match/michal/cclmatch1.out.xml @@ -0,0 +1,133 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" type="lex disamb"> +<chunkList> + <chunk> + <sentence> + <tok> + <orth>To</orth> + <lex><base>ten</base><ctag>adj:sg:nom:n:pos</ctag></lex> + <lex><base>ten</base><ctag>adj:sg:acc:n:pos</ctag></lex> + <lex><base>to</base><ctag>conj</ctag></lex> + <lex><base>to</base><ctag>pred</ctag></lex> + <lex><base>to</base><ctag>qub</ctag></lex> + <lex><base>to</base><ctag>subst:sg:nom:n</ctag></lex> + <lex><base>to</base><ctag>subst:sg:acc:n</ctag></lex> + <lex><base>to</base><ctag>subst:sg:voc:n</ctag></lex> + <ann chan="capitalized_noun" head="1">1</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="HIGHLAND_NAM">0</ann> + </tok> + <tok> + <orth>Wyżyna</orth> + <lex><base>wyżyna</base><ctag>subst:sg:nom:f</ctag></lex> + <ann chan="capitalized_noun" head="1">2</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="HIGHLAND_NAM" head="1">1</ann> + </tok> + <tok> + <orth>Płaska</orth> + <lex><base>płaski</base><ctag>adj:sg:nom:f:pos</ctag></lex> + <ann chan="capitalized_noun" head="1">3</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based" head="1">1</ann> + <ann chan="HIGHLAND_NAM">1</ann> + </tok> + <ns/> + <tok> + <orth>,</orth> + <lex><base>,</base><ctag>interp</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="HIGHLAND_NAM">0</ann> + </tok> + <tok> + <orth>Wyżyna</orth> + <lex><base>wyżyna</base><ctag>subst:sg:nom:f</ctag></lex> + <ann chan="capitalized_noun" head="1">4</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="HIGHLAND_NAM" head="1">2</ann> + </tok> + <tok> + <orth>Wypukła</orth> + <lex><base>wypukły</base><ctag>adj:sg:nom:f:pos</ctag></lex> + <ann chan="capitalized_noun" head="1">5</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based" head="1">2</ann> + <ann chan="HIGHLAND_NAM">2</ann> + </tok> + <ns/> + <tok> + <orth>,</orth> + <lex><base>,</base><ctag>interp</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="HIGHLAND_NAM">0</ann> + </tok> + <tok> + <orth>wyżyna</orth> + <lex><base>wyżyna</base><ctag>subst:sg:nom:f</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="HIGHLAND_NAM">0</ann> + </tok> + <tok> + <orth>Kot</orth> + <lex><base>kot</base><ctag>subst:sg:nom:m2</ctag></lex> + <ann chan="capitalized_noun" head="1">7</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word" head="1">1</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="HIGHLAND_NAM">0</ann> + </tok> + <tok> + <orth>i</orth> + <lex><base>i</base><ctag>conj</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <tok> + <orth>miasto</orth> + <lex><base>miasto</base><ctag>subst:sg:nom:n</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger" head="1">1</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <tok> + <orth>Gdańsk</orth> + <lex><base>gdańsk</base><ctag>subst:sg:nom:m3</ctag></lex> + <ann chan="capitalized_noun" head="1">8</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word" head="1">2</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <ns/> + <tok> + <orth>.</orth> + <lex><base>.</base><ctag>interp</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + </sentence> + </chunk> +</chunkList> +</cesAna> diff --git a/tests/rules-data/match/michal/cclmatch2.ccl b/tests/rules-data/match/michal/cclmatch2.ccl new file mode 100644 index 0000000000000000000000000000000000000000..b02d73ff33475a200388c331fbf74a0d1fc4e5f8 --- /dev/null +++ b/tests/rules-data/match/michal/cclmatch2.ccl @@ -0,0 +1,15 @@ + + +apply( + match( + regex( base[0], 'wyżyna'), + and( inter(class[0], {subst}), inter(cas[0], {nom}) ) + ), + cond( + not( ann(:1, 'capitalized_noun' ) ) + ), + actions( + mark(:2, 'HIGHLAND_NAM') + ) +) + diff --git a/tests/rules-data/match/michal/cclmatch2.out.xml b/tests/rules-data/match/michal/cclmatch2.out.xml new file mode 100644 index 0000000000000000000000000000000000000000..de4ddd61344224cac98f3038da2314f54578359e --- /dev/null +++ b/tests/rules-data/match/michal/cclmatch2.out.xml @@ -0,0 +1,133 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" type="lex disamb"> +<chunkList> + <chunk> + <sentence> + <tok> + <orth>To</orth> + <lex><base>ten</base><ctag>adj:sg:nom:n:pos</ctag></lex> + <lex><base>ten</base><ctag>adj:sg:acc:n:pos</ctag></lex> + <lex><base>to</base><ctag>conj</ctag></lex> + <lex><base>to</base><ctag>pred</ctag></lex> + <lex><base>to</base><ctag>qub</ctag></lex> + <lex><base>to</base><ctag>subst:sg:nom:n</ctag></lex> + <lex><base>to</base><ctag>subst:sg:acc:n</ctag></lex> + <lex><base>to</base><ctag>subst:sg:voc:n</ctag></lex> + <ann chan="capitalized_noun" head="1">1</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="HIGHLAND_NAM">0</ann> + </tok> + <tok> + <orth>Wyżyna</orth> + <lex><base>wyżyna</base><ctag>subst:sg:nom:f</ctag></lex> + <ann chan="capitalized_noun" head="1">2</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="HIGHLAND_NAM">0</ann> + </tok> + <tok> + <orth>Płaska</orth> + <lex><base>płaski</base><ctag>adj:sg:nom:f:pos</ctag></lex> + <ann chan="capitalized_noun" head="1">3</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based" head="1">1</ann> + <ann chan="HIGHLAND_NAM">0</ann> + </tok> + <ns/> + <tok> + <orth>,</orth> + <lex><base>,</base><ctag>interp</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="HIGHLAND_NAM">0</ann> + </tok> + <tok> + <orth>Wyżyna</orth> + <lex><base>wyżyna</base><ctag>subst:sg:nom:f</ctag></lex> + <ann chan="capitalized_noun" head="1">4</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="HIGHLAND_NAM">0</ann> + </tok> + <tok> + <orth>Wypukła</orth> + <lex><base>wypukły</base><ctag>adj:sg:nom:f:pos</ctag></lex> + <ann chan="capitalized_noun" head="1">5</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based" head="1">2</ann> + <ann chan="HIGHLAND_NAM">0</ann> + </tok> + <ns/> + <tok> + <orth>,</orth> + <lex><base>,</base><ctag>interp</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="HIGHLAND_NAM">0</ann> + </tok> + <tok> + <orth>wyżyna</orth> + <lex><base>wyżyna</base><ctag>subst:sg:nom:f</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="HIGHLAND_NAM">0</ann> + </tok> + <tok> + <orth>Kot</orth> + <lex><base>kot</base><ctag>subst:sg:nom:m2</ctag></lex> + <ann chan="capitalized_noun" head="1">7</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word" head="1">1</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="HIGHLAND_NAM" head="1">1</ann> + </tok> + <tok> + <orth>i</orth> + <lex><base>i</base><ctag>conj</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <tok> + <orth>miasto</orth> + <lex><base>miasto</base><ctag>subst:sg:nom:n</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger" head="1">1</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <tok> + <orth>Gdańsk</orth> + <lex><base>gdańsk</base><ctag>subst:sg:nom:m3</ctag></lex> + <ann chan="capitalized_noun" head="1">8</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word" head="1">2</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + <ns/> + <tok> + <orth>.</orth> + <lex><base>.</base><ctag>interp</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + </tok> + </sentence> + </chunk> +</chunkList> +</cesAna> diff --git a/tests/rules-data/match/michal/cclmatch3.ccl b/tests/rules-data/match/michal/cclmatch3.ccl new file mode 100644 index 0000000000000000000000000000000000000000..817f1b54e5077e8e8844e1bdc1267afef2e185ed --- /dev/null +++ b/tests/rules-data/match/michal/cclmatch3.ccl @@ -0,0 +1,12 @@ +apply( + match( + is( 'city_trigger' ), + is( 'first_capital_word' ) + ), + cond( + not( annsub(:2, 'city_nam_gaz') ) + ), + actions( + mark(:2, 'CITY_NAM') + ) +) diff --git a/tests/rules-data/match/michal/cclmatch3.out.xml b/tests/rules-data/match/michal/cclmatch3.out.xml new file mode 100644 index 0000000000000000000000000000000000000000..5d348dc0919b3710230a8e6b9e843c816e7e1d93 --- /dev/null +++ b/tests/rules-data/match/michal/cclmatch3.out.xml @@ -0,0 +1,137 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE cesAna SYSTEM "xcesAnaIPI.dtd"> +<cesAna xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" type="lex disamb"> +<chunkList> + <chunk> + <sentence> + <tok> + <orth>To</orth> + <lex><base>ten</base><ctag>adj:sg:nom:n:pos</ctag></lex> + <lex><base>ten</base><ctag>adj:sg:acc:n:pos</ctag></lex> + <lex><base>to</base><ctag>conj</ctag></lex> + <lex><base>to</base><ctag>pred</ctag></lex> + <lex><base>to</base><ctag>qub</ctag></lex> + <lex><base>to</base><ctag>subst:sg:nom:n</ctag></lex> + <lex><base>to</base><ctag>subst:sg:acc:n</ctag></lex> + <lex><base>to</base><ctag>subst:sg:voc:n</ctag></lex> + <ann chan="capitalized_noun" head="1">1</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="CITY_NAM">0</ann> + </tok> + <tok> + <orth>Wyżyna</orth> + <lex><base>wyżyna</base><ctag>subst:sg:nom:f</ctag></lex> + <ann chan="capitalized_noun" head="1">2</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="CITY_NAM">0</ann> + </tok> + <tok> + <orth>Płaska</orth> + <lex><base>płaski</base><ctag>adj:sg:nom:f:pos</ctag></lex> + <ann chan="capitalized_noun" head="1">3</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based" head="1">1</ann> + <ann chan="CITY_NAM">0</ann> + </tok> + <ns/> + <tok> + <orth>,</orth> + <lex><base>,</base><ctag>interp</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="CITY_NAM">0</ann> + </tok> + <tok> + <orth>Wyżyna</orth> + <lex><base>wyżyna</base><ctag>subst:sg:nom:f</ctag></lex> + <ann chan="capitalized_noun" head="1">4</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="CITY_NAM">0</ann> + </tok> + <tok> + <orth>Wypukła</orth> + <lex><base>wypukły</base><ctag>adj:sg:nom:f:pos</ctag></lex> + <ann chan="capitalized_noun" head="1">5</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based" head="1">2</ann> + <ann chan="CITY_NAM">0</ann> + </tok> + <ns/> + <tok> + <orth>,</orth> + <lex><base>,</base><ctag>interp</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="CITY_NAM">0</ann> + </tok> + <tok> + <orth>wyżyna</orth> + <lex><base>wyżyna</base><ctag>subst:sg:nom:f</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="CITY_NAM">0</ann> + </tok> + <tok> + <orth>Kot</orth> + <lex><base>kot</base><ctag>subst:sg:nom:m2</ctag></lex> + <ann chan="capitalized_noun" head="1">7</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word" head="1">1</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="CITY_NAM">0</ann> + </tok> + <tok> + <orth>i</orth> + <lex><base>i</base><ctag>conj</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="CITY_NAM">0</ann> + </tok> + <tok> + <orth>miasto</orth> + <lex><base>miasto</base><ctag>subst:sg:nom:n</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger" head="1">1</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="CITY_NAM">0</ann> + </tok> + <tok> + <orth>Gdańsk</orth> + <lex><base>gdańsk</base><ctag>subst:sg:nom:m3</ctag></lex> + <ann chan="capitalized_noun" head="1">8</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word" head="1">2</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="CITY_NAM" head="1">1</ann> + </tok> + <ns/> + <tok> + <orth>.</orth> + <lex><base>.</base><ctag>interp</ctag></lex> + <ann chan="capitalized_noun">0</ann> + <ann chan="city_trigger">0</ann> + <ann chan="first_capital_word">0</ann> + <ann chan="reladj_gaz_based">0</ann> + <ann chan="CITY_NAM">0</ann> + </tok> + </sentence> + </chunk> +</chunkList> +</cesAna>