Skip to content
Snippets Groups Projects
Commit b1b976c6 authored by ilor's avatar ilor
Browse files

Merge branch 'master' of nlp.pwr.wroc.pl:wccl

parents 9c7dabf1 c5dd1dfd
No related branches found
No related tags found
No related merge requests found
......@@ -57,13 +57,14 @@ SET(libwccl_STAT_SRC
ops/functions/tset/catfilter.cpp
ops/functions/tset/getsymbols.cpp
ops/functions/tset/getsymbolsinrange.cpp
ops/match/actions/markmatch.cpp
ops/match/applyoperator.cpp
ops/match/conditions/conjconditions.cpp
ops/match/conditions/longest.cpp
ops/match/conditions/oneof.cpp
ops/match/conditions/optionalmatch.cpp
ops/match/conditions/repeatedmatch.cpp
ops/match/conditions/tokencondition.cpp
ops/match/applyoperator.cpp
ops/match/matchoperator.cpp
ops/rulesequence.cpp
ops/tagaction.cpp
......
#include <libwccl/values/match.h>
#include <libwccl/ops/match/actions/markmatch.h>
#include <sstream>
namespace Wccl {
void MarkMatch::execute(const ActionExecContext& context) const
{
SentenceContext& sc = context.sentence_context();
boost::shared_ptr<Corpus2::AnnotatedSentence> as;
as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(sc.get_sentence_ptr());
if (!as) {
throw InvalidArgument("context", "Operator needs an annotated sentence.");
}
boost::shared_ptr<const Match> match_from = match_from_->apply(context);
boost::shared_ptr<const Match> match_to =
(match_from_.get() == match_to_.get()) ? match_from : match_to_->apply(context);
int abs_left = match_from->first_token(as).get_value();
int abs_right = match_to->last_token(as).get_value();
if (abs_left < 0) {
throw WcclError("Received starting match that points outside sentence.");
}
if (abs_right >= sc.size()) {
throw WcclError("Received ending match that points outside sentence.");
}
if (abs_left > abs_right) {
throw WcclError("Received starting match points after the received ending match.");
}
// TODO: what about head in this mark from match actions? Mark from tag actions does have it.
int abs_head = abs_left;
if (!as->has_channel(chan_name_)) {
as->create_channel(chan_name_);
}
Corpus2::AnnotationChannel& channel = as->get_channel(chan_name_);
int segment_idx = channel.get_new_segment_index();
for (int i = abs_left; i <= abs_right; ++i) {
if (channel.get_segment_at(i) > 0) {
throw WcclError("Mark action would overwrite existing annotation");
}
}
for (int i = abs_left; i <= abs_right; ++i) {
channel.set_segment_at(i, segment_idx);
channel.set_head_at(i, false);
}
channel.set_head_at(abs_head, true);
}
std::string MarkMatch::to_string(const Corpus2::Tagset& tagset) const
{
std::ostringstream os;
os << name() << "("
<< match_from_->to_string(tagset) << ", ";
if (match_from_.get() != match_to_.get()) {
os << match_to_->to_string(tagset) << ", ";
}
os << "\"" << chan_name_ << "\")";
return os.str();
}
std::ostream& MarkMatch::write_to(std::ostream& os) const
{
os << name() << "("
<< *match_from_ << ", ";
if (match_from_.get() != match_to_.get()) {
os << *match_to_ << ", ";
}
os << "\"" << chan_name_ << "\")";
return os;
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_MATCH_ACTIONS_MARKMATCH_H
#define LIBWCCL_OPS_MATCH_ACTIONS_MARKMATCH_H
#include <libwccl/ops/match/matchaction.h>
#include <libwccl/ops/function.h>
namespace Wccl {
class MarkMatch : public MatchAction
{
public:
MarkMatch(
const boost::shared_ptr<Function<Match> >& match_from,
const boost::shared_ptr<Function<Match> >& match_to,
const std::string& annotation_name)
: match_from_(match_from),
match_to_(match_to),
chan_name_(annotation_name)
{
BOOST_ASSERT(match_from_);
BOOST_ASSERT(match_to_);
}
MarkMatch(
const boost::shared_ptr<Function<Match> >& match_from_to,
const std::string& annotation_name)
: match_from_(match_from_to),
match_to_(match_from_to),
chan_name_(annotation_name)
{
BOOST_ASSERT(match_from_);
BOOST_ASSERT(match_to_);
}
/**
* @returns Name of the action.
*/
std::string name() const {
return "mark";
}
/**
* Executes the action for the given execution context.
*/
void execute(const ActionExecContext& context) const;
/**
* @returns String representation of the expression.
*/
std::string to_string(const Corpus2::Tagset& tagset) const;
protected:
/**
* Writes string representation of the MatchAction to
* an output stream.
* @returns Stream written to.
* @note May be incomplete and/or containt internal info.
*/
virtual std::ostream& write_to(std::ostream& ostream) const;
private:
const boost::shared_ptr<Function<Match> >& match_from_;
const boost::shared_ptr<Function<Match> >& match_to_;
const std::string chan_name_;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_MATCH_ACTIONS_MARKMATCH_H
#ifndef LIBWCCL_OPS_MATCH_MATCHACTION_H
#define LIBWCCL_OPS_MATCH_MATCHACTION_H
#include <libwccl/ops/expression.h>
#include <libwccl/ops/actionexeccontext.h>
namespace Wccl {
/**
......
......@@ -78,6 +78,7 @@ header {
#include <libwccl/ops/match/conditions/optionalmatch.h>
#include <libwccl/ops/match/conditions/repeatedmatch.h>
#include <libwccl/ops/match/conditions/conjconditions.h>
#include <libwccl/ops/match/actions/markmatch.h>
// Unicode String
#include <unicode/uniset.h>
......@@ -407,29 +408,41 @@ position_value
// ----------------------------------------------------------------------------
// Value used into match operator such as TOK[position] and ANN[position, name]
// Returns boost::shared_ptr<Match>
match_value
match_literal
returns [boost::shared_ptr<Match> val]
{
boost::shared_ptr<MatchData> m;
}
: m = match_data_value {
: m = match_data_literal {
val.reset(new Match(m));
}
;
// Constant match value
// Returns boost::shared_ptr<Constant<Match> >
match_value_const
returns [boost::shared_ptr<Constant<Match> > val]
{
boost::shared_ptr<Match> m;
}
: m = match_literal {
val.reset(new Constant<Match>(*m));
}
;
// ----------------------------------------------------------------------------
// Value used into match operator such as TOK[position] and ANN[position, name]
// Returns boost::shared_ptr<MatchData>
match_data_value
match_data_literal
returns [boost::shared_ptr<MatchData> val]
: val = token_match_value
| val = ann_match_value
| val = match_vector_value
: val = token_match_literal
| val = ann_match_literal
| val = match_vector_literal
;
// token match value
// token match literal - TOK[position]
// Returns boost::shared_ptr<TokenMatch>
token_match_value
token_match_literal
returns [boost::shared_ptr<TokenMatch> val]
{
boost::shared_ptr<Position> p;
......@@ -439,9 +452,9 @@ token_match_value
}
;
// annotation match value
// annotation match literal - ANN[position, name]
// Returns boost::shared_ptr<AnnotationMatch>
ann_match_value
ann_match_literal
returns [boost::shared_ptr<AnnotationMatch> val]
{
boost::shared_ptr<Position> p;
......@@ -451,28 +464,28 @@ ann_match_value
}
;
// annotation match vector: MATCH() or MATCH(token, ann, MATCH())
// annotation match vector literal: MATCH() or MATCH(token, ann, MATCH())
// Returns boost::shared_ptr<MatchVector>
match_vector_value
match_vector_literal
returns [boost::shared_ptr<MatchVector> val]
{
val.reset(new MatchVector());
}
: "MATCH" LPAREN (match_vector_value_item[val])? RPAREN
: "MATCH" LPAREN (match_vector_literal_item[val])? RPAREN
;
// Body of the MATCH value. It only adds vector items to the MatchVector
// Item may be single or multiple
match_vector_value_item [boost::shared_ptr<MatchVector>& mvector]
match_vector_literal_item [boost::shared_ptr<MatchVector>& mvector]
{
boost::shared_ptr<Match> m_val;
}
: m_val = match_value {
: m_val = match_literal {
mvector->append(m_val);
}
(
COMMA
m_val = match_value {
m_val = match_literal {
mvector->append(m_val);
}
)*
......@@ -1902,16 +1915,16 @@ match_cond_optional
}
;
// Match condition - repleace
// Returns boost::shared_ptr<OptionalMatch>
// Match condition - repeat
// Returns boost::shared_ptr<RepeatedMatch>
match_cond_repeate
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<OptionalMatch> mtch]
returns [boost::shared_ptr<RepeatedMatch> mtch]
{
boost::shared_ptr<ConjConditions> m_cond;
}
: "repeate" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
mtch.reset(new OptionalMatch(m_cond));
: "repeat" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
mtch.reset(new RepeatedMatch(m_cond));
}
;
......@@ -1927,11 +1940,32 @@ match_action
;
// Match mark action
// Returns ???
// Returns
match_mark_action
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<MatchAction> m_act]
: "mark" LPAREN /* TODO */ RPAREN
returns [boost::shared_ptr<MarkMatch> m_act]
{
boost::shared_ptr<Function<Match> > match_to;
boost::shared_ptr<Function<Match> > match_from;
}
: "mark" LPAREN
match_from = match_fit[tagset, vars] COMMA
(match_to = match_fit[tagset, vars] COMMA) ?
annotation_name : STRING
RPAREN {
if (!match_to) {
m_act.reset(
new MarkMatch(
match_from,
((antlr::Token*)annotation_name)->getText()));
} else {
m_act.reset(
new MarkMatch(
match_from,
match_to,
((antlr::Token*)annotation_name)->getText()));
}
}
;
// Match unmark action
......@@ -1964,6 +1998,19 @@ match_action_comma_sep
)*
;
// Function<Match> is wrapper for Constant<Match> and Function<Match>
// Returns boost::shared_ptr<Function<Match> >
match_fit
[const Corpus2::Tagset& tagset, Variables& vars]
returns [boost::shared_ptr<Function<Match> > ret]
{
//
}
: ret = match_vector_variable [vars]
| ret = match_value_const
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ANTLR LEXER
......
......@@ -49,6 +49,16 @@ public:
{
}
Match(const Match& match)
: match_(match.match_->clone())
{
}
Match& operator=(const Match& match) {
match_ = match.match_->clone();
return *this;
}
const MatchData& get_value() const {
return *match_;
}
......@@ -61,7 +71,7 @@ public:
* Check if the match is empty (matches nothing). Match objects themselves
* are by definition empty, child classes are sometimes or always non-empty.
*/
virtual bool empty() const {
bool empty() const {
return match_->empty();
}
......@@ -69,7 +79,7 @@ public:
* Getter for the first token matched. If the match is empty, must return
* Nowhere.
*/
virtual Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const {
Position first_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const {
return match_->first_token(s);
}
......@@ -77,7 +87,7 @@ public:
* Getter for the last token matched. If the match is empty, must return
* Nowhere.
*/
virtual Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const {
Position last_token(const boost::shared_ptr<Corpus2::AnnotatedSentence>& s) const {
return match_->last_token(s);
}
......
......@@ -11,7 +11,7 @@ namespace Wccl {
* - VectorMatch, TokenMatch or AnnotationMatch.
* (empty VectorMatch should be default option)
*/
class MatchData// : boost::noncopyable
class MatchData
{
public:
......@@ -38,6 +38,8 @@ public:
virtual std::string to_raw_string() const = 0;
virtual ~MatchData() {}
protected:
virtual MatchData* clone_internal() const = 0;
};
......
......@@ -90,7 +90,7 @@ BOOST_FIXTURE_TEST_CASE(empty_matchvector, IsEmptyFix)
Match v_match(v);
boost::shared_ptr<Function<Match> > match_expr(new Constant<Match>(v_match));
IsEmpty<Match> e(match_expr);
BOOST_CHECK(!e.apply(cx)->get_value());
BOOST_CHECK(e.apply(cx)->get_value());
}
BOOST_FIXTURE_TEST_CASE(full_matchvector, IsEmptyFix)
......@@ -100,7 +100,7 @@ BOOST_FIXTURE_TEST_CASE(full_matchvector, IsEmptyFix)
Match v_match(v);
boost::shared_ptr<Function<Match> > match_expr(new Constant<Match>(v_match));
IsEmpty<Match> e(match_expr);
BOOST_CHECK(e.apply(cx)->get_value());
BOOST_CHECK(!e.apply(cx)->get_value());
}
//------------ To string ----------
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment