diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index f09edd862fd8df77623bee3ce52b52a94b83769e..3d265a8d8f64bfecff990cd66c1f99d29a7d6ef9 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -29,6 +29,7 @@ SET(libwccl_STAT_SRC exception.cpp ops/action.cpp ops/actions/delete.cpp + ops/actions/relabel.cpp ops/actions/select.cpp ops/formatters.cpp ops/functions/bool/iteration.cpp diff --git a/libwccl/ops/actions/relabel.cpp b/libwccl/ops/actions/relabel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..46a7ed4ad3edb3dff639e38476bb18bab1529d99 --- /dev/null +++ b/libwccl/ops/actions/relabel.cpp @@ -0,0 +1,45 @@ +#include <libwccl/ops/actions/relabel.h> +#include <libpwrutils/foreach.h> +#include <sstream> + +namespace Wccl { + +Bool Relabel::execute(const ActionExecContext& context) const +{ + Bool changed(false); + Corpus2::mask_t replace_wclass = replace_with_->apply(context)->get_value().get_pos(); + int abs_pos = context.sentence_context().get_abs_position(*pos_->apply(context)); + if (context.sentence_context().is_inside(abs_pos)) { + Corpus2::Token& token = *context.sentence_context().at(abs_pos); + std::vector<Corpus2::Lexeme> original(token.lexemes()); + token.lexemes().clear(); + foreach (Corpus2::Lexeme& lexeme, original) { + token.add_lexeme(lexeme); + if (condition_->apply(context)->get_value()) { + changed.set_value(true); + lexeme.set_tag(Corpus2::Tag(replace_wclass, lexeme.tag().get_values())); + } + token.lexemes().pop_back(); + } + token.lexemes() = original; + } + return changed; +} + +std::string Relabel::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream os; + os << name() << "(" << pos_->to_string(tagset) << ", " + << replace_with_->to_string(tagset) << ", " + << condition_->to_string(tagset) << ")"; + return os.str(); +} + +std::ostream& Relabel::write_to(std::ostream& os) const +{ + os << name() << "(" << *pos_ << ", " << *replace_with_ << ", " << *condition_ << ")"; + return os; +} + + +} /* end ns Wccl */ diff --git a/libwccl/ops/actions/relabel.h b/libwccl/ops/actions/relabel.h new file mode 100644 index 0000000000000000000000000000000000000000..79baa932293f2a41ccb2c2bc793ddab71b5aef81 --- /dev/null +++ b/libwccl/ops/actions/relabel.h @@ -0,0 +1,72 @@ +#ifndef LIBWCCL_OPS_RELABEL_DELETE_H +#define LIBWCCL_OPS_RELABEL_DELETE_H + +#include <libwccl/ops/action.h> +#include <libwccl/values/position.h> +#include <libwccl/values/bool.h> +#include <libwccl/ops/function.h> + +namespace Wccl { + +/** + * Action to change part of speech for lexemes that meet a condition. + */ +class Relabel : public Action +{ +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + typedef boost::shared_ptr<Function<Bool> > BoolFunctionPtr; + typedef boost::shared_ptr<Function<TSet> > TSetFunctionPtr; + + Relabel( + const TSetFunctionPtr& replace_with, + const BoolFunctionPtr& condition, + const PosFunctionPtr& pos = detail::CurrentPos()) + : pos_(pos), + condition_(condition), + replace_with_(replace_with) + { + BOOST_ASSERT(pos_); + BOOST_ASSERT(condition_); + BOOST_ASSERT(replace_with_); + } + + /** + * @returns Name of the function. + */ + std::string name() const { + return "relabel"; + } + + /** + * @returns String representation of the Action + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + /** + * Writes string representation of the Action to + * an output stream. + * @returns Stream written to. + * @note May be incomplete and/or containt internal info. + */ + std::ostream& write_to(std::ostream& ostream) const; + + /** + * Executes the Action on given context: for all lexemes + * that meet given condition, change their part of speech + * to the part of speech given by provided TSet function. + * No action is done if position points outside sentence. + * @returns True if there were any changes made; False otherwise + */ + Bool execute(const ActionExecContext &context) const; + +private: + const PosFunctionPtr pos_; + const BoolFunctionPtr condition_; + const TSetFunctionPtr replace_with_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_RELABEL_DELETE_H