From 710ff253caec4dce83e888a499958e7ff643f9dc Mon Sep 17 00:00:00 2001 From: Adam Wardynski <award@.(win7-laptop)> Date: Fri, 29 Apr 2011 18:41:49 +0200 Subject: [PATCH] lex() operator, to translate strings via a lexicon. --- libwccl/CMakeLists.txt | 1 + .../ops/functions/strset/lextranslator.cpp | 28 ++++++++++ libwccl/ops/functions/strset/lextranslator.h | 55 +++++++++++++++++++ 3 files changed, 84 insertions(+) create mode 100644 libwccl/ops/functions/strset/lextranslator.cpp create mode 100644 libwccl/ops/functions/strset/lextranslator.h diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 7d27222..165786b 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -56,6 +56,7 @@ SET(libwccl_STAT_SRC ops/functions/strset/affix.cpp ops/functions/strset/getlemmas.cpp ops/functions/strset/getorth.cpp + ops/functions/strset/lextranslator.cpp ops/functions/strset/tolower.cpp ops/functions/strset/toupper.cpp ops/functions/tset/agrfilter.cpp diff --git a/libwccl/ops/functions/strset/lextranslator.cpp b/libwccl/ops/functions/strset/lextranslator.cpp new file mode 100644 index 0000000..424520b --- /dev/null +++ b/libwccl/ops/functions/strset/lextranslator.cpp @@ -0,0 +1,28 @@ +#include <libwccl/ops/functions/strset/lextranslator.h> +#include <sstream> + +namespace Wccl { + +std::string LexTranslator::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream os; + os << name(tagset) << "(" + << strset_expr_->to_string(tagset) << ", \"" + << lexicon_->name() << "\")"; + return os.str(); +} + +std::ostream& LexTranslator::write_to(std::ostream& os) const +{ + os << raw_name() << "(" << *strset_expr_ << ", \"" << lexicon_->name() << "\")"; + return os; +} + +LexTranslator::BaseRetValPtr LexTranslator::apply_internal( + const FunExecContext& context) const +{ + boost::shared_ptr<const StrSet> set = strset_expr_->apply(context); + return lexicon_->translate(*set); +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/strset/lextranslator.h b/libwccl/ops/functions/strset/lextranslator.h new file mode 100644 index 0000000..4cfab9d --- /dev/null +++ b/libwccl/ops/functions/strset/lextranslator.h @@ -0,0 +1,55 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_STRSET_LEXTRANSLATOR_H +#define LIBWCCL_OPS_FUNCTIONS_STRSET_LEXTRANSLATOR_H + +#include <libwccl/values/strset.h> +#include <libwccl/ops/function.h> +#include <libwccl/lexicon/lexicon.h> + +namespace Wccl { + +/** + * WCCL functional operator "lex", translates strings via a lexicon. + */ +class LexTranslator : public Function<StrSet> +{ +public: + LexTranslator( + const boost::shared_ptr<Function<StrSet> >& strset_expr, + const boost::shared_ptr<const Lexicon>& lexicon) + : strset_expr_(strset_expr), + lexicon_(lexicon) + { + BOOST_ASSERT(strset_expr_); + BOOST_ASSERT(lexicon_); + } + + /** + * @returns String representation of the function in the form of + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * @returns Name of the function + */ + std::string raw_name() const { + return "orth"; + } +protected: + std::ostream& write_to(std::ostream& os) const; + + /** + * Get a string set from the argument expression and return a copy, + * translated using the lexicon given at construction. Strings that don't + * exist in the lexicon are removed from output, whereas strings that + * exist as keys are replaced with corresponding values. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; + +private: + const boost::shared_ptr<Function<StrSet> > strset_expr_; + const boost::shared_ptr<const Lexicon> lexicon_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_STRSET_LEXTRANSLATOR_H -- GitLab