diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 7d272225ef745d87af65480f8fa3498a77c59247..165786b645d69ff7417ef1a01a0aff1e9a0959ea 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -56,6 +56,7 @@ SET(libwccl_STAT_SRC ops/functions/strset/affix.cpp ops/functions/strset/getlemmas.cpp ops/functions/strset/getorth.cpp + ops/functions/strset/lextranslator.cpp ops/functions/strset/tolower.cpp ops/functions/strset/toupper.cpp ops/functions/tset/agrfilter.cpp diff --git a/libwccl/ops/functions/strset/lextranslator.cpp b/libwccl/ops/functions/strset/lextranslator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..424520b4b69e2c6e4ccc44569fa30eb1623d05bd --- /dev/null +++ b/libwccl/ops/functions/strset/lextranslator.cpp @@ -0,0 +1,28 @@ +#include <libwccl/ops/functions/strset/lextranslator.h> +#include <sstream> + +namespace Wccl { + +std::string LexTranslator::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream os; + os << name(tagset) << "(" + << strset_expr_->to_string(tagset) << ", \"" + << lexicon_->name() << "\")"; + return os.str(); +} + +std::ostream& LexTranslator::write_to(std::ostream& os) const +{ + os << raw_name() << "(" << *strset_expr_ << ", \"" << lexicon_->name() << "\")"; + return os; +} + +LexTranslator::BaseRetValPtr LexTranslator::apply_internal( + const FunExecContext& context) const +{ + boost::shared_ptr<const StrSet> set = strset_expr_->apply(context); + return lexicon_->translate(*set); +} + +} /* end ns Wccl */ diff --git a/libwccl/ops/functions/strset/lextranslator.h b/libwccl/ops/functions/strset/lextranslator.h new file mode 100644 index 0000000000000000000000000000000000000000..4cfab9d8d8c3f4cc13b8566ede5ce3415d4a3288 --- /dev/null +++ b/libwccl/ops/functions/strset/lextranslator.h @@ -0,0 +1,55 @@ +#ifndef LIBWCCL_OPS_FUNCTIONS_STRSET_LEXTRANSLATOR_H +#define LIBWCCL_OPS_FUNCTIONS_STRSET_LEXTRANSLATOR_H + +#include <libwccl/values/strset.h> +#include <libwccl/ops/function.h> +#include <libwccl/lexicon/lexicon.h> + +namespace Wccl { + +/** + * WCCL functional operator "lex", translates strings via a lexicon. + */ +class LexTranslator : public Function<StrSet> +{ +public: + LexTranslator( + const boost::shared_ptr<Function<StrSet> >& strset_expr, + const boost::shared_ptr<const Lexicon>& lexicon) + : strset_expr_(strset_expr), + lexicon_(lexicon) + { + BOOST_ASSERT(strset_expr_); + BOOST_ASSERT(lexicon_); + } + + /** + * @returns String representation of the function in the form of + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * @returns Name of the function + */ + std::string raw_name() const { + return "orth"; + } +protected: + std::ostream& write_to(std::ostream& os) const; + + /** + * Get a string set from the argument expression and return a copy, + * translated using the lexicon given at construction. Strings that don't + * exist in the lexicon are removed from output, whereas strings that + * exist as keys are replaced with corresponding values. + */ + BaseRetValPtr apply_internal(const FunExecContext& context) const; + +private: + const boost::shared_ptr<Function<StrSet> > strset_expr_; + const boost::shared_ptr<const Lexicon> lexicon_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_FUNCTIONS_STRSET_LEXTRANSLATOR_H