From 380efc1ecd0321ef549b6dba31d652f370ff6c7e Mon Sep 17 00:00:00 2001 From: Adam Wardynski <award@.(win7-laptop)> Date: Fri, 29 Apr 2011 18:37:19 +0200 Subject: [PATCH] Class to hold lexicon data. --- libwccl/CMakeLists.txt | 1 + libwccl/lexicon/lexicon.cpp | 46 +++++++++++++++++++++++++++ libwccl/lexicon/lexicon.h | 62 +++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 libwccl/lexicon/lexicon.cpp create mode 100644 libwccl/lexicon/lexicon.h diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 33cece2..fb2d134 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -27,6 +27,7 @@ endif(WIN32) SET(libwccl_STAT_SRC exception.cpp + lexicon/lexicon.cpp ops/formatters.cpp ops/functions/bool/iteration.cpp ops/functions/bool/iterations/atleast.cpp diff --git a/libwccl/lexicon/lexicon.cpp b/libwccl/lexicon/lexicon.cpp new file mode 100644 index 0000000..e985d19 --- /dev/null +++ b/libwccl/lexicon/lexicon.cpp @@ -0,0 +1,46 @@ +#include <libwccl/lexicon/lexicon.h> +#include <libpwrutils/util.h> +#include <libwccl/exception.h> +#include <libpwrutils/foreach.h> + +#include <boost/make_shared.hpp> + +namespace Wccl { + +const UnicodeString& Lexicon::translate(const icu_44::UnicodeString &key) const +{ + static UnicodeString empty; + map_t::const_iterator i = map_.find(key); + if (i == map_.end()) { + return empty; + } + return i->second; +} + +boost::shared_ptr<StrSet> Lexicon::translate(const StrSet& set) const +{ + boost::shared_ptr<StrSet> ret_set = boost::make_shared<StrSet>(); + foreach(const UnicodeString& s, set.get_value()) { + const UnicodeString& v = translate(s); + if (!v.isEmpty()) { + ret_set->insert(v); + } + } + return ret_set; +} + +void Lexicon::insert(const UnicodeString& key, const UnicodeString& value) +{ + if (has_key(key)) { + throw InvalidArgument("key", PwrNlp::to_utf8(key) + " - entry already added."); + } + if (key.isEmpty()) { + throw InvalidArgument("key", "Cannot add an empty string."); + } + if (value.isEmpty()) { + throw InvalidArgument("value", "Cannot add an empty string."); + } + map_[key] = value; +} + +} /* end ns Wccl */ diff --git a/libwccl/lexicon/lexicon.h b/libwccl/lexicon/lexicon.h new file mode 100644 index 0000000..c02f628 --- /dev/null +++ b/libwccl/lexicon/lexicon.h @@ -0,0 +1,62 @@ +#ifndef LIBWCCL_LEXICON_LEXICON_H +#define LIBWCCL_LEXICON_LEXICON_H + +#include <boost/unordered_map.hpp> +#include <boost/noncopyable.hpp> +#include <libcorpus2/lexeme.h> // for unicodestring hash + +#include <libwccl/values/strset.h> + +namespace Wccl { + +class Lexicon : boost::noncopyable +{ +public: + typedef boost::unordered_map<UnicodeString, UnicodeString> map_t; + + Lexicon(const std::string& name) + : name_(name) + { + BOOST_ASSERT(!name_.empty()); + } + + /** + * Translate given key to a value held in this lexicon. + * @returns Value assigned to the given key, if present. + * Empty UnicodeString if the key was not present. + */ + const UnicodeString& translate(const UnicodeString& key) const; + + + /** + * Translate given set of strings to corresponding values + * from the lexicon. + * Nonexisting keys will translate to nothing (will be removed + * from output). + */ + boost::shared_ptr<StrSet> translate(const StrSet& set) const; + + std::string name() const { + return name_; + } + + bool has_key(const UnicodeString& key) const { + return map_.find(key) != map_.end(); + } + + void insert(const UnicodeString& key, const UnicodeString& value); + void insert(const UnicodeString& key) { + insert(key, key); + } + + const map_t& map() const { + return map_; + } +private: + map_t map_; + const std::string name_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_LEXICON_LEXICON_H -- GitLab