diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 33cece25448824303fc8bc0132c19f0a1cf416bd..fb2d134db1d2d533a0ff7f94ee8d51133ffb7b54 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -27,6 +27,7 @@ endif(WIN32) SET(libwccl_STAT_SRC exception.cpp + lexicon/lexicon.cpp ops/formatters.cpp ops/functions/bool/iteration.cpp ops/functions/bool/iterations/atleast.cpp diff --git a/libwccl/lexicon/lexicon.cpp b/libwccl/lexicon/lexicon.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e985d194e8eec26d3c6b319b4f78b92a193f74c4 --- /dev/null +++ b/libwccl/lexicon/lexicon.cpp @@ -0,0 +1,46 @@ +#include <libwccl/lexicon/lexicon.h> +#include <libpwrutils/util.h> +#include <libwccl/exception.h> +#include <libpwrutils/foreach.h> + +#include <boost/make_shared.hpp> + +namespace Wccl { + +const UnicodeString& Lexicon::translate(const icu_44::UnicodeString &key) const +{ + static UnicodeString empty; + map_t::const_iterator i = map_.find(key); + if (i == map_.end()) { + return empty; + } + return i->second; +} + +boost::shared_ptr<StrSet> Lexicon::translate(const StrSet& set) const +{ + boost::shared_ptr<StrSet> ret_set = boost::make_shared<StrSet>(); + foreach(const UnicodeString& s, set.get_value()) { + const UnicodeString& v = translate(s); + if (!v.isEmpty()) { + ret_set->insert(v); + } + } + return ret_set; +} + +void Lexicon::insert(const UnicodeString& key, const UnicodeString& value) +{ + if (has_key(key)) { + throw InvalidArgument("key", PwrNlp::to_utf8(key) + " - entry already added."); + } + if (key.isEmpty()) { + throw InvalidArgument("key", "Cannot add an empty string."); + } + if (value.isEmpty()) { + throw InvalidArgument("value", "Cannot add an empty string."); + } + map_[key] = value; +} + +} /* end ns Wccl */ diff --git a/libwccl/lexicon/lexicon.h b/libwccl/lexicon/lexicon.h new file mode 100644 index 0000000000000000000000000000000000000000..c02f6284805c7901c2baa10cc4d5b5954e785d22 --- /dev/null +++ b/libwccl/lexicon/lexicon.h @@ -0,0 +1,62 @@ +#ifndef LIBWCCL_LEXICON_LEXICON_H +#define LIBWCCL_LEXICON_LEXICON_H + +#include <boost/unordered_map.hpp> +#include <boost/noncopyable.hpp> +#include <libcorpus2/lexeme.h> // for unicodestring hash + +#include <libwccl/values/strset.h> + +namespace Wccl { + +class Lexicon : boost::noncopyable +{ +public: + typedef boost::unordered_map<UnicodeString, UnicodeString> map_t; + + Lexicon(const std::string& name) + : name_(name) + { + BOOST_ASSERT(!name_.empty()); + } + + /** + * Translate given key to a value held in this lexicon. + * @returns Value assigned to the given key, if present. + * Empty UnicodeString if the key was not present. + */ + const UnicodeString& translate(const UnicodeString& key) const; + + + /** + * Translate given set of strings to corresponding values + * from the lexicon. + * Nonexisting keys will translate to nothing (will be removed + * from output). + */ + boost::shared_ptr<StrSet> translate(const StrSet& set) const; + + std::string name() const { + return name_; + } + + bool has_key(const UnicodeString& key) const { + return map_.find(key) != map_.end(); + } + + void insert(const UnicodeString& key, const UnicodeString& value); + void insert(const UnicodeString& key) { + insert(key, key); + } + + const map_t& map() const { + return map_; + } +private: + map_t map_; + const std::string name_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_LEXICON_LEXICON_H