From 5d9c21990ec119da2f8fe8dacca359b2b94556b0 Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Wed, 8 Dec 2010 08:08:40 +0100 Subject: [PATCH] Values cleanup and doxy: remove some redundancies, make the Begin/End constants in Position an enum, change tag_ref to contents --- libwccl/ops/functions/tset/agrfilter.cpp | 2 +- libwccl/ops/functions/tset/catfilter.cpp | 2 +- libwccl/ops/functions/tset/getsymbols.cpp | 2 +- .../ops/functions/tset/getsymbolsinrange.cpp | 2 +- libwccl/values/bool.h | 5 +++ libwccl/values/position.cpp | 10 ----- libwccl/values/position.h | 18 ++++++-- libwccl/values/strset.cpp | 6 +-- libwccl/values/strset.h | 41 ++++++++++++------- libwccl/values/tset.h | 23 ++++++++++- libwccl/values/value.h | 10 +++++ 11 files changed, 85 insertions(+), 36 deletions(-) diff --git a/libwccl/ops/functions/tset/agrfilter.cpp b/libwccl/ops/functions/tset/agrfilter.cpp index 2ce6440..ce6911d 100644 --- a/libwccl/ops/functions/tset/agrfilter.cpp +++ b/libwccl/ops/functions/tset/agrfilter.cpp @@ -59,7 +59,7 @@ AgrFilter::BaseRetValPtr AgrFilter::apply_internal(const FunExecContext& context // //@ todo: implement // - tset->tag_ref().mask_with(mask); + tset->contents().mask_with(mask); return tset; } diff --git a/libwccl/ops/functions/tset/catfilter.cpp b/libwccl/ops/functions/tset/catfilter.cpp index 705e28a..d16faef 100644 --- a/libwccl/ops/functions/tset/catfilter.cpp +++ b/libwccl/ops/functions/tset/catfilter.cpp @@ -41,7 +41,7 @@ CatFilter::BaseRetValPtr CatFilter::apply_internal(const FunExecContext& context tset->combine_with(lexeme.tag()); } } - tset->tag_ref().mask_with(mask); + tset->contents().mask_with(mask); return tset; } diff --git a/libwccl/ops/functions/tset/getsymbols.cpp b/libwccl/ops/functions/tset/getsymbols.cpp index 2fa0ccd..5137ca2 100644 --- a/libwccl/ops/functions/tset/getsymbols.cpp +++ b/libwccl/ops/functions/tset/getsymbols.cpp @@ -37,7 +37,7 @@ GetSymbols::BaseRetValPtr GetSymbols::apply_internal(const FunExecContext& conte foreach (const Corpus2::Lexeme& lexeme, token->lexemes()) { tset->combine_with(lexeme.tag()); } - tset->tag_ref().mask_with(mask_); + tset->contents().mask_with(mask_); return tset; } diff --git a/libwccl/ops/functions/tset/getsymbolsinrange.cpp b/libwccl/ops/functions/tset/getsymbolsinrange.cpp index fd49f56..13cfce9 100644 --- a/libwccl/ops/functions/tset/getsymbolsinrange.cpp +++ b/libwccl/ops/functions/tset/getsymbolsinrange.cpp @@ -52,7 +52,7 @@ GetSymbolsInRange::BaseRetValPtr GetSymbolsInRange::apply_internal(const FunExec tset->combine_with(lexeme.tag()); } } - tset->tag_ref().mask_with(mask_); + tset->contents().mask_with(mask_); return tset; } diff --git a/libwccl/values/bool.h b/libwccl/values/bool.h index fd8a92f..53393fb 100644 --- a/libwccl/values/bool.h +++ b/libwccl/values/bool.h @@ -5,6 +5,11 @@ namespace Wccl { +/** + * A Value subtype which is a thin wrapper around a Bool. + * + * The default value of a Bool is false. + */ class Bool : public Value { public: diff --git a/libwccl/values/position.cpp b/libwccl/values/position.cpp index f1cadf2..8b816c0 100644 --- a/libwccl/values/position.cpp +++ b/libwccl/values/position.cpp @@ -6,16 +6,6 @@ namespace Wccl { const char* Position::type_name = "Position"; -#ifndef _MSC_VER -//If you're strict about standards, if you want to use -//integral or enum static consts in a program, you still -//have to define them even if the declaration has an initializer. -//Well, at least GCC is strict about it. -const int Position::Nowhere; -const int Position::End; -const int Position::Begin; -#endif - std::string Position::to_raw_string() const { switch (val_) { diff --git a/libwccl/values/position.h b/libwccl/values/position.h index ca3f7a1..43984fe 100644 --- a/libwccl/values/position.h +++ b/libwccl/values/position.h @@ -9,6 +9,16 @@ namespace Wccl { class SentenceContext; +/** + * A Position Value, which is essentially an integer offset with three special + * values: Begin, End and Nowhere, corresponding to the beginning of the + * processed sentence, its end and a general "outside" position. + * + * Note that means evaluating a position generally makes sense only with + * the context of some sentence. + * + * The default value is Nowhere. + */ class Position : public Value { public: @@ -16,9 +26,11 @@ public: typedef int value_type; - static const int Nowhere = boost::integer_traits<int>::const_min; - static const int Begin = boost::integer_traits<int>::const_min + 1; - static const int End = boost::integer_traits<int>::const_max; + enum Enum { + Nowhere = boost::integer_traits<int>::const_min, + Begin = boost::integer_traits<int>::const_min + 1, + End = boost::integer_traits<int>::const_max + }; explicit Position(int v = Nowhere) : val_(v) diff --git a/libwccl/values/strset.cpp b/libwccl/values/strset.cpp index e8fd607..18f1946 100644 --- a/libwccl/values/strset.cpp +++ b/libwccl/values/strset.cpp @@ -12,7 +12,7 @@ std::string StrSet::to_raw_string() const { std::stringstream ss; ss << "["; - set_t::const_iterator it = set_.begin(); + value_type::const_iterator it = set_.begin(); while(it != set_.end()) { ss << '\"' << PwrNlp::to_utf8(*it) << '\"'; //TODO escaping if(++it != set_.end()) { @@ -33,8 +33,8 @@ bool StrSet::intersects(const StrSet &other) const { //the sets and using set_intersection. //It's faster to iterate through the smaller set and check in //the larger than it is to do the opposite, hence the &?: below. - const set_t& smaller = size() < other.size() ? set_ : other.set_; - const set_t& bigger = size() < other.size() ? other.set_ : set_; + const value_type& smaller = size() < other.size() ? set_ : other.set_; + const value_type& bigger = size() < other.size() ? other.set_ : set_; foreach (const UnicodeString& u, smaller) { if (bigger.find(u) != bigger.end()) { return true; diff --git a/libwccl/values/strset.h b/libwccl/values/strset.h index 911a28d..5b95467 100644 --- a/libwccl/values/strset.h +++ b/libwccl/values/strset.h @@ -7,61 +7,72 @@ namespace Wccl { +/** + * A Value subtype representing a set of strings. + * + * No guarantees for the order of elements are given at this time. + * + * By default the set is empty. + */ class StrSet : public Value { public: WCCL_VALUE_PREAMBLE - typedef boost::unordered_set<UnicodeString> set_t; - - typedef set_t value_type; + typedef boost::unordered_set<UnicodeString> value_type; StrSet() : set_() { } - explicit StrSet(const set_t& s) + explicit StrSet(const value_type& s) : set_(s) { } - const set_t& contents() const { + const value_type& get_value() const { return set_; } - set_t& contents() { - return set_; - } - - void set_contents(const set_t& set) { + void set_value(const value_type& set) { set_ = set; } - const set_t& get_value() const { - return contents(); + /** + * get_value() alias. + */ + const value_type& contents() const { + return set_; } - void set_value(const set_t& set) { - set_contents(set); + /** + * Nonconst variant of get_value() + */ + value_type& contents() { + return set_; } void swap(StrSet& ss) { ss.set_.swap(set_); } + /// Convenience function to add a new UnicodeString to the set void insert(const UnicodeString& u) { set_.insert(u); } + /// Convenience function to add a new string to the set, treated as UTF-8 void insert_utf8(const std::string& u) { insert(UnicodeString::fromUTF8(u)); } + /// Convenience size accesor int size() const { return set_.size(); } + /// Convenience empty checker bool empty() const { return set_.empty(); } @@ -86,7 +97,7 @@ public: std::string to_raw_string() const; private: - set_t set_; + value_type set_; }; } /* end ns Wccl */ diff --git a/libwccl/values/tset.h b/libwccl/values/tset.h index 47533f1..228023d5 100644 --- a/libwccl/values/tset.h +++ b/libwccl/values/tset.h @@ -6,6 +6,12 @@ namespace Wccl { +/** + * A Value subtype representing a set of tagset symbols, essentially + * a wrapper on a Corpus2::Tag. + * + * Default-constructed TSets are empty. + */ class TSet : public Value { public: @@ -31,10 +37,25 @@ public: tag_ = tag; } - Corpus2::Tag& tag_ref() { + /** + * Alias of get_value() + */ + Corpus2::Tag& contents() { return tag_; } + /** + * Nonconst variant of contents() + */ + Corpus2::Tag& contents() { + return tag_; + } + + /** + * Convenience function to add a symbol from a tagste by name. + * + * Note: slow. Avoid in code that gets repeatedly executed. + */ void insert_symbol(const Corpus2::Tagset& tagset, const std::string& s); bool empty() const { diff --git a/libwccl/values/value.h b/libwccl/values/value.h index e7cee0f..efd92cb 100644 --- a/libwccl/values/value.h +++ b/libwccl/values/value.h @@ -13,6 +13,16 @@ namespace Wccl { /** * Abstract base class for WCCL value types + * + * Value subtypes should use the WCCL_VALUE_PREAMBLE macro in the beginning + * of their public: section and define the relevant static data members + * and functions in the respective cpp file. + * + * Value subtypes, but not Value itself, should have a value_type typedef + * and a const value_type& get_value() const member function defined + * for interoperability with templates and convenience functions. + * + * Values should be default-constructible with a documented default state. */ class Value { -- GitLab