Skip to content
Snippets Groups Projects
Commit e73283fa authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

compact_string repr for Value

parent 45e923f3
Branches
No related merge requests found
...@@ -48,6 +48,58 @@ UnicodeString StrSet::to_raw_string_u() const ...@@ -48,6 +48,58 @@ UnicodeString StrSet::to_raw_string_u() const
return u; return u;
} }
std::string StrSet::to_compact_string(const Corpus2::Tagset& /* tagset */)
const
{
if (set_.empty()) {
return "-";
}
std::stringstream ss;
value_type::const_iterator it = set_.begin();
while(it != set_.end()) {
ss << '\"';
std::string item = PwrNlp::to_utf8(*it);
boost::algorithm::replace_all(item, "-", "\\u002d");
boost::algorithm::replace_all(item, ".", "\\u002e");
boost::algorithm::replace_all(item, " ", "\\u0020");
boost::algorithm::replace_all(item, "\t", "\\u0009");
ss << item;
ss << '\"';
if(++it != set_.end()) {
ss << "-";
}
}
return ss.str();
}
UnicodeString StrSet::to_compact_string_u(const Corpus2::Tagset& /* tagset */)
const
{
UnicodeString u;
if (set_.empty()) {
u.append(UNICODE_STRING("-", 1));
return u;
}
value_type::const_iterator it = set_.begin();
while(it != set_.end()) {
u.append(UNICODE_STRING("\"", 1));
UnicodeString item = *it;
item.findAndReplace(UNICODE_STRING("-", 1), UNICODE_STRING("\\u002d", 6));
item.findAndReplace(UNICODE_STRING(".", 1), UNICODE_STRING("\\u002e", 6));
item.findAndReplace(UNICODE_STRING(" ", 1), UNICODE_STRING("\\u0020", 6));
item.findAndReplace(UNICODE_STRING("\t", 1), UNICODE_STRING("\\u0009", 6));
u.append(item);
u.append(UNICODE_STRING("\"", 1));
if(++it != set_.end()) {
u.append(UNICODE_STRING("-", 1));
}
}
return u;
}
bool StrSet::intersects(const StrSet &other) const { bool StrSet::intersects(const StrSet &other) const {
if (empty() || other.empty()) { if (empty() || other.empty()) {
return false; return false;
......
...@@ -99,6 +99,12 @@ public: ...@@ -99,6 +99,12 @@ public:
/// Value override /// Value override
UnicodeString to_raw_string_u() const; UnicodeString to_raw_string_u() const;
/// Value override
std::string to_compact_string(const Corpus2::Tagset& tagset) const;
/// Value override
UnicodeString to_compact_string_u(const Corpus2::Tagset& tagset) const;
private: private:
value_type set_; value_type set_;
}; };
......
#include <libwccl/values/tset.h> #include <libwccl/values/tset.h>
#include <libpwrutils/foreach.h> #include <libpwrutils/foreach.h>
#include <libpwrutils/bitset.h> #include <libpwrutils/bitset.h>
#include <boost/algorithm/string.hpp>
#include <sstream> #include <sstream>
namespace Wccl { namespace Wccl {
...@@ -46,4 +48,16 @@ void TSet::insert_symbol(const Corpus2::Tagset& tagset, const std::string& s) ...@@ -46,4 +48,16 @@ void TSet::insert_symbol(const Corpus2::Tagset& tagset, const std::string& s)
tag_.combine_with(tagset.parse_symbol(s)); tag_.combine_with(tagset.parse_symbol(s));
} }
std::string TSet::to_compact_string(const Corpus2::Tagset& tagset)
const
{
if (tag_.is_null()) {
return "-";
}
std::string body = tagset.tag_to_symbol_string(tag_);
boost::algorithm::replace_all(body, ",", "-");
return body;
}
} /* end ns Wccl */ } /* end ns Wccl */
...@@ -115,6 +115,9 @@ public: ...@@ -115,6 +115,9 @@ public:
std::string to_raw_string() const; std::string to_raw_string() const;
/// Value override
std::string to_compact_string(const Corpus2::Tagset& tagset) const;
private: private:
Corpus2::Tag tag_; Corpus2::Tag tag_;
}; };
......
...@@ -71,6 +71,22 @@ public: ...@@ -71,6 +71,22 @@ public:
return UnicodeString::fromUTF8(to_raw_string()); return UnicodeString::fromUTF8(to_raw_string());
} }
/**
* Compact string representation: sets are represented as hyphen-separated
* strings (sorted) with no brackets. The representation is suitable for
* generating compact output where some degree of ambiguity is allowed
* (note that type can't be unambiguously inferred from such strings).
*/
virtual std::string to_compact_string(const Corpus2::Tagset& /* tagset */)
const {
return to_raw_string();
}
virtual UnicodeString to_compact_string_u(const Corpus2::Tagset& tagset)
const {
return UnicodeString::fromUTF8(to_compact_string(tagset));
}
protected: protected:
Value() {} Value() {}
}; };
......
...@@ -29,6 +29,7 @@ namespace Wccl { ...@@ -29,6 +29,7 @@ namespace Wccl {
virtual std::string to_string(const Corpus2::Tagset& /*tagset*/) const; virtual std::string to_string(const Corpus2::Tagset& /*tagset*/) const;
virtual std::string to_raw_string() const = 0; virtual std::string to_raw_string() const = 0;
virtual std::string to_compact_string(const Corpus2::Tagset& /*tagset*/) const;
}; };
} }
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment