Skip to content
Snippets Groups Projects
Commit e73283fa authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

compact_string repr for Value

parent 45e923f3
No related merge requests found
......@@ -48,6 +48,58 @@ UnicodeString StrSet::to_raw_string_u() const
return u;
}
std::string StrSet::to_compact_string(const Corpus2::Tagset& /* tagset */)
const
{
if (set_.empty()) {
return "-";
}
std::stringstream ss;
value_type::const_iterator it = set_.begin();
while(it != set_.end()) {
ss << '\"';
std::string item = PwrNlp::to_utf8(*it);
boost::algorithm::replace_all(item, "-", "\\u002d");
boost::algorithm::replace_all(item, ".", "\\u002e");
boost::algorithm::replace_all(item, " ", "\\u0020");
boost::algorithm::replace_all(item, "\t", "\\u0009");
ss << item;
ss << '\"';
if(++it != set_.end()) {
ss << "-";
}
}
return ss.str();
}
UnicodeString StrSet::to_compact_string_u(const Corpus2::Tagset& /* tagset */)
const
{
UnicodeString u;
if (set_.empty()) {
u.append(UNICODE_STRING("-", 1));
return u;
}
value_type::const_iterator it = set_.begin();
while(it != set_.end()) {
u.append(UNICODE_STRING("\"", 1));
UnicodeString item = *it;
item.findAndReplace(UNICODE_STRING("-", 1), UNICODE_STRING("\\u002d", 6));
item.findAndReplace(UNICODE_STRING(".", 1), UNICODE_STRING("\\u002e", 6));
item.findAndReplace(UNICODE_STRING(" ", 1), UNICODE_STRING("\\u0020", 6));
item.findAndReplace(UNICODE_STRING("\t", 1), UNICODE_STRING("\\u0009", 6));
u.append(item);
u.append(UNICODE_STRING("\"", 1));
if(++it != set_.end()) {
u.append(UNICODE_STRING("-", 1));
}
}
return u;
}
bool StrSet::intersects(const StrSet &other) const {
if (empty() || other.empty()) {
return false;
......
......@@ -99,6 +99,12 @@ public:
/// Value override
UnicodeString to_raw_string_u() const;
/// Value override
std::string to_compact_string(const Corpus2::Tagset& tagset) const;
/// Value override
UnicodeString to_compact_string_u(const Corpus2::Tagset& tagset) const;
private:
value_type set_;
};
......
#include <libwccl/values/tset.h>
#include <libpwrutils/foreach.h>
#include <libpwrutils/bitset.h>
#include <boost/algorithm/string.hpp>
#include <sstream>
namespace Wccl {
......@@ -46,4 +48,16 @@ void TSet::insert_symbol(const Corpus2::Tagset& tagset, const std::string& s)
tag_.combine_with(tagset.parse_symbol(s));
}
std::string TSet::to_compact_string(const Corpus2::Tagset& tagset)
const
{
if (tag_.is_null()) {
return "-";
}
std::string body = tagset.tag_to_symbol_string(tag_);
boost::algorithm::replace_all(body, ",", "-");
return body;
}
} /* end ns Wccl */
......@@ -115,6 +115,9 @@ public:
std::string to_raw_string() const;
/// Value override
std::string to_compact_string(const Corpus2::Tagset& tagset) const;
private:
Corpus2::Tag tag_;
};
......
......@@ -71,6 +71,22 @@ public:
return UnicodeString::fromUTF8(to_raw_string());
}
/**
* Compact string representation: sets are represented as hyphen-separated
* strings (sorted) with no brackets. The representation is suitable for
* generating compact output where some degree of ambiguity is allowed
* (note that type can't be unambiguously inferred from such strings).
*/
virtual std::string to_compact_string(const Corpus2::Tagset& /* tagset */)
const {
return to_raw_string();
}
virtual UnicodeString to_compact_string_u(const Corpus2::Tagset& tagset)
const {
return UnicodeString::fromUTF8(to_compact_string(tagset));
}
protected:
Value() {}
};
......
......@@ -29,6 +29,7 @@ namespace Wccl {
virtual std::string to_string(const Corpus2::Tagset& /*tagset*/) const;
virtual std::string to_raw_string() const = 0;
virtual std::string to_compact_string(const Corpus2::Tagset& /*tagset*/) const;
};
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment