From 368fb5f02bf26a1d4b2a15a204cacd3a15bfaa88 Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Tue, 24 May 2011 12:24:30 +0200
Subject: [PATCH] simple swig icu UnicodeString wrapper with __str__,
 __unicode__ and length()

---
 swig/corpus2.i       |  1 +
 swig/unicodestring.i | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 swig/unicodestring.i

diff --git a/swig/corpus2.i b/swig/corpus2.i
index a805a32..087727c 100644
--- a/swig/corpus2.i
+++ b/swig/corpus2.i
@@ -4,6 +4,7 @@
 %module corpus2
 
 %include "boost_shared_ptr.i"
+%include "unicodestring.i"
 %include "libcorpus2exception.i"
 %include "libcorpusannotatedsentence.i"
 %include "libcorpusannotationchannel.i"
diff --git a/swig/unicodestring.i b/swig/unicodestring.i
new file mode 100644
index 0000000..30997d7
--- /dev/null
+++ b/swig/unicodestring.i
@@ -0,0 +1,42 @@
+#ifndef SWIG_UNICODESTRING_I
+#define SWIG_UNICODESTRING_I
+
+%module unicodestring
+%{
+  #include <unicode/unistr.h>
+%} 
+
+%include "std_string.i"
+%include "std_except.i"
+
+
+class UnicodeString {
+  public:
+    int length();
+    UChar* getBuffer();
+    UChar* getTerminatedBuffer();
+
+%pythoncode %{
+	def __unicode__(self):
+		return self.as_utf16().decode('utf16')
+
+	def __str__(self):
+		return self.as_utf8()
+%}
+
+};
+
+%extend UnicodeString {
+    std::string as_utf16() {
+        return std::string((char*)self->getTerminatedBuffer(), self->length()*2);
+    }
+
+	std::string as_utf8() {
+		std::string r;
+		self->toUTF8String(r);
+		return r;
+	}
+}
+
+
+#endif /* SWIG_UNICODESTRING_I */
-- 
GitLab