diff --git a/swig/corpus2.i b/swig/corpus2.i index a805a328edbf38ce4cf86f21915a120b9e2d5b60..087727cda465a2a2dd8cf328bc2db988843ec93c 100644 --- a/swig/corpus2.i +++ b/swig/corpus2.i @@ -4,6 +4,7 @@ %module corpus2 %include "boost_shared_ptr.i" +%include "unicodestring.i" %include "libcorpus2exception.i" %include "libcorpusannotatedsentence.i" %include "libcorpusannotationchannel.i" diff --git a/swig/unicodestring.i b/swig/unicodestring.i new file mode 100644 index 0000000000000000000000000000000000000000..30997d7d1a91929648fea39967dc7a85107e2984 --- /dev/null +++ b/swig/unicodestring.i @@ -0,0 +1,42 @@ +#ifndef SWIG_UNICODESTRING_I +#define SWIG_UNICODESTRING_I + +%module unicodestring +%{ + #include <unicode/unistr.h> +%} + +%include "std_string.i" +%include "std_except.i" + + +class UnicodeString { + public: + int length(); + UChar* getBuffer(); + UChar* getTerminatedBuffer(); + +%pythoncode %{ + def __unicode__(self): + return self.as_utf16().decode('utf16') + + def __str__(self): + return self.as_utf8() +%} + +}; + +%extend UnicodeString { + std::string as_utf16() { + return std::string((char*)self->getTerminatedBuffer(), self->length()*2); + } + + std::string as_utf8() { + std::string r; + self->toUTF8String(r); + return r; + } +} + + +#endif /* SWIG_UNICODESTRING_I */