From 76bd95c73ef2f7d5cfac5639e07ad2057ef899df Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Wed, 4 May 2011 13:34:05 +0200 Subject: [PATCH] add helper functions for swig --- libcorpus2/lexeme.cpp | 6 ++++++ libcorpus2/lexeme.h | 8 ++++++++ libcorpus2/token.cpp | 6 ++++++ libcorpus2/token.h | 9 +++++++++ swig/libcorpuslexeme.i | 4 ++-- swig/libcorpustoken.i | 4 +++- 6 files changed, 34 insertions(+), 3 deletions(-) diff --git a/libcorpus2/lexeme.cpp b/libcorpus2/lexeme.cpp index be0f8ae..d5370ed 100644 --- a/libcorpus2/lexeme.cpp +++ b/libcorpus2/lexeme.cpp @@ -33,6 +33,12 @@ Lexeme Lexeme::create(const UnicodeString& lemma, const Tag& tag) return Lexeme(lemma, tag); } +Lexeme Lexeme::create_utf8(const std::string& lemma_utf8, const Tag& tag) +{ + return Lexeme(UnicodeString::fromUTF8(lemma_utf8), tag); +} + + bool Lexeme::is_null() const { return lemma().length() == 0 || tag().is_null(); diff --git a/libcorpus2/lexeme.h b/libcorpus2/lexeme.h index fbf82a0..4c0119a 100644 --- a/libcorpus2/lexeme.h +++ b/libcorpus2/lexeme.h @@ -53,6 +53,9 @@ public: /// Helper creation function static Lexeme create(const UnicodeString& lemma, const Tag& tag); + /// Helper creation function, UTF-8 variant + static Lexeme create_utf8(const std::string& lemma_utf8, const Tag& tag); + /// Lemma accessor const UnicodeString& lemma() const { return lemma_; @@ -63,6 +66,11 @@ public: lemma_ = l; } + /// Lemma setter, UTF-8 variant + void set_lemma_utf8(const std::string& lemma_utf8) { + lemma_ = UnicodeString::fromUTF8(lemma_utf8); + } + /// UTF-8 lemma convenience accessor const std::string lemma_utf8() const { return PwrNlp::to_utf8(lemma_); diff --git a/libcorpus2/token.cpp b/libcorpus2/token.cpp index dba0322..38f3bdb 100644 --- a/libcorpus2/token.cpp +++ b/libcorpus2/token.cpp @@ -32,6 +32,12 @@ Token::Token(const UnicodeString &orth, PwrNlp::Whitespace::Enum wa) { } +Token* Token::create_utf8(const std::string& orth_utf8, + PwrNlp::Whitespace::Enum wa /*= PwrNlp::Whitespace::Space*/) +{ + return new Token(UnicodeString::fromUTF8(orth_utf8), wa); +} + Token* Token::clone() const { Token* t = new Token(); diff --git a/libcorpus2/token.h b/libcorpus2/token.h index 13c623f..ab09922 100644 --- a/libcorpus2/token.h +++ b/libcorpus2/token.h @@ -54,6 +54,10 @@ public: /// Create a Token with the given orth and whitespace amount Token(const UnicodeString& orth, PwrNlp::Whitespace::Enum wa); + /// Token creation, UTF-8 + static Token* create_utf8(const std::string& orth_utf8, + PwrNlp::Whitespace::Enum wa = PwrNlp::Whitespace::Space); + /// Create a duplicate Token Token* clone() const; @@ -72,6 +76,11 @@ public: orth_ = orth; } + /// Orth setter (UTF-8) + void set_orth_utf8(const std::string& orth_utf8) { + orth_ = UnicodeString::fromUTF8(orth_utf8); + } + /// WA getter const PwrNlp::Whitespace::Enum& wa() const { return wa_; diff --git a/swig/libcorpuslexeme.i b/swig/libcorpuslexeme.i index 058e8b7..e962845 100644 --- a/swig/libcorpuslexeme.i +++ b/swig/libcorpuslexeme.i @@ -24,13 +24,13 @@ namespace Corpus2 { Lexeme(const UnicodeString& lemma, const Tag& tag); static Lexeme create(const UnicodeString& lemma, const Tag& tag); - // static Lexeme create_utf8(const std::string& lemma, const Tag& tag); + static Lexeme create_utf8(const std::string& lemma, const Tag& tag); const UnicodeString& lemma() const; const std::string lemma_utf8() const; void set_lemma(const UnicodeString& l); - // void set_lemma_utf8(const std::string& l); + void set_lemma_utf8(const std::string& l); const Tag& tag() const; void set_tag(const Tag& tag); diff --git a/swig/libcorpustoken.i b/swig/libcorpustoken.i index 079063b..5d81bba 100644 --- a/swig/libcorpustoken.i +++ b/swig/libcorpustoken.i @@ -26,12 +26,14 @@ namespace Corpus2 { Token(); Token(const UnicodeString& orth, PwrNlp::Whitespace::Enum wa); Token* clone() const; + + Token* create_utf8(const std::string& orth_utf8, PwrNlp::Whitespace::Enum wa = PwrNlp::Whitespace::Space); const UnicodeString& orth() const; std::string orth_utf8() const; void set_orth(const UnicodeString& orth); - // void set_orth_utf8(const std::string& orth); + void set_orth_utf8(const std::string& orth); const PwrNlp::Whitespace::Enum& wa() const; void set_wa(const PwrNlp::Whitespace::Enum& wa); -- GitLab