diff --git a/libcorpus2/chunk.cpp b/libcorpus2/chunk.cpp index 7e2f97eda1d47e277c18d70c3dae3c5ca602a3ae..5994145d48c8300a4a5fa0951c2afbe4356d9cd1 100644 --- a/libcorpus2/chunk.cpp +++ b/libcorpus2/chunk.cpp @@ -16,6 +16,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <libcorpus2/chunk.h> #include <libpwrutils/foreach.h> +#include <boost/make_shared.hpp> namespace Corpus2 { @@ -25,16 +26,13 @@ Chunk::Chunk() Chunk::~Chunk() { - foreach (Sentence* s, sentences_) { - delete s; - } } -Chunk* Chunk::clone() const +boost::shared_ptr<Chunk> Chunk::clone_shared() const { - Chunk* copy = new Chunk; - foreach (Sentence* s, sentences_) { - copy->append(s->clone()); + boost::shared_ptr<Chunk> copy = boost::make_shared<Chunk>(); + foreach (const boost::shared_ptr<Sentence>& s, sentences_) { + copy->append(s->clone_shared()); } copy->attributes_ = attributes_; return copy; diff --git a/libcorpus2/chunk.h b/libcorpus2/chunk.h index 5f667bcf4962c421941e96008d6d533bf2a93c89..09e32b9d793d940df7aa4e2dd082641282b84ecc 100644 --- a/libcorpus2/chunk.h +++ b/libcorpus2/chunk.h @@ -36,7 +36,7 @@ public: ~Chunk(); - Chunk* clone() const; + boost::shared_ptr<Chunk> clone_shared() const; bool has_attribute(const std::string& name) const; @@ -44,15 +44,15 @@ public: void set_attribute(const std::string& name, const std::string& value); - std::vector<Sentence*>& sentences() { + std::vector< boost::shared_ptr<Sentence> >& sentences() { return sentences_; } - const std::vector<Sentence*>& sentences() const { + const std::vector< boost::shared_ptr<Sentence> >& sentences() const { return sentences_; } - void append(Sentence* s) { + void append(const boost::shared_ptr<Sentence>& s) { sentences_.push_back(s); } @@ -63,7 +63,7 @@ public: } private: - std::vector<Sentence*> sentences_; + std::vector< boost::shared_ptr<Sentence> > sentences_; attr_map_t attributes_; }; diff --git a/libcorpus2/io/orthwriter.cpp b/libcorpus2/io/orthwriter.cpp index f2a766b55d362f52052f0a8250ef014f08ccd344..11393a8dc866f399d992aa76eb1b7292a0d58530 100644 --- a/libcorpus2/io/orthwriter.cpp +++ b/libcorpus2/io/orthwriter.cpp @@ -67,7 +67,7 @@ void OrthWriter::write_sentence(const Sentence &s) void OrthWriter::write_chunk(const Chunk &c) { - foreach (const Sentence* s, c.sentences()) { + foreach (const Sentence::Ptr& s, c.sentences()) { write_sentence(*s); if (!actual_ws_) { os() << "\n"; diff --git a/libcorpus2/io/plainwriter.cpp b/libcorpus2/io/plainwriter.cpp index 561a0461a35fe723b1f2740b1d3976ba8c5e61d0..45b156bc166c7105cd3645a4e023a1710523b31d 100644 --- a/libcorpus2/io/plainwriter.cpp +++ b/libcorpus2/io/plainwriter.cpp @@ -48,7 +48,7 @@ void PlainWriter::write_sentence(const Sentence &s) void PlainWriter::write_chunk(const Chunk& c) { os() << "[[[<<<\n\n"; - foreach (const Sentence* s, c.sentences()) { + foreach (const boost::shared_ptr<Sentence>& s, c.sentences()) { write_sentence(*s); } os() << ">>>]]]\n\n"; diff --git a/libcorpus2/io/premorphwriter.cpp b/libcorpus2/io/premorphwriter.cpp index 29b22f364978e4fda4965782c304469343fd337a..b1d39a43bc99614aacc3d0515935e0396c1a4778 100644 --- a/libcorpus2/io/premorphwriter.cpp +++ b/libcorpus2/io/premorphwriter.cpp @@ -60,7 +60,7 @@ void PremorphWriter::write_sentence(const Sentence &s) void PremorphWriter::write_chunk(const Chunk &c) { paragraph_head(c); - foreach (const Sentence* s, c.sentences()) { + foreach (const Sentence::ConstPtr& s, c.sentences()) { write_sentence(*s); } os() << "</chunk>\n"; diff --git a/libcorpus2/io/reader.cpp b/libcorpus2/io/reader.cpp index 4dbfc106a36f0a51c8035e18f9b1e117e9c3519e..d0f1223ac06b84af863fe0aa1d723392f8c55572 100644 --- a/libcorpus2/io/reader.cpp +++ b/libcorpus2/io/reader.cpp @@ -15,6 +15,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. */ #include <libcorpus2/io/reader.h> +#include <boost/make_shared.hpp> namespace Corpus2 { @@ -37,12 +38,6 @@ BufferedChunkReader::~BufferedChunkReader() foreach (Token* t, token_buf_) { delete t; } - foreach (Sentence* s, sentence_buf_) { - delete s; - } - foreach (Chunk* c, chunk_buf_) { - delete c; - } } Token* BufferedChunkReader::get_next_token() @@ -50,12 +45,11 @@ Token* BufferedChunkReader::get_next_token() bool more = true; while (token_buf_.empty() && more) { ensure_more(); - Sentence* s = get_next_sentence(); - if (s != NULL) { + Sentence::Ptr s = get_next_sentence(); + if (s) { std::copy(s->tokens().begin(), s->tokens().end(), std::back_inserter(token_buf_)); - s->tokens().clear(); - delete s; + s->release_tokens(); } else { more = false; } @@ -69,37 +63,35 @@ Token* BufferedChunkReader::get_next_token() } } -Sentence* BufferedChunkReader::get_next_sentence() +Sentence::Ptr BufferedChunkReader::get_next_sentence() { bool more = true; while (sentence_buf_.empty() && more) { ensure_more(); - Chunk* c = get_next_chunk(); - if (c != NULL) { + boost::shared_ptr<Chunk> c = get_next_chunk(); + if (c) { std::copy(c->sentences().begin(), c->sentences().end(), std::back_inserter(sentence_buf_)); - c->sentences().clear(); - delete c; } else { more = false; } } if (sentence_buf_.empty()) { - return NULL; + return Sentence::Ptr(); } else { - Sentence* s = sentence_buf_.front(); + Sentence::Ptr s = sentence_buf_.front(); sentence_buf_.pop_front(); return s; } } -Chunk* BufferedChunkReader::get_next_chunk() +boost::shared_ptr<Chunk> BufferedChunkReader::get_next_chunk() { ensure_more(); if (chunk_buf_.empty()) { - return NULL; + return boost::shared_ptr<Chunk>(); } else { - Chunk* t = chunk_buf_.front(); + boost::shared_ptr<Chunk> t = chunk_buf_.front(); chunk_buf_.pop_front(); return t; } @@ -107,7 +99,7 @@ Chunk* BufferedChunkReader::get_next_chunk() BufferedSentenceReader::BufferedSentenceReader(const Tagset& tagset) : TokenReader(tagset), chunkify_(true) - , sentence_buf_(NULL), token_buf_() + , sentence_buf_(), token_buf_() { } @@ -115,8 +107,8 @@ Token* BufferedSentenceReader::get_next_token() { bool more = true; while (token_buf_.empty() && more) { - Sentence* s = get_next_sentence(); - if (s != NULL) { + Sentence::Ptr s = get_next_sentence(); + if (s) { std::copy(s->tokens().begin(), s->tokens().end(), std::back_inserter(token_buf_)); } else { @@ -132,32 +124,32 @@ Token* BufferedSentenceReader::get_next_token() } } -Sentence* BufferedSentenceReader::get_next_sentence() +Sentence::Ptr BufferedSentenceReader::get_next_sentence() { if (sentence_buf_ != NULL) { - Sentence* s = sentence_buf_; - sentence_buf_ = NULL; + Sentence::Ptr s = sentence_buf_; + sentence_buf_.reset(); return s; } else { return actual_next_sentence(); } } -Chunk* BufferedSentenceReader::get_next_chunk() +boost::shared_ptr<Chunk> BufferedSentenceReader::get_next_chunk() { - Sentence* s = get_next_sentence(); - if (s == NULL) { - return NULL; + Sentence::Ptr s = get_next_sentence(); + if (!s) { + return boost::shared_ptr<Chunk>(); } else { - Chunk* c = new Chunk; + boost::shared_ptr<Chunk> c = boost::make_shared<Chunk>(); c->append(s); s = get_next_sentence(); - while (s != NULL && (!chunkify_ || s->first_token()->wa() != + while (s && (!chunkify_ || s->first_token()->wa() != PwrNlp::Whitespace::ManyNewlines)) { c->append(s); s = get_next_sentence(); } - if (s != NULL) { + if (s) { sentence_buf_ = s; } return c; diff --git a/libcorpus2/io/reader.h b/libcorpus2/io/reader.h index 37accdbc1306e7718ac8b1ba7e4e8f8c56d5b80f..15adbbb62999eb523d74fe4cc7f9c9ce49a13e2c 100644 --- a/libcorpus2/io/reader.h +++ b/libcorpus2/io/reader.h @@ -42,9 +42,9 @@ public: virtual Token* get_next_token() = 0; - virtual Sentence* get_next_sentence() = 0; + virtual Sentence::Ptr get_next_sentence() = 0; - virtual Chunk* get_next_chunk() = 0; + virtual boost::shared_ptr<Chunk> get_next_chunk() = 0; const Tagset& tagset() { return tagset_; @@ -70,15 +70,15 @@ public: Token* get_next_token(); - Sentence* get_next_sentence(); + Sentence::Ptr get_next_sentence(); - Chunk* get_next_chunk(); + boost::shared_ptr<Chunk> get_next_chunk(); protected: virtual void ensure_more() = 0; - std::deque<Chunk*> chunk_buf_; - std::deque<Sentence*> sentence_buf_; + std::deque< boost::shared_ptr<Chunk> > chunk_buf_; + std::deque< Sentence::Ptr > sentence_buf_; std::deque<Token*> token_buf_; }; @@ -99,16 +99,16 @@ public: Token* get_next_token(); - Sentence* get_next_sentence(); + Sentence::Ptr get_next_sentence(); - Chunk* get_next_chunk(); + boost::shared_ptr<Chunk> get_next_chunk(); protected: - virtual Sentence* actual_next_sentence() = 0; + virtual Sentence::Ptr actual_next_sentence() = 0; bool chunkify_; - Sentence* sentence_buf_; + Sentence::Ptr sentence_buf_; std::deque<Token*> token_buf_; }; diff --git a/libcorpus2/io/rft.cpp b/libcorpus2/io/rft.cpp index d12fe28c621ce646e9ee9ccfbc2db2608da053f0..1d82498197216db3f1b29aa77d4708d4f6b59a79 100644 --- a/libcorpus2/io/rft.cpp +++ b/libcorpus2/io/rft.cpp @@ -19,6 +19,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <libpwrutils/foreach.h> #include <boost/algorithm/string.hpp> +#include <boost/make_shared.hpp> namespace Corpus2 { @@ -62,7 +63,7 @@ void RftWriter::write_sentence(const Sentence& s) void RftWriter::write_chunk(const Chunk& c) { - foreach (Sentence* s, c.sentences()) { + foreach (const Sentence::ConstPtr& s, c.sentences()) { write_sentence(*s); } } @@ -72,10 +73,10 @@ RftReader::RftReader(const Tagset& tagset, std::istream& is, bool disamb) { } -Sentence* RftReader::actual_next_sentence() +Sentence::Ptr RftReader::actual_next_sentence() { std::string line; - Sentence* s = NULL; + Sentence::Ptr s; while (is().good()) { std::getline(is(), line); if (line.empty()) { @@ -92,8 +93,8 @@ Sentence* RftReader::actual_next_sentence() Token* t = new Token(); t->set_orth(UnicodeString::fromUTF8(orth)); t->set_wa(PwrNlp::Whitespace::Space); - if (s == NULL) { - s = new Sentence(); + if (!s) { + s = boost::make_shared<Sentence>(); t->set_wa(PwrNlp::Whitespace::Newline); } t->add_lexeme(Lexeme(t->orth(), tag)); diff --git a/libcorpus2/io/rft.h b/libcorpus2/io/rft.h index 8bb3de2d7d96285094ac151ec8a428e2502617ac..fcb0d6b6f406f40cbe5b676aa33e6610cd1d8b98 100644 --- a/libcorpus2/io/rft.h +++ b/libcorpus2/io/rft.h @@ -61,7 +61,7 @@ public: protected: /// BufferedSentenceReader override - Sentence* actual_next_sentence(); + Sentence::Ptr actual_next_sentence(); std::istream& is_; diff --git a/libcorpus2/io/xcesreader.cpp b/libcorpus2/io/xcesreader.cpp index ae295d3bdba424e1dde84914043cb4dea73e664a..d0e18afa133b06ca82352cfcba848fd64da6c620 100644 --- a/libcorpus2/io/xcesreader.cpp +++ b/libcorpus2/io/xcesreader.cpp @@ -18,7 +18,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <libcorpus2/io/sax.h> #include <libpwrutils/foreach.h> #include <libxml++/libxml++.h> - +#include <boost/make_shared.hpp> #include <fstream> namespace Corpus2 { @@ -26,8 +26,9 @@ namespace Corpus2 { class XcesReaderImpl : public BasicSaxParser { public: - XcesReaderImpl(const Tagset& tagset, std::deque<Chunk*>& obuf, - bool disamb_only, bool disamb_sh); + XcesReaderImpl(const Tagset& tagset, + std::deque< boost::shared_ptr<Chunk> >& obuf, + bool disamb_only, bool disamb_sh); ~XcesReaderImpl(); @@ -50,11 +51,11 @@ protected: Token* tok_; - Sentence* sent_; + Sentence::Ptr sent_; - Chunk* chunk_; + boost::shared_ptr<Chunk> chunk_; - std::deque<Chunk*>& obuf_; + std::deque< boost::shared_ptr<Chunk> >& obuf_; bool disamb_only_; @@ -101,11 +102,12 @@ void XcesReader::ensure_more() } XcesReaderImpl::XcesReaderImpl(const Tagset& tagset, - std::deque<Chunk*>& obuf, bool disamb_only, bool disamb_sh) + std::deque< boost::shared_ptr<Chunk> >& obuf, + bool disamb_only, bool disamb_sh) : BasicSaxParser() , tagset_(tagset), state_(XS_NONE), chunkless_(false) , wa_(PwrNlp::Whitespace::Newline) - , sbuf_(), tok_(NULL), sent_(NULL), chunk_(NULL), obuf_(obuf) + , sbuf_(), tok_(NULL), sent_(), chunk_(), obuf_(obuf) , disamb_only_(disamb_only), disamb_sh_(disamb_sh) { } @@ -113,8 +115,6 @@ XcesReaderImpl::XcesReaderImpl(const Tagset& tagset, XcesReaderImpl::~XcesReaderImpl() { delete tok_; - delete sent_; - delete chunk_; } void XcesReaderImpl::on_start_element(const Glib::ustring &name, @@ -132,10 +132,10 @@ void XcesReaderImpl::on_start_element(const Glib::ustring &name, //throw XcesError("Top level <chunk> is type=\"s\""); state_ = XS_SENTENCE; chunkless_ = true; - chunk_ = new Chunk; - sent_ = new Sentence; + chunk_ = boost::make_shared<Chunk>(); + sent_ = boost::make_shared<Sentence>(); } else { - chunk_ = new Chunk; + chunk_ = boost::make_shared<Chunk>(); state_ = XS_CHUNK; foreach (const Attribute& a, attributes) { chunk_->set_attribute(a.name, a.value); @@ -146,7 +146,7 @@ void XcesReaderImpl::on_start_element(const Glib::ustring &name, throw XcesError("Sub level <chunk> not type=\"s\""); } state_ = XS_SENTENCE; - sent_ = new Sentence; + sent_ = boost::make_shared<Sentence>(); } else { throw XcesError("Unexpected <chunk>"); } @@ -217,10 +217,10 @@ void XcesReaderImpl::on_end_element(const Glib::ustring &name) state_ = XS_SENTENCE; } else if (state_ == XS_SENTENCE && name == "chunk") { chunk_->append(sent_); - sent_ = NULL; + sent_.reset(); if (chunkless_) { obuf_.push_back(chunk_); - chunk_ = NULL; + chunk_.reset(); state_ = XS_NONE; chunkless_ = false; } else { @@ -228,7 +228,7 @@ void XcesReaderImpl::on_end_element(const Glib::ustring &name) } } else if (state_ == XS_CHUNK && name == "chunk") { obuf_.push_back(chunk_); - chunk_ = NULL; + chunk_.reset(); state_ = XS_NONE; } } diff --git a/libcorpus2/io/xceswriter.cpp b/libcorpus2/io/xceswriter.cpp index 8c2e9c8bf00606fc38de8087fc4677cece98b874..e389d687dc30d6960b9ac3bdbe4f355aff482baa 100644 --- a/libcorpus2/io/xceswriter.cpp +++ b/libcorpus2/io/xceswriter.cpp @@ -76,7 +76,7 @@ void XcesWriter::write_sentence(const Sentence& s) void XcesWriter::write_chunk(const Chunk &c) { bool new_chunk = true; - foreach (const Sentence* s, c.sentences()) { + foreach (const Sentence::ConstPtr& s, c.sentences()) { if (split_chunks_on_newlines_ && !s->tokens().empty()) { const Token* first = s->first_token(); if (first->wa() == PwrNlp::Whitespace::ManyNewlines) { diff --git a/libcorpus2/sentence.cpp b/libcorpus2/sentence.cpp index 3794fe07b4065d7d934b25ab2b78df5d6802db79..44fa4f2849e19fdf49fdb61c77d89ebaafb3173c 100644 --- a/libcorpus2/sentence.cpp +++ b/libcorpus2/sentence.cpp @@ -15,7 +15,30 @@ or FITNESS FOR A PARTICULAR PURPOSE. */ #include <libcorpus2/sentence.h> +#include <boost/make_shared.hpp> namespace Corpus2 { +Sentence::~Sentence() +{ + foreach (const Token* t, tokens_) { + delete t; + } +} + +Sentence::Ptr Sentence::clone_shared() const +{ + Sentence::Ptr s = boost::make_shared<Sentence>(); + foreach (const Token* t, tokens_) { + s->append(t->clone()); + } + return s; +} + +void Sentence::release_tokens() +{ + tokens_.clear(); +} + + } /* end ns Corpus2 */ diff --git a/libcorpus2/sentence.h b/libcorpus2/sentence.h index 567e5745f37061c89fbb305dacf0c9219ed2e413..1cc97928d3a62f55caa874f54d3f00e8576c620c 100644 --- a/libcorpus2/sentence.h +++ b/libcorpus2/sentence.h @@ -18,14 +18,74 @@ or FITNESS FOR A PARTICULAR PURPOSE. #define LIBCORPUS2_SENTENCE_H #include <libcorpus2/token.h> - #include <libpwrutils/sentence.h> +#include <boost/shared_ptr.hpp> namespace Corpus2 { -/// typedef for a sentence of Corpus2 tokens based on the PwrNlp sentence -/// template. -typedef PwrNlp::SentenceTemplate<Token> Sentence; +class Sentence : private boost::noncopyable +{ +public: + typedef boost::shared_ptr<Sentence> Ptr; + typedef boost::shared_ptr<const Sentence> ConstPtr; + + /// Empty constructor + Sentence() + : tokens_() + { + } + + Ptr clone_shared() const; + + /// Destructor + ~Sentence(); + + void release_tokens(); + + bool empty() const { + return tokens_.empty(); + } + + /// Size accessor + size_t size() const { + return tokens_.size(); + } + + /// Token accessor + Token* operator[](size_t idx) { + return tokens_[idx]; + } + + /// Token accessor, const + const Token* operator[](size_t idx) const { + return tokens_[idx]; + } + + /// Underlying vector accessor, const + const std::vector<Token*>& tokens() const { + return tokens_; + } + + /// Underlying vector accessor + std::vector<Token*>& tokens() { + return tokens_; + } + + /// Helper function for appending tokens + void append(Token* t) { + tokens_.push_back(t); + } + + /// convenience first token accessor + const Token* first_token() const { + assert(!tokens_.empty()); + return tokens_[0]; + } + +private: + /// The tokens this sentence contains and owns + std::vector<Token*> tokens_; +}; } /* end ns Corpus2 */