From a7afe1cf5e01dbfe402cbd7a7e35710013e69112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Koco=C5=84?= <jan.kocon@pwr.edu.pl> Date: Wed, 31 Oct 2018 16:43:09 +0100 Subject: [PATCH] added option to write Corpus2::Document to string (also in SWIG) --- libcorpus2/io/helpers.cpp | 36 ++++++++++++++++++++++++++++++++++++ libcorpus2/io/helpers.h | 15 +++++++++++++++ swig/tokenreader.i | 11 +++++++++++ 3 files changed, 62 insertions(+) diff --git a/libcorpus2/io/helpers.cpp b/libcorpus2/io/helpers.cpp index 8ebdf85..c148d83 100644 --- a/libcorpus2/io/helpers.cpp +++ b/libcorpus2/io/helpers.cpp @@ -16,6 +16,8 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <libcorpus2/io/helpers.h> #include <libcorpus2/io/reader.h> +#include <libcorpus2/io/writer.h> +#include <boost/foreach.hpp> #include <sstream> namespace Corpus2 { @@ -35,4 +37,38 @@ std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string( return chunks; } +std::string write_chunk_to_utf8_string( + boost::shared_ptr<Chunk> c, + const Tagset& tagset, + const std::string& format) +{ + std::stringstream ss; + boost::shared_ptr<TokenWriter> writer = TokenWriter::create_stream_writer( + format, ss, tagset); + writer->write_chunk(*c); + writer->finish(); + std::string data(std::istreambuf_iterator<char>(ss), {}); + return data; +} + +namespace whole { +std::string write_document_to_utf8_string( + const Document& d, + const Tagset& tagset, + const std::string& format) +{ + std::stringstream ss; + boost::shared_ptr<TokenWriter> writer = TokenWriter::create_stream_writer( + format, ss, tagset); + BOOST_FOREACH(boost::shared_ptr<Chunk> c, d.paragraphs()) { + writer->write_chunk(*c); + } + writer->finish(); + std::string data(std::istreambuf_iterator<char>(ss), {}); + return data; +} + +} + + } /* end ns Corpus2 */ diff --git a/libcorpus2/io/helpers.h b/libcorpus2/io/helpers.h index c61a904..34fe76f 100644 --- a/libcorpus2/io/helpers.h +++ b/libcorpus2/io/helpers.h @@ -19,6 +19,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <libcorpus2/chunk.h> #include <libcorpus2/tagset.h> +#include <libcorpus2_whole/document.h> namespace Corpus2 { @@ -27,6 +28,20 @@ std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string( const Tagset& tagset, const std::string& format); +std::string write_chunk_to_utf8_string( + boost::shared_ptr<Chunk> c, + const Tagset& tagset, + const std::string& format); + +namespace whole { + +std::string write_document_to_utf8_string( + const Document& d, + const Tagset& tagset, + const std::string& format); +} + + } /* end ns Corpus2 */ #endif // LIBCORPUS2_IO_HELPERS_H diff --git a/swig/tokenreader.i b/swig/tokenreader.i index 522c628..011f954 100644 --- a/swig/tokenreader.i +++ b/swig/tokenreader.i @@ -108,9 +108,20 @@ namespace Corpus2 { std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string( const std::string& data, const Tagset& tagset, const std::string& format); +%feature("autodoc", "1"); + std::string write_chunk_to_utf8_string( + boost::shared_ptr<Corpus2::Chunk> c, const Tagset& tagset, const std::string& format); +namespace whole { +%feature("autodoc", "1"); + std::string write_document_to_utf8_string( + Corpus2::whole::Document& d, const Tagset& tagset, const std::string& format); +} + + } using namespace std; using namespace Corpus2; +using namespace whole; #endif /* SWIG_LIBCORPUS2_TOKENREADER_I */ -- GitLab