diff --git a/libcorpus2/io/helpers.cpp b/libcorpus2/io/helpers.cpp index 8ebdf857586b1844295e21e3f9b78d4b5d93b4ca..c148d833149a38a6fc830b8a1c476e94fdaee50c 100644 --- a/libcorpus2/io/helpers.cpp +++ b/libcorpus2/io/helpers.cpp @@ -16,6 +16,8 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <libcorpus2/io/helpers.h> #include <libcorpus2/io/reader.h> +#include <libcorpus2/io/writer.h> +#include <boost/foreach.hpp> #include <sstream> namespace Corpus2 { @@ -35,4 +37,38 @@ std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string( return chunks; } +std::string write_chunk_to_utf8_string( + boost::shared_ptr<Chunk> c, + const Tagset& tagset, + const std::string& format) +{ + std::stringstream ss; + boost::shared_ptr<TokenWriter> writer = TokenWriter::create_stream_writer( + format, ss, tagset); + writer->write_chunk(*c); + writer->finish(); + std::string data(std::istreambuf_iterator<char>(ss), {}); + return data; +} + +namespace whole { +std::string write_document_to_utf8_string( + const Document& d, + const Tagset& tagset, + const std::string& format) +{ + std::stringstream ss; + boost::shared_ptr<TokenWriter> writer = TokenWriter::create_stream_writer( + format, ss, tagset); + BOOST_FOREACH(boost::shared_ptr<Chunk> c, d.paragraphs()) { + writer->write_chunk(*c); + } + writer->finish(); + std::string data(std::istreambuf_iterator<char>(ss), {}); + return data; +} + +} + + } /* end ns Corpus2 */ diff --git a/libcorpus2/io/helpers.h b/libcorpus2/io/helpers.h index c61a90413dc46badd9f978f19fa9ba3e3407888a..34fe76fbb007ed9b3c6947e2a096d5ade4c20848 100644 --- a/libcorpus2/io/helpers.h +++ b/libcorpus2/io/helpers.h @@ -19,6 +19,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <libcorpus2/chunk.h> #include <libcorpus2/tagset.h> +#include <libcorpus2_whole/document.h> namespace Corpus2 { @@ -27,6 +28,20 @@ std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string( const Tagset& tagset, const std::string& format); +std::string write_chunk_to_utf8_string( + boost::shared_ptr<Chunk> c, + const Tagset& tagset, + const std::string& format); + +namespace whole { + +std::string write_document_to_utf8_string( + const Document& d, + const Tagset& tagset, + const std::string& format); +} + + } /* end ns Corpus2 */ #endif // LIBCORPUS2_IO_HELPERS_H diff --git a/swig/tokenreader.i b/swig/tokenreader.i index 522c6287c720c5f7d326f5a90cf841b3b7972461..011f954e740636fdd32adc5cee89a7d346f1438d 100644 --- a/swig/tokenreader.i +++ b/swig/tokenreader.i @@ -108,9 +108,20 @@ namespace Corpus2 { std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string( const std::string& data, const Tagset& tagset, const std::string& format); +%feature("autodoc", "1"); + std::string write_chunk_to_utf8_string( + boost::shared_ptr<Corpus2::Chunk> c, const Tagset& tagset, const std::string& format); +namespace whole { +%feature("autodoc", "1"); + std::string write_document_to_utf8_string( + Corpus2::whole::Document& d, const Tagset& tagset, const std::string& format); +} + + } using namespace std; using namespace Corpus2; +using namespace whole; #endif /* SWIG_LIBCORPUS2_TOKENREADER_I */