Skip to content
Snippets Groups Projects
Commit a7afe1cf authored by Jan Kocoń's avatar Jan Kocoń
Browse files

added option to write Corpus2::Document to string (also in SWIG)

parent ad1df37e
Branches
No related merge requests found
......@@ -16,6 +16,8 @@ or FITNESS FOR A PARTICULAR PURPOSE.
#include <libcorpus2/io/helpers.h>
#include <libcorpus2/io/reader.h>
#include <libcorpus2/io/writer.h>
#include <boost/foreach.hpp>
#include <sstream>
namespace Corpus2 {
......@@ -35,4 +37,38 @@ std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string(
return chunks;
}
std::string write_chunk_to_utf8_string(
boost::shared_ptr<Chunk> c,
const Tagset& tagset,
const std::string& format)
{
std::stringstream ss;
boost::shared_ptr<TokenWriter> writer = TokenWriter::create_stream_writer(
format, ss, tagset);
writer->write_chunk(*c);
writer->finish();
std::string data(std::istreambuf_iterator<char>(ss), {});
return data;
}
namespace whole {
std::string write_document_to_utf8_string(
const Document& d,
const Tagset& tagset,
const std::string& format)
{
std::stringstream ss;
boost::shared_ptr<TokenWriter> writer = TokenWriter::create_stream_writer(
format, ss, tagset);
BOOST_FOREACH(boost::shared_ptr<Chunk> c, d.paragraphs()) {
writer->write_chunk(*c);
}
writer->finish();
std::string data(std::istreambuf_iterator<char>(ss), {});
return data;
}
}
} /* end ns Corpus2 */
......@@ -19,6 +19,7 @@ or FITNESS FOR A PARTICULAR PURPOSE.
#include <libcorpus2/chunk.h>
#include <libcorpus2/tagset.h>
#include <libcorpus2_whole/document.h>
namespace Corpus2 {
......@@ -27,6 +28,20 @@ std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string(
const Tagset& tagset,
const std::string& format);
std::string write_chunk_to_utf8_string(
boost::shared_ptr<Chunk> c,
const Tagset& tagset,
const std::string& format);
namespace whole {
std::string write_document_to_utf8_string(
const Document& d,
const Tagset& tagset,
const std::string& format);
}
} /* end ns Corpus2 */
#endif // LIBCORPUS2_IO_HELPERS_H
......@@ -108,9 +108,20 @@ namespace Corpus2 {
std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string(
const std::string& data, const Tagset& tagset, const std::string& format);
%feature("autodoc", "1");
std::string write_chunk_to_utf8_string(
boost::shared_ptr<Corpus2::Chunk> c, const Tagset& tagset, const std::string& format);
namespace whole {
%feature("autodoc", "1");
std::string write_document_to_utf8_string(
Corpus2::whole::Document& d, const Tagset& tagset, const std::string& format);
}
}
using namespace std;
using namespace Corpus2;
using namespace whole;
#endif /* SWIG_LIBCORPUS2_TOKENREADER_I */
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment