From bca77ee2605e8cc3bb4dedb1c7d718a349bf4eaf Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Tue, 17 May 2011 14:20:43 +0200
Subject: [PATCH] corpus2 helper to read a string

---
 libcorpus2/CMakeLists.txt   |  1 +
 libcorpus2/io/helpers.cpp   | 22 ++++++++++++++++++++++
 libcorpus2/io/helpers.h     | 16 ++++++++++++++++
 swig/libcorpuschunk.i       |  1 +
 swig/libcorpustokenreader.i |  5 +++++
 5 files changed, 45 insertions(+)
 create mode 100644 libcorpus2/io/helpers.cpp
 create mode 100644 libcorpus2/io/helpers.h

diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt
index 2af29e1..279ef42 100644
--- a/libcorpus2/CMakeLists.txt
+++ b/libcorpus2/CMakeLists.txt
@@ -57,6 +57,7 @@ SET(libcorpus2_STAT_SRC
 	tokenmetadata.cpp
 	io/cclreader.cpp
 	io/cclwriter.cpp
+	io/helpers.cpp
 	io/fastxces.cpp
 	io/nonewriter.cpp
 	io/orthwriter.cpp
diff --git a/libcorpus2/io/helpers.cpp b/libcorpus2/io/helpers.cpp
new file mode 100644
index 0000000..9aac878
--- /dev/null
+++ b/libcorpus2/io/helpers.cpp
@@ -0,0 +1,22 @@
+#include <libcorpus2/io/helpers.h>
+#include <libcorpus2/io/reader.h>
+#include <sstream>
+namespace Corpus2 {
+
+std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string(
+        const std::string& data,
+        const Tagset& tagset,
+        const std::string& format)
+{
+	std::stringstream ss;
+	ss << data;
+	boost::shared_ptr<TokenReader> reader = TokenReader::create_stream_reader(
+		format, tagset, ss);
+	std::vector<boost::shared_ptr<Chunk> > chunks;
+	while (boost::shared_ptr<Chunk> c = reader->get_next_chunk()) {
+		chunks.push_back(c);
+	}
+	return chunks;
+}
+
+} /* end ns Corpus2 */
diff --git a/libcorpus2/io/helpers.h b/libcorpus2/io/helpers.h
new file mode 100644
index 0000000..92d8a89
--- /dev/null
+++ b/libcorpus2/io/helpers.h
@@ -0,0 +1,16 @@
+#ifndef LIBSORPUS2_IO_HELPERS_H
+#define LIBCORPUS2_IO_HELPERS_H
+
+#include <libcorpus2/chunk.h>
+#include <libcorpus2/tagset.h>
+
+namespace Corpus2 {
+
+std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string(
+	const std::string& data,
+	const Tagset& tagset,
+	const std::string& format);
+
+} /* end ns Corpus2 */
+
+#endif // LIBCORPUS2_IO_HELPERS_H
diff --git a/swig/libcorpuschunk.i b/swig/libcorpuschunk.i
index baa3fc1..df79864 100644
--- a/swig/libcorpuschunk.i
+++ b/swig/libcorpuschunk.i
@@ -46,6 +46,7 @@ namespace Corpus2 {
   };
 }
 
+%template(ChunkPtrVector) std::vector<boost::shared_ptr<Chunk> >;
 using namespace std;
 using namespace Corpus2;
 
diff --git a/swig/libcorpustokenreader.i b/swig/libcorpustokenreader.i
index 93043ff..0a367d5 100644
--- a/swig/libcorpustokenreader.i
+++ b/swig/libcorpustokenreader.i
@@ -4,6 +4,7 @@
 %module libcorpustokenreader
 %{
   #include <libcorpus2/io/reader.h>
+  #include <libcorpus2/io/helpers.h>
 %}
 
 %include "libcorpustag.i"
@@ -73,6 +74,10 @@ namespace Corpus2 {
     static std::string reader_help(const std::string& class_id);
     static std::vector<std::string> available_reader_types_help();
   };
+
+  std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string(
+    const std::string& data, const Tagset& tagset, const std::string& format);
+ 
 }
 
 using namespace std;
-- 
GitLab