From bca77ee2605e8cc3bb4dedb1c7d718a349bf4eaf Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Tue, 17 May 2011 14:20:43 +0200 Subject: [PATCH] corpus2 helper to read a string --- libcorpus2/CMakeLists.txt | 1 + libcorpus2/io/helpers.cpp | 22 ++++++++++++++++++++++ libcorpus2/io/helpers.h | 16 ++++++++++++++++ swig/libcorpuschunk.i | 1 + swig/libcorpustokenreader.i | 5 +++++ 5 files changed, 45 insertions(+) create mode 100644 libcorpus2/io/helpers.cpp create mode 100644 libcorpus2/io/helpers.h diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt index 2af29e1..279ef42 100644 --- a/libcorpus2/CMakeLists.txt +++ b/libcorpus2/CMakeLists.txt @@ -57,6 +57,7 @@ SET(libcorpus2_STAT_SRC tokenmetadata.cpp io/cclreader.cpp io/cclwriter.cpp + io/helpers.cpp io/fastxces.cpp io/nonewriter.cpp io/orthwriter.cpp diff --git a/libcorpus2/io/helpers.cpp b/libcorpus2/io/helpers.cpp new file mode 100644 index 0000000..9aac878 --- /dev/null +++ b/libcorpus2/io/helpers.cpp @@ -0,0 +1,22 @@ +#include <libcorpus2/io/helpers.h> +#include <libcorpus2/io/reader.h> +#include <sstream> +namespace Corpus2 { + +std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string( + const std::string& data, + const Tagset& tagset, + const std::string& format) +{ + std::stringstream ss; + ss << data; + boost::shared_ptr<TokenReader> reader = TokenReader::create_stream_reader( + format, tagset, ss); + std::vector<boost::shared_ptr<Chunk> > chunks; + while (boost::shared_ptr<Chunk> c = reader->get_next_chunk()) { + chunks.push_back(c); + } + return chunks; +} + +} /* end ns Corpus2 */ diff --git a/libcorpus2/io/helpers.h b/libcorpus2/io/helpers.h new file mode 100644 index 0000000..92d8a89 --- /dev/null +++ b/libcorpus2/io/helpers.h @@ -0,0 +1,16 @@ +#ifndef LIBSORPUS2_IO_HELPERS_H +#define LIBCORPUS2_IO_HELPERS_H + +#include <libcorpus2/chunk.h> +#include <libcorpus2/tagset.h> + +namespace Corpus2 { + +std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string( + const std::string& data, + const Tagset& tagset, + const std::string& format); + +} /* end ns Corpus2 */ + +#endif // LIBCORPUS2_IO_HELPERS_H diff --git a/swig/libcorpuschunk.i b/swig/libcorpuschunk.i index baa3fc1..df79864 100644 --- a/swig/libcorpuschunk.i +++ b/swig/libcorpuschunk.i @@ -46,6 +46,7 @@ namespace Corpus2 { }; } +%template(ChunkPtrVector) std::vector<boost::shared_ptr<Chunk> >; using namespace std; using namespace Corpus2; diff --git a/swig/libcorpustokenreader.i b/swig/libcorpustokenreader.i index 93043ff..0a367d5 100644 --- a/swig/libcorpustokenreader.i +++ b/swig/libcorpustokenreader.i @@ -4,6 +4,7 @@ %module libcorpustokenreader %{ #include <libcorpus2/io/reader.h> + #include <libcorpus2/io/helpers.h> %} %include "libcorpustag.i" @@ -73,6 +74,10 @@ namespace Corpus2 { static std::string reader_help(const std::string& class_id); static std::vector<std::string> available_reader_types_help(); }; + + std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string( + const std::string& data, const Tagset& tagset, const std::string& format); + } using namespace std; -- GitLab