Skip to content
Snippets Groups Projects
Commit bca77ee2 authored by ilor's avatar ilor
Browse files

corpus2 helper to read a string

parent f268d517
Branches
No related merge requests found
...@@ -57,6 +57,7 @@ SET(libcorpus2_STAT_SRC ...@@ -57,6 +57,7 @@ SET(libcorpus2_STAT_SRC
tokenmetadata.cpp tokenmetadata.cpp
io/cclreader.cpp io/cclreader.cpp
io/cclwriter.cpp io/cclwriter.cpp
io/helpers.cpp
io/fastxces.cpp io/fastxces.cpp
io/nonewriter.cpp io/nonewriter.cpp
io/orthwriter.cpp io/orthwriter.cpp
......
#include <libcorpus2/io/helpers.h>
#include <libcorpus2/io/reader.h>
#include <sstream>
namespace Corpus2 {
std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string(
const std::string& data,
const Tagset& tagset,
const std::string& format)
{
std::stringstream ss;
ss << data;
boost::shared_ptr<TokenReader> reader = TokenReader::create_stream_reader(
format, tagset, ss);
std::vector<boost::shared_ptr<Chunk> > chunks;
while (boost::shared_ptr<Chunk> c = reader->get_next_chunk()) {
chunks.push_back(c);
}
return chunks;
}
} /* end ns Corpus2 */
#ifndef LIBSORPUS2_IO_HELPERS_H
#define LIBCORPUS2_IO_HELPERS_H
#include <libcorpus2/chunk.h>
#include <libcorpus2/tagset.h>
namespace Corpus2 {
std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string(
const std::string& data,
const Tagset& tagset,
const std::string& format);
} /* end ns Corpus2 */
#endif // LIBCORPUS2_IO_HELPERS_H
...@@ -46,6 +46,7 @@ namespace Corpus2 { ...@@ -46,6 +46,7 @@ namespace Corpus2 {
}; };
} }
%template(ChunkPtrVector) std::vector<boost::shared_ptr<Chunk> >;
using namespace std; using namespace std;
using namespace Corpus2; using namespace Corpus2;
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
%module libcorpustokenreader %module libcorpustokenreader
%{ %{
#include <libcorpus2/io/reader.h> #include <libcorpus2/io/reader.h>
#include <libcorpus2/io/helpers.h>
%} %}
%include "libcorpustag.i" %include "libcorpustag.i"
...@@ -73,6 +74,10 @@ namespace Corpus2 { ...@@ -73,6 +74,10 @@ namespace Corpus2 {
static std::string reader_help(const std::string& class_id); static std::string reader_help(const std::string& class_id);
static std::vector<std::string> available_reader_types_help(); static std::vector<std::string> available_reader_types_help();
}; };
std::vector<boost::shared_ptr<Chunk> > read_chunks_from_utf8_string(
const std::string& data, const Tagset& tagset, const std::string& format);
} }
using namespace std; using namespace std;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment