diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt index 96f80e90a2eef8edfc1575fd763d5ba914dff277..b225155d07cfd1de3456a6305e98df3a85f43973 100644 --- a/libcorpus2/CMakeLists.txt +++ b/libcorpus2/CMakeLists.txt @@ -3,7 +3,7 @@ PROJECT(corpus2) set(corpus2_ver_major "1") set(corpus2_ver_minor "0") -set(corpus2_ver_patch "6") +set(corpus2_ver_patch "8") if(NOT LIBCORPUS2_SRC_DATA_DIR) @@ -60,6 +60,7 @@ SET(libcorpus2_STAT_SRC io/fastxces.cpp io/nonewriter.cpp io/orthwriter.cpp + io/pathwriter.cpp io/plainwriter.cpp io/premorphwriter.cpp io/reader.cpp diff --git a/libcorpus2/io/pathwriter.cpp b/libcorpus2/io/pathwriter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a1abcdad990fb08648af46b5088bd29606783bb9 --- /dev/null +++ b/libcorpus2/io/pathwriter.cpp @@ -0,0 +1,32 @@ +#include <libcorpus2/io/pathwriter.h> + +namespace Corpus2 { + +PathWriter::PathWriter(const boost::shared_ptr<TokenWriter>& underlying, + const boost::shared_ptr<std::ostream>& os) + : TokenWriter(*os, underlying->tagset(), string_range_vector()), + os_(os), underlying_(underlying) +{ + assert(&underlying_->os() == os_.get()); +} + +PathWriter::~PathWriter() +{ +} + +void PathWriter::write_token(const Token& t) +{ + underlying_->write_token(t); +} + +void PathWriter::write_sentence(const Sentence& s) +{ + underlying_->write_sentence(s); +} + +void PathWriter::write_chunk(const Chunk& p) +{ + underlying_->write_chunk(p); +} + +} /* end ns Corpus2 */ diff --git a/libcorpus2/io/pathwriter.h b/libcorpus2/io/pathwriter.h new file mode 100644 index 0000000000000000000000000000000000000000..1f9bcc88105ba73b3beb5cd7c0fc1f27b6c17b18 --- /dev/null +++ b/libcorpus2/io/pathwriter.h @@ -0,0 +1,49 @@ +#ifndef LIBSORPUS2_IO_PATHWRITER_H +#define LIBCORPUS2_IO_PATHWRITER_H + +#include <libcorpus2/io/writer.h> +#include <boost/scoped_ptr.hpp> + +namespace Corpus2 { + +/** + * A wrapper class for the output stream Writers that holds an open stream + * and closes it on destruction. All write calls are passed to the + * wrapped Writer. + */ +class PathWriter : public TokenWriter +{ +public: + /** + * The constructor. It is assumed that the underlying writer operates + * on the same ostream as the pointer passed. + */ + PathWriter(const boost::shared_ptr<TokenWriter>& underlying, + const boost::shared_ptr<std::ostream>& os); + + ~PathWriter(); + + void write_token(const Token& t); + + virtual void write_sentence(const Sentence& s); + + virtual void write_chunk(const Chunk& p); + + boost::shared_ptr<TokenWriter> get_underlying() { + return underlying_; + } + + boost::shared_ptr<std::ostream> get_ostream_pointer() { + return os_; + } + +protected: + /// The owned ostream pointer + boost::shared_ptr<std::ostream> os_; + /// The wrapped Writer (Note: must come after the ostream ptr in the class!) + boost::shared_ptr<TokenWriter> underlying_; +}; + +} /* end ns Corpus2 */ + +#endif // LIBCORPUS2_IO_PATHWRITER_H diff --git a/libcorpus2/io/reader.cpp b/libcorpus2/io/reader.cpp index 51777bf976ea06db7495bdf5512f3f4d6c67b93f..46a158a4eceaf78eee265785497ca1a4c4811b76 100644 --- a/libcorpus2/io/reader.cpp +++ b/libcorpus2/io/reader.cpp @@ -90,7 +90,7 @@ boost::shared_ptr<TokenReader> TokenReader::create_path_reader( return boost::shared_ptr<TokenReader>( detail::TokenReaderFactorySingleton::Instance().path_factory.CreateObject( class_id, tagset, path, params)); - } catch (detail::TokenReaderFactoryException& e) { + } catch (detail::TokenReaderFactoryException&) { throw Corpus2Error("Reader class not found: " + class_id); } } diff --git a/libcorpus2/io/reader.h b/libcorpus2/io/reader.h index 4f52aac410b116fdee0ceec9b1c49ffc46d1eab0..cc89b4b0e2e01cc6f20d66d6435871bf2ff4c05b 100644 --- a/libcorpus2/io/reader.h +++ b/libcorpus2/io/reader.h @@ -248,7 +248,6 @@ TokenReaderFactoryException; } /* end ns detail */ - template <typename T> bool TokenReader::register_reader(const std::string& class_id, const std::string& help) diff --git a/libcorpus2/io/writer.cpp b/libcorpus2/io/writer.cpp index 73c3a4c03e8ef9cb12c1d923853abfb54b45b791..4315343b11418e047e278976c8d940a210cac580 100644 --- a/libcorpus2/io/writer.cpp +++ b/libcorpus2/io/writer.cpp @@ -17,8 +17,11 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <libcorpus2/io/writer.h> #include <libpwrutils/foreach.h> #include <boost/algorithm/string.hpp> - +#include <libcorpus2/exception.h> +#include <libcorpus2/io/pathwriter.h> +#include <boost/make_shared.hpp> #include <sstream> +#include <fstream> namespace Corpus2 { @@ -65,16 +68,22 @@ void TokenWriter::write_token_dispose(Token* t) } -TokenWriter* TokenWriter::create(const std::string class_id, +boost::shared_ptr<TokenWriter> TokenWriter::create_stream_writer(const std::string& class_id, std::ostream& os, const Tagset& tagset, const string_range_vector& params) { - return TokenWriterFactorySingleton::Instance().factory.CreateObject( - class_id, os, tagset, params); + try { + return boost::shared_ptr<TokenWriter>( + detail::TokenWriterFactorySingleton::Instance().factory.CreateObject( + class_id, os, tagset, params)); + } catch (detail::TokenWriterFactoryException&) { + throw Corpus2Error("Writer class not found: " + class_id); + } } -TokenWriter* TokenWriter::create(const std::string class_id_params, +boost::shared_ptr<TokenWriter> TokenWriter::create_stream_writer( + const std::string& class_id_params, std::ostream& os, const Tagset& tagset) { @@ -83,20 +92,49 @@ TokenWriter* TokenWriter::create(const std::string class_id_params, boost::is_any_of(std::string(","))); std::string class_id = boost::copy_range<std::string>(params[0]); params.erase(params.begin(), params.begin() + 1); - return TokenWriterFactorySingleton::Instance().factory.CreateObject( - class_id, os, tagset, params); + return create_stream_writer(class_id, os, tagset, params); +} + +boost::shared_ptr<TokenWriter> TokenWriter::create_path_writer(const std::string& class_id, + const std::string& path, + const Tagset& tagset, + const string_range_vector& params) +{ + boost::shared_ptr<std::ofstream> ofs = boost::make_shared<std::ofstream>(path.c_str()); + if (!ofs->good()) { + throw Corpus2::FileNotFound(path, "", "writer creation"); + } + boost::shared_ptr<TokenWriter> underlying = create_stream_writer( + class_id, *ofs, tagset, params); + assert(underlying); + boost::shared_ptr<TokenWriter> wrapped = boost::make_shared<PathWriter>( + underlying, ofs); + return wrapped; +} + +boost::shared_ptr<TokenWriter> TokenWriter::create_path_writer( + const std::string& class_id_params, + const std::string& path, + const Tagset& tagset) +{ + string_range_vector params; + boost::algorithm::split(params, class_id_params, + boost::is_any_of(std::string(","))); + std::string class_id = boost::copy_range<std::string>(params[0]); + params.erase(params.begin(), params.begin() + 1); + return create_path_writer(class_id, path, tagset, params); } std::vector<std::string> TokenWriter::available_writer_types() { - return TokenWriterFactorySingleton::Instance().factory.RegisteredIds(); + return detail::TokenWriterFactorySingleton::Instance().factory.RegisteredIds(); } std::string TokenWriter::writer_help(const std::string& class_id) { std::map<std::string, std::string>::const_iterator c; - c = TokenWriterFactorySingleton::Instance().help.find(class_id); - if (c != TokenWriterFactorySingleton::Instance().help.end()) { + c = detail::TokenWriterFactorySingleton::Instance().help.find(class_id); + if (c != detail::TokenWriterFactorySingleton::Instance().help.end()) { return c->second; } else { return ""; @@ -109,8 +147,8 @@ std::vector<std::string> TokenWriter::available_writer_types_help() foreach (std::string& id, v) { std::stringstream ss; std::map<std::string, std::string>::const_iterator c; - c = TokenWriterFactorySingleton::Instance().help.find(id); - if (c != TokenWriterFactorySingleton::Instance().help.end()) { + c = detail::TokenWriterFactorySingleton::Instance().help.find(id); + if (c != detail::TokenWriterFactorySingleton::Instance().help.end()) { ss << id << "["; ss << c->second; ss << "]"; diff --git a/libcorpus2/io/writer.h b/libcorpus2/io/writer.h index a17e80335ace34e8e5f9087e914b9c2b3f756025..09cd0a45deb91bdc7833174b26389633a3c00a3d 100644 --- a/libcorpus2/io/writer.h +++ b/libcorpus2/io/writer.h @@ -69,7 +69,7 @@ public: * @param os the output stream to pass to the writer's constructor * @param tagset the tagset to pass to the writer's constructor */ - static TokenWriter* create(const std::string class_id, + static boost::shared_ptr<TokenWriter> create_stream_writer(const std::string& class_id, std::ostream& os, const Tagset& tagset, const string_range_vector& params); @@ -81,10 +81,40 @@ public: * is called. Parameters are expected to be comma-separated from the * class id. */ - static TokenWriter* create(const std::string class_id_params, + static boost::shared_ptr<TokenWriter> create_stream_writer(const std::string& class_id_params, std::ostream& os, const Tagset& tagset); + /** + * Factory interface for creating writers from string identifiers + * + * Mostly a convenience function to avoid having client code refer + * directly to the factory instance. + * + * This is the file path (as opposed to output stream) version. + * + * @param class_id the unique class identifier + * @param path file to write to + * @param tagset the tagset to pass to the writer's constructor + */ + static boost::shared_ptr<TokenWriter> create_path_writer(const std::string& class_id, + const std::string& path, + const Tagset& tagset, + const string_range_vector& params); + + /** + * Factory interface for creating writers from string identifiers. + * + * Params are split from the class id and then the more general version + * is called. Parameters are expected to be comma-separated from the + * class id. + * + * This is the file path (as opposed to output stream) version. + */ + static boost::shared_ptr<TokenWriter> create_path_writer(const std::string& class_id_params, + const std::string& path, + const Tagset& tagset); + /** * Function to get a vector of available writer type strings. */ @@ -132,6 +162,8 @@ private: int indent_; }; +namespace detail { + typedef Loki::Factory< TokenWriter, // The base class for objects created in the factory std::string, // Identifier type @@ -179,14 +211,17 @@ T* writer_creator(std::ostream& os, const Tagset& tagset, return new T(os, tagset, params); } +} /* end ns detail */ + + template <typename T> bool TokenWriter::register_writer(const std::string& class_id, const std::string& help) { - bool ret = TokenWriterFactorySingleton::Instance().factory.Register( - class_id, writer_creator<T>); + bool ret = detail::TokenWriterFactorySingleton::Instance().factory.Register( + class_id, detail::writer_creator<T>); if (ret) { - TokenWriterFactorySingleton::Instance().help[class_id] = help; + detail::TokenWriterFactorySingleton::Instance().help[class_id] = help; } return ret; } diff --git a/tests/io.cpp b/tests/io.cpp index 1048a4f7a99e9e6f58f110e8b3af75b33963832e..f302a6f50e175b720189a8a8786aae5225dd8504 100644 --- a/tests/io.cpp +++ b/tests/io.cpp @@ -139,7 +139,7 @@ BOOST_AUTO_TEST_CASE( iobase ) Corpus2::XcesReader xr(tagset, ssin); boost::shared_ptr<Corpus2::Chunk> chunk = xr.get_next_chunk(); std::stringstream ss; - boost::shared_ptr<Corpus2::TokenWriter> w(Corpus2::TokenWriter::create("xces,flat", ss, tagset)); + boost::shared_ptr<Corpus2::TokenWriter> w(Corpus2::TokenWriter::create_stream_writer("xces,flat", ss, tagset)); w->write_chunk(*chunk); w->finish(); BOOST_CHECK_EQUAL(ss.str(), swiatopoglad); @@ -154,7 +154,7 @@ BOOST_AUTO_TEST_CASE( fast ) Corpus2::FastXcesReader xr(tagset, ssin); boost::shared_ptr<Corpus2::Chunk> chunk = xr.get_next_chunk(); std::stringstream ss; - boost::shared_ptr<Corpus2::TokenWriter> w(Corpus2::TokenWriter::create("xces,flat", ss, tagset)); + boost::shared_ptr<Corpus2::TokenWriter> w(Corpus2::TokenWriter::create_stream_writer("xces,flat", ss, tagset)); w->write_chunk(*chunk); w->finish(); BOOST_CHECK_EQUAL(ss.str(), swiatopoglad_noid); @@ -169,7 +169,7 @@ BOOST_AUTO_TEST_CASE( io_oo ) xr.set_option("no_warn_inconsistent"); boost::shared_ptr<Corpus2::Chunk> chunk = xr.get_next_chunk(); std::stringstream ss; - boost::shared_ptr<Corpus2::TokenWriter> w(Corpus2::TokenWriter::create("xces,flat", ss, tagset)); + boost::shared_ptr<Corpus2::TokenWriter> w(Corpus2::TokenWriter::create_stream_writer("xces,flat", ss, tagset)); w->write_chunk(*chunk); w->finish(); BOOST_CHECK_EQUAL(ss.str(), swiatopoglad); diff --git a/tests/ioann.cpp b/tests/ioann.cpp index 8f9d3804a6b097536d99684f3c2d6e065f75134d..963bfe05aace469f6e439ae2dbc9c27f41e5e728 100644 --- a/tests/ioann.cpp +++ b/tests/ioann.cpp @@ -101,7 +101,7 @@ BOOST_AUTO_TEST_CASE( iobase ) boost::shared_ptr<Corpus2::Chunk> chunk = xr.get_next_chunk(); BOOST_REQUIRE(chunk); std::stringstream ss; - boost::shared_ptr<Corpus2::TokenWriter> w(Corpus2::TokenWriter::create("xces,flat", ss, tagset)); + boost::shared_ptr<Corpus2::TokenWriter> w(Corpus2::TokenWriter::create_stream_writer("xces,flat", ss, tagset)); //boost::shared_ptr<Corpus2::TokenWriter> wann(Corpus2::TokenWriter::create("ccl", std::cerr, tagset)); //wann->write_chunk(*chunk); //wann->finish();