diff --git a/libcorpus2_whole/io/poliqarpdocumentreader.cpp b/libcorpus2_whole/io/poliqarpdocumentreader.cpp index a302340b593a656ef573431a78420d82c17caf3a..62953d721789726b176dbfc6b5b471b9e5cf5327 100644 --- a/libcorpus2_whole/io/poliqarpdocumentreader.cpp +++ b/libcorpus2_whole/io/poliqarpdocumentreader.cpp @@ -1,21 +1,22 @@ #include <libcorpus2_whole/io/poliqarpdocumentreader.h> +#include <boost/scoped_ptr.hpp> namespace Corpus2 { namespace whole { PoliqarpDocumentReader::PoliqarpDocumentReader(const Tagset& tagset, const std::string& corpus_path) - : DocumentReaderI("poliqarp"), corpus_path_(corpus_path) + : DocumentReaderI("poliqarp") { - this->pqr_ = boost::shared_ptr<PoliqarpReader> (new PoliqarpReader(tagset, corpus_path)); + this->pqr_ = boost::shared_ptr<PoliqarpReader>(new PoliqarpReader(tagset, corpus_path)); } boost::shared_ptr<Document> PoliqarpDocumentReader::read() { boost::shared_ptr<Document> document = boost::make_shared<Document>(); - // boost::shared_ptr<Chunk> chunk = this->pqr_->pq_->get_next_sentence(); -// if (chunk) { -// document->add_paragraph(chunk); -// } + boost::shared_ptr<Chunk> chunk = this->pqr_->get_next_chunk(); + if (chunk) { + document->add_paragraph(chunk); + } return document; } diff --git a/libcorpus2_whole/io/poliqarpdocumentreader.h b/libcorpus2_whole/io/poliqarpdocumentreader.h index f59ac67b730faf5402eaf5e32bdd125dbf5ecd5f..58d33183feedf9763aeda12be1ef218550f50899 100644 --- a/libcorpus2_whole/io/poliqarpdocumentreader.h +++ b/libcorpus2_whole/io/poliqarpdocumentreader.h @@ -2,22 +2,39 @@ #define LIBCORPUS2_WHOLE_POLIQARPDOCUMENTREADER_H #include <poliqarp/pqreader.h> -#include <libcorpus2_whole/io/docreaderi.h> #include <libcorpus2_whole/document.h> +#include <libcorpus2_whole/io/reader_i.h> namespace Corpus2 { namespace whole { +/** + * Wrapper for PoliqarpReader. + * Can be used as "document" reader for Poliqarp corpus. Method read() retuns + * document from given corpus path. Behavior of this method is similar to + * get_next_document() from Poliqarp Client. First call of read() gives first + * document in corpus, nth call of read() gives nth document from corpus... + */ class PoliqarpDocumentReader : public DocumentReaderI { public: PoliqarpDocumentReader(const Tagset& tagset, const std::string& corpus_path); - /// semantic of this methd is like get_next_document + /** + * Semantic of this methd is similar to get_next_document from Poliqarp Client + * @return nth readed document + */ boost::shared_ptr<Document> read(); + /** + * Sets options for readers (relation reader and/or ccl reader). + * Available options: + * - autogen_sent_id -- for automatically generation identifiers of sentences + */ + void set_option(const std::string& option); + private: - const std::string corpus_path_; + /// Poliqarp reader used for reading Poliqarp corp boost::shared_ptr<PoliqarpReader> pqr_; };