From 6e7affbee509ce24e606e751a476a3064ea4fb4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20K=C4=99dzia?= <Pawel.Kedzia@pwr.wroc.pl> Date: Thu, 29 Dec 2011 12:21:38 +0100 Subject: [PATCH] Added PoliqarpDocumentReader --- .../io/poliqarpdocumentreader.cpp | 13 ++++++----- libcorpus2_whole/io/poliqarpdocumentreader.h | 23 ++++++++++++++++--- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/libcorpus2_whole/io/poliqarpdocumentreader.cpp b/libcorpus2_whole/io/poliqarpdocumentreader.cpp index a302340..62953d7 100644 --- a/libcorpus2_whole/io/poliqarpdocumentreader.cpp +++ b/libcorpus2_whole/io/poliqarpdocumentreader.cpp @@ -1,21 +1,22 @@ #include <libcorpus2_whole/io/poliqarpdocumentreader.h> +#include <boost/scoped_ptr.hpp> namespace Corpus2 { namespace whole { PoliqarpDocumentReader::PoliqarpDocumentReader(const Tagset& tagset, const std::string& corpus_path) - : DocumentReaderI("poliqarp"), corpus_path_(corpus_path) + : DocumentReaderI("poliqarp") { - this->pqr_ = boost::shared_ptr<PoliqarpReader> (new PoliqarpReader(tagset, corpus_path)); + this->pqr_ = boost::shared_ptr<PoliqarpReader>(new PoliqarpReader(tagset, corpus_path)); } boost::shared_ptr<Document> PoliqarpDocumentReader::read() { boost::shared_ptr<Document> document = boost::make_shared<Document>(); - // boost::shared_ptr<Chunk> chunk = this->pqr_->pq_->get_next_sentence(); -// if (chunk) { -// document->add_paragraph(chunk); -// } + boost::shared_ptr<Chunk> chunk = this->pqr_->get_next_chunk(); + if (chunk) { + document->add_paragraph(chunk); + } return document; } diff --git a/libcorpus2_whole/io/poliqarpdocumentreader.h b/libcorpus2_whole/io/poliqarpdocumentreader.h index f59ac67..58d3318 100644 --- a/libcorpus2_whole/io/poliqarpdocumentreader.h +++ b/libcorpus2_whole/io/poliqarpdocumentreader.h @@ -2,22 +2,39 @@ #define LIBCORPUS2_WHOLE_POLIQARPDOCUMENTREADER_H #include <poliqarp/pqreader.h> -#include <libcorpus2_whole/io/docreaderi.h> #include <libcorpus2_whole/document.h> +#include <libcorpus2_whole/io/reader_i.h> namespace Corpus2 { namespace whole { +/** + * Wrapper for PoliqarpReader. + * Can be used as "document" reader for Poliqarp corpus. Method read() retuns + * document from given corpus path. Behavior of this method is similar to + * get_next_document() from Poliqarp Client. First call of read() gives first + * document in corpus, nth call of read() gives nth document from corpus... + */ class PoliqarpDocumentReader : public DocumentReaderI { public: PoliqarpDocumentReader(const Tagset& tagset, const std::string& corpus_path); - /// semantic of this methd is like get_next_document + /** + * Semantic of this methd is similar to get_next_document from Poliqarp Client + * @return nth readed document + */ boost::shared_ptr<Document> read(); + /** + * Sets options for readers (relation reader and/or ccl reader). + * Available options: + * - autogen_sent_id -- for automatically generation identifiers of sentences + */ + void set_option(const std::string& option); + private: - const std::string corpus_path_; + /// Poliqarp reader used for reading Poliqarp corp boost::shared_ptr<PoliqarpReader> pqr_; }; -- GitLab