Skip to content
Snippets Groups Projects
Commit 6e7affbe authored by Paweł Kędzia's avatar Paweł Kędzia
Browse files

Added PoliqarpDocumentReader

parent ed0135f0
No related branches found
No related tags found
No related merge requests found
#include <libcorpus2_whole/io/poliqarpdocumentreader.h>
#include <boost/scoped_ptr.hpp>
namespace Corpus2 {
namespace whole {
PoliqarpDocumentReader::PoliqarpDocumentReader(const Tagset& tagset, const std::string& corpus_path)
: DocumentReaderI("poliqarp"), corpus_path_(corpus_path)
: DocumentReaderI("poliqarp")
{
this->pqr_ = boost::shared_ptr<PoliqarpReader>(new PoliqarpReader(tagset, corpus_path));
}
......@@ -12,10 +13,10 @@ PoliqarpDocumentReader::PoliqarpDocumentReader(const Tagset& tagset, const std::
boost::shared_ptr<Document> PoliqarpDocumentReader::read()
{
boost::shared_ptr<Document> document = boost::make_shared<Document>();
// boost::shared_ptr<Chunk> chunk = this->pqr_->pq_->get_next_sentence();
// if (chunk) {
// document->add_paragraph(chunk);
// }
boost::shared_ptr<Chunk> chunk = this->pqr_->get_next_chunk();
if (chunk) {
document->add_paragraph(chunk);
}
return document;
}
......
......@@ -2,22 +2,39 @@
#define LIBCORPUS2_WHOLE_POLIQARPDOCUMENTREADER_H
#include <poliqarp/pqreader.h>
#include <libcorpus2_whole/io/docreaderi.h>
#include <libcorpus2_whole/document.h>
#include <libcorpus2_whole/io/reader_i.h>
namespace Corpus2 {
namespace whole {
/**
* Wrapper for PoliqarpReader.
* Can be used as "document" reader for Poliqarp corpus. Method read() retuns
* document from given corpus path. Behavior of this method is similar to
* get_next_document() from Poliqarp Client. First call of read() gives first
* document in corpus, nth call of read() gives nth document from corpus...
*/
class PoliqarpDocumentReader : public DocumentReaderI
{
public:
PoliqarpDocumentReader(const Tagset& tagset, const std::string& corpus_path);
/// semantic of this methd is like get_next_document
/**
* Semantic of this methd is similar to get_next_document from Poliqarp Client
* @return nth readed document
*/
boost::shared_ptr<Document> read();
/**
* Sets options for readers (relation reader and/or ccl reader).
* Available options:
* - autogen_sent_id -- for automatically generation identifiers of sentences
*/
void set_option(const std::string& option);
private:
const std::string corpus_path_;
/// Poliqarp reader used for reading Poliqarp corp
boost::shared_ptr<PoliqarpReader> pqr_;
};
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment