Skip to content
Snippets Groups Projects
Commit 7bb4fa18 authored by Paweł Kędzia's avatar Paweł Kędzia
Browse files

DocumentReader implementation

parent 3add93fc
No related branches found
No related tags found
No related merge requests found
......@@ -14,9 +14,40 @@ or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#include <libcorpus2/io/docreader.h>
#include <boost/make_shared.hpp>
#include <libcorpus2/io/docreader.h>
namespace Corpus2 {
DocumentReader::DocumentReader(const Tagset& tagset,
const std::string &annot_path, const std::string &rela_path)
{
make_readers(tagset, annot_path, rela_path);
}
void DocumentReader::make_readers(const Tagset& tagset,
const std::string &annot_path, const std::string &rela_path)
{
ccl_reader_ = boost::make_shared<CclReader>(tagset, annot_path);
rel_reader_ = boost::make_shared<RelationReader>(rela_path);
}
boost::shared_ptr<Document> DocumentReader::read()
{
boost::shared_ptr<Chunk> chunk;
boost::shared_ptr<Document> document = boost::make_shared<Document>();
// Read ccl document and makes document
while (1) {
chunk = ccl_reader_->get_next_chunk();
if (!chunk) {
break;
}
else {
document->add_paragraph(chunk);
}
}
return document;
}
} /* end ns Corpus2 */
......@@ -17,7 +17,11 @@ or FITNESS FOR A PARTICULAR PURPOSE.
#ifndef LIBCORPUS2_DOCREADER_H
#define LIBCORPUS2_DOCREADER_H
#include <libcorpus2/io/reader.h>
#include <libcorpus2/document.h>
#include <libcorpus2/io/cclreader.h>
#include <libcorpus2/io/relreader.h>
#include <boost/shared_ptr.hpp>
namespace Corpus2 {
......@@ -32,11 +36,39 @@ public:
* chunk-style annotations are read from annot_path, while relations
* between chunk-style annotations are read from rela_path.
* Both path may in particular point to the same path.
* TODO!
* @param tagset Tagset to use
* @param annot_path Path to file with morphosyntax and chunk-style annotations
* @param rela_path path to file with relations
*/
DocumentReader(const std::string &annot_path,
const std::string &rela_path,
const std::string &rdr_class_id = "ccl");
DocumentReader(const Tagset& tagset,
const std::string &annot_path,
const std::string &rela_path);
/**
* Reads document stored in given file(s), in file with morphosyntax and
* chunk-style annotations and from file with relations.
* @return Pointer to readed Document
*/
boost::shared_ptr<Document> read();
private:
/**
* Makes CclReader and RelationReader for given paths to files.
* @param annot_path Path to file with morphosyntax and chunk-style annotations
* @param tagset Tagset to use in CclReader
* @param rela_path path to file with relations
*/
void make_readers(
const Tagset& tagset,
const std::string &annot_path,
const std::string &rela_path);
// -------------------------------------------------------------------------
/// Pointer to CclReader
boost::shared_ptr<CclReader> ccl_reader_;
/// Pointer to RelationReader
boost::shared_ptr<RelationReader> rel_reader_;
};
} /* end ns Corpus2 */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment