Skip to content
Snippets Groups Projects
Commit 7bb4fa18 authored by Paweł Kędzia's avatar Paweł Kędzia
Browse files

DocumentReader implementation

parent 3add93fc
Branches
No related merge requests found
......@@ -14,9 +14,40 @@ or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#include <libcorpus2/io/docreader.h>
#include <boost/make_shared.hpp>
#include <libcorpus2/io/docreader.h>
namespace Corpus2 {
DocumentReader::DocumentReader(const Tagset& tagset,
const std::string &annot_path, const std::string &rela_path)
{
make_readers(tagset, annot_path, rela_path);
}
void DocumentReader::make_readers(const Tagset& tagset,
const std::string &annot_path, const std::string &rela_path)
{
ccl_reader_ = boost::make_shared<CclReader>(tagset, annot_path);
rel_reader_ = boost::make_shared<RelationReader>(rela_path);
}
boost::shared_ptr<Document> DocumentReader::read()
{
boost::shared_ptr<Chunk> chunk;
boost::shared_ptr<Document> document = boost::make_shared<Document>();
// Read ccl document and makes document
while (1) {
chunk = ccl_reader_->get_next_chunk();
if (!chunk) {
break;
}
else {
document->add_paragraph(chunk);
}
}
return document;
}
} /* end ns Corpus2 */
......@@ -17,7 +17,11 @@ or FITNESS FOR A PARTICULAR PURPOSE.
#ifndef LIBCORPUS2_DOCREADER_H
#define LIBCORPUS2_DOCREADER_H
#include <libcorpus2/io/reader.h>
#include <libcorpus2/document.h>
#include <libcorpus2/io/cclreader.h>
#include <libcorpus2/io/relreader.h>
#include <boost/shared_ptr.hpp>
namespace Corpus2 {
......@@ -32,11 +36,39 @@ public:
* chunk-style annotations are read from annot_path, while relations
* between chunk-style annotations are read from rela_path.
* Both path may in particular point to the same path.
* TODO!
* @param tagset Tagset to use
* @param annot_path Path to file with morphosyntax and chunk-style annotations
* @param rela_path path to file with relations
*/
DocumentReader(const std::string &annot_path,
const std::string &rela_path,
const std::string &rdr_class_id = "ccl");
DocumentReader(const Tagset& tagset,
const std::string &annot_path,
const std::string &rela_path);
/**
* Reads document stored in given file(s), in file with morphosyntax and
* chunk-style annotations and from file with relations.
* @return Pointer to readed Document
*/
boost::shared_ptr<Document> read();
private:
/**
* Makes CclReader and RelationReader for given paths to files.
* @param annot_path Path to file with morphosyntax and chunk-style annotations
* @param tagset Tagset to use in CclReader
* @param rela_path path to file with relations
*/
void make_readers(
const Tagset& tagset,
const std::string &annot_path,
const std::string &rela_path);
// -------------------------------------------------------------------------
/// Pointer to CclReader
boost::shared_ptr<CclReader> ccl_reader_;
/// Pointer to RelationReader
boost::shared_ptr<RelationReader> rel_reader_;
};
} /* end ns Corpus2 */
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment