From 7bb4fa18b05368e490c5639871b99a1603d48cc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20K=C4=99dzia?= <Pawel.Kedzia@pwr.wroc.pl> Date: Tue, 15 Nov 2011 15:59:27 +0100 Subject: [PATCH] DocumentReader implementation --- libcorpus2/io/docreader.cpp | 33 ++++++++++++++++++++++++++++- libcorpus2/io/docreader.h | 42 ++++++++++++++++++++++++++++++++----- 2 files changed, 69 insertions(+), 6 deletions(-) diff --git a/libcorpus2/io/docreader.cpp b/libcorpus2/io/docreader.cpp index 597190c..463ae4f 100644 --- a/libcorpus2/io/docreader.cpp +++ b/libcorpus2/io/docreader.cpp @@ -14,9 +14,40 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE and COPYING files for more details. */ -#include <libcorpus2/io/docreader.h> #include <boost/make_shared.hpp> +#include <libcorpus2/io/docreader.h> namespace Corpus2 { + DocumentReader::DocumentReader(const Tagset& tagset, + const std::string &annot_path, const std::string &rela_path) + { + make_readers(tagset, annot_path, rela_path); + } + + void DocumentReader::make_readers(const Tagset& tagset, + const std::string &annot_path, const std::string &rela_path) + { + ccl_reader_ = boost::make_shared<CclReader>(tagset, annot_path); + rel_reader_ = boost::make_shared<RelationReader>(rela_path); + } + + boost::shared_ptr<Document> DocumentReader::read() + { + boost::shared_ptr<Chunk> chunk; + boost::shared_ptr<Document> document = boost::make_shared<Document>(); + + // Read ccl document and makes document + while (1) { + chunk = ccl_reader_->get_next_chunk(); + if (!chunk) { + break; + } + else { + document->add_paragraph(chunk); + } + } + + return document; + } } /* end ns Corpus2 */ diff --git a/libcorpus2/io/docreader.h b/libcorpus2/io/docreader.h index 632def6..b769c56 100644 --- a/libcorpus2/io/docreader.h +++ b/libcorpus2/io/docreader.h @@ -17,7 +17,11 @@ or FITNESS FOR A PARTICULAR PURPOSE. #ifndef LIBCORPUS2_DOCREADER_H #define LIBCORPUS2_DOCREADER_H -#include <libcorpus2/io/reader.h> +#include <libcorpus2/document.h> +#include <libcorpus2/io/cclreader.h> +#include <libcorpus2/io/relreader.h> + +#include <boost/shared_ptr.hpp> namespace Corpus2 { @@ -32,11 +36,39 @@ public: * chunk-style annotations are read from annot_path, while relations * between chunk-style annotations are read from rela_path. * Both path may in particular point to the same path. - * TODO! + * @param tagset Tagset to use + * @param annot_path Path to file with morphosyntax and chunk-style annotations + * @param rela_path path to file with relations */ - DocumentReader(const std::string &annot_path, - const std::string &rela_path, - const std::string &rdr_class_id = "ccl"); + DocumentReader(const Tagset& tagset, + const std::string &annot_path, + const std::string &rela_path); + + /** + * Reads document stored in given file(s), in file with morphosyntax and + * chunk-style annotations and from file with relations. + * @return Pointer to readed Document + */ + boost::shared_ptr<Document> read(); + +private: + /** + * Makes CclReader and RelationReader for given paths to files. + * @param annot_path Path to file with morphosyntax and chunk-style annotations + * @param tagset Tagset to use in CclReader + * @param rela_path path to file with relations + */ + void make_readers( + const Tagset& tagset, + const std::string &annot_path, + const std::string &rela_path); + + // ------------------------------------------------------------------------- + /// Pointer to CclReader + boost::shared_ptr<CclReader> ccl_reader_; + + /// Pointer to RelationReader + boost::shared_ptr<RelationReader> rel_reader_; }; } /* end ns Corpus2 */ -- GitLab