diff --git a/libcorpus2/io/docreader.cpp b/libcorpus2/io/docreader.cpp index 597190c760b465e9f07fb12b8e5772c2c20f73a0..463ae4fcfead3fc63baef91ea1b2320f4c859ef8 100644 --- a/libcorpus2/io/docreader.cpp +++ b/libcorpus2/io/docreader.cpp @@ -14,9 +14,40 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE and COPYING files for more details. */ -#include <libcorpus2/io/docreader.h> #include <boost/make_shared.hpp> +#include <libcorpus2/io/docreader.h> namespace Corpus2 { + DocumentReader::DocumentReader(const Tagset& tagset, + const std::string &annot_path, const std::string &rela_path) + { + make_readers(tagset, annot_path, rela_path); + } + + void DocumentReader::make_readers(const Tagset& tagset, + const std::string &annot_path, const std::string &rela_path) + { + ccl_reader_ = boost::make_shared<CclReader>(tagset, annot_path); + rel_reader_ = boost::make_shared<RelationReader>(rela_path); + } + + boost::shared_ptr<Document> DocumentReader::read() + { + boost::shared_ptr<Chunk> chunk; + boost::shared_ptr<Document> document = boost::make_shared<Document>(); + + // Read ccl document and makes document + while (1) { + chunk = ccl_reader_->get_next_chunk(); + if (!chunk) { + break; + } + else { + document->add_paragraph(chunk); + } + } + + return document; + } } /* end ns Corpus2 */ diff --git a/libcorpus2/io/docreader.h b/libcorpus2/io/docreader.h index 632def65ca2d630301f04ffab5aa5c07b618d9ef..b769c567ebf0070482385fce1c63cec01ddf88ad 100644 --- a/libcorpus2/io/docreader.h +++ b/libcorpus2/io/docreader.h @@ -17,7 +17,11 @@ or FITNESS FOR A PARTICULAR PURPOSE. #ifndef LIBCORPUS2_DOCREADER_H #define LIBCORPUS2_DOCREADER_H -#include <libcorpus2/io/reader.h> +#include <libcorpus2/document.h> +#include <libcorpus2/io/cclreader.h> +#include <libcorpus2/io/relreader.h> + +#include <boost/shared_ptr.hpp> namespace Corpus2 { @@ -32,11 +36,39 @@ public: * chunk-style annotations are read from annot_path, while relations * between chunk-style annotations are read from rela_path. * Both path may in particular point to the same path. - * TODO! + * @param tagset Tagset to use + * @param annot_path Path to file with morphosyntax and chunk-style annotations + * @param rela_path path to file with relations */ - DocumentReader(const std::string &annot_path, - const std::string &rela_path, - const std::string &rdr_class_id = "ccl"); + DocumentReader(const Tagset& tagset, + const std::string &annot_path, + const std::string &rela_path); + + /** + * Reads document stored in given file(s), in file with morphosyntax and + * chunk-style annotations and from file with relations. + * @return Pointer to readed Document + */ + boost::shared_ptr<Document> read(); + +private: + /** + * Makes CclReader and RelationReader for given paths to files. + * @param annot_path Path to file with morphosyntax and chunk-style annotations + * @param tagset Tagset to use in CclReader + * @param rela_path path to file with relations + */ + void make_readers( + const Tagset& tagset, + const std::string &annot_path, + const std::string &rela_path); + + // ------------------------------------------------------------------------- + /// Pointer to CclReader + boost::shared_ptr<CclReader> ccl_reader_; + + /// Pointer to RelationReader + boost::shared_ptr<RelationReader> rel_reader_; }; } /* end ns Corpus2 */