From 844c9363bc44bd1fcb102d4f1f51f85ba571eb23 Mon Sep 17 00:00:00 2001 From: michal <michal@kalafior.(none)> Date: Mon, 27 May 2013 10:12:01 +0200 Subject: [PATCH] fixed bug with reading documents without id - temporary solution, whole DocumentReader class should be rebuilt --- libcorpus2_whole/io/documentreader.cpp | 23 ++++++++++++++++++++++- libcorpus2_whole/io/documentreader.h | 4 +++- swig/documentreader.i | 1 + 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/libcorpus2_whole/io/documentreader.cpp b/libcorpus2_whole/io/documentreader.cpp index dcdf4f0..c65249d 100644 --- a/libcorpus2_whole/io/documentreader.cpp +++ b/libcorpus2_whole/io/documentreader.cpp @@ -73,8 +73,29 @@ boost::shared_ptr<Document> DocumentReader::read() throw Corpus2Error(corpus_type_ + " is an unknown reader type!"); } +boost::shared_ptr<Document> DocumentReader::read_with_auto_id() +{ + std::string line; +#ifdef WITH_POLIQARP + if (corpus_type_ == "poliqarp") { + return this->reader->read(); + } +#endif + if (corpus_type_ == "document") { + if (std::getline(corpus_file, line)) { + boost::shared_ptr<CclRelReader> cclrel_reader = get_cclrel_reader(line); + cclrel_reader->get_option("autogen_sent_id"); + cclrel_reader->set_option("autogen_chunk_id"); + return cclrel_reader->read(); + } else { + return boost::make_shared<Document>("End"); + } + } + throw Corpus2Error(corpus_type_ + " is an unknown reader type!"); +} + -boost::shared_ptr<DocumentReaderI> DocumentReader::get_cclrel_reader(std::string& line) +boost::shared_ptr<CclRelReader> DocumentReader::get_cclrel_reader(std::string& line) { std::string ann_path, rel_path; diff --git a/libcorpus2_whole/io/documentreader.h b/libcorpus2_whole/io/documentreader.h index 9dda867..b23a574 100644 --- a/libcorpus2_whole/io/documentreader.h +++ b/libcorpus2_whole/io/documentreader.h @@ -20,6 +20,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <string> #include <libcorpus2_whole/corpus.h> #include <libcorpus2_whole/io/reader_i.h> +#include <libcorpus2_whole/io/cclrelreader.h> namespace Corpus2 { namespace whole { @@ -30,10 +31,11 @@ public: DocumentReader(const Tagset& tagset, const std::string& corpus_type, const std::string& corpus_file_path, const std::string& corpus_reader); DocumentReader(const Tagset& tagset, const std::string& corpus_type, const std::string& corpus_file_path); boost::shared_ptr<Document> read(); + boost::shared_ptr<Document> read_with_auto_id(); private: - boost::shared_ptr<DocumentReaderI> get_cclrel_reader(std::string& line); + boost::shared_ptr<CclRelReader> get_cclrel_reader(std::string& line); boost::shared_ptr<DocumentReaderI> reader; std::ifstream corpus_file; private: diff --git a/swig/documentreader.i b/swig/documentreader.i index a360315..2290e46 100644 --- a/swig/documentreader.i +++ b/swig/documentreader.i @@ -22,6 +22,7 @@ namespace whole { DocumentReader(const Tagset& tagset, const std::string& corpus_type,const std::string& corpus_file_path, const std::string& corpus_reader); DocumentReader(const Tagset& tagset, const std::string& corpus_type,const std::string& corpus_file_path); boost::shared_ptr<Document> read(); + boost::shared_ptr<Document> read_with_auto_id(); }; } // whole ns } // Corpus2 ns -- GitLab