diff --git a/libcorpus2_whole/io/documentreader.cpp b/libcorpus2_whole/io/documentreader.cpp index dcdf4f0b629c18cd8aff5647029cfd0b10b0bde6..c65249dbc056c11e3f05cb650bdc2fee700e6488 100644 --- a/libcorpus2_whole/io/documentreader.cpp +++ b/libcorpus2_whole/io/documentreader.cpp @@ -73,8 +73,29 @@ boost::shared_ptr<Document> DocumentReader::read() throw Corpus2Error(corpus_type_ + " is an unknown reader type!"); } +boost::shared_ptr<Document> DocumentReader::read_with_auto_id() +{ + std::string line; +#ifdef WITH_POLIQARP + if (corpus_type_ == "poliqarp") { + return this->reader->read(); + } +#endif + if (corpus_type_ == "document") { + if (std::getline(corpus_file, line)) { + boost::shared_ptr<CclRelReader> cclrel_reader = get_cclrel_reader(line); + cclrel_reader->get_option("autogen_sent_id"); + cclrel_reader->set_option("autogen_chunk_id"); + return cclrel_reader->read(); + } else { + return boost::make_shared<Document>("End"); + } + } + throw Corpus2Error(corpus_type_ + " is an unknown reader type!"); +} + -boost::shared_ptr<DocumentReaderI> DocumentReader::get_cclrel_reader(std::string& line) +boost::shared_ptr<CclRelReader> DocumentReader::get_cclrel_reader(std::string& line) { std::string ann_path, rel_path; diff --git a/libcorpus2_whole/io/documentreader.h b/libcorpus2_whole/io/documentreader.h index 9dda86768a7155c7c685f4ee764b020d41601ad6..b23a5746b0cb9931ebb5ee4c510472e7c4f814e2 100644 --- a/libcorpus2_whole/io/documentreader.h +++ b/libcorpus2_whole/io/documentreader.h @@ -20,6 +20,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <string> #include <libcorpus2_whole/corpus.h> #include <libcorpus2_whole/io/reader_i.h> +#include <libcorpus2_whole/io/cclrelreader.h> namespace Corpus2 { namespace whole { @@ -30,10 +31,11 @@ public: DocumentReader(const Tagset& tagset, const std::string& corpus_type, const std::string& corpus_file_path, const std::string& corpus_reader); DocumentReader(const Tagset& tagset, const std::string& corpus_type, const std::string& corpus_file_path); boost::shared_ptr<Document> read(); + boost::shared_ptr<Document> read_with_auto_id(); private: - boost::shared_ptr<DocumentReaderI> get_cclrel_reader(std::string& line); + boost::shared_ptr<CclRelReader> get_cclrel_reader(std::string& line); boost::shared_ptr<DocumentReaderI> reader; std::ifstream corpus_file; private: diff --git a/swig/documentreader.i b/swig/documentreader.i index a36031549f465b57a16b057f214ab394993cd929..2290e4621edbe065765ecb6483525c4022318ffe 100644 --- a/swig/documentreader.i +++ b/swig/documentreader.i @@ -22,6 +22,7 @@ namespace whole { DocumentReader(const Tagset& tagset, const std::string& corpus_type,const std::string& corpus_file_path, const std::string& corpus_reader); DocumentReader(const Tagset& tagset, const std::string& corpus_type,const std::string& corpus_file_path); boost::shared_ptr<Document> read(); + boost::shared_ptr<Document> read_with_auto_id(); }; } // whole ns } // Corpus2 ns