From ed82f26b8d49018d8700b8285fb1358da9af34e7 Mon Sep 17 00:00:00 2001 From: Lukasz Bilenkij <lukasz.bilenkij@gmail.com> Date: Mon, 20 Feb 2012 00:46:22 +0100 Subject: [PATCH] handling poliqarp readers --- libcorpus2_whole/io/documentreader.cpp | 17 ++++++++++++++++- libcorpus2_whole/io/documentreader.h | 2 +- libcorpus2_whole/io/poliqarpdocumentreader.cpp | 10 +++++++++- libcorpus2_whole/io/poliqarpdocumentreader.h | 4 ++-- swig/documentreader.i | 3 ++- 5 files changed, 30 insertions(+), 6 deletions(-) diff --git a/libcorpus2_whole/io/documentreader.cpp b/libcorpus2_whole/io/documentreader.cpp index e335a8d..73db0d3 100644 --- a/libcorpus2_whole/io/documentreader.cpp +++ b/libcorpus2_whole/io/documentreader.cpp @@ -25,6 +25,21 @@ or FITNESS FOR A PARTICULAR PURPOSE. namespace Corpus2 { namespace whole{ +DocumentReader::DocumentReader(const Tagset& tagset, const std::string& corpus_type, const std::string& corpus_file_path, const std::string& corpus_reader) + : corpus_type_(corpus_type), tagset_(tagset), corpus_path_(corpus_file_path) +{ + if (corpus_type_ == "document") { + corpus_file.open(corpus_file_path.c_str()); +#ifdef WITH_POLIQARP + } else if (corpus_type_ == "poliqarp") { + reader = boost::shared_ptr<PoliqarpDocumentReader>( + new PoliqarpDocumentReader(tagset_, corpus_path_, corpus_reader)); +#endif + } else { + throw Corpus2Error(corpus_type_ + " is an unknown reader type!"); + } +} + DocumentReader::DocumentReader(const Tagset& tagset, const std::string& corpus_type, const std::string& corpus_file_path) : corpus_type_(corpus_type), tagset_(tagset), corpus_path_(corpus_file_path) { @@ -33,7 +48,7 @@ DocumentReader::DocumentReader(const Tagset& tagset, const std::string& corpus_t #ifdef WITH_POLIQARP } else if (corpus_type_ == "poliqarp") { reader = boost::shared_ptr<PoliqarpDocumentReader>( - new PoliqarpDocumentReader(tagset_, corpus_path_)); + new PoliqarpDocumentReader(tagset_, corpus_path_, "poliqarp")); #endif } else { throw Corpus2Error(corpus_type_ + " is an unknown reader type!"); diff --git a/libcorpus2_whole/io/documentreader.h b/libcorpus2_whole/io/documentreader.h index 9bfe439..760fd67 100644 --- a/libcorpus2_whole/io/documentreader.h +++ b/libcorpus2_whole/io/documentreader.h @@ -11,8 +11,8 @@ namespace whole { class DocumentReader { public: + DocumentReader(const Tagset& tagset, const std::string& corpus_type, const std::string& corpus_file_path, const std::string& corpus_reader); DocumentReader(const Tagset& tagset, const std::string& corpus_type, const std::string& corpus_file_path); - boost::shared_ptr<Document> read(); private: diff --git a/libcorpus2_whole/io/poliqarpdocumentreader.cpp b/libcorpus2_whole/io/poliqarpdocumentreader.cpp index b52ddac..a2d0644 100644 --- a/libcorpus2_whole/io/poliqarpdocumentreader.cpp +++ b/libcorpus2_whole/io/poliqarpdocumentreader.cpp @@ -4,10 +4,17 @@ namespace Corpus2 { namespace whole { +PoliqarpDocumentReader::PoliqarpDocumentReader(const Tagset& tagset, const std::string& corpus_path, const std::string& corpus_reader) + : DocumentReaderI("poliqarp") +{ + this->pqr_ = Corpus2::TokenReader::create_path_reader(corpus_reader, tagset, corpus_path); +} + PoliqarpDocumentReader::PoliqarpDocumentReader(const Tagset& tagset, const std::string& corpus_path) : DocumentReaderI("poliqarp") { - this->pqr_ = boost::shared_ptr<PoliqarpReader>(new PoliqarpReader(tagset, corpus_path)); + + this->pqr_ = Corpus2::TokenReader::create_path_reader("poliqarp", tagset, corpus_path); } boost::shared_ptr<Document> PoliqarpDocumentReader::read() @@ -26,5 +33,6 @@ boost::shared_ptr<Document> PoliqarpDocumentReader::read() return document; } + } // whole ns } // Corpus2 ns diff --git a/libcorpus2_whole/io/poliqarpdocumentreader.h b/libcorpus2_whole/io/poliqarpdocumentreader.h index 58d3318..70cb046 100644 --- a/libcorpus2_whole/io/poliqarpdocumentreader.h +++ b/libcorpus2_whole/io/poliqarpdocumentreader.h @@ -18,8 +18,8 @@ namespace whole { class PoliqarpDocumentReader : public DocumentReaderI { public: + PoliqarpDocumentReader(const Tagset& tagset, const std::string& corpus_path, const std::string& corpus_reader); PoliqarpDocumentReader(const Tagset& tagset, const std::string& corpus_path); - /** * Semantic of this methd is similar to get_next_document from Poliqarp Client * @return nth readed document @@ -35,7 +35,7 @@ public: private: /// Poliqarp reader used for reading Poliqarp corp - boost::shared_ptr<PoliqarpReader> pqr_; + Corpus2::TokenReader::TokenReaderPtr pqr_; }; } // whole ns diff --git a/swig/documentreader.i b/swig/documentreader.i index bafc3ea..a360315 100644 --- a/swig/documentreader.i +++ b/swig/documentreader.i @@ -19,7 +19,8 @@ namespace Corpus2 { namespace whole { class DocumentReader { public: - DocumentReader(const Tagset& tagset, const std::string& corpus_type,const std::string& corpus_file); + DocumentReader(const Tagset& tagset, const std::string& corpus_type,const std::string& corpus_file_path, const std::string& corpus_reader); + DocumentReader(const Tagset& tagset, const std::string& corpus_type,const std::string& corpus_file_path); boost::shared_ptr<Document> read(); }; } // whole ns -- GitLab