diff --git a/libcorpus2_whole/CMakeLists.txt b/libcorpus2_whole/CMakeLists.txt index 004e1a12d19ed1ee1a5c1347e13494d6dd067c16..e4f6ca987c7e19c00b239ed53ac4f8f4f859f433 100644 --- a/libcorpus2_whole/CMakeLists.txt +++ b/libcorpus2_whole/CMakeLists.txt @@ -9,11 +9,13 @@ SET(libcorpus2_whole_SRC corpus.cpp document.cpp relation.cpp - io/corpusreader.cpp - io/docreader.cpp - io/docreaderi.h - io/poliqarpdocumentreader.cpp + io/reader_i.h io/relreader.cpp + io/documentreader.cpp + io/documentcorpusreader.cpp + io/poliqarpdocumentreader.cpp + io/poliqarpcorpusreader.cpp + io/corpusreader.cpp ) file(GLOB_RECURSE INCS "*.h") diff --git a/libcorpus2_whole/corpus.cpp b/libcorpus2_whole/corpus.cpp index 06df2f9a0f768e609747b1edc2b0d3dd0f7826c6..00fb0ffe2e979a485945ed2ea23eb04d8da3f715 100644 --- a/libcorpus2_whole/corpus.cpp +++ b/libcorpus2_whole/corpus.cpp @@ -21,7 +21,7 @@ namespace whole{ Corpus::Corpus(const std::string name) : name_(name), documents_() { - this->current_document_ = documents_.begin(); + set_ = false; } } // whole ns diff --git a/libcorpus2_whole/corpus.h b/libcorpus2_whole/corpus.h index c37c60ec65647ed06bb3bad69af8e2dd9e8ba849..225018ca1f3412388f61159a8650c61266c60a56 100644 --- a/libcorpus2_whole/corpus.h +++ b/libcorpus2_whole/corpus.h @@ -49,12 +49,29 @@ public: } /// Next document in corpus - /// @todo I don't know if it'll be working... It should be tested! boost::shared_ptr<Document> next_document() { - return *(current_document_++); + static boost::shared_ptr<Document> _empty_doc_ptr; + if (!this->set_) { + if (this->documents_.size() == 0) { + return _empty_doc_ptr; + } + this->set_ = true; + this->current_document_ = documents_.begin(); + } + else { + current_document_++; + } + + if (current_document_ == documents_.end()) { + return _empty_doc_ptr; + } + + return *current_document_; } private: + bool set_; + /// Corpus name const std::string name_; diff --git a/libcorpus2_whole/document.cpp b/libcorpus2_whole/document.cpp index 889fbf2ffbb0dde38a2bae92f12cfbb25ba9439c..9b391b84fd0101f544a79884efdcd13593e53c9f 100644 --- a/libcorpus2_whole/document.cpp +++ b/libcorpus2_whole/document.cpp @@ -18,9 +18,10 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <boost/make_shared.hpp> namespace Corpus2 { +namespace whole { -Document::Document() - : paragraphs_(), relations_() +Document::Document(const std::string& path) + : paragraphs_(), relations_(), path_(path) { } @@ -28,4 +29,5 @@ Document::~Document() { } -} /* end ns Corpus2 */ +} // whole ns +} // Corpus2 ns diff --git a/libcorpus2_whole/document.h b/libcorpus2_whole/document.h index 0401386dfc451cf8f07ed8fe31bfb5f278dc81ec..d02a043cd39c2e57fecc16064236cb440160662e 100644 --- a/libcorpus2_whole/document.h +++ b/libcorpus2_whole/document.h @@ -22,6 +22,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <boost/shared_ptr.hpp> namespace Corpus2 { +namespace whole { /** * A whole document, consisting of consecutive paragraphs ("chunks"), being @@ -32,7 +33,10 @@ namespace Corpus2 { class Document { public: - Document(); + /** + * Path to file, if not set, then default is empty + */ + Document(const std::string& path = ""); ~Document(); /// Adds paragraphs to document @@ -55,14 +59,23 @@ public: return relations_; } + /// Returns path to the document + const std::string& path() const { + return path_; + } + protected: /// Paragraphs in document - std::vector< boost::shared_ptr<Chunk> > paragraphs_; + std::vector<boost::shared_ptr<Chunk> > paragraphs_; /// Relations in document - std::vector< boost::shared_ptr<Relation> > relations_; + std::vector<boost::shared_ptr<Relation> > relations_; + + /// Path to the file (if it's not a file, then is empty) + const std::string path_; }; -} /* end ns Corpus2 */ +} // whole ns +} // Corpus2 ns #endif // LIBCORPUS2_WHOLE_DOCUMENT_H diff --git a/libcorpus2_whole/io/corpusreader.cpp b/libcorpus2_whole/io/corpusreader.cpp index 05485563f62a7aa9db96a6d4bfb6139ad2fd6a03..56538ad5115a7ce386243bb1214378502f634a99 100644 --- a/libcorpus2_whole/io/corpusreader.cpp +++ b/libcorpus2_whole/io/corpusreader.cpp @@ -1,71 +1,37 @@ -#include <fstream> -#include <boost/algorithm/string.hpp> - -#include <libcorpus2/exception.h> #include <libcorpus2_whole/io/corpusreader.h> +#include <libcorpus2_whole/io/poliqarpcorpusreader.h> +#include <libcorpus2_whole/io/documentcorpusreader.h> namespace Corpus2 { -namespace whole { +namespace whole{ CorpusReader::CorpusReader(const Tagset& tagset, const std::string& corpus_type) - : tagset_(tagset), corpus_type_(corpus_type) + : corpus_type_(corpus_type), tagset_(tagset) { // } boost::shared_ptr<Corpus> CorpusReader::read(const std::string& corpus_file_path) { - std::string line; - std::ifstream corpus_file(corpus_file_path.c_str()); - if (!corpus_file) { - throw Corpus2Error(corpus_file_path + " file not found!"); + try { + boost::shared_ptr<CorpusReaderI> reader = this->get_corpus_reader_by_type(); + return reader->read(corpus_file_path); + } catch(...) { + throw; } - - boost::shared_ptr<Corpus> corpus = boost::make_shared<Corpus>(corpus_file_path); - - while(getline(corpus_file, line)) { - boost::shared_ptr<DocumentReaderI> reader; - std::string ann_path, rel_path; - // split line by semicolon - std::vector<std::string> splitted_line; - boost::split(splitted_line, line, boost::is_any_of(";")); - if (splitted_line.empty()) { - continue; - } - else if (splitted_line.size() == 1) { - ann_path = splitted_line[0]; - rel_path = ""; - } - else { - ann_path = splitted_line[0]; - rel_path = splitted_line[1]; - } - reader = this->get_reader_by_type(this->corpus_type_, ann_path, rel_path); - boost::shared_ptr<Document> document = reader->read(); - corpus->add_document(document); - } - - return corpus; } // -boost::shared_ptr<DocumentReaderI> CorpusReader::get_reader_by_type( - const std::string &type, - const std::string &ann_path, - const std::string &rel_path) +boost::shared_ptr<CorpusReaderI> CorpusReader::get_corpus_reader_by_type() { - /*if (type == "poliqarp") { - static boost::shared_ptr<PoliqarpDocumentReader> pq_doc_reader; - if (!pq_doc_reader) { - pq_doc_reader = boost::shared_ptr<PoliqarpDocumentReader>( - new PoliqarpDocumentReader(this->tagset_, ann_path)); - } - return pq_doc_reader; - } else */if (type == "document") { - return boost::shared_ptr<DocumentReader>( - new DocumentReader(this->tagset_, ann_path, rel_path)); + if (corpus_type_ == "poliqarp") { + return boost::shared_ptr<PoliqarpCorpusReader>( + new PoliqarpCorpusReader(tagset_)); + } else if (corpus_type_ == "document") { + return boost::shared_ptr<DocumentCorpusReader>( + new DocumentCorpusReader(tagset_)); } - throw Corpus2Error(type + " is unknown reader type!"); + throw Corpus2Error(corpus_type_ + " is unknown reader type!"); } } // whole ns diff --git a/libcorpus2_whole/io/corpusreader.h b/libcorpus2_whole/io/corpusreader.h index fcf0779910a1feedbbb0c581396b04e8e142d1c5..4308b3591f640d35d50ab08a9d24f303e908745e 100644 --- a/libcorpus2_whole/io/corpusreader.h +++ b/libcorpus2_whole/io/corpusreader.h @@ -2,22 +2,18 @@ #define LIBCORPUS2_WHOLE_CORPUSREADER_H #include <string> -#include <boost/shared_ptr.hpp> - -#include <libcorpus2_whole/io/docreaderi.h> -#include <libcorpus2_whole/io/docreader.h> #include <libcorpus2_whole/corpus.h> +#include <libcorpus2_whole/io/reader_i.h> namespace Corpus2 { namespace whole { -/** - * CorpusReader is a corpus-like reader - */ class CorpusReader { public: /** + * Constructor of corpus reader. Here must be set type of the corpus, which + * will be reading. * @arg corpus_type may be: * - document (contains relations) * - poliqarp @@ -35,19 +31,16 @@ public: */ boost::shared_ptr<Corpus> read(const std::string& corpus_file); -protected: - /// Tagset to use, sets only onece in constructor - const Tagset& tagset_; +private: + /// Returns reader based on corpus type (poliqarp/document) + boost::shared_ptr<CorpusReaderI> get_corpus_reader_by_type(); +private: /// Type of corpus, sets only once in constructor - const std::string& corpus_type_; + const std::string corpus_type_; -private: - /// Returns reader based on corpus type (poliqarp/document) - boost::shared_ptr<DocumentReaderI> get_reader_by_type( - const std::string &type, - const std::string &ann_path, - const std::string &rel_path = ""); + /// Tagset to use, sets only onece in constructor + const Tagset& tagset_; }; } // whole ns diff --git a/libcorpus2_whole/io/documentcorpusreader.cpp b/libcorpus2_whole/io/documentcorpusreader.cpp new file mode 100644 index 0000000000000000000000000000000000000000..12d035904f1e8113c28b1fff413df38bd6264872 --- /dev/null +++ b/libcorpus2_whole/io/documentcorpusreader.cpp @@ -0,0 +1,55 @@ +#include <fstream> +#include <boost/algorithm/string.hpp> + +#include <libcorpus2/exception.h> +#include <libcorpus2_whole/io/documentcorpusreader.h> +#include <libcorpus2_whole/io/documentreader.h> + +namespace Corpus2 { +namespace whole { + +DocumentCorpusReader::DocumentCorpusReader(const Tagset& tagset) : tagset_(tagset) +{ + // +} + +boost::shared_ptr<Corpus> DocumentCorpusReader::read(const std::string& corpus_file_path) +{ + std::string line; + std::string ann_path, rel_path; + boost::shared_ptr<DocumentReader> doc_reader; + + std::ifstream corpus_file(corpus_file_path.c_str()); + if (!corpus_file) { + throw Corpus2Error(corpus_file_path + " file not found!"); + } + + boost::shared_ptr<Corpus> corpus = boost::make_shared<Corpus>(corpus_file_path); + while(getline(corpus_file, line)) { + // split line by semicolon + std::vector<std::string> splitted_line; + boost::split(splitted_line, line, boost::is_any_of(";")); + + if (splitted_line.empty()) { + // maybe exception? + continue; + } + else if (splitted_line.size() == 1) { + // maybe exception? + continue; + } + + ann_path = splitted_line[0]; + rel_path = splitted_line[1]; + + doc_reader = boost::shared_ptr<DocumentReader>( + new DocumentReader(this->tagset_, ann_path, rel_path)); + + corpus->add_document(doc_reader->read()); + } + + return corpus; +} + +} // whole ns +} // Corpus2 ns diff --git a/libcorpus2_whole/io/documentcorpusreader.h b/libcorpus2_whole/io/documentcorpusreader.h new file mode 100644 index 0000000000000000000000000000000000000000..642639aeca71dcba211f34a0672429bb78fd2f9e --- /dev/null +++ b/libcorpus2_whole/io/documentcorpusreader.h @@ -0,0 +1,46 @@ +#ifndef LIBCORPUS2_WHOLE_DOCUMENTCORPUSREADER_H +#define LIBCORPUS2_WHOLE_DOCUMENTCORPUSREADER_H + +#include <string> +#include <boost/shared_ptr.hpp> +#include <libcorpus2_whole/corpus.h> +#include <libcorpus2_whole/io/reader_i.h> + +namespace Corpus2 { +namespace whole { + +/** + * Reads "document-like cropus" + * Reads all documents in given file and returns their Corpus representation + */ +class DocumentCorpusReader : public CorpusReaderI +{ +public: + DocumentCorpusReader(const Tagset& tagset); + + /** + * Reads corpus from given path + * @arg corpus_file Path to file contains paths to corpus files. + * Each lines in given corpus file contains two paths, to relations + * and annotatons (in one line, first is path to annotations + * and second are relations -- these paths, should be separated + * by semicolon) + * @return Readed corpus + */ + boost::shared_ptr<Corpus> read(const std::string& corpus_file); + + /** + * Sets options for readers (relation reader and/or ccl reader). + * Available options: + * - autogen_sent_id -- for automatically generation identifiers of sentences + */ + void set_option(const std::string& option); + +private: + const Tagset& tagset_; +}; + +} // whole ns +} // Corpus2 ns + +#endif // LIBCORPUS2_WHOLE_DOCUMENTCORPUSREADER_H diff --git a/libcorpus2_whole/io/docreader.cpp b/libcorpus2_whole/io/documentreader.cpp similarity index 88% rename from libcorpus2_whole/io/docreader.cpp rename to libcorpus2_whole/io/documentreader.cpp index 4fba578d24e019da34d39b129e6c521400859a18..8aa1a79a421c4e08add2b75b48040fed6ad974af 100644 --- a/libcorpus2_whole/io/docreader.cpp +++ b/libcorpus2_whole/io/documentreader.cpp @@ -15,14 +15,16 @@ or FITNESS FOR A PARTICULAR PURPOSE. */ #include <boost/make_shared.hpp> -#include <libcorpus2_whole/io/docreader.h> +#include <libcorpus2_whole/io/documentreader.h> namespace Corpus2 { +namespace whole { DocumentReader::DocumentReader(const Tagset& tagset, const std::string &annot_path, const std::string &rela_path) : DocumentReaderI("document") { make_readers(tagset, annot_path, rela_path); + make_id_doc(annot_path, rela_path); } void DocumentReader::make_readers(const Tagset& tagset, @@ -32,10 +34,16 @@ namespace Corpus2 { rel_reader_ = boost::make_shared<RelationReader>(rela_path); } + void DocumentReader::make_id_doc(const std::string &annot_path, + const std::string &rela_path) + { + id_ = (annot_path + ";" + rela_path); + } + boost::shared_ptr<Document> DocumentReader::read() { boost::shared_ptr<Chunk> chunk; - boost::shared_ptr<Document> document = boost::make_shared<Document>(); + boost::shared_ptr<Document> document = boost::make_shared<Document>(id_); // Read ccl document and makes document while (1) { @@ -77,4 +85,5 @@ namespace Corpus2 { return ""; } -} /* end ns Corpus2 */ +} // whole ns +} // Corpus2 ns diff --git a/libcorpus2_whole/io/docreader.h b/libcorpus2_whole/io/documentreader.h similarity index 86% rename from libcorpus2_whole/io/docreader.h rename to libcorpus2_whole/io/documentreader.h index acbb595f84ab0d872c8345ea3b82aaf106aea677..8b092de21caf029a4e706fd23e606585d041512c 100644 --- a/libcorpus2_whole/io/docreader.h +++ b/libcorpus2_whole/io/documentreader.h @@ -14,18 +14,19 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE and COPYING files for more details. */ -#ifndef LIBCORPUS2_WHOLE__DOCREADER_H -#define LIBCORPUS2_WHOLE__DOCREADER_H +#ifndef LIBCORPUS2_WHOLE_DOCREADER_H +#define LIBCORPUS2_WHOLE_DOCREADER_H +#include <libcorpus2/io/cclreader.h> #include <libcorpus2_whole/relation.h> #include <libcorpus2_whole/document.h> -#include <libcorpus2_whole/io/docreaderi.h> -#include <libcorpus2/io/cclreader.h> #include <libcorpus2_whole/io/relreader.h> +#include <libcorpus2_whole/io/reader_i.h> #include <boost/shared_ptr.hpp> namespace Corpus2 { +namespace whole { /** * A reader for whole documents. Note that a whole document is read into memory @@ -77,13 +78,25 @@ private: const std::string &annot_path, const std::string &rela_path); + /** + * Based on given paths (annotations and relations) makes document identifier + * Document identifier is set to id_ class-state + */ + void make_id_doc(const std::string &annot_path, + const std::string &rela_path); + // ------------------------------------------------------------------------- /// Pointer to CclReader boost::shared_ptr<CclReader> ccl_reader_; /// Pointer to RelationReader boost::shared_ptr<RelationReader> rel_reader_; + + /// Future document identifier + std::string id_; }; -} /* end ns Corpus2 */ + +} // whole ns +} // Corpus2 ns #endif // LIBCORPUS2_WHOLE_DOCREADER_H diff --git a/libcorpus2_whole/io/poliqarpcorpusreader.cpp b/libcorpus2_whole/io/poliqarpcorpusreader.cpp new file mode 100644 index 0000000000000000000000000000000000000000..acc6e36288486fed014b25c1ef412c090d8eb776 --- /dev/null +++ b/libcorpus2_whole/io/poliqarpcorpusreader.cpp @@ -0,0 +1,31 @@ +#include <libcorpus2_whole/io/poliqarpcorpusreader.h> + +namespace Corpus2 { +namespace whole{ + +PoliqarpCorpusReader::PoliqarpCorpusReader(const Tagset& tagset) + : tagset_(tagset) +{ + // +} + +boost::shared_ptr<Corpus> PoliqarpCorpusReader::read(const std::string& corpus_file) +{ + boost::shared_ptr<Document> doc; + boost::shared_ptr<Corpus> corpus = boost::make_shared<Corpus>(corpus_file); + + this->pq_doc_reader_ = boost::shared_ptr<PoliqarpDocumentReader>( + new PoliqarpDocumentReader(tagset_, corpus_file)); + + while (1) { + if (!(doc = this->pq_doc_reader_->read())) { + break; + } + corpus->add_document(doc); + } + + return corpus; +} + +} // whole ns +} // Corpus2 ns diff --git a/libcorpus2_whole/io/poliqarpcorpusreader.h b/libcorpus2_whole/io/poliqarpcorpusreader.h new file mode 100644 index 0000000000000000000000000000000000000000..c5134b3fa839daae449e6bc0d09dda23b72e89da --- /dev/null +++ b/libcorpus2_whole/io/poliqarpcorpusreader.h @@ -0,0 +1,28 @@ +#ifndef LIBCORPUS2_WHOLE_POLIQARPCORPUSREADER_H +#define LIBCORPUS2_WHOLE_POLIQARPCORPUSREADER_H + +#include <string> +#include <boost/shared_ptr.hpp> +#include <libcorpus2_whole/corpus.h> +#include <libcorpus2_whole/io/poliqarpdocumentreader.h> +#include <libcorpus2_whole/io/reader_i.h> + +namespace Corpus2 { +namespace whole { + +class PoliqarpCorpusReader : public CorpusReaderI +{ +public: + PoliqarpCorpusReader(const Tagset& tagset); + + boost::shared_ptr<Corpus> read(const std::string& corpus_file); + +private: + const Tagset& tagset_; + boost::shared_ptr<PoliqarpDocumentReader> pq_doc_reader_; +}; + +} // whole ns +} // Corpus2 ns + +#endif // LIBCORPUS2_WHOLE_POLIQARPCORPUSREADER_H diff --git a/libcorpus2_whole/io/poliqarpdocumentreader.cpp b/libcorpus2_whole/io/poliqarpdocumentreader.cpp index a302340b593a656ef573431a78420d82c17caf3a..8e41832d758893389129d36b011aebaba4e28173 100644 --- a/libcorpus2_whole/io/poliqarpdocumentreader.cpp +++ b/libcorpus2_whole/io/poliqarpdocumentreader.cpp @@ -1,21 +1,24 @@ #include <libcorpus2_whole/io/poliqarpdocumentreader.h> +#include <boost/scoped_ptr.hpp> namespace Corpus2 { namespace whole { PoliqarpDocumentReader::PoliqarpDocumentReader(const Tagset& tagset, const std::string& corpus_path) - : DocumentReaderI("poliqarp"), corpus_path_(corpus_path) + : DocumentReaderI("poliqarp") { - this->pqr_ = boost::shared_ptr<PoliqarpReader> (new PoliqarpReader(tagset, corpus_path)); + this->pqr_ = boost::shared_ptr<PoliqarpReader>(new PoliqarpReader(tagset, corpus_path)); } boost::shared_ptr<Document> PoliqarpDocumentReader::read() { - boost::shared_ptr<Document> document = boost::make_shared<Document>(); - // boost::shared_ptr<Chunk> chunk = this->pqr_->pq_->get_next_sentence(); -// if (chunk) { -// document->add_paragraph(chunk); -// } + boost::shared_ptr<Document> document; + boost::shared_ptr<Chunk> chunk = this->pqr_->get_next_chunk(); + + if (chunk) { + document = boost::make_shared<Document>(); + document->add_paragraph(chunk); + } return document; } diff --git a/libcorpus2_whole/io/poliqarpdocumentreader.h b/libcorpus2_whole/io/poliqarpdocumentreader.h index f59ac67b730faf5402eaf5e32bdd125dbf5ecd5f..58d33183feedf9763aeda12be1ef218550f50899 100644 --- a/libcorpus2_whole/io/poliqarpdocumentreader.h +++ b/libcorpus2_whole/io/poliqarpdocumentreader.h @@ -2,22 +2,39 @@ #define LIBCORPUS2_WHOLE_POLIQARPDOCUMENTREADER_H #include <poliqarp/pqreader.h> -#include <libcorpus2_whole/io/docreaderi.h> #include <libcorpus2_whole/document.h> +#include <libcorpus2_whole/io/reader_i.h> namespace Corpus2 { namespace whole { +/** + * Wrapper for PoliqarpReader. + * Can be used as "document" reader for Poliqarp corpus. Method read() retuns + * document from given corpus path. Behavior of this method is similar to + * get_next_document() from Poliqarp Client. First call of read() gives first + * document in corpus, nth call of read() gives nth document from corpus... + */ class PoliqarpDocumentReader : public DocumentReaderI { public: PoliqarpDocumentReader(const Tagset& tagset, const std::string& corpus_path); - /// semantic of this methd is like get_next_document + /** + * Semantic of this methd is similar to get_next_document from Poliqarp Client + * @return nth readed document + */ boost::shared_ptr<Document> read(); + /** + * Sets options for readers (relation reader and/or ccl reader). + * Available options: + * - autogen_sent_id -- for automatically generation identifiers of sentences + */ + void set_option(const std::string& option); + private: - const std::string corpus_path_; + /// Poliqarp reader used for reading Poliqarp corp boost::shared_ptr<PoliqarpReader> pqr_; }; diff --git a/libcorpus2_whole/io/docreaderi.h b/libcorpus2_whole/io/reader_i.h similarity index 73% rename from libcorpus2_whole/io/docreaderi.h rename to libcorpus2_whole/io/reader_i.h index b7a8afc8ebdf7312caf2bdc7f73f3a8d13760784..e3ba688aa0d0f0aad7703cd54d8016f838a55377 100644 --- a/libcorpus2_whole/io/docreaderi.h +++ b/libcorpus2_whole/io/reader_i.h @@ -2,11 +2,13 @@ #define READERI_H #include <string> +#include <boost/shared_ptr.hpp> #include <libcorpus2_whole/document.h> -#include <boost/shared_ptr.hpp> +#include <libcorpus2_whole/corpus.h> namespace Corpus2 { +namespace whole { /** * Reader interface, @@ -26,6 +28,13 @@ protected: const std::string type_; }; -} +class CorpusReaderI +{ +public: + virtual boost::shared_ptr<Corpus> read(const std::string& corpus_file) = 0; +}; + +} // whole ns +} // Corpus2 ns #endif // READERI_H diff --git a/libcorpus2_whole/io/relreader.cpp b/libcorpus2_whole/io/relreader.cpp index f683046072c16d4ea15c52d03f7fe6450cfa9f87..c94ac6eea32d1bb7a5a07825ea7d6cb0cae706dc 100644 --- a/libcorpus2_whole/io/relreader.cpp +++ b/libcorpus2_whole/io/relreader.cpp @@ -22,6 +22,8 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <boost/make_shared.hpp> namespace Corpus2 { +namespace whole { + RelationReader::RelationReader(const std::string &rela_path) : rela_path_(rela_path) { @@ -197,4 +199,5 @@ std::string RelationReader::get_attribute_value( return ""; } -} /* end ns Corpus2 */ +} // whole ns +} // Corpus2 ns diff --git a/libcorpus2_whole/io/relreader.h b/libcorpus2_whole/io/relreader.h index 225c0b40ae9011621af704957ab47aa111040abf..bb01e19dab5c9fce7e4eeb68b1b1cfe78a29e1f7 100644 --- a/libcorpus2_whole/io/relreader.h +++ b/libcorpus2_whole/io/relreader.h @@ -27,6 +27,8 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <iostream> namespace Corpus2 { +namespace whole { + const static std::string RELATION_TAG = "rel"; const static std::string RELATIONS_TAG = "relations"; const static std::string RELATION_DIRECT_FROM = "from"; @@ -118,6 +120,8 @@ private: boost::shared_ptr<DirectionPoint> rel_from_; boost::shared_ptr<DirectionPoint> rel_to_; }; -} /* end ns Corpus2 */ + +} // whole ns +} // Corpus2 ns #endif // LIBCORPUS2_WHOLE_RELREADER_H diff --git a/libcorpus2_whole/relation.cpp b/libcorpus2_whole/relation.cpp index 163a1a6f5338c3025701470e5ee69c108c4624ca..d1df54f9f529ece1a6bd4446d23f64bec7785c47 100644 --- a/libcorpus2_whole/relation.cpp +++ b/libcorpus2_whole/relation.cpp @@ -18,6 +18,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <libcorpus2_whole/relation.h> namespace Corpus2 { +namespace whole { Relation::Relation(const std::string& name, const boost::shared_ptr<const DirectionPoint> from, @@ -39,4 +40,5 @@ Relation::~Relation() { } -} /* end ns Corpus2 */ +} // whole ns +} // Corpus2 ns diff --git a/libcorpus2_whole/relation.h b/libcorpus2_whole/relation.h index aea36809f85fd17ccf41bc7116aedb0c10ee7e80..8e8ed1d0d49baaa3d1c97312cf8711e912587691 100644 --- a/libcorpus2_whole/relation.h +++ b/libcorpus2_whole/relation.h @@ -21,6 +21,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <boost/shared_ptr.hpp> namespace Corpus2 { +namespace whole { /** * Helper class to represent one of two point of direction in any relation. @@ -117,6 +118,7 @@ private: const boost::shared_ptr<const DirectionPoint> to_; }; -} /* end ns Corpus2 */ +} // whole ns +} // Corpus2 ns #endif // LIBCORPUS2_RELATIONT_H diff --git a/swig/corpus.i b/swig/corpus.i new file mode 100644 index 0000000000000000000000000000000000000000..4d3f8df0cfcfcb59afc32ec46f94753beaaedc48 --- /dev/null +++ b/swig/corpus.i @@ -0,0 +1,33 @@ +#ifndef SWIG_LIBCORPUS2_CORPUS_I +#define SWIG_LIBCORPUS2_CORPUS_I + +%module libcorpuscorpus +%{ + #include <libcorpus2_whole/corpus.h> +%} + +%include "std_defs.i" +%include "document.i" + +%template(CorpusPtr) boost::shared_ptr<Corpus2::whole::Corpus>; +%template(ConstCorpusPtr) boost::shared_ptr<const Corpus2::whole::Corpus>; + +namespace Corpus2 { +namespace whole { + class Corpus { + public: + Corpus(const std::string name = ""); + + void add_document(boost::shared_ptr<Document> document); + + boost::shared_ptr<Document> next_document(); + const std::vector<boost::shared_ptr<Document> > documents() const; + }; +} // whole ns +} // Corpus2 ns + +using namespace std; +using namespace Corpus2; +using namespace Corpus2::whole; + +#endif /* SWIG_LIBCORPUS2_CORPUS_I */ diff --git a/swig/corpus2.i b/swig/corpus2.i index b925343a152e6c2e05c8aac41b7588b225e4c181..40b5f856e1c06c82cd648a45e0fb30cd2b54105f 100644 --- a/swig/corpus2.i +++ b/swig/corpus2.i @@ -10,13 +10,9 @@ %include "annotationchannel.i" %include "annotationview.i" %include "chunk.i" -%include "document.i" -%include "documentreader.i" %include "iob.i" %include "lexeme.i" %include "libpwrnlperror.i" -%include "relation.i" -%include "relationreader.i" %include "sentence.i" %include "tag.i" %include "tagging.i" @@ -27,6 +23,13 @@ %include "tokenreader.i" %include "tokenwriter.i" +%include "relation.i" +%include "document.i" +%include "corpus.i" +%include "relationreader.i" +%include "documentreader.i" +%include "corpusreader.i" + %{ #include <libcorpus2/util/settings.h> static void set_verbose(bool v) { diff --git a/swig/corpusreader.i b/swig/corpusreader.i new file mode 100644 index 0000000000000000000000000000000000000000..7f5bddf0ee419c713eb9119c580408a2bdb81417 --- /dev/null +++ b/swig/corpusreader.i @@ -0,0 +1,32 @@ +#ifndef SWIG_LIBCORPUS2_CORPUS_READER_I +#define SWIG_LIBCORPUS2_CORPUS_READER_I + +%module libcorpusdocument +%{ + #include <libcorpus2_whole/io/corpusreader.h> +%} + +%include "std_defs.i" +%include "tagset.i" +%include "corpus.i" + +%template(CorpusReaderPtr) boost::shared_ptr<Corpus2::whole::CorpusReader>; +%template(ConstCorpusReaderPtr) boost::shared_ptr<const Corpus2::whole::CorpusReader>; + +%template(CorpusReaderPtrVector) std::vector<boost::shared_ptr<Corpus2::whole::CorpusReader> >; + +namespace Corpus2 { +namespace whole { + class CorpusReader { + public: + CorpusReader(const Tagset& tagset, const std::string& corpus_type); + boost::shared_ptr<Corpus> read(const std::string& corpus_file); + }; +} // whole ns +} // Corpus2 ns + +using namespace std; +using namespace Corpus2; +using namespace Corpus2::whole; + +#endif /* SWIG_LIBCORPUS2_CORPUS_READER_I */ diff --git a/swig/document.i b/swig/document.i index 5d62a33c6215b4e47a69de36a8f2132994aac214..0e8ea3c7122466c8427eca1dbdaa7bc2f57c5596 100644 --- a/swig/document.i +++ b/swig/document.i @@ -10,13 +10,16 @@ %include "chunk.i" %include "relation.i" -%template(DocumentPtr) boost::shared_ptr<Corpus2::Document>; -%template(ConstDocumentPtr) boost::shared_ptr<const Corpus2::Document>; +%template(DocumentPtr) boost::shared_ptr<Corpus2::whole::Document>; +%template(ConstDocumentPtr) boost::shared_ptr<const Corpus2::whole::Document>; + +%template(DocumentPtrVector) std::vector<boost::shared_ptr<Corpus2::whole::Document> >; namespace Corpus2 { +namespace whole { class Document { public: - Document(); + Document(const std::string& path = ""); ~Document(); void add_paragraph(const boost::shared_ptr<Chunk> para); @@ -24,10 +27,14 @@ namespace Corpus2 { const std::vector< boost::shared_ptr<Chunk> >& paragraphs() const; const std::vector< boost::shared_ptr<Relation> >& relations() const; + + const std::string& path() const; }; -} +} // whole ns +} // Corpus2 ns using namespace std; using namespace Corpus2; +using namespace Corpus2::whole; #endif /* SWIG_LIBCORPUS2_DOCUMENT_I */ diff --git a/swig/documentreader.i b/swig/documentreader.i index efdbf0cf73c694a5be51e1ae87b6bb845e5a7bfd..2d1258b736a0b6df031e78b0e3941f071ca1b7ab 100644 --- a/swig/documentreader.i +++ b/swig/documentreader.i @@ -3,7 +3,7 @@ %module libcorpusdocumentreader %{ - #include <libcorpus2_whole/io/docreader.h> + #include <libcorpus2_whole/io/documentreader.h> %} %include "exception.i" @@ -11,6 +11,7 @@ %include "boost_shared_ptr.i" namespace Corpus2 { +namespace whole { class DocumentReader { public: %exception { @@ -39,9 +40,11 @@ namespace Corpus2 { /* --------------------------------------------------------------------- */ ~DocumentReader(); }; -} +} // whole ns +} // Corpus2 ns using namespace std; using namespace Corpus2; +using namespace Corpus2::whole; #endif /* SWIG_LIBCORPUS2_DOCUMENTREADER_I */ diff --git a/swig/relation.i b/swig/relation.i index 7ebe41a656c379135760dddc69826355be47f06a..507e77ed8a30a984fd3ff8509bc9900940c5e145 100644 --- a/swig/relation.i +++ b/swig/relation.i @@ -10,14 +10,15 @@ %include "std_string.i" %include "boost_shared_ptr.i" -%template(DirectionPointPtr) boost::shared_ptr<Corpus2::DirectionPoint>; -%template(ConstDirectionPointPtr) boost::shared_ptr<const Corpus2::DirectionPoint>; +%template(DirectionPointPtr) boost::shared_ptr<Corpus2::whole::DirectionPoint>; +%template(ConstDirectionPointPtr) boost::shared_ptr<const Corpus2::whole::DirectionPoint>; -%template(RelationPtr) boost::shared_ptr<Corpus2::Relation>; -%template(ConstRelationPtr) boost::shared_ptr<const Corpus2::Relation>; -%template(RelationPtrVector) std::vector< boost::shared_ptr<Corpus2::Relation> >; +%template(RelationPtr) boost::shared_ptr<Corpus2::whole::Relation>; +%template(ConstRelationPtr) boost::shared_ptr<const Corpus2::whole::Relation>; +%template(RelationPtrVector) std::vector< boost::shared_ptr<Corpus2::whole::Relation> >; namespace Corpus2 { +namespace whole { class DirectionPoint { public: DirectionPoint(const std::string, const std::string, const int); @@ -50,9 +51,11 @@ namespace Corpus2 { %rename(rel_name) name() const; const std::string name() const; }; -} +} // whole ns +} // Corpus2 ns using namespace std; using namespace Corpus2; +using namespace Corpus2::whole; #endif /* SWIG_LIBCORPUS2_RELATION_I */ diff --git a/swig/relationreader.i b/swig/relationreader.i index bcf567aed3cdb936bd3e87f3f71ff6c8bc874cf6..b763980a4e2c3e91013f5b9046572e600111bd5a 100644 --- a/swig/relationreader.i +++ b/swig/relationreader.i @@ -7,14 +7,17 @@ %} namespace Corpus2 { +namespace whole { class RelationReader { public: RelationReader(const std::string &rela_path); const std::vector< boost::shared_ptr<Relation> >& relations(); }; -} +} // whole ns +} // Corpus2 ns using namespace std; using namespace Corpus2; +using namespace Corpus2::whole; #endif /* SWIG_LIBCORPUS2_RELATIONREADER_I */