From 75b314aa92c378b4b65b85726596bfa2899dfa79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20K=C4=99dzia?= <Pawel.Kedzia@pwr.wroc.pl> Date: Tue, 3 Jan 2012 12:36:44 +0100 Subject: [PATCH] Document contains path to file what was readed. --- libcorpus2_whole/document.cpp | 4 ++-- libcorpus2_whole/document.h | 17 ++++++++++++++--- libcorpus2_whole/io/documentreader.cpp | 9 ++++++++- libcorpus2_whole/io/documentreader.h | 10 ++++++++++ swig/document.i | 4 +++- 5 files changed, 37 insertions(+), 7 deletions(-) diff --git a/libcorpus2_whole/document.cpp b/libcorpus2_whole/document.cpp index 6b949b3..9b391b8 100644 --- a/libcorpus2_whole/document.cpp +++ b/libcorpus2_whole/document.cpp @@ -20,8 +20,8 @@ or FITNESS FOR A PARTICULAR PURPOSE. namespace Corpus2 { namespace whole { -Document::Document() - : paragraphs_(), relations_() +Document::Document(const std::string& path) + : paragraphs_(), relations_(), path_(path) { } diff --git a/libcorpus2_whole/document.h b/libcorpus2_whole/document.h index 97ad4b8..d02a043 100644 --- a/libcorpus2_whole/document.h +++ b/libcorpus2_whole/document.h @@ -33,7 +33,10 @@ namespace whole { class Document { public: - Document(); + /** + * Path to file, if not set, then default is empty + */ + Document(const std::string& path = ""); ~Document(); /// Adds paragraphs to document @@ -56,12 +59,20 @@ public: return relations_; } + /// Returns path to the document + const std::string& path() const { + return path_; + } + protected: /// Paragraphs in document - std::vector< boost::shared_ptr<Chunk> > paragraphs_; + std::vector<boost::shared_ptr<Chunk> > paragraphs_; /// Relations in document - std::vector< boost::shared_ptr<Relation> > relations_; + std::vector<boost::shared_ptr<Relation> > relations_; + + /// Path to the file (if it's not a file, then is empty) + const std::string path_; }; } // whole ns diff --git a/libcorpus2_whole/io/documentreader.cpp b/libcorpus2_whole/io/documentreader.cpp index 4d35ffe..8aa1a79 100644 --- a/libcorpus2_whole/io/documentreader.cpp +++ b/libcorpus2_whole/io/documentreader.cpp @@ -24,6 +24,7 @@ namespace whole { : DocumentReaderI("document") { make_readers(tagset, annot_path, rela_path); + make_id_doc(annot_path, rela_path); } void DocumentReader::make_readers(const Tagset& tagset, @@ -33,10 +34,16 @@ namespace whole { rel_reader_ = boost::make_shared<RelationReader>(rela_path); } + void DocumentReader::make_id_doc(const std::string &annot_path, + const std::string &rela_path) + { + id_ = (annot_path + ";" + rela_path); + } + boost::shared_ptr<Document> DocumentReader::read() { boost::shared_ptr<Chunk> chunk; - boost::shared_ptr<Document> document = boost::make_shared<Document>(); + boost::shared_ptr<Document> document = boost::make_shared<Document>(id_); // Read ccl document and makes document while (1) { diff --git a/libcorpus2_whole/io/documentreader.h b/libcorpus2_whole/io/documentreader.h index d0c4af3..8b092de 100644 --- a/libcorpus2_whole/io/documentreader.h +++ b/libcorpus2_whole/io/documentreader.h @@ -78,12 +78,22 @@ private: const std::string &annot_path, const std::string &rela_path); + /** + * Based on given paths (annotations and relations) makes document identifier + * Document identifier is set to id_ class-state + */ + void make_id_doc(const std::string &annot_path, + const std::string &rela_path); + // ------------------------------------------------------------------------- /// Pointer to CclReader boost::shared_ptr<CclReader> ccl_reader_; /// Pointer to RelationReader boost::shared_ptr<RelationReader> rel_reader_; + + /// Future document identifier + std::string id_; }; } // whole ns diff --git a/swig/document.i b/swig/document.i index 900053a..0e8ea3c 100644 --- a/swig/document.i +++ b/swig/document.i @@ -19,7 +19,7 @@ namespace Corpus2 { namespace whole { class Document { public: - Document(); + Document(const std::string& path = ""); ~Document(); void add_paragraph(const boost::shared_ptr<Chunk> para); @@ -27,6 +27,8 @@ namespace whole { const std::vector< boost::shared_ptr<Chunk> >& paragraphs() const; const std::vector< boost::shared_ptr<Relation> >& relations() const; + + const std::string& path() const; }; } // whole ns } // Corpus2 ns -- GitLab