diff --git a/libcorpus2_whole/document.cpp b/libcorpus2_whole/document.cpp index 6b949b349df9ea938cafd63e51cae6c72c4b8b5d..9b391b84fd0101f544a79884efdcd13593e53c9f 100644 --- a/libcorpus2_whole/document.cpp +++ b/libcorpus2_whole/document.cpp @@ -20,8 +20,8 @@ or FITNESS FOR A PARTICULAR PURPOSE. namespace Corpus2 { namespace whole { -Document::Document() - : paragraphs_(), relations_() +Document::Document(const std::string& path) + : paragraphs_(), relations_(), path_(path) { } diff --git a/libcorpus2_whole/document.h b/libcorpus2_whole/document.h index 97ad4b8d480aa0e64a27b04fb3458c753cf1d452..d02a043cd39c2e57fecc16064236cb440160662e 100644 --- a/libcorpus2_whole/document.h +++ b/libcorpus2_whole/document.h @@ -33,7 +33,10 @@ namespace whole { class Document { public: - Document(); + /** + * Path to file, if not set, then default is empty + */ + Document(const std::string& path = ""); ~Document(); /// Adds paragraphs to document @@ -56,12 +59,20 @@ public: return relations_; } + /// Returns path to the document + const std::string& path() const { + return path_; + } + protected: /// Paragraphs in document - std::vector< boost::shared_ptr<Chunk> > paragraphs_; + std::vector<boost::shared_ptr<Chunk> > paragraphs_; /// Relations in document - std::vector< boost::shared_ptr<Relation> > relations_; + std::vector<boost::shared_ptr<Relation> > relations_; + + /// Path to the file (if it's not a file, then is empty) + const std::string path_; }; } // whole ns diff --git a/libcorpus2_whole/io/documentreader.cpp b/libcorpus2_whole/io/documentreader.cpp index 4d35ffee38e76aec35cdef23d27a9dc86fb7a5e8..8aa1a79a421c4e08add2b75b48040fed6ad974af 100644 --- a/libcorpus2_whole/io/documentreader.cpp +++ b/libcorpus2_whole/io/documentreader.cpp @@ -24,6 +24,7 @@ namespace whole { : DocumentReaderI("document") { make_readers(tagset, annot_path, rela_path); + make_id_doc(annot_path, rela_path); } void DocumentReader::make_readers(const Tagset& tagset, @@ -33,10 +34,16 @@ namespace whole { rel_reader_ = boost::make_shared<RelationReader>(rela_path); } + void DocumentReader::make_id_doc(const std::string &annot_path, + const std::string &rela_path) + { + id_ = (annot_path + ";" + rela_path); + } + boost::shared_ptr<Document> DocumentReader::read() { boost::shared_ptr<Chunk> chunk; - boost::shared_ptr<Document> document = boost::make_shared<Document>(); + boost::shared_ptr<Document> document = boost::make_shared<Document>(id_); // Read ccl document and makes document while (1) { diff --git a/libcorpus2_whole/io/documentreader.h b/libcorpus2_whole/io/documentreader.h index d0c4af38a662469468a86e07d42020ffa2535379..8b092de21caf029a4e706fd23e606585d041512c 100644 --- a/libcorpus2_whole/io/documentreader.h +++ b/libcorpus2_whole/io/documentreader.h @@ -78,12 +78,22 @@ private: const std::string &annot_path, const std::string &rela_path); + /** + * Based on given paths (annotations and relations) makes document identifier + * Document identifier is set to id_ class-state + */ + void make_id_doc(const std::string &annot_path, + const std::string &rela_path); + // ------------------------------------------------------------------------- /// Pointer to CclReader boost::shared_ptr<CclReader> ccl_reader_; /// Pointer to RelationReader boost::shared_ptr<RelationReader> rel_reader_; + + /// Future document identifier + std::string id_; }; } // whole ns diff --git a/swig/document.i b/swig/document.i index 900053a084eeefe85dc5c07d7369e319296a306c..0e8ea3c7122466c8427eca1dbdaa7bc2f57c5596 100644 --- a/swig/document.i +++ b/swig/document.i @@ -19,7 +19,7 @@ namespace Corpus2 { namespace whole { class Document { public: - Document(); + Document(const std::string& path = ""); ~Document(); void add_paragraph(const boost::shared_ptr<Chunk> para); @@ -27,6 +27,8 @@ namespace whole { const std::vector< boost::shared_ptr<Chunk> >& paragraphs() const; const std::vector< boost::shared_ptr<Relation> >& relations() const; + + const std::string& path() const; }; } // whole ns } // Corpus2 ns