Skip to content
Snippets Groups Projects
Commit 79f06f02 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

Merge branch 'master' of nlp.pwr.wroc.pl:corpus2

parents a7ae417f 19dd793d
No related merge requests found
...@@ -73,8 +73,29 @@ boost::shared_ptr<Document> DocumentReader::read() ...@@ -73,8 +73,29 @@ boost::shared_ptr<Document> DocumentReader::read()
throw Corpus2Error(corpus_type_ + " is an unknown reader type!"); throw Corpus2Error(corpus_type_ + " is an unknown reader type!");
} }
boost::shared_ptr<Document> DocumentReader::read_with_auto_id()
{
std::string line;
#ifdef WITH_POLIQARP
if (corpus_type_ == "poliqarp") {
return this->reader->read();
}
#endif
if (corpus_type_ == "document") {
if (std::getline(corpus_file, line)) {
boost::shared_ptr<CclRelReader> cclrel_reader = get_cclrel_reader(line);
cclrel_reader->get_option("autogen_sent_id");
cclrel_reader->set_option("autogen_chunk_id");
return cclrel_reader->read();
} else {
return boost::make_shared<Document>("End");
}
}
throw Corpus2Error(corpus_type_ + " is an unknown reader type!");
}
boost::shared_ptr<DocumentReaderI> DocumentReader::get_cclrel_reader(std::string& line) boost::shared_ptr<CclRelReader> DocumentReader::get_cclrel_reader(std::string& line)
{ {
std::string ann_path, rel_path; std::string ann_path, rel_path;
......
...@@ -20,6 +20,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. ...@@ -20,6 +20,7 @@ or FITNESS FOR A PARTICULAR PURPOSE.
#include <string> #include <string>
#include <libcorpus2_whole/corpus.h> #include <libcorpus2_whole/corpus.h>
#include <libcorpus2_whole/io/reader_i.h> #include <libcorpus2_whole/io/reader_i.h>
#include <libcorpus2_whole/io/cclrelreader.h>
namespace Corpus2 { namespace Corpus2 {
namespace whole { namespace whole {
...@@ -30,10 +31,11 @@ public: ...@@ -30,10 +31,11 @@ public:
DocumentReader(const Tagset& tagset, const std::string& corpus_type, const std::string& corpus_file_path, const std::string& corpus_reader); DocumentReader(const Tagset& tagset, const std::string& corpus_type, const std::string& corpus_file_path, const std::string& corpus_reader);
DocumentReader(const Tagset& tagset, const std::string& corpus_type, const std::string& corpus_file_path); DocumentReader(const Tagset& tagset, const std::string& corpus_type, const std::string& corpus_file_path);
boost::shared_ptr<Document> read(); boost::shared_ptr<Document> read();
boost::shared_ptr<Document> read_with_auto_id();
private: private:
boost::shared_ptr<DocumentReaderI> get_cclrel_reader(std::string& line); boost::shared_ptr<CclRelReader> get_cclrel_reader(std::string& line);
boost::shared_ptr<DocumentReaderI> reader; boost::shared_ptr<DocumentReaderI> reader;
std::ifstream corpus_file; std::ifstream corpus_file;
private: private:
......
...@@ -22,6 +22,7 @@ namespace whole { ...@@ -22,6 +22,7 @@ namespace whole {
DocumentReader(const Tagset& tagset, const std::string& corpus_type,const std::string& corpus_file_path, const std::string& corpus_reader); DocumentReader(const Tagset& tagset, const std::string& corpus_type,const std::string& corpus_file_path, const std::string& corpus_reader);
DocumentReader(const Tagset& tagset, const std::string& corpus_type,const std::string& corpus_file_path); DocumentReader(const Tagset& tagset, const std::string& corpus_type,const std::string& corpus_file_path);
boost::shared_ptr<Document> read(); boost::shared_ptr<Document> read();
boost::shared_ptr<Document> read_with_auto_id();
}; };
} // whole ns } // whole ns
} // Corpus2 ns } // Corpus2 ns
......
...@@ -67,6 +67,8 @@ namespace Corpus2 { ...@@ -67,6 +67,8 @@ namespace Corpus2 {
boost::shared_ptr<TokenMetaData> get_metadata() const; boost::shared_ptr<TokenMetaData> get_metadata() const;
void set_metadata(TokenMetaData& md); void set_metadata(TokenMetaData& md);
void set_metadata_ptr(boost::shared_ptr<TokenMetaData> md); void set_metadata_ptr(boost::shared_ptr<TokenMetaData> md);
void create_metadata();
}; };
%extend Token { %extend Token {
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment