Skip to content
Snippets Groups Projects
Commit cd5e51ab authored by Paweł Kędzia's avatar Paweł Kędzia
Browse files

Added CorpusReader

parent 7d492649
Branches
No related merge requests found
#include <libcorpus2_whole/io/corpusreader.h>
#include <libcorpus2_whole/io/poliqarpcorpusreader.h>
#include <libcorpus2_whole/io/documentcorpusreader.h>
namespace Corpus2 {
namespace whole{
CorpusReader::CorpusReader(const Tagset& tagset, const std::string& corpus_type)
: corpus_type_(corpus_type), tagset_(tagset)
{
//
}
boost::shared_ptr<Corpus> CorpusReader::read(const std::string& corpus_file_path)
{
boost::shared_ptr<CorpusReaderI> reader = this->get_corpus_reader_by_type();
return reader->read(corpus_file_path);
}
//
boost::shared_ptr<CorpusReaderI> CorpusReader::get_corpus_reader_by_type()
{
if (corpus_type_ == "poliqarp") {
return boost::shared_ptr<PoliqarpCorpusReader>(
new PoliqarpCorpusReader(tagset_));
} else if (corpus_type_ == "document") {
return boost::shared_ptr<DocumentCorpusReader>(
new DocumentCorpusReader(tagset_));
}
throw Corpus2Error(corpus_type_ + " is unknown reader type!");
}
} // whole ns
} // Corpus2 ns
#ifndef LIBCORPUS2_WHOLE_CORPUSREADER_H
#define LIBCORPUS2_WHOLE_CORPUSREADER_H
#include <string>
#include <libcorpus2_whole/corpus.h>
#include <libcorpus2_whole/io/reader_i.h>
namespace Corpus2 {
namespace whole {
class CorpusReader
{
public:
/**
* @arg corpus_type may be:
* - document (contains relations)
* - poliqarp
*/
CorpusReader(const Tagset& tagset, const std::string& corpus_type);
/**
* Reads corpus from given path
* @arg corpus_file Path to file contains paths to corpus files.
* Depend on corpus type, each line in this file should contains only
* path to one document from corpus or path to (in particular DocReader)
* relations and annotatons (in one line, first is path to annotations
* and second are relations -- these paths, should be separated by semicolon)
* @return Readed corpus
*/
boost::shared_ptr<Corpus> read(const std::string& corpus_file);
private:
/// Returns reader based on corpus type (poliqarp/document)
boost::shared_ptr<CorpusReaderI> get_corpus_reader_by_type();
private:
/// Type of corpus, sets only once in constructor
const std::string& corpus_type_;
/// Tagset to use, sets only onece in constructor
const Tagset& tagset_;
};
} // whole ns
} // Corpus2 ns
#endif // LIBCORPUS2_WHOLE_CORPUSREADER_H
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment