Skip to content
Snippets Groups Projects
Commit 6d77a112 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

docreader stub

parent b5b781b1
Branches
No related merge requests found
......@@ -41,6 +41,7 @@ SET(libcorpus2_STAT_SRC
ann/iob.cpp
ann/view.cpp
chunk.cpp
document.cpp
exception.cpp
lexeme.cpp
sentence.cpp
......@@ -53,6 +54,7 @@ SET(libcorpus2_STAT_SRC
tokenmetadata.cpp
io/cclreader.cpp
io/cclwriter.cpp
io/docreader.cpp
io/helpers.cpp
io/fastxces.cpp
io/iob-chan.cpp
......
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#include <libcorpus2/document.h>
#include <boost/make_shared.hpp>
namespace Corpus2 {
Document::Document()
: paragraphs_()
{
}
Document::~Document()
{
}
} /* end ns Corpus2 */
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#ifndef LIBCORPUS2_DOCUMENT_H
#define LIBCORPUS2_DOCUMENT_H
#include <libcorpus2/chunk.h>
#include <boost/shared_ptr.hpp>
namespace Corpus2 {
/**
* A whole document, consisting of consecutive paragraphs ("chunks"), being
* sequences of sentences.
* Usage of this class assumes that a whole document is read into memory before
* any further takes place.
*/
class Document
{
public:
Document();
~Document();
void add_paragraph(const boost::shared_ptr<Chunk> para) {
paragraphs_.push_back(para);
}
const std::vector< boost::shared_ptr<Chunk> >& paragraphs() const {
return paragraphs_;
}
protected:
std::vector< boost::shared_ptr<Chunk> > paragraphs_;
};
} /* end ns Corpus2 */
#endif // LIBCORPUS2_DOCUMENT_H
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#include <libcorpus2/io/docreader.h>
#include <boost/make_shared.hpp>
namespace Corpus2 {
} /* end ns Corpus2 */
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#ifndef LIBCORPUS2_DOCREADER_H
#define LIBCORPUS2_DOCREADER_H
#include <libcorpus2/io/reader.h>
namespace Corpus2 {
/**
* A reader for whole documents. Note that a whole document is read into memory
* before any processing may take place.
*/
class DocumentReader {
public:
/**
* Reads a whole document, using the two given path: the morphosyntax and
* chunk-style annotations are read from annot_path, while relations
* between chunk-style annotations are read from rela_path.
* Both path may in particular point to the same path.
* TODO!
*/
DocumentReader(const std::string &annot_path,
const std::string &rela_path,
const std::string &rdr_class_id = "ccl");
};
} /* end ns Corpus2 */
#endif // LIBCORPUS2_DOCREADER_H
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment