An error occurred while loading the file. Please try again.
-
Adam Radziszewski authored69950e72
relreader.cpp 4.09 KiB
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski, Paweł Kędzia
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#include <libpwrutils/foreach.h>
#include <libcorpus2/exception.h>
#include <libcorpus2/io/relreader.h>
#include <fstream>
#include <boost/make_shared.hpp>
namespace Corpus2 {
RelationReader::RelationReader(const std::string &rela_path)
: rela_path_(rela_path)
{
readed_ = false;
in_relation_ = false;
in_relations_ = false;
file_.reset(new std::ifstream(rela_path.c_str(), std::ifstream::in));
if (!file_->good()) {
file_.reset();
throw Corpus2Error("File not found!");
}
}
void RelationReader::read()
{
std::istream* is = NULL;
static const int BUFSIZE = 1024;
if (!file_) {
throw Corpus2Error("File not found!");
}
else {
is = file_.get();
}
while (is->good()) {
unsigned char buf[BUFSIZE + 1];
is->read(reinterpret_cast<char*>(buf), BUFSIZE);
parse_chunk_raw(buf, is->gcount());
if (is->eof()) {
finish_chunk_parsing();
}
}
// mark that document has been readed
readed_ = true;
}
void RelationReader::on_start_element(const Glib::ustring& name,
const AttributeList& attributes) {
if (name == RELATIONS_TAG) {
in_relations_ = true;
}
else if (in_relations_ && name == RELATION_TAG) {
in_relation_ = true;
parse_relation_name(attributes);
}
else if (in_relation_ && name == RELATION_DIRECT_FROM) {
parse_direction_from(attributes);
}
else if (in_relation_ && name == RELATION_DIRECT_TO) {
parse_direction_to(attributes);
}
else {
//
}
}
void RelationReader::on_end_element(const Glib::ustring& name) {
if (name == RELATIONS_TAG) {
in_relations_ = false;
}
else if (in_relations_ && name == RELATION_TAG) {
in_relation_ = false;
try {
validate();
add_current_relation();
} catch (...) {
throw;
}
}
else {
//
}
}
void RelationReader::on_characters(const Glib::ustring &text)
{
if (in_relation_) {
ann_number_ += text.raw();
}
}
void RelationReader::validate()
{
if (rel_name_.empty()) {
throw Corpus2Error("Relation name is empty!");
}
if (!rel_from_) {
throw Corpus2Error("Cannot find \"from\" in relation " + rel_name_);
}
if (!rel_to_) {
throw Corpus2Error("Cannot find \"to\" in relation " + rel_name_);
}
if (ann_number_.empty()) {
throw Corpus2Error("Cannot find annotation number in relation " + rel_name_);
}
}
void RelationReader::add_current_relation()
{
boost::shared_ptr<Relation> relation;
relation = boost::make_shared<Relation>(rel_name_, rel_from_, rel_to_);
relations_.push_back(relation);
}
void RelationReader::parse_relation_name(const AttributeList& attributes)
{
rel_name_ = get_attribute_value(attributes, RELATION_NAME);
ann_number_ = "";
rel_from_.reset();
rel_to_.reset();
}
void RelationReader::parse_direction_from(const AttributeList& attributes)
{
parse_direction(attributes, rel_from_);
}
void RelationReader::parse_direction_to(const AttributeList& attributes)
{
parse_direction(attributes, rel_to_);
}
void RelationReader::parse_direction(const AttributeList& attributes,
boost::shared_ptr<DirectionPoint>& direct)
{
int annotation_number = 99999999;
std::string sentence_id = get_attribute_value(attributes, RELATION_SENTENCE_ID);
std::string channel_name = get_attribute_value(attributes, RELATION_CHANNEL_NAME);
std::istringstream (ann_number_) >> annotation_number;
direct = boost::make_shared<DirectionPoint>(
sentence_id, channel_name, annotation_number);
}
std::string RelationReader::get_attribute_value(
const AttributeList& attributes, const std::string& name)
{
foreach (const Attribute& a, attributes) {
if (a.name == name) {
return a.value;
}
}
return "";
}
} /* end ns Corpus2 */