From 9916f226f110a714d3698367b975e2f7bd8e84e2 Mon Sep 17 00:00:00 2001 From: lburdka <lburdka@wojtek-desktop.(none)> Date: Wed, 2 May 2012 10:37:14 +0200 Subject: [PATCH] Extended corpus2_whole to fit anaphora requirements --- libcorpus2_whole/CMakeLists.txt | 1 + libcorpus2_whole/io/relwriter.cpp | 105 ++++++++++++++++++++++++++++++ libcorpus2_whole/io/relwriter.h | 85 ++++++++++++++++++++++++ libcorpus2_whole/relation.cpp | 22 +++++++ libcorpus2_whole/relation.h | 21 ++++-- swig/relation.i | 4 ++ swig/relationwriter.i | 29 +++++++++ 7 files changed, 263 insertions(+), 4 deletions(-) mode change 100644 => 100755 libcorpus2_whole/CMakeLists.txt create mode 100755 libcorpus2_whole/io/relwriter.cpp create mode 100755 libcorpus2_whole/io/relwriter.h mode change 100644 => 100755 libcorpus2_whole/relation.cpp mode change 100644 => 100755 libcorpus2_whole/relation.h mode change 100644 => 100755 swig/relation.i create mode 100755 swig/relationwriter.i diff --git a/libcorpus2_whole/CMakeLists.txt b/libcorpus2_whole/CMakeLists.txt old mode 100644 new mode 100755 index 0e07fa9..9606df2 --- a/libcorpus2_whole/CMakeLists.txt +++ b/libcorpus2_whole/CMakeLists.txt @@ -11,6 +11,7 @@ SET(libcorpus2_whole_SRC relation.cpp io/reader_i.h io/relreader.cpp + io/relwriter.cpp io/cclrelreader.cpp io/documentcorpusreader.cpp io/corpusreader.cpp diff --git a/libcorpus2_whole/io/relwriter.cpp b/libcorpus2_whole/io/relwriter.cpp new file mode 100755 index 0000000..6255719 --- /dev/null +++ b/libcorpus2_whole/io/relwriter.cpp @@ -0,0 +1,105 @@ +/* + Copyright (C) 2010 Tomasz ¦niatowski, Adam Radziszewski, Pawe³ Kêdzia + Part of the libcorpus2 project + + This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + + This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. + + See the LICENSE and COPYING files for more details. +*/ + +#include <libpwrutils/foreach.h> +#include <libcorpus2/exception.h> +#include <libcorpus2_whole/io/relwriter.h> + +#include <fstream> +#include <sstream> +#include <boost/make_shared.hpp> + +namespace Corpus2 { +namespace whole { + +RelationWriter::RelationWriter(const std::string &rela_path) +{ + rela_path_= rela_path; +} + + +void RelationWriter::write(const std::vector< boost::shared_ptr<Relation> >& relations) +{ + file_.reset(new std::ofstream(rela_path_.c_str())); + + //header + file_->write("<relations>\n",12); + + foreach (const boost::shared_ptr<Relation>& r, relations){ + file_->write(" <rel name=\"",14); + std::streamsize size = r->name().size(); + file_->write(r->name().c_str(),size); + //Maybe additional info about set is needed in Relation class + //file_->write("\" set=\"Anaphora\">\n",18); + file_->write("\">\n",3); + file_->write(" <from sent=\"",18); + size = r->from()->sentence_id().size(); + file_->write(r->from()->sentence_id().c_str(),size); + file_->write("\" chan=\"",8); + size = r->from()->channel_name().size(); + file_->write(r->from()->channel_name().c_str(),size); + file_->write("\">",2); + int an_num = r->from()->annotation_number(); + std::ostringstream ss; + ss << an_num; + std::string str = ss.str(); + size=str.size(); + file_->write(str.c_str(),size); + file_->write("</from>\n",8); + file_->write(" <to sent=\"",16); + size = r->to()->sentence_id().size(); + file_->write(r->to()->sentence_id().c_str(),size); + file_->write("\" chan=\"",8); + size = r->to()->channel_name().size(); + file_->write(r->to()->channel_name().c_str(),size); + file_->write("\">",2); + an_num = r->to()->annotation_number(); + std::ostringstream ss1; + ss1 << an_num; + str = ss1.str(); + size=str.size(); + file_->write(str.c_str(),size); + file_->write("</to>\n",6); + //file_->write("\n",1); + file_->write(" </rel>",9); + file_->write("\n",1); + } + + + + //footer + file_->write("</relations>",12); + file_->close(); +} + + +void write_relation(const boost::shared_ptr<Relation>& r) +{ + //std::streamsize size = r->from()->sentence_id().size(); + //f->write(r->from()->sentence_id().c_str(),size); + +} + +void do_header(){ +} + +void do_footer(){ +} + + +} // whole ns +} // Corpus2 ns + diff --git a/libcorpus2_whole/io/relwriter.h b/libcorpus2_whole/io/relwriter.h new file mode 100755 index 0000000..7ce5469 --- /dev/null +++ b/libcorpus2_whole/io/relwriter.h @@ -0,0 +1,85 @@ +/* + Copyright (C) 2010 Tomasz ¦niatowski, Adam Radziszewski, Pawe³ Kêdzia + Part of the libcorpus2 project + + This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + + This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. + + See the LICENSE and COPYING files for more details. +*/ + +#ifndef LIBCORPUS2_WHOLE_RELWRITER_H +#define LIBCORPUS2_WHOLE_RELWRITER_H + +#include <vector> +#include <boost/shared_ptr.hpp> +#include <boost/scoped_ptr.hpp> + +#include <libcorpus2_whole/relation.h> + +#include <libcorpus2/io/xmlwriter.h> + +#include <iostream> + +namespace Corpus2 { +namespace whole { + + +/** + * A reader for realtion documents. Note that document is read into memory + * before any processing may take place. + */ +class RelationWriter { +public: + /** + * Reads a document with relations + * @param rela_path path to file with relations + */ + RelationWriter(const std::string &rela_path); + + /** + * Lazy relations accessor. + * If relations are not readed then read relations and returns list of them. + * @return List of readed relations + */ + void write(const std::vector< boost::shared_ptr<Relation> >& relations); + + +private: + + void write_relation(const boost::shared_ptr<Relation>& r); + + void do_header(); + + void do_footer(); + + + // ------------------------------------------------------------------------- + /// List of the relations in given relation file + std::vector< boost::shared_ptr<Relation> > relations_; + + /// Path to file with relations + std::string rela_path_; + + /// File pointer + boost::shared_ptr<std::ofstream> file_; + + // ------------------------------------------------------------------------- + // Temporary information of actual parsing relation + std::string rel_name_; + std::string ann_number_; + boost::shared_ptr<DirectionPoint> rel_from_; + boost::shared_ptr<DirectionPoint> rel_to_; +}; + +} // whole ns +} // Corpus2 ns + +#endif // LIBCORPUS2_WHOLE_RELREADER_H + diff --git a/libcorpus2_whole/relation.cpp b/libcorpus2_whole/relation.cpp old mode 100644 new mode 100755 index d1df54f..a069a02 --- a/libcorpus2_whole/relation.cpp +++ b/libcorpus2_whole/relation.cpp @@ -16,6 +16,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <boost/make_shared.hpp> #include <libcorpus2_whole/relation.h> +#include <stdio.h> namespace Corpus2 { namespace whole { @@ -27,6 +28,27 @@ Relation::Relation(const std::string& name, { } +boost::shared_ptr<Relation> Relation::rel_pt(){ + relation_=boost::shared_ptr<Relation>(new Relation(name_,from_,to_)); + return relation_; +} + +void Relation::set_to(const DirectionPoint& dp){ + boost::shared_ptr<const DirectionPoint> temp(new DirectionPoint(dp.sentence_id(),dp.channel_name(),dp.annotation_number())); + to_=temp; + +} + +void Relation::set_from(const DirectionPoint& dp){ + boost::shared_ptr<const DirectionPoint> temp(new DirectionPoint(dp.sentence_id(),dp.channel_name(),dp.annotation_number())); + from_=temp; + +} + +void Relation::set_name(const std::string& s){ + name_=s; +} + Relation::Relation(const std::string& name, const DirectionPoint& from, const DirectionPoint& to) diff --git a/libcorpus2_whole/relation.h b/libcorpus2_whole/relation.h old mode 100644 new mode 100755 index 8e8ed1d..136f5ca --- a/libcorpus2_whole/relation.h +++ b/libcorpus2_whole/relation.h @@ -94,6 +94,15 @@ public: ~Relation(); + //Setter of "form" direction point + void set_from(const DirectionPoint& dp); + + //Setter of "to" direction point + void set_to(const DirectionPoint& dp); + + //Setterf of name + void set_name(const std::string& s); + /// Accessor to "from" direction point const boost::shared_ptr<const DirectionPoint>& from() const { return from_; @@ -109,13 +118,17 @@ public: return name_; } + boost::shared_ptr<Relation> rel_pt(); + private: /// Direction name - const std::string name_; - + std::string name_; + + ///Pointer to relation + boost::shared_ptr<Relation> relation_; /// Direction points: from and to - const boost::shared_ptr<const DirectionPoint> from_; - const boost::shared_ptr<const DirectionPoint> to_; + boost::shared_ptr<const DirectionPoint> from_; + boost::shared_ptr<const DirectionPoint> to_; }; } // whole ns diff --git a/swig/relation.i b/swig/relation.i old mode 100644 new mode 100755 index 507e77e..ad4b110 --- a/swig/relation.i +++ b/swig/relation.i @@ -40,7 +40,11 @@ namespace whole { const DirectionPoint& to); ~Relation(); + void set_from(const DirectionPoint& dp); + void set_to(const DirectionPoint& dp); + void set_name(const std::string& s); + boost::shared_ptr<Relation> rel_pt(); /* It must be renamed because "from" is python keyword */ %rename(rel_from) from() const; const boost::shared_ptr<const DirectionPoint>& from() const; diff --git a/swig/relationwriter.i b/swig/relationwriter.i new file mode 100755 index 0000000..41f23f4 --- /dev/null +++ b/swig/relationwriter.i @@ -0,0 +1,29 @@ +#ifndef SWIG_LIBCORPUS2_RELATIONWRITER_I +#define SWIG_LIBCORPUS2_RELATIONWRITER_I + +%module libcorpusrelationwriter +%{ + #include <libcorpus2_whole/io/relwriter.h> +%} + + +namespace Corpus2 { +namespace whole { + class RelationWriter { + public: + RelationWriter(const std::string &rela_path); + void write(const std::vector< boost::shared_ptr<Relation> > relations); + private: + void do_header(); + void do_footer(); + //void write_relation(const boost::shared_ptr<Relation>& r); +}; +} // whole ns +} // Corpus2 ns + +using namespace std; +using namespace Corpus2; +using namespace Corpus2::whole; + +#endif /* SWIG_LIBCORPUS2_RELATIONWRITER_I */ + -- GitLab