From 457811ce616780ff405356ed4652bbce616a896d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20K=C4=99dzia?= <Pawel.Kedzia@pwr.wroc.pl> Date: Thu, 28 Apr 2011 09:26:32 +0200 Subject: [PATCH] Corpus2::Annotation wrapper --- swig/Makefile | 25 ++++++--- swig/libcorpusannotationchannel.i | 84 +++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 6 deletions(-) create mode 100644 swig/libcorpusannotationchannel.i diff --git a/swig/Makefile b/swig/Makefile index 03b0031..04f2b15 100644 --- a/swig/Makefile +++ b/swig/Makefile @@ -21,7 +21,8 @@ CBIN=libcorpustag.o \ libcorpuschunk.o \ libcorpustokenwriter.o \ libcorpustokenreader.o \ - libcorpusiob.o + libcorpusiob.o \ + libcorpusannotationchannel.o CBINOUT=_boost_shared_ptr.so \ _libcorpustag.so \ @@ -33,7 +34,8 @@ CBINOUT=_boost_shared_ptr.so \ _libcorpuschunk.so \ _libcorpustokenwriter.so \ _libcorpustokenreader.so \ - _libcorpusiob.so + _libcorpusiob.so \ + _libcorpusannotationchannel.so CWRAP=boost_shared_ptr_wrap.cxx \ libcorpustag_wrap.cxx \ @@ -45,7 +47,8 @@ CWRAP=boost_shared_ptr_wrap.cxx \ libcorpuschunk_wrap.cxx \ libcorpustokenwriter_wrap.cxx \ libcorpustokenreader_wrap.cxx \ - libcorpusiob_wrap.cxx + libcorpusiob_wrap.cxx \ + libcorpusannotationchannel_wrap.cxx CWRAPBIN=boost_shared_ptr_wrap.o \ libcorpustag_wrap.o \ @@ -57,7 +60,8 @@ CWRAPBIN=boost_shared_ptr_wrap.o \ libcorpuschunk_wrap.o \ libcorpustokenwriter_wrap.o \ libcorpustokenreader_wrap.o \ - libcorpusiob_wrap.o + libcorpusiob_wrap.o \ + libcorpusannotationchannel_wrap.o PYMODULES=boost_shared_ptr.py \ libcorpustag.py \ @@ -69,7 +73,8 @@ PYMODULES=boost_shared_ptr.py \ libcorpuschunk.py \ libcorpustokenwriter.py \ libcorpustokenreader.py \ - libcorpusiob.py + libcorpusiob.py \ + libcorpusannotationchannel.py PYCBIN=boost_shared_ptr.pyc \ libcorpustag.pyc \ @@ -81,7 +86,8 @@ PYCBIN=boost_shared_ptr.pyc \ libcorpuschunk.pyc \ libcorpustokenwriter.pyc \ libcorpustokenreader.pyc \ - libcorpusiob.pyc + libcorpusiob.pyc \ + libcorpusannotationchannel.pyc # ----------------------------------------------------------------------------- all:boost_shared_ptr.o $(CBIN) @@ -164,6 +170,13 @@ libcorpusiob.o: $(CPP) -shared libcorpusiob_wrap.o \ $(PWRUTILBIN) $(CORPUS2BIN) -o _libcorpusiob.so +# AnnotationChannel +libcorpusannotationchannel.o: + $(SWIG) $(SWIGOPTS_LANG) libcorpusannotationchannel.i + $(CPP) -c libcorpusannotationchannel_wrap.cxx -I$(PYTHONDIR) $(CPPFLAGS) + $(CPP) -shared libcorpusannotationchannel_wrap.o \ + $(PWRUTILBIN) $(CORPUS2BIN) -o _libcorpusannotationchannel.so + # ----------------------------------------------------------------------------- clean: rm -f $(CBIN) $(CBINOUT) $(CWRAP) $(CWRAPBIN) $(PYMODULES) $(PYCBIN) diff --git a/swig/libcorpusannotationchannel.i b/swig/libcorpusannotationchannel.i new file mode 100644 index 0000000..91a7929 --- /dev/null +++ b/swig/libcorpusannotationchannel.i @@ -0,0 +1,84 @@ +#ifndef SWIG_LIBCORPUS2_ANNOTATIONCHANNEL_I +#define SWIG_LIBCORPUS2_ANNOTATIONCHANNEL_I + +%module libcorpusannotationchannel +%{ + #include <libcorpus2/ann/channel.h> +%} + +%include "std_string.i" +%include "std_vector.i" +%include "libcorpusiob.i" + +%template(IntVector) std::vector<int>; +%template(IOBVector) std::vector<Corpus2::IOB::Enum>; +%template(AnnotationVector) std::vector<Corpus2::Annotation>; + +namespace Corpus2 { + struct Annotation { + Annotation() : indices(), head_index(-1) {} + std::vector<int> indices; + int head_index; + bool empty() const; + bool sane() const; + }; + + class AnnotationChannel { + public: + AnnotationChannel(); + explicit AnnotationChannel(int size); + ~AnnotationChannel(); + + /* --------------------------------------------------------------------- */ + + int size() const; + void resize(int size); + + void make_iob_from_segments(); + void make_segments_from_iob(); + + int renumber_segments(); + int get_new_segment_index() const; + + /* --------------------------------------------------------------------- */ + enum AnnotationVectorMode { + O_DISJOINT_EXCLUSIVE = 0, + O_CONTINUOUS = 1, + O_INCLUSIVE = 2, + O_CONTINUOUS_INCLUSIVE = 3 + }; + + std::vector<Annotation> make_annotation_vector( + AnnotationVectorMode mode = O_DISJOINT_EXCLUSIVE) const; + + /* --------------------------------------------------------------------- */ + std::vector<int>& segments(); + const std::vector<int>& segments() const; + + int get_segment_at(int idx) const; + void set_segment_at(int token_idx, int segment_idx); + + /* --------------------------------------------------------------------- */ + const std::vector<IOB::Enum>& iobs() const; + IOB::Enum get_iob_at(int idx); + + /* --------------------------------------------------------------------- */ + void set_iob_at(int idx, IOB::Enum iob); + bool is_head_at(int idx) const; + void set_head_at(int idx, bool v); + + /* --------------------------------------------------------------------- */ + std::string dump_iob() const; + std::string dump_segments() const; + std::string dump_heads() const; + std::string dump_alpha() const; + + /* --------------------------------------------------------------------- */ + void do_counts(int& annotations, int& disjoint, int& unannotated) const; + }; +} + +using namespace std; +using namespace Corpus2; + +#endif /* SWIG_LIBCORPUS2_ANNOTATIONCHANNEL_I */ -- GitLab