From e69b53e6d450809babf08c639fa0fdeb0c760a7e Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Fri, 18 Feb 2011 13:30:41 +0100 Subject: [PATCH] update AnnotationChannel interface, fix make_iob_from_segments bug, upgrade make_annotation_vector, annotation heads --- libcorpus2/ann/channel.cpp | 63 ++++++++++++++++++++++++++++++++------ libcorpus2/ann/channel.h | 31 ++++++++++++++++++- 2 files changed, 83 insertions(+), 11 deletions(-) diff --git a/libcorpus2/ann/channel.cpp b/libcorpus2/ann/channel.cpp index 5ff16f9..cb5fa31 100644 --- a/libcorpus2/ann/channel.cpp +++ b/libcorpus2/ann/channel.cpp @@ -1,16 +1,18 @@ #include <libcorpus2/ann/channel.h> +#include <libpwrutils/foreach.h> #include <algorithm> #include <boost/bind.hpp> +#include <sstream> namespace Corpus2 { AnnotationChannel::AnnotationChannel() - : segments_(), iobs_() + : segments_(), iobs_(), heads_() { } AnnotationChannel::AnnotationChannel(int size) - : segments_(size), iobs_(size) + : segments_(size), iobs_(size), heads_(size) { } @@ -20,7 +22,7 @@ void AnnotationChannel::make_iob_from_segments() for (size_t i = 0; i < segments_.size(); ++i) { if (segments_[i] == 0) { iobs_[i] = IOB::O; - } else if (segments_[i] != prev_seg) { + } else if (segments_[i] == prev_seg) { iobs_[i] = IOB::I; } else { iobs_[i] = IOB::B; @@ -67,6 +69,15 @@ int AnnotationChannel::renumber_segments() return next; } +int AnnotationChannel::get_segment_at(int idx) const +{ + if (idx >= 0 && idx < static_cast<int>(segments_.size())) { + return segments_[idx]; + } else { + return 0; + } +} + IOB::Enum AnnotationChannel::get_iob_at(int idx) { if (idx >= 0 && idx < static_cast<int>(iobs_.size())) { @@ -83,24 +94,56 @@ void AnnotationChannel::set_iob_at(int idx, IOB::Enum iob) } } +bool AnnotationChannel::is_head_at(int idx) const +{ + if (idx >= 0 && idx < static_cast<int>(heads_.size())) { + return heads_[idx]; + } else { + return false; + } +} + +void AnnotationChannel::set_head_at(int idx, bool v) +{ + if (idx >= 0 && idx < static_cast<int>(heads_.size())) { + heads_[idx] = v; + } +} + std::vector<Annotation> AnnotationChannel::make_annotation_vector() const { std::vector<Annotation> rv; int smax = 0; for (size_t i = 0; i < segments_.size(); ++i) { int s = segments_[i]; - if (s > smax) { - rv.resize(smax = s); + if (s > 0) { + if (s > smax) { + rv.resize(smax = s); + } + rv[s - 1].indices.push_back(i); + if (heads_[i]) { + rv[s - 1].head_index = i; + } } - rv[s].indices.push_back(i); - if (rv[s].head_index == -1) { - rv[s].head_index = i; + } + rv.erase(std::remove_if(rv.begin(), rv.end(), + boost::bind(&Annotation::empty, _1)), rv.end()); + foreach (Annotation& a, rv) { + if (a.head_index == -1) { + a.head_index = a.indices[0]; } } std::sort(rv.begin(), rv.end(), AnnotationHeadCompare()); - rv.erase(std::remove_if(rv.begin(), rv.end(), - boost::bind(&Annotation::empty, _1))); return rv; } +std::string AnnotationChannel::dump_iob() const +{ + std::stringstream ss; + foreach (Corpus2::IOB::Enum e, iobs()) { + ss << Corpus2::IOB::to_string(e); + } + return ss.str(); +} + } /* end ns Corpus2 */ diff --git a/libcorpus2/ann/channel.h b/libcorpus2/ann/channel.h index 0637638..cea905e 100644 --- a/libcorpus2/ann/channel.h +++ b/libcorpus2/ann/channel.h @@ -84,10 +84,22 @@ public: /** * The segment-index array accesor */ + std::vector<int>& segments() { + return segments_; + } + + /** + * The segment-index array accesor, const + */ const std::vector<int>& segments() const { return segments_; } + /** + * Segment index getter, 0 (no segment) if idx is out of range. + */ + int get_segment_at(int idx) const; + /** * The IOB data vector */ @@ -96,7 +108,7 @@ public: } /** - * IOB getter, returns IOB::O if idx is out of range + * IOB getter, returns IOB::O if idx is out of range. */ IOB::Enum get_iob_at(int idx); @@ -105,12 +117,29 @@ public: */ void set_iob_at(int idx, IOB::Enum iob); + /** + * Head flag getter, false if out of range. + */ + bool is_head_at(int idx) const; + + /** + * Head flag setter, out of range indices are not processed. + */ + void set_head_at(int idx, bool v); + + /** + * Compose a string consisting of all IOB markers in order. + */ + std::string dump_iob() const; + private: /// segment indices std::vector<int> segments_; /// IOB data std::vector<IOB::Enum> iobs_; + + std::vector<bool> heads_; }; } /* end ns Corpus2 */ -- GitLab