From e69b53e6d450809babf08c639fa0fdeb0c760a7e Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Fri, 18 Feb 2011 13:30:41 +0100
Subject: [PATCH] update AnnotationChannel interface, fix
 make_iob_from_segments bug, upgrade make_annotation_vector, annotation heads

---
 libcorpus2/ann/channel.cpp | 63 ++++++++++++++++++++++++++++++++------
 libcorpus2/ann/channel.h   | 31 ++++++++++++++++++-
 2 files changed, 83 insertions(+), 11 deletions(-)

diff --git a/libcorpus2/ann/channel.cpp b/libcorpus2/ann/channel.cpp
index 5ff16f9..cb5fa31 100644
--- a/libcorpus2/ann/channel.cpp
+++ b/libcorpus2/ann/channel.cpp
@@ -1,16 +1,18 @@
 #include <libcorpus2/ann/channel.h>
+#include <libpwrutils/foreach.h>
 #include <algorithm>
 #include <boost/bind.hpp>
+#include <sstream>
 
 namespace Corpus2 {
 
 AnnotationChannel::AnnotationChannel()
-	: segments_(), iobs_()
+	: segments_(), iobs_(), heads_()
 {
 }
 
 AnnotationChannel::AnnotationChannel(int size)
-	: segments_(size), iobs_(size)
+	: segments_(size), iobs_(size), heads_(size)
 {
 }
 
@@ -20,7 +22,7 @@ void AnnotationChannel::make_iob_from_segments()
 	for (size_t i = 0; i < segments_.size(); ++i) {
 		if (segments_[i] == 0) {
 			iobs_[i] = IOB::O;
-		} else if (segments_[i] != prev_seg) {
+		} else if (segments_[i] == prev_seg) {
 			iobs_[i] = IOB::I;
 		} else {
 			iobs_[i] = IOB::B;
@@ -67,6 +69,15 @@ int AnnotationChannel::renumber_segments()
 	return next;
 }
 
+int AnnotationChannel::get_segment_at(int idx) const
+{
+	if (idx >= 0 && idx < static_cast<int>(segments_.size())) {
+		return segments_[idx];
+	} else {
+		return 0;
+	}
+}
+
 IOB::Enum AnnotationChannel::get_iob_at(int idx)
 {
 	if (idx >= 0 && idx < static_cast<int>(iobs_.size())) {
@@ -83,24 +94,56 @@ void AnnotationChannel::set_iob_at(int idx, IOB::Enum iob)
 	}
 }
 
+bool AnnotationChannel::is_head_at(int idx) const
+{
+	if (idx >= 0 && idx < static_cast<int>(heads_.size())) {
+		return heads_[idx];
+	} else {
+		return false;
+	}
+}
+
+void  AnnotationChannel::set_head_at(int idx, bool v)
+{
+	if (idx >= 0 && idx < static_cast<int>(heads_.size())) {
+		heads_[idx] = v;
+	}
+}
+
 std::vector<Annotation> AnnotationChannel::make_annotation_vector() const
 {
 	std::vector<Annotation> rv;
 	int smax = 0;
 	for (size_t i = 0; i < segments_.size(); ++i) {
 		int s = segments_[i];
-		if (s > smax) {
-			rv.resize(smax = s);
+		if (s > 0) {
+			if (s > smax) {
+				rv.resize(smax = s);
+			}
+			rv[s - 1].indices.push_back(i);
+			if (heads_[i]) {
+				rv[s - 1].head_index = i;
+			}
 		}
-		rv[s].indices.push_back(i);
-		if (rv[s].head_index == -1) {
-			rv[s].head_index = i;
+	}
+	rv.erase(std::remove_if(rv.begin(), rv.end(),
+		boost::bind(&Annotation::empty, _1)), rv.end());
+	foreach (Annotation& a, rv) {
+		if (a.head_index == -1) {
+			a.head_index = a.indices[0];
 		}
 	}
 	std::sort(rv.begin(), rv.end(), AnnotationHeadCompare());
-	rv.erase(std::remove_if(rv.begin(), rv.end(),
-		boost::bind(&Annotation::empty, _1)));
 	return rv;
 }
 
+std::string AnnotationChannel::dump_iob() const
+{
+	std::stringstream ss;
+	foreach (Corpus2::IOB::Enum e, iobs()) {
+		ss << Corpus2::IOB::to_string(e);
+	}
+	return ss.str();
+}
+
 } /* end ns Corpus2 */
diff --git a/libcorpus2/ann/channel.h b/libcorpus2/ann/channel.h
index 0637638..cea905e 100644
--- a/libcorpus2/ann/channel.h
+++ b/libcorpus2/ann/channel.h
@@ -84,10 +84,22 @@ public:
 	/**
 	 * The segment-index array accesor
 	 */
+	std::vector<int>& segments() {
+		return segments_;
+	}
+
+	/**
+	 * The segment-index array accesor, const
+	 */
 	const std::vector<int>& segments() const {
 		return segments_;
 	}
 
+	/**
+	 * Segment index getter, 0 (no segment) if idx is out of range.
+	 */
+	int get_segment_at(int idx) const;
+
 	/**
 	 * The IOB data vector
 	 */
@@ -96,7 +108,7 @@ public:
 	}
 
 	/**
-	 * IOB getter, returns IOB::O if idx is out of range
+	 * IOB getter, returns IOB::O if idx is out of range.
 	 */
 	IOB::Enum get_iob_at(int idx);
 
@@ -105,12 +117,29 @@ public:
 	 */
 	void set_iob_at(int idx, IOB::Enum iob);
 
+	/**
+	 * Head flag getter, false if out of range.
+	 */
+	bool is_head_at(int idx) const;
+
+	/**
+	 * Head flag setter, out of range indices are not processed.
+	 */
+	void set_head_at(int idx, bool v);
+
+	/**
+	 * Compose a string consisting of all IOB markers in order.
+	 */
+	std::string dump_iob() const;
+
 private:
 	/// segment indices
 	std::vector<int> segments_;
 
 	/// IOB data
 	std::vector<IOB::Enum> iobs_;
+
+	std::vector<bool> heads_;
 };
 
 } /* end ns Corpus2 */
-- 
GitLab