From 2605b8cce84eba5880ce7da688487447ab15c1ff Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Tue, 12 Apr 2011 17:17:10 +0200
Subject: [PATCH] extend AnnotationChannel interface: add dump_alpha and
 get_new_segment_index

---
 libcorpus2/ann/channel.cpp | 35 +++++++++++++++++++++++++++++++++++
 libcorpus2/ann/channel.h   | 16 ++++++++++++++--
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/libcorpus2/ann/channel.cpp b/libcorpus2/ann/channel.cpp
index 394dbb0..0138795 100644
--- a/libcorpus2/ann/channel.cpp
+++ b/libcorpus2/ann/channel.cpp
@@ -3,6 +3,7 @@
 #include <algorithm>
 #include <boost/bind.hpp>
 #include <sstream>
+#include <iostream>
 #include <set>
 
 namespace Corpus2 {
@@ -76,6 +77,19 @@ int AnnotationChannel::renumber_segments()
 	}
 	return next;
 }
+int AnnotationChannel::get_new_segment_index() const
+{
+	//cache this?
+	std::vector<bool> used(segments_.size() + 1);
+	foreach (size_t sid, segments_) {
+		if (sid < used.size()) {
+			used[sid] = true;
+		}
+	}
+	int first = 1;
+	while ((first < static_cast<int>(used.size())) && used[first]) ++first;
+	return first;
+}
 
 int AnnotationChannel::get_segment_at(int idx) const
 {
@@ -188,6 +202,27 @@ std::string AnnotationChannel::dump_heads() const
 	return ss.str();
 }
 
+std::string AnnotationChannel::dump_alpha() const
+{
+	std::stringstream ss;
+	for (int i = 0; i < size(); ++i) {
+		if (segments_[i] == 0) {
+			if (heads_[i]) {
+				ss << '#';
+			} else {
+				ss << '_';
+			}
+		} else {
+			if (heads_[i]) {
+				ss << static_cast<unsigned char>('A' - 1 + segments_[i]);
+			} else {
+				ss << static_cast<unsigned char>('a' - 1 + segments_[i]);
+			}
+		}
+	}
+	return ss.str();
+}
+
 void AnnotationChannel::do_counts(int& annotations, int& disjoint, int& unannotated) const
 {
 	std::set<int> used_sids;
diff --git a/libcorpus2/ann/channel.h b/libcorpus2/ann/channel.h
index 2c3a76f..d4b02bc 100644
--- a/libcorpus2/ann/channel.h
+++ b/libcorpus2/ann/channel.h
@@ -74,6 +74,11 @@ public:
 	 */
 	int renumber_segments();
 
+	/**
+	 * Figure out an index for a new segment
+	 */
+	int get_new_segment_index() const;
+
 	enum AnnotationVectorMode
 	{
 		O_DISJOINT_EXCLUSIVE = 0,
@@ -148,15 +153,22 @@ public:
 	std::string dump_iob() const;
 
 	/**
-	 * Compose a string consisting of all segment indices in order.
+	 * Compose a string consisting of all segment indices in order, e.g. "01102"
 	 */
 	std::string dump_segments() const;
 
 	/**
-	 * Compose a string consisting of all head flags in order
+	 * Compose a string consisting of all head flags in order, e.g. " H  H"
 	 */
 	std::string dump_heads() const;
 
+	/**
+	 * Compose a string consisting of segment/head info in alphabetic format
+	 * where capital letters denote the head flag, and underscore indicates
+	 * no segment, so e.g. "_Aa_B"
+	 */
+	std::string dump_alpha() const;
+
 	void do_counts(int& annotations, int& disjoint, int& unannotated) const;
 
 private:
-- 
GitLab