Skip to content
Snippets Groups Projects
Commit 2605b8cc authored by ilor's avatar ilor
Browse files

extend AnnotationChannel interface: add dump_alpha and get_new_segment_index

parent 903e88c7
No related branches found
No related tags found
No related merge requests found
......@@ -3,6 +3,7 @@
#include <algorithm>
#include <boost/bind.hpp>
#include <sstream>
#include <iostream>
#include <set>
namespace Corpus2 {
......@@ -76,6 +77,19 @@ int AnnotationChannel::renumber_segments()
}
return next;
}
int AnnotationChannel::get_new_segment_index() const
{
//cache this?
std::vector<bool> used(segments_.size() + 1);
foreach (size_t sid, segments_) {
if (sid < used.size()) {
used[sid] = true;
}
}
int first = 1;
while ((first < static_cast<int>(used.size())) && used[first]) ++first;
return first;
}
int AnnotationChannel::get_segment_at(int idx) const
{
......@@ -188,6 +202,27 @@ std::string AnnotationChannel::dump_heads() const
return ss.str();
}
std::string AnnotationChannel::dump_alpha() const
{
std::stringstream ss;
for (int i = 0; i < size(); ++i) {
if (segments_[i] == 0) {
if (heads_[i]) {
ss << '#';
} else {
ss << '_';
}
} else {
if (heads_[i]) {
ss << static_cast<unsigned char>('A' - 1 + segments_[i]);
} else {
ss << static_cast<unsigned char>('a' - 1 + segments_[i]);
}
}
}
return ss.str();
}
void AnnotationChannel::do_counts(int& annotations, int& disjoint, int& unannotated) const
{
std::set<int> used_sids;
......
......@@ -74,6 +74,11 @@ public:
*/
int renumber_segments();
/**
* Figure out an index for a new segment
*/
int get_new_segment_index() const;
enum AnnotationVectorMode
{
O_DISJOINT_EXCLUSIVE = 0,
......@@ -148,15 +153,22 @@ public:
std::string dump_iob() const;
/**
* Compose a string consisting of all segment indices in order.
* Compose a string consisting of all segment indices in order, e.g. "01102"
*/
std::string dump_segments() const;
/**
* Compose a string consisting of all head flags in order
* Compose a string consisting of all head flags in order, e.g. " H H"
*/
std::string dump_heads() const;
/**
* Compose a string consisting of segment/head info in alphabetic format
* where capital letters denote the head flag, and underscore indicates
* no segment, so e.g. "_Aa_B"
*/
std::string dump_alpha() const;
void do_counts(int& annotations, int& disjoint, int& unannotated) const;
private:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment