Skip to content
Snippets Groups Projects
Commit 2605b8cc authored by ilor's avatar ilor
Browse files

extend AnnotationChannel interface: add dump_alpha and get_new_segment_index

parent 903e88c7
No related merge requests found
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <algorithm> #include <algorithm>
#include <boost/bind.hpp> #include <boost/bind.hpp>
#include <sstream> #include <sstream>
#include <iostream>
#include <set> #include <set>
namespace Corpus2 { namespace Corpus2 {
...@@ -76,6 +77,19 @@ int AnnotationChannel::renumber_segments() ...@@ -76,6 +77,19 @@ int AnnotationChannel::renumber_segments()
} }
return next; return next;
} }
int AnnotationChannel::get_new_segment_index() const
{
//cache this?
std::vector<bool> used(segments_.size() + 1);
foreach (size_t sid, segments_) {
if (sid < used.size()) {
used[sid] = true;
}
}
int first = 1;
while ((first < static_cast<int>(used.size())) && used[first]) ++first;
return first;
}
int AnnotationChannel::get_segment_at(int idx) const int AnnotationChannel::get_segment_at(int idx) const
{ {
...@@ -188,6 +202,27 @@ std::string AnnotationChannel::dump_heads() const ...@@ -188,6 +202,27 @@ std::string AnnotationChannel::dump_heads() const
return ss.str(); return ss.str();
} }
std::string AnnotationChannel::dump_alpha() const
{
std::stringstream ss;
for (int i = 0; i < size(); ++i) {
if (segments_[i] == 0) {
if (heads_[i]) {
ss << '#';
} else {
ss << '_';
}
} else {
if (heads_[i]) {
ss << static_cast<unsigned char>('A' - 1 + segments_[i]);
} else {
ss << static_cast<unsigned char>('a' - 1 + segments_[i]);
}
}
}
return ss.str();
}
void AnnotationChannel::do_counts(int& annotations, int& disjoint, int& unannotated) const void AnnotationChannel::do_counts(int& annotations, int& disjoint, int& unannotated) const
{ {
std::set<int> used_sids; std::set<int> used_sids;
......
...@@ -74,6 +74,11 @@ public: ...@@ -74,6 +74,11 @@ public:
*/ */
int renumber_segments(); int renumber_segments();
/**
* Figure out an index for a new segment
*/
int get_new_segment_index() const;
enum AnnotationVectorMode enum AnnotationVectorMode
{ {
O_DISJOINT_EXCLUSIVE = 0, O_DISJOINT_EXCLUSIVE = 0,
...@@ -148,15 +153,22 @@ public: ...@@ -148,15 +153,22 @@ public:
std::string dump_iob() const; std::string dump_iob() const;
/** /**
* Compose a string consisting of all segment indices in order. * Compose a string consisting of all segment indices in order, e.g. "01102"
*/ */
std::string dump_segments() const; std::string dump_segments() const;
/** /**
* Compose a string consisting of all head flags in order * Compose a string consisting of all head flags in order, e.g. " H H"
*/ */
std::string dump_heads() const; std::string dump_heads() const;
/**
* Compose a string consisting of segment/head info in alphabetic format
* where capital letters denote the head flag, and underscore indicates
* no segment, so e.g. "_Aa_B"
*/
std::string dump_alpha() const;
void do_counts(int& annotations, int& disjoint, int& unannotated) const; void do_counts(int& annotations, int& disjoint, int& unannotated) const;
private: private:
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment