diff --git a/libcorpus2/ann/channel.cpp b/libcorpus2/ann/channel.cpp index 34e75491fd27aa77cdc3a294ad5df50590e181c6..581c9dde1537d36dab1d6102305177c47f6128cc 100644 --- a/libcorpus2/ann/channel.cpp +++ b/libcorpus2/ann/channel.cpp @@ -152,6 +152,7 @@ std::vector<Annotation> AnnotationChannel::make_annotation_vector( rv.resize(smax = s); } rv[s - 1].indices.push_back(i); + rv[s - 1].seg_number = s; if (heads_[i]) { rv[s - 1].head_index = i; } diff --git a/libcorpus2/ann/channel.h b/libcorpus2/ann/channel.h index 204ee2c9c4a7fad62f63dc22ae126666f5d7a36d..d0ba7218a190c10ee069a6e5c76ca516d5fb9a8b 100644 --- a/libcorpus2/ann/channel.h +++ b/libcorpus2/ann/channel.h @@ -13,12 +13,16 @@ namespace Corpus2 { /** * A general indexed annotation spanning a possibly disjoint group of * tokens, with a distinguished 'head' token. + * The annotation contains information about token indices (array subscripts + * referring to the original sentence), the index of a token marked as head, + * as well as the original segment number (segment index). */ struct Annotation { - Annotation() : indices(), head_index(-1) {} + Annotation() : indices(), head_index(-1), seg_number(0) {} std::vector<int> indices; int head_index; + int seg_number; bool empty() const { return indices.empty(); } diff --git a/swig/annotationchannel.i b/swig/annotationchannel.i index 1d9c8da92b2db76899fc4fde8e07e49c6e2bb8e2..8ffeb1db171648af87c95b367710cde1be92c870 100644 --- a/swig/annotationchannel.i +++ b/swig/annotationchannel.i @@ -18,9 +18,10 @@ namespace Corpus2 { struct Annotation { - Annotation() : indices(), head_index(-1) {} + Annotation() : indices(), head_index(-1), seg_number(0) {} std::vector<int> indices; int head_index; + int seg_number; bool empty() const; bool sane() const; }; diff --git a/tests/ann_basic.cpp b/tests/ann_basic.cpp index 54a1e0ddbbf0e74b0b62ce177377a044ffa0df1d..5a4204b7baf6fb38e0b4bf19992e61cea697fb25 100644 --- a/tests/ann_basic.cpp +++ b/tests/ann_basic.cpp @@ -119,6 +119,10 @@ BOOST_AUTO_TEST_CASE( make_ann ) BOOST_CHECK_EQUAL(anns[2].indices[1], 4); BOOST_CHECK_EQUAL(anns[3].indices[0], 5); BOOST_CHECK_EQUAL(anns[2].head_index, 3); + BOOST_CHECK_EQUAL(anns[0].seg_number, 1); + BOOST_CHECK_EQUAL(anns[1].seg_number, 2); + BOOST_CHECK_EQUAL(anns[2].seg_number, 3); + BOOST_CHECK_EQUAL(anns[3].seg_number, 4); }