From 8897ad73ba1dd3f8d168c6ec6e6db509e821f13f Mon Sep 17 00:00:00 2001
From: jezozwierzak <jezozwierzak@gmail.com>
Date: Mon, 8 Oct 2012 10:33:47 +0200
Subject: [PATCH] Added trainging and chunking heads

---
 iobber/chunker.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/iobber/chunker.py b/iobber/chunker.py
index d872499..e049369 100644
--- a/iobber/chunker.py
+++ b/iobber/chunker.py
@@ -163,6 +163,8 @@ class Chunker:
 							else:
 								non_O_chan = chan_name
 								non_O_tag = there_iob
+							if chan.is_head_at(tok_idx):
+								non_O_chan += '-H'
 					# B-NP, I-VP etc. or O
 					class_label = 'O' if non_O_chan is None else '%s-%s' % (non_O_tag, non_O_chan)
 					# generate training example and store to file
@@ -218,13 +220,20 @@ class Chunker:
 					decsn = classify.classify_token(model, tok_idx)
 					non_O_chan = None
 					non_O_tag = 'O'
+					is_head = None
 					if decsn != 'O':
-						non_O_tag, non_O_chan = decsn.split('-')
+						decsn_array = decsn.split('-')
+						if len(decsn_array) == 2:
+							non_O_tag, non_O_chan = decsn_array
+						elif len(decsn_array) == 3:
+							non_O_tag, non_O_chan, is_head = decsn_array
 					for chan_name in chans:
 						chan = asent.get_channel(chan_name)
 						# TODO: rename the from_string in corpus2 and fix it here
 						tag_to_set = 'O' if chan_name != non_O_chan else non_O_tag
 						chan.set_iob_at(tok_idx, corpus2.from_string(tag_to_set))
+						if tag_to_set != 'O' and is_head:
+							chan.set_head_at(tok_idx, True)
 				# switch back to segments
 				for chan_name in chans:
 					chan = asent.get_channel(chan_name)
-- 
GitLab