Skip to content
Snippets Groups Projects
Commit 40e19bc4 authored by jezozwierzak's avatar jezozwierzak
Browse files

Merge branch 'tag_heads'

parents 5072f7c2 e2ebf2b5
No related merge requests found
......@@ -164,6 +164,8 @@ class Chunker:
else:
non_O_chan = chan_name
non_O_tag = there_iob
if chan.is_head_at(tok_idx):
non_O_chan += '-H'
# B-NP, I-VP etc. or O
class_label = 'O' if non_O_chan is None else '%s-%s' % (non_O_tag, non_O_chan)
# generate training example and store to file
......@@ -219,13 +221,22 @@ class Chunker:
decsn = classify.classify_token(model, tok_idx)
non_O_chan = None
non_O_tag = 'O'
is_head = None
if decsn != 'O':
non_O_tag, non_O_chan = decsn.split('-')
decsn_array = decsn.split('-')
if len(decsn_array) == 2:
non_O_tag, non_O_chan = decsn_array
elif len(decsn_array) == 3:
non_O_tag, non_O_chan, is_head = decsn_array
else:
raise IOError('Unexpected label returned from classifier: ' + decsn)
for chan_name in chans:
chan = asent.get_channel(chan_name)
# TODO: rename the from_string in corpus2 and fix it here
tag_to_set = 'O' if chan_name != non_O_chan else non_O_tag
chan.set_iob_at(tok_idx, corpus2.from_string(tag_to_set))
if tag_to_set != 'O' and is_head:
chan.set_head_at(tok_idx, True)
# switch back to segments
for chan_name in chans:
chan = asent.get_channel(chan_name)
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment