Skip to content
Snippets Groups Projects
Commit f5c56e89 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

fix iobber chunker: write sent boundaries to train files as expected by crf_learn

parent 5cbaa67d
No related merge requests found
......@@ -138,6 +138,7 @@ class Chunker:
class_label = corpus2.to_string(chan.get_iob_at(tok_idx))
# generate training example and store to file
classify.write_example(tr_file, feat_vals, class_label)
classify.write_end_of_sent(tr_file)
self.stats.num_sents += 1
self.stats.num_toks += sent.tokens().size()
......
......@@ -38,6 +38,10 @@ def write_example(tr_file, feat_vals, class_label):
tr_file.write(class_label)
tr_file.write('\n')
def write_end_of_sent(tr_file):
"""Writes end-of-sentence marker to the training file."""
tr_file.write('\n')
def train_and_save(conf, model_name, config_dir, data_dir, chan_name):
"""Trains a CRF classifier for the given chan_name. The trained model
is saved to filenames (generated using model_name and conf)."""
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment