diff --git a/iobber/chunker.py b/iobber/chunker.py index 616029bfb546801718b919dad50004b0928ab2d4..16ed3437d37209634a326148d0ee6c1a2a75018e 100644 --- a/iobber/chunker.py +++ b/iobber/chunker.py @@ -138,6 +138,7 @@ class Chunker: class_label = corpus2.to_string(chan.get_iob_at(tok_idx)) # generate training example and store to file classify.write_example(tr_file, feat_vals, class_label) + classify.write_end_of_sent(tr_file) self.stats.num_sents += 1 self.stats.num_toks += sent.tokens().size() diff --git a/iobber/classify.py b/iobber/classify.py index f70a7762b8c28e275bf54b925d195367439fdb59..bce6a1da74dcef9fc8f074e6fc8efbcd604cb560 100644 --- a/iobber/classify.py +++ b/iobber/classify.py @@ -38,6 +38,10 @@ def write_example(tr_file, feat_vals, class_label): tr_file.write(class_label) tr_file.write('\n') +def write_end_of_sent(tr_file): + """Writes end-of-sentence marker to the training file.""" + tr_file.write('\n') + def train_and_save(conf, model_name, config_dir, data_dir, chan_name): """Trains a CRF classifier for the given chan_name. The trained model is saved to filenames (generated using model_name and conf)."""