diff --git a/iobber/chunker.py b/iobber/chunker.py index ee5b0303ca5e4d48e713b86e5dd0a609a4cc239b..14c053e0f46c401a51c6b84d94ca162ff22dd487 100644 --- a/iobber/chunker.py +++ b/iobber/chunker.py @@ -214,11 +214,7 @@ class Chunker: sys.stderr.write('done!\n') self.stats.dump() - def tag_sentence(self, sent): - """Chunks the given sentence.""" - # wrap the sentence as an AnnotatedSentence - asent = corpus2.AnnotatedSentence.wrap_sentence(sent) - + def _tag_sentence(self, sent, asent): # iterate over layers for layer_idx, layer in enumerate(self.layers): # get model for current layer @@ -283,6 +279,26 @@ class Chunker: if self.verbose: self.stats.maybe_report() + def tag_sentence(self, sent): + """Chunks the given sentence.""" + # wrap the sentence as an AnnotatedSentence + asent = corpus2.AnnotatedSentence.wrap_sentence(sent) + self._tag_sentence(sent, asent) + + def tag_sentence_sane(self, sent): + """ + A sane version of tag_sentence, that doesn't require the sentence to be + preprocessed in any way to actually get something sensible out of it. + The way is gleaned from iobber_txt + Ideally this would replace tag_sentence, but first someone has to make + sense of this absurd class + """ + + asent = corpus2.AnnotatedSentence.wrap_sentence(sent) + new_sent = corpus2.AnnotatedSentence.cast_as_sentence(asent) + self._tag_sentence(new_sent, asent) + return new_sent + def tag_input(self, in_path, out_path, input_format, output_format, preserve_pars): """Chunks the input and writes processed input to out_path or stdout if