From 946184be83ce3a2d2d17fec08b5e927c54d8e3b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Kali=C5=84ski?= <168023@student.pwr.wroc.pl>
Date: Tue, 18 Nov 2014 14:50:07 +0100
Subject: [PATCH] New, sane tag_sentence

Adden a new method which can be used as is to tag a sentence and return the
result, without any AnnotatedSentence magic on caller's side
---
 iobber/chunker.py | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/iobber/chunker.py b/iobber/chunker.py
index ee5b030..14c053e 100644
--- a/iobber/chunker.py
+++ b/iobber/chunker.py
@@ -214,11 +214,7 @@ class Chunker:
 				sys.stderr.write('done!\n')
 				self.stats.dump()
 
-	def tag_sentence(self, sent):
-		"""Chunks the given sentence."""
-		# wrap the sentence as an AnnotatedSentence
-		asent = corpus2.AnnotatedSentence.wrap_sentence(sent)
-		
+	def _tag_sentence(self, sent, asent):
 		# iterate over layers
 		for layer_idx, layer in enumerate(self.layers):
 			# get model for current layer
@@ -283,6 +279,26 @@ class Chunker:
 		if self.verbose:
 			self.stats.maybe_report()
 
+	def tag_sentence(self, sent):
+		"""Chunks the given sentence."""
+		# wrap the sentence as an AnnotatedSentence
+		asent = corpus2.AnnotatedSentence.wrap_sentence(sent)
+		self._tag_sentence(sent, asent)
+
+	def tag_sentence_sane(self, sent):
+		"""
+		A sane version of tag_sentence, that doesn't require the sentence to be
+		preprocessed in any way to actually get something sensible out of it.
+		The way is gleaned from iobber_txt
+		Ideally this would replace tag_sentence, but first someone has to make
+		sense of this absurd class
+		"""
+
+		asent = corpus2.AnnotatedSentence.wrap_sentence(sent)
+		new_sent = corpus2.AnnotatedSentence.cast_as_sentence(asent)
+		self._tag_sentence(new_sent, asent)
+		return new_sent
+
 	def tag_input(self, in_path, out_path, input_format, output_format,
 			preserve_pars):
 		"""Chunks the input and writes processed input to out_path or stdout if
-- 
GitLab