From a0fa6b8a34d87396c1acb40e55c1c9f22388c9f2 Mon Sep 17 00:00:00 2001
From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl>
Date: Thu, 20 Oct 2011 12:16:30 +0200
Subject: [PATCH] tagger eval reports diffs in seg if run in debug mode

---
 utils/tagger-eval.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/utils/tagger-eval.py b/utils/tagger-eval.py
index a69395b..f12f401 100755
--- a/utils/tagger-eval.py
+++ b/utils/tagger-eval.py
@@ -27,7 +27,7 @@ changelog = """
 * separate stats for unknown forms
 """
 
-def text(tok_seq, respect_spaces):
+def text(tok_seq, respect_spaces, mark_boundaries = False):
 	"""Extracts text from a sequence of tokens. If respect_spaces, will append
 	spaces between tokens where no no-space markers present."""
 	buff = StringIO()
@@ -35,10 +35,14 @@ def text(tok_seq, respect_spaces):
 	for tok in tok_seq:
 		if nonfirst and respect_spaces and tok.after_space():
 			buff.write(' ')
+		if mark_boundaries:
+			buff.write('[')
 		buff.write(tok.orth_utf8().decode('utf-8'))
+		if mark_boundaries:
+			buff.write(']')
 		nonfirst = True
 	return buff.getvalue()
-		
+
 def next_tok(rdr):
 	while True:
 		tok = rdr.get_next_token()
@@ -253,6 +257,8 @@ class TokComp:
 			pre_feat_sets[0].add(Feat.SEG_NOCHANGE)
 			pre_feat_sets[0].update(self.cmp_toks(tag_seq[0], ref_seq[0]))
 		else:
+			if self.debug:
+				print 'SEGCHANGE\t%s\t%s' % (text(tag_seq, True, True), text(ref_seq, True, True))
 			# mark all as subjected to segmentation changes
 			for feats in pre_feat_sets: feats.add(Feat.SEG_CHANGE)
 			# check if all ref and tagged toks are punctuation
-- 
GitLab