From f2a2d53e16504350125610eaede615c39fbc9c68 Mon Sep 17 00:00:00 2001 From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl> Date: Tue, 18 Oct 2011 13:12:54 +0200 Subject: [PATCH] tagger-eval script reports percentage unknown and segchane --- utils/tagger-eval.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/utils/tagger-eval.py b/utils/tagger-eval.py index 508e690..d87921a 100755 --- a/utils/tagger-eval.py +++ b/utils/tagger-eval.py @@ -138,6 +138,9 @@ class Metric: # heur recover PUNCHIT_PUNCONLY = ([Feat.ALLPUNC_HIT], None) PUNCHIT_AROUND = ([Feat.PUNCAROUND_HIT], None) + # percentage of known and unknown tokens + KN = ([Feat.KNOWN], None) + UNK = ([Feat.UNKNOWN], None) # percentage of tokens subjected to seg change SEG_CHANGE = ([Feat.SEG_CHANGE], None) SEG_NOCHANGE = ([Feat.SEG_NOCHANGE], None) @@ -355,6 +358,9 @@ def go(): unk_weak = 0.0 unk_strong_pos = 0.0 + perc_unk = 0.0 + perc_segchange = 0.0 + for fold_idx in range(num_folds): tag_fn = args[fold_idx] # filename of tagged fold @ fold_idx ref_fn = args[fold_idx + num_folds] # ... reference fold @ fold_idx @@ -375,6 +381,8 @@ def go(): strong_pos += res.value_of(Metric.POS_SC) unk_weak += res.value_of(Metric.UNK_WC) unk_strong_pos += res.value_of(Metric.UNK_POS_SC) + perc_unk += res.value_of(Metric.UNK) + perc_segchange += res.value_of(Metric.SEG_CHANGE) print 'AVG weak corr lower bound\t%.4f%%' % (weak_lower_bound / num_folds) print 'AVG weak corr upper bound\t%.4f%%' % (weak_upper_bound / num_folds) @@ -382,6 +390,8 @@ def go(): print 'AVG POS strong corr\t%.4f%%' % (strong_pos / num_folds) print 'AVG UNK weak corr (heur)\t%.4f%%' % (unk_weak / num_folds) print 'AVG UNK POS strong corr\t%.4f%%' % (unk_strong_pos / num_folds) + print 'AVG percentage UNK\t%.4f%%' % (perc_unk / num_folds) + print 'AVG percentage seg change\t%.4f%%' % (perc_segchange / num_folds) if __name__ == '__main__': -- GitLab