diff --git a/utils/tagger-eval.py b/utils/tagger-eval.py index 0e781f79a77a3b9d8723bcf07d6a329ef62b1277..e6209b8228c08ee0d8d9cdeb4d2a2f5c44bdcf50 100755 --- a/utils/tagger-eval.py +++ b/utils/tagger-eval.py @@ -21,6 +21,7 @@ changelog = """ * higher frac precision in output * extract measures to functions for averaging * averaging over folds +* separate stats for unknown forms """ def text(tok_seq, respect_spaces): @@ -153,6 +154,13 @@ class TokComp: self.ref_toks_amb_weak_punc_hit = 0 # tokens subjected to s.a. that were weakly hit thanks to punc-around self.ref_toks_amb_weak_puncplus_hit = 0 + # ref toks presumably unknown (unk tag among ref tok tags) + self.ref_toks_unk = 0 + # ref toks that contribute to WC, amb + noamb + self.ref_toks_unk_weak_hit = 0 + # ref toks that contribute to SC on POS level, amb + noamb + self.ref_toks_unk_pos_strong_hit = 0 + self.tag_toks = 0 # all tokens in tagger output self.tag_toks_amb = 0 # tokens in tagger output subjected to s.a. @@ -161,9 +169,9 @@ class TokComp: tok_tags = set([self.tagset.tag_to_string(lex.tag()) for lex in tok.lexemes() if lex.is_disamb()]) return tok_tags == set([self.punc_tag]) - def is_unknown(self, tok): + def is_unk(self, tok): tok_tags = [self.tagset.tag_to_string(lex.tag()) for lex in tok.lexemes()] - return unk_tag in tok_tags + return self.unk_tag in tok_tags def tagstrings_of_token(self, tok): """Returns a set of strings, corresponding to disamb tags @@ -212,17 +220,20 @@ class TokComp: self.tag_toks += len(tag_seq) self.ref_toks += len(ref_seq) - unk_tokens = sum(self.is_unk(ref_tok) for ref_tok in + unk_tokens = sum(self.is_unk(ref_tok) for ref_tok in ref_seq) + self.ref_toks_unk += unk_tokens # first variant: no segmentation mess if len(tag_seq) == 1 and len(ref_seq) == 1: tagval, posval = self.cmp_toks(tag_seq[0], ref_seq[0]) if tagval > 0: self.ref_toks_noamb_weak_hit += len(ref_seq) + self.ref_toks_unk_weak_hit += unk_tokens if tagval == 2: self.ref_toks_noamb_strong_hit += len(ref_seq) if posval == 2: self.ref_toks_noamb_pos_strong_hit += len(ref_seq) + self.ref_toks_unk_pos_strong_hit += unk_tokens if self.debug: print '\t\tnormal', tagval, posval else: @@ -238,6 +249,8 @@ class TokComp: self.ref_toks_amb_weak_hit += len(ref_seq) self.ref_toks_amb_strong_hit += len(ref_seq) self.ref_toks_amb_pos_strong_hit += len(ref_seq) + self.ref_toks_unk_weak_hit += unk_tokens # unlikely that unk_tokens > 0 + self.ref_toks_unk_pos_strong_hit += unk_tokens # as above if self.debug: print '\t\tpunc hit, ref len', len(ref_seq) else: nonpunc_ref = [tok for tok in ref_seq if not self.is_punc(tok)] @@ -249,6 +262,7 @@ class TokComp: if tagval > 0: self.ref_toks_amb_weak_hit += len(ref_seq) self.ref_toks_amb_weak_puncplus_hit += len(ref_seq) + self.ref_toks_unk_weak_hit += unk_tokens if self.debug: print '\t\tpuncPLUS weak hit, ref len', len(ref_seq) if tagval == 2: self.ref_toks_amb_strong_hit += len(ref_seq) @@ -258,6 +272,7 @@ class TokComp: if self.debug: print '\t\tMISS, ref len', len(ref_seq) if posval == 2: self.ref_toks_amb_pos_strong_hit += len(ref_seq) + self.ref_toks_unk_pos_strong_hit += unk_tokens def weak_lower_bound(self): """Returns weak correctness percentage counting only hits where @@ -296,10 +311,17 @@ class TokComp: """Upper bound for SC.""" upper_strong_hits = self.ref_toks_noamb_strong_hit + self.ref_toks_amb return 100.0 * upper_strong_hits / self.ref_toks + + def unk_weak_corr(self): + return 100.0 * self.ref_toks_unk_weak_hit / self.ref_toks_unk + + def unk_pos_strong_corr(self): + return 100.0 * self.ref_toks_unk_pos_strong_hit / self.ref_toks_unk def dump(self): print '----' print 'REF-toks\t%d' % self.ref_toks + print 'REF-toks-unk\t%d\t%.4f%%' % (self.ref_toks_unk, 100.0 * self.ref_toks_unk / self.ref_toks) print 'TAGGER-toks\t%d' % self.tag_toks print 'REF-amb-toks\t%d\t%.4f%%' % (self.ref_toks_amb, 100.0 * self.ref_toks_amb / self.ref_toks) print 'TAGGER-amb-toks\t%d\t%.4f%%' % (self.tag_toks_amb, 100.0 * self.tag_toks_amb / self.tag_toks) @@ -356,6 +378,9 @@ def go(): parser.add_option('-p', '--punc-tag', type='string', action='store', dest='punc_tag', default='interp', help='set the tag used for punctuation; default: interp') + parser.add_option('-u', '--unk-tag', type='string', action='store', + dest='unk_tag', default='ign', + help='set the tag used for unknown forms; default: ign') parser.add_option('-k', '--keep-optional', action='store_false', default=True, dest='expand_optional', help='do not expand unspecified optional attributes to multiple tags') @@ -380,6 +405,9 @@ def go(): weak_upper_bound = 0.0 weak = 0.0 strong_pos = 0.0 + + unk_weak = 0.0 + unk_strong_pos = 0.0 for fold_idx in range(num_folds): tag_fn = args[fold_idx] # filename of tagged fold @ fold_idx @@ -389,7 +417,7 @@ def go(): tag_rdr = corpus2.TokenReader.create_path_reader(options.input_format, tagset, tag_fn) ref_rdr = corpus2.TokenReader.create_path_reader(options.input_format, tagset, ref_fn) - res = TokComp(tagset, options.punc_tag, + res = TokComp(tagset, options.punc_tag, options.unk_tag, options.expand_optional, options.debug_mode) for tag_seq, ref_seq in tok_seqs(tag_rdr, ref_rdr, options.respect_spaces, options.verbose, options.debug_mode): res.update(tag_seq, ref_seq) @@ -399,11 +427,15 @@ def go(): weak_upper_bound += res.weak_upper_bound() weak += res.weak_corr() strong_pos += res.pos_strong_corr() + unk_weak += res.unk_weak_corr() + unk_strong_pos += res.unk_pos_strong_corr() print 'AVG weak corr lower bound\t%.4f%%' % (weak_lower_bound / num_folds) print 'AVG weak corr upper bound\t%.4f%%' % (weak_upper_bound / num_folds) print 'AVG weak corr (heur)\t%.4f%%' % (weak / num_folds) print 'AVG POS strong corr\t%.4f%%' % (strong_pos / num_folds) + print 'AVG UNK weak corr (heur)\t%.4f%%' % (unk_weak / num_folds) + print 'AVG UNK POS strong corr\t%.4f%%' % (unk_strong_pos / num_folds) if __name__ == '__main__':