Skip to content
Snippets Groups Projects
Commit c096e30f authored by macias's avatar macias
Browse files

add stats.py script to check containment of names in chunk_np

parent cc2ce37c
No related merge requests found
#!/usr/bin/python
# -*- coding: utf-8 -*-
import corpus2
files = '/home/macias/kpwr-1.1-nm-chunk-ccl-folds/ccl-test10.xml'
def main():
tagset = corpus2.get_named_tagset('nkjp')
reader = corpus2.TokenReader.create_path_reader('ccl', tagset, files)
sent = reader.get_next_sentence()
wyniki_o = []
while sent:
asent = corpus2.AnnotatedSentence.wrap_sentence(sent)
# print sent.id(), sent.size()
sent_size = sent.size()
tab = []
for chan_name in asent.all_channels():
if chan_name == 'chunk_np' or chan_name.endswith("_nam") :
chan = asent.get_channel(chan_name)
ann_vec = chan.make_annotation_vector()
row = [chan_name, ann_vec]
tab.append(row)
# for ann in ann_vec:
# inds = sorted(ann.indices)
# print inds
if len(tab) > 0 :
wyniki_o.append(check_sentence(tab, sent_size))
sent = reader.get_next_sentence()
ok = 0
err = 0
no_np = 0
for results in wyniki_o:
ok = ok + results[0]
if results[2]:
err = err + results[1]
else:
no_np = no_np + results[1]
print files
print "Nazwy własne znajdujące się poprawnie w chunk_np: ",ok
print "Nazwy własne nie zawierające sie w chunk_np: ",err
print "Brak chunk_np w zdaniu:", no_np
def check_sentence(tab, size):
chunknp = []
otherchunks = []
wyniki = []
ok = 0
err = 0
np = False
for ann in tab:
if ann[0] == "chunk_np":
chunknp = create_row(ann, size)
else:
otherchunks.append(create_row(ann, size))
if len(chunknp) > 0:
for chunk in otherchunks:
wyniki.append(isOK(chunknp, chunk))
ok = 0
err = 0
for results in wyniki:
ok = ok + results[0]
err = err + results[1]
np = True
else:
for chunk in otherchunks:
wyniki.append(count_all_nonzero_nam(chunk))
np = False
ok = 0
err = 0
for results in wyniki:
ok = ok + results[0]
err = err + results[1]
return [ok, err, np]
def count_all_nonzero_nam(chunk):
tab = []
err = 0
for toks in chunk:
if toks != 0 and have_value(toks, tab) and isinstance( toks, int ):
err += 1
tab.append(toks)
return [0, err]
def create_row(ann, size):
row = []
x = 0
while x < size:
row.append(0)
x += 1
row.append(ann[0])
i = 0
for chun in ann[1]:
inds = sorted(chun.indices)
i += 1
for element in inds:
row[element] = i
return row
def isOK(comparer, chunks):
i = 0
err = 0
ok = 0
tab = []
# print comparer
# print chunks
for toks in chunks:
if toks != 0 and have_value(toks, tab) and isinstance( toks, int ):
tab.append(toks)
for nam_id in tab:
if check_one_channel(comparer, chunks, nam_id):
ok += 1
else:
err += 1
return [ok, err]
def check_one_channel(comparer, chunks, nam_id):
i = 0
tab = []
for toks in chunks:
if toks == nam_id:
tab.append(comparer[i])
i += 1
value = tab[0]
for v in tab:
if v != value:
return False
return True
def have_value(toks, tab):
for v in tab:
if v == toks:
return False
return True
if __name__ == '__main__':
main()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment