diff --git a/cclutils/_base.py b/cclutils/_base.py index f26bf889566714834ec224561c39c28f20e5b7ae..48bd5e8423706219d58ce628ae859ea2d9481757 100644 --- a/cclutils/_base.py +++ b/cclutils/_base.py @@ -162,13 +162,14 @@ def get_tagset(tagset): def read_chunks_it(filepath, tagset='nkjp'): -""" Returns a iterable sentence generator. +""" Returns a iterable chunk generator. Args: filepath: a path to CCL file tagset: the name of the tagset that is used in the document or a tagset object itself. - Returns: a iterable sentence generator. + Returns: + a iterable chunk generator. """ tagset = get_tagset(tagset) reader = corpus2.TokenReader_create_path_reader('ccl', tagset, filepath) @@ -176,10 +177,9 @@ def read_chunks_it(filepath, tagset='nkjp'): while True: chunk = reader.get_next_chunk() - if chunk: - yield chunk - else: + if not chunk: break + yield chunk del reader @@ -200,11 +200,10 @@ def read_sentences_it(filepath, tagset='nkjp'): while True: sentence = reader.get_next_sentence() - - if sentence: - yield sentence - else: + + if not sentence: break + yield sentence del reader