Skip to content
Snippets Groups Projects
Commit 50043d34 authored by ilor's avatar ilor
Browse files

corpus-get upgrade: stop reading corpus after reading the last to-be-returned chunk/sentence

parent 80696f5a
Branches
No related tags found
No related merge requests found
......@@ -52,7 +52,7 @@ def chunks(rdr):
yield chunk
def write_selected_sentences(sents, writer, selection):
def write_selected_sentences(sents, writer, selection, maxsel = None):
sid = 0
for sent in sents:
if sid in selection:
......@@ -65,6 +65,7 @@ def write_selected_sentences(sents, writer, selection):
writer.write_token(tok)
tid += 1
sid += 1
if maxsel is not None and sid > maxsel: break
def go():
parser = OptionParser(usage=descr)
......@@ -110,6 +111,7 @@ def go():
return
else:
selection.update(izip(parse_range_info(arg), repeat(())))
maxsel = max(selection.keys())
if selection == {}:
if options.chunks:
for chunk in chunks(reader):
......@@ -127,8 +129,9 @@ def go():
else:
write_selected_sentences(chunk.sentences(), writer, selection[cid])
cid += 1
if cid > maxsel: break
else:
write_selected_sentences(sentences(reader), writer, selection)
write_selected_sentences(sentences(reader), writer, selection, maxsel)
if __name__ == '__main__':
go()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment