Skip to content
Snippets Groups Projects
Commit 50043d34 authored by ilor's avatar ilor
Browse files

corpus-get upgrade: stop reading corpus after reading the last to-be-returned chunk/sentence

parent 80696f5a
No related merge requests found
...@@ -52,7 +52,7 @@ def chunks(rdr): ...@@ -52,7 +52,7 @@ def chunks(rdr):
yield chunk yield chunk
def write_selected_sentences(sents, writer, selection): def write_selected_sentences(sents, writer, selection, maxsel = None):
sid = 0 sid = 0
for sent in sents: for sent in sents:
if sid in selection: if sid in selection:
...@@ -65,6 +65,7 @@ def write_selected_sentences(sents, writer, selection): ...@@ -65,6 +65,7 @@ def write_selected_sentences(sents, writer, selection):
writer.write_token(tok) writer.write_token(tok)
tid += 1 tid += 1
sid += 1 sid += 1
if maxsel is not None and sid > maxsel: break
def go(): def go():
parser = OptionParser(usage=descr) parser = OptionParser(usage=descr)
...@@ -110,6 +111,7 @@ def go(): ...@@ -110,6 +111,7 @@ def go():
return return
else: else:
selection.update(izip(parse_range_info(arg), repeat(()))) selection.update(izip(parse_range_info(arg), repeat(())))
maxsel = max(selection.keys())
if selection == {}: if selection == {}:
if options.chunks: if options.chunks:
for chunk in chunks(reader): for chunk in chunks(reader):
...@@ -127,8 +129,9 @@ def go(): ...@@ -127,8 +129,9 @@ def go():
else: else:
write_selected_sentences(chunk.sentences(), writer, selection[cid]) write_selected_sentences(chunk.sentences(), writer, selection[cid])
cid += 1 cid += 1
if cid > maxsel: break
else: else:
write_selected_sentences(sentences(reader), writer, selection) write_selected_sentences(sentences(reader), writer, selection, maxsel)
if __name__ == '__main__': if __name__ == '__main__':
go() go()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment