diff --git a/corpus2tools/corpus-get b/corpus2tools/corpus-get index 6e62ae334e252d0305672247b0fccc2d681ba7f4..98a5af0b1fc6d03a7c4f9d94ac84593ff62b0ed9 100755 --- a/corpus2tools/corpus-get +++ b/corpus2tools/corpus-get @@ -52,7 +52,7 @@ def chunks(rdr): yield chunk -def write_selected_sentences(sents, writer, selection): +def write_selected_sentences(sents, writer, selection, maxsel = None): sid = 0 for sent in sents: if sid in selection: @@ -65,6 +65,7 @@ def write_selected_sentences(sents, writer, selection): writer.write_token(tok) tid += 1 sid += 1 + if maxsel is not None and sid > maxsel: break def go(): parser = OptionParser(usage=descr) @@ -110,6 +111,7 @@ def go(): return else: selection.update(izip(parse_range_info(arg), repeat(()))) + maxsel = max(selection.keys()) if selection == {}: if options.chunks: for chunk in chunks(reader): @@ -127,8 +129,9 @@ def go(): else: write_selected_sentences(chunk.sentences(), writer, selection[cid]) cid += 1 + if cid > maxsel: break else: - write_selected_sentences(sentences(reader), writer, selection) + write_selected_sentences(sentences(reader), writer, selection, maxsel) if __name__ == '__main__': go()