Migrated wccl-rules from py2 to py3

af5d164e · Jarema Radom · 17d9424d · af5d164e
Commit af5d164e authored 3 years ago by Jarema Radom
--- a/src/doc/wccl-rules.py
+++ b/src/doc/wccl-rules.py
 #!/usr/bin/python
-# -*- coding: utf-8 -*-
 import sys
 from optparse import OptionParser

@@ -69,12 +68,12 @@ def process_sent(asent, wccl_file, shall_print_ann_info):
 		match_rules = wccl_file.get_match_rules_ptr()
 		match_rules.apply_all(asent)
 	if shall_print_ann_info:
-		print '####', ' '.join(tok.orth_utf8() for tok in asent.tokens())
-		print asent.annotation_info()
+		print('####', ' '.join(tok.orth_utf8() for tok in asent.tokens()))
+		print(asent.annotation_info())
 		for chan_name in asent.all_channels():
 			chan = asent.get_channel(chan_name)
 			# get the internal representation -- annotation id vector
-			print chan_name, '\t', chan.dump_segments()
+			print(chan_name, '\t', chan.dump_segments())
 			# if you want to get it as a int vector, use chan.segments()
 			# or to get one it at idx -- chan.get_segment_at(idx)
 			# 0 means no annot there, positive values==ann indices
@@ -84,7 +83,7 @@ def process_sent(asent, wccl_file, shall_print_ann_info):
 			for ann in ann_vec:
 				idx_text = ','.join(str(idx) for idx in ann.indices)
 				orth_text = ' '.join(asent.tokens()[idx].orth_utf8() for idx in ann.indices)
-				print '\t%s\t%s\t(%s)' % (chan_name, idx_text, orth_text)
+				print('\t%s\t%s\t(%s)' % (chan_name, idx_text, orth_text))


 def go():
@@ -147,32 +146,25 @@ def go():
 	writer = get_writer(options.out_path, tagset, options.output_format)
 	
 	# processing paragraph-by-paragraph
-	if options.preserve_chunks:
-		while True:
-			chunk = reader.get_next_chunk()
-			if not chunk:
-				break # end of input
-			# process each sentence separately
-			for sent in chunk.sentences():
-				# wrap the sentence as an AnnotatedSentence
-				asent = corpus2.AnnotatedSentence.wrap_sentence(sent)
-				process_sent(sent, wccl_file, options.ann_info)
-			# save processed chunk
-			# NOTE: if the input sent was not AnnotatedSentence, the changes
-			# will be discarded
-			writer.write_chunk(chunk)
-	else:
-		while True:
-			sent = reader.get_next_sentence()
-			if not sent:
-				break # end of input
+
+	while True:
+		chunk = reader.get_next_chunk()
+		if not chunk:
+			break # end of input
+		# process each sentence separately
+		for sent in chunk.sentences():
 			# wrap the sentence as an AnnotatedSentence
 			asent = corpus2.AnnotatedSentence.wrap_sentence(sent)
 			process_sent(asent, wccl_file, options.ann_info)
-			# save processed sentence (safe)
-			# NOTE: if the input sent was not AnnotatedSentence, the changes
-			# will be discarded
-			writer.write_sentence(sent)
+		# save processed chunk
+		# NOTE: if the input sent was not AnnotatedSentence, the changes
+		# will be discarded
+		if writer:
+			if options.preserve_chunks:
+				writer.write_chunk(chunk)
+			else:
+				writer.write_sentence(sent)
+	writer.finish()

 if __name__ == '__main__':
 	go()