import json import os from dataclasses import asdict from pprint import pprint from sziszapangma.integration.service_core.asr.asr_result import AsrResult RESPONSE_DIR = 'asr_responses_cache/common_voice_wav2vec2' RESULT_DIR = 'experiment_data/cached_asr/common_voice_wav2vec2' if __name__ == '__main__': for file_name in os.listdir(RESPONSE_DIR): print(file_name) d = json.load(open(f'{RESPONSE_DIR}/{file_name}', 'r')) if True: # all_text = ' '.join([result['alternatives'][0]['transcript'] for result in d['results'] # if 'transcript' in result['alternatives'][0]]) # all_words = [] # for result in d['results']: # if 'transcript' in result['alternatives'][0]: # all_words.extend([it['word'] for it in result['alternatives'][0]['words']]) # r = { # 'words_time_alignment': None, # 'transcription': all_words, # 'full_text': all_text # } json.dump(d, open(f'{RESULT_DIR}/{file_name}', 'w')) else: print(d) print('err')