import json import os import numpy as np from experiment.voicelab.voicelab_dependency import get_record_provider from scipy.io.wavfile import write as write_wav from sziszapangma.integration.asr_processor import AsrWebClient def t(): record_provider = get_record_provider() for record_id in record_provider.get_all_records(): result_path = f'/home/marcinwatroba/PWR_ASR/asr-benchmarks/asr_responses_cache/voicelab_wav2vec2' \ f'/{record_id}.json' if not os.path.exists(result_path): print(record_id) path = record_provider.get_path(record_id) # tmp_file = 'audio.wav' # AudioSegment.from_file(path).export(tmp_file, format='wav') client = AsrWebClient('http://localhost:5439/process_asr', '__example_token__') # client.call_recognise(path) json.dump(client.call_recognise(path), open(result_path, 'w')) if __name__ == '__main__': t() # dataset = get_dataset() # os.mkdir('experiment_data/audio/fleurs_audio') # for it in dataset: # it_id = it['audio']['path'] # path = f'experiment_data/audio/fleurs_audio/{it_id}' # arr = (it['audio']['array'] * 32768).astype(np.int16) # write_wav(path, it['audio']['sampling_rate'], arr)