Select Git revision
main.py 976 B
from pydub import AudioSegment
from sziszapangma.integration.service_core.asr.asr_base_processor import AsrBaseProcessor
from sziszapangma.integration.service_core.asr.asr_result import AsrResult, WordTimeAlignment
from huggingsound import SpeechRecognitionModel
class SpeechbrainAsrProcessor(AsrBaseProcessor):
def process_asr(self, audio_file_path: str) -> AsrResult:
asr_result = model.transcribe([audio_file_path])[0]
return AsrResult(
words=[it for it in asr_result['transcription'].split(' ')],
full_text=asr_result['transcription'],
words_time_alignment=[
WordTimeAlignment(asr_result['start_timestamps'][it], asr_result['end_timestamps'][it])
for it in range(len(asr_result['start_timestamps']))
]
)
if __name__ == '__main__':
model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-xls-r-1b-polish")
SpeechbrainAsrProcessor().start_processor()