Skip to content
Snippets Groups Projects
Select Git revision
  • 4d5a8bb0d47ee35f678e9ce491305f42e46d2ee7
  • main default protected
  • change_data_model
  • feature/add_auth_asr_service
  • fix/incorrect_import
  • feature/change_registry_clarin
  • feature/add_base_asr_service
  • feature/add_poetry
  • feature/add_word_ids
  • feature/add_sziszapangma
10 results

main.py

Blame
  • user avatar
    Marcin Wątroba authored
    4d5a8bb0
    History
    main.py 976 B
    from pydub import AudioSegment
    
    from sziszapangma.integration.service_core.asr.asr_base_processor import AsrBaseProcessor
    from sziszapangma.integration.service_core.asr.asr_result import AsrResult, WordTimeAlignment
    from huggingsound import SpeechRecognitionModel
    
    
    class SpeechbrainAsrProcessor(AsrBaseProcessor):
    
        def process_asr(self, audio_file_path: str) -> AsrResult:
            asr_result = model.transcribe([audio_file_path])[0]
            return AsrResult(
                words=[it for it in asr_result['transcription'].split(' ')],
                full_text=asr_result['transcription'],
                words_time_alignment=[
                    WordTimeAlignment(asr_result['start_timestamps'][it], asr_result['end_timestamps'][it])
                    for it in range(len(asr_result['start_timestamps']))
                ]
            )
    
    
    if __name__ == '__main__':
        model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-xls-r-1b-polish")
        SpeechbrainAsrProcessor().start_processor()