whisper_processor.py

from typing import Dict, Any

import whisper
from whisper import Whisper

from sziszapangma.model.model_creators import create_new_word


class WhisperAsrProcessor:
    _whisper: Whisper

    def __init__(self, model_name: str):
        self._whisper = whisper.load_model(model_name, in_memory=True)
        print(self._whisper.device)

    def call_recognise(self, file_path: str) -> Dict[str, Any]:
        result = self._whisper.transcribe(file_path)
        return {
            "transcription": [create_new_word(it) for it in result['text'].split()],
            "full_text": result['text'],
            "words_time_alignment": None,
            "language": result['language'],
            "segments": result['segments']
        }