Skip to content
Snippets Groups Projects
voicelab_gold_transcript_processor.py 995 B
from typing import List, Dict

from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider
from sziszapangma.integration.gold_transcript_processor import GoldTranscriptProcessor


class VoicelabGoldTranscriptProcessor(GoldTranscriptProcessor):
    _record_provider: VoicelabTelcoRecordProvider

    def __init__(self, record_provider: VoicelabTelcoRecordProvider):
        self._record_provider = record_provider

    def parse_word(self, word):
        return {
            'id': word['id'],
            'word': word['text']
        }

    def get_gold_transcript(self, record_id: str) -> List[Dict[str, any]]:
        relation_manager = self._record_provider.get_relation_manager(record_id)
        document = [itt for itt in relation_manager.get_all_items() if itt['type'] == 'Document'][0]
        document_words = [relation_manager.get_item_by_id(item_id) for item_id in document['word_ids']]
        return [self.parse_word(word) for word in document_words]