from typing import List, Dict from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider from sziszapangma.integration.gold_transcript_processor import GoldTranscriptProcessor class VoicelabGoldTranscriptProcessor(GoldTranscriptProcessor): _record_provider: VoicelabTelcoRecordProvider def __init__(self, record_provider: VoicelabTelcoRecordProvider): self._record_provider = record_provider def parse_word(self, word): return { 'id': word['id'], 'word': word['text'] } def get_gold_transcript(self, record_id: str) -> List[Dict[str, any]]: relation_manager = self._record_provider.get_relation_manager(record_id) document = [itt for itt in relation_manager.get_all_items() if itt['type'] == 'Document'][0] document_words = [relation_manager.get_item_by_id(item_id) for item_id in document['word_ids']] return [self.parse_word(word) for word in document_words]