An error occurred while loading the file. Please try again.
-
Marcin Wątroba authored8d234117
voicelab_gold_transcript_processor.py 995 B
from typing import List, Dict
from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider
from sziszapangma.integration.gold_transcript_processor import GoldTranscriptProcessor
class VoicelabGoldTranscriptProcessor(GoldTranscriptProcessor):
_record_provider: VoicelabTelcoRecordProvider
def __init__(self, record_provider: VoicelabTelcoRecordProvider):
self._record_provider = record_provider
def parse_word(self, word):
return {
'id': word['id'],
'word': word['text']
}
def get_gold_transcript(self, record_id: str) -> List[Dict[str, any]]:
relation_manager = self._record_provider.get_relation_manager(record_id)
document = [itt for itt in relation_manager.get_all_items() if itt['type'] == 'Document'][0]
document_words = [relation_manager.get_item_by_id(item_id) for item_id in document['word_ids']]
return [self.parse_word(word) for word in document_words]