from typing import List, Dict from experiment.luna.luna_record_provider import LunaRecordProvider from sziszapangma.integration.gold_transcript_processor import GoldTranscriptProcessor from sziszapangma.model.relation_manager import RelationManager class LunaGoldTranscriptProcessor(GoldTranscriptProcessor): _record_provider: LunaRecordProvider def __init__(self, record_provider: LunaRecordProvider): self._record_provider = record_provider def parse_word(self, word, relation_manager: RelationManager): all_relations = relation_manager.get_all_relations_for_item(word['id']) pos_id = [it['second_id'] for it in all_relations if it['second_type'] in ['pos']][0] return { 'id': word['id'], 'word': word['text'], 'pos': relation_manager.get_item_by_id(pos_id)['value'] } def get_gold_transcript(self, record_id: str) -> List[Dict[str, any]]: relation_manager = self._record_provider.get_relation_manager(record_id) document = [itt for itt in relation_manager.get_all_items() if itt['type'] == 'Document'][0] document_words = [relation_manager.get_item_by_id(item_id) for item_id in document['word_ids']] return [ self.parse_word(word, relation_manager) for word in document_words ]