#! /usr/bin/python # -*- coding: utf-8 -*- from importer.Phrase import phrase_from_tree from syntax.models import SyntacticFunction, Control, NaturalLanguageDescription import syntax.models from entries.phrase_descriptions.descriptions import phrase_description2 class Function: def __init__(self, value): self._value = value @classmethod def fromTree(cls, tree): value = tree._children[0]._attrs['value'] return cls(value) # @TODO: dwa rodzaje kontroli class Control: def __init__(self, function): self._function = function class Position: def __init__(self, position_id, function, controls, phrases, phrase_ids): self._id = position_id self._function = function self._control = controls self._phrases = phrases self._phrase_ids = phrase_ids self._db_id = None @classmethod def fromTree(cls, tree): function = None controls = None phrases = [] phrase_ids = {} position_id = [] for subtree in tree._children: if subtree._attrs['name'] == 'function': function = Function.fromTree(subtree) elif subtree._attrs['name'] == 'control': controls = [Control(c._attrs['value']) for c in subtree._children[0]._children] elif subtree._attrs['name'] == 'phrases': for phrase_tree in subtree._children[0]._children: phrase = phrase_from_tree(phrase_tree) phrases.append(phrase) if phrase.getId() is not None: phrase_ids[phrase.getId()] = phrase position_id.append(int(phrase.getId().split('-')[0].split('.')[-1])) # TODO So far, only schema positions were stored in database # and all phrases had an id from xml:id attribute. Now we add # modification positions where phrases have no id, so the # text_rep is used. Is this enough? Perhaps text_rep could also be used # for schema position phrases? else: phr_id = str(phrase) phrase_ids[phr_id] = phrase position_id.append(phr_id) position_id.sort() position_id = tuple(position_id) result = cls(position_id, function, controls, phrases, phrase_ids) for phrase in phrases: phrase._position = result return result def store(self, schema, stored_positions, schema_positions, negativity): label = [None, None, None, negativity] if self._function is not None: function = SyntacticFunction.objects.get(name=self._function._value) label[0] = self._function._value else: function = None control = None pred_control = None if self._control is not None: for c in self._control: if c._function[:4] == 'pred': if pred_control is None: pred_control = syntax.models.PredicativeControl.objects.get(name=c._function) label[2] = c._function else: raise(DoublePredControlError) else: if control is None: control = syntax.models.Control.objects.get(name=c._function) label[1] = c._function else: raise(DoubleControlError) i = 1 extended_id = (tuple(label), self._id, i) while extended_id in schema_positions: i += 1 extended_id = (tuple(label), self._id, i) schema_positions.add(extended_id) # self._id is None for parts of lex atr # TODO (KK) ^^^ if self._id is not None and extended_id in stored_positions: position = syntax.models.Position.objects.get(id = stored_positions[extended_id]) self._db_id = position.id schema.positions.add(position) else: position = schema.positions.create(function=function, control=control, pred_control=pred_control, phrases_count=len(self._phrases)) self._db_id = position.id controller = None if control and control.name == 'controllee': # no controllee and pred_controllee at the same time assert(not pred_control or pred_control.name == 'pred_controller') controller = self._schema.getController('controllee') if pred_control and pred_control.name == 'pred_controllee': # no controllee and pred_controllee at the same time assert(not control or control.name == 'controller') controller = self._schema.getController('pred_controllee') for phrase in self._phrases: phrase.store(position, stored_positions) phrase_text = str(phrase) desc_count = NaturalLanguageDescription.objects.filter( negativity=negativity, function=position.function, control=position.control, pred_control=position.pred_control, phrase_str=phrase_text).count() if desc_count == 0: for lang in ('pl', 'en'): desc_text = phrase_description2(phrase, self, negativity, lang, controller=controller) # TODO also index by controller function? other controller info? desc = NaturalLanguageDescription( lang=lang, negativity=negativity, function=position.function, control=position.control, pred_control=position.pred_control, phrase_str=phrase_text, description=desc_text) desc.save() if self._id is not None: stored_positions[extended_id] = position.id def hasControl(self, control): if self._control: for c in self._control: if c._function == control: return True return False def getCase(self): cases = set() for phrase in self._phrases: case = phrase.getCase() if case: cases.add(case) try: assert(len(cases) == 1) except AssertionError: print('COULDN’T DETERMINE POSITION CASE:') print(' + '.join(map(str, self._phrases))) raise return cases.pop() def getPhraseIds(self): return self._phrase_ids def __unicode__(self): if self._function is None: return '[' + ','.join([phrase.toUnicode(self._function) for phrase in self._phrases]) + ']' else: return self._function._value + '([' + ','.join([phrase.toUnicode(self._function) for phrase in self._phrases]) + '])' def toUnicode(self, function, phrases=None): pre = '' post = '' if self._control is not None: for control in self._control: pre += control._function + '(' post += ')' if phrases is None: if self._function is None or self._function._value == 'obj': return pre + '[' + ','.join([phrase.toUnicode(self._function) for phrase in self._phrases]) + ']' + post elif self._function._value == 'subj': return pre + 'subj([' + ','.join([phrase.toUnicode(self._function) for phrase in self._phrases]) + '])' + post elif self._function._value == 'obj': return pre + 'obj([' + ','.join([phrase.toUnicode(self._function) for phrase in self._phrases]) + '])' + post elif self._function._value == 'head': return pre + 'head([' + ','.join([phrase.toUnicode(self._function) for phrase in self._phrases]) + '])' + post else: temp = [] for phrase in self._phrases: if phrase in phrases: temp.append(phrase.toUnicode(self._function)) if self._function is None: return pre + '[' + ','.join(temp) + ']' + post elif self._function._value == 'subj': return pre + 'subj([' + ','.join(temp) + '])' + post elif self._function._value == 'obj': return pre + 'obj([' + ','.join(temp) + '])' + post elif self._function._value == 'head': return pre + 'head([' + ','.join(temp) + '])' + post def subposition(self, phrases=None): c = '' if self._control is not None: c = ','.join([control._function for control in self._control]) f = '' if self._function is not None: f = self._function._value if f != '' and c != '': f += ',' if phrases is None: return f + c + '{' + ','.join([str(phrase) for phrase in self._phrases]) + '}' else: temp = [str(phrase) for phrase in self._phrases if phrase in phrases] return f + c + '{' + ','.join(temp) + '}'