#! /usr/bin/python # -*- coding: utf-8 -*- from collections import defaultdict import examples.models from connections.models import ExampleConnection, SchemaHook from syntax.models import Schema, Position from syntax.models_phrase import PhraseType from semantics.models import Argument def clean_sentence(sentence): return sentence.replace('\r\n', ' ').replace('\n', ' ').replace('\r', ' ') class Example: def __init__(self, eid, sentence, source, opinion, note, illustrated_syntax, meaning, illustrated_semantics): self._eid = eid self._sentence = sentence self._source = source self._opinion = opinion self._note = note self._phrases = illustrated_syntax self._meaning = meaning self._arguments = illustrated_semantics @classmethod def fromTree(cls, example_tree, phrases, entry_semantics, base, meanings, in_data, out_file, misconnected): eid = int(example_tree._attrs['xml:id'].split('.')[1].split('-')[0]) if len(example_tree._children) == 6: # both meaning and note meaning = int(example_tree._children[0]._children[0]._attrs['sameAs'][1:].split("_")[1].split("-")[0]) sentence = clean_sentence(example_tree._children[2]._children[0]._content) illustrated_syntax = IllustratesSyntax.fromTree(example_tree._children[1], phrases, base, sentence, out_file) illustrated_semantics = IllustratesSemantics.interfere(sentence, base, meaning, illustrated_syntax, entry_semantics, meanings, eid, in_data, out_file, misconnected) source = example_tree._children[3]._children[0]._attrs['value'] opinion = example_tree._children[4]._children[0]._attrs['value'] note = example_tree._children[5]._children[0]._content elif len(example_tree._children) == 5 and example_tree._children[0]._attrs['name'] == 'meaning': # meaning and no note meaning = int(example_tree._children[0]._children[0]._attrs['sameAs'][1:].split("_")[1].split("-")[0]) sentence = clean_sentence(example_tree._children[2]._children[0]._content) illustrated_syntax = IllustratesSyntax.fromTree(example_tree._children[1], phrases, base, sentence, out_file) illustrated_semantics = IllustratesSemantics.interfere(sentence, base, meaning, illustrated_syntax, entry_semantics, meanings, eid, in_data, out_file, misconnected) source = example_tree._children[3]._children[0]._attrs['value'] opinion = example_tree._children[4]._children[0]._attrs['value'] note = None elif len(example_tree._children) == 5: # note and no meaning meaning = None sentence = clean_sentence(example_tree._children[1]._children[0]._content) illustrated_syntax = IllustratesSyntax.fromTree(example_tree._children[0], phrases, base, sentence, out_file) illustrated_semantics = None source = example_tree._children[2]._children[0]._attrs['value'] opinion = example_tree._children[3]._children[0]._attrs['value'] note = example_tree._children[4]._children[0]._content elif len(example_tree._children) == 4: # no meaning and no note meaning = None sentence = clean_sentence(example_tree._children[1]._children[0]._content) illustrated_syntax = IllustratesSyntax.fromTree(example_tree._children[0], phrases, base, sentence, out_file) illustrated_semantics = None source = example_tree._children[2]._children[0]._attrs['value'] opinion = example_tree._children[3]._children[0]._attrs['value'] note = None else: print(example_tree) raise UnknownError() return cls(eid, sentence, source, opinion, note, illustrated_syntax, meaning, illustrated_semantics) def store(self, entry, meanings): # self._phrases = illustrated_syntax # self._meaning = meaning # self._arguments = illustrated_semantics opinion = examples.models.ExampleOpinion.objects.get(key=self._opinion) source = examples.models.ExampleSource.objects.get(key=self._source) example = examples.models.Example(entry=entry, sentence=self._sentence, opinion=opinion, source=source, note=self._note) example.pk = self._eid example.save() if not self._phrases.exists(): print(' STORING EXAMPLE:', self._sentence) print(' example not connected to syntax') # example not connected to syntax pass elif self._arguments is None: print(' STORING EXAMPLE:', self._sentence) print(' example connected to syntax but not connected to semantics') # example connected to syntax but not connected to semantics connection = ExampleConnection(example=example) connection.save() subpositions = self._phrases._subpositions subentry = subpositions[0][0]._position._schema.getSubentry(entry) schema = Schema.objects.get(id=subpositions[0][0]._position._schema._db_id) for subposition in subpositions: position = Position.objects.get(id=subposition[0]._position._db_id) for phrase_obj in subposition: phrase = PhraseType.objects.get(text_rep=str(phrase_obj)) hook = SchemaHook(subentry=subentry, schema=schema, position=position, phrase_type=phrase, alternation=1) hook.save() connection.schema_connections.add(hook) elif self._arguments.exists(): #print(' example connected to both syntax and semantics and connection can be transfered') # example connected to both syntax and semantics # and connection can be transfered lemma, meaning = meanings[self._meaning] meaning = meaning.get() connection = ExampleConnection(example=example, lexical_unit=meaning) connection.save() for role_illustration in self._arguments._arguments: if role_illustration._argument is not None: argument = Argument.objects.get(id=role_illustration._argument._db_id) connection.arguments.add(argument) subposition = role_illustration._subposition subentry = subposition[0]._position._schema.getSubentry(entry) schema = Schema.objects.get(id=subposition[0]._position._schema._db_id) position = Position.objects.get(id=subposition[0]._position._db_id) for phrase_obj in subposition: phrase = PhraseType.objects.get(text_rep=str(phrase_obj)) hook = SchemaHook(subentry=subentry, schema=schema, position=position, phrase_type=phrase, alternation=role_illustration._alternation) hook.save() connection.schema_connections.add(hook) else: print(' STORING EXAMPLE:', self._sentence) print(' example connected to both syntax and semantics but connection cannot be transfered or connected to schema but not to any phrases in it; treated as not connected to syntax') # example connected to both syntax and semantics # but connection cannot be transfered # or connected to schema but not to any phrases in it # example is treated as not connected to syntax pass class IllustratesSyntax: def __init__(self, schema_key, subpositions): self._schema_key = schema_key self._subpositions = subpositions @classmethod def fromTree(cls, tree, phrases, base, sentence, out_file): schema_key = None positions = defaultdict(lambda: []) for subtree in tree._children[0]._children: phrase_id = subtree._attrs['sameAs'][1:] schema_key = int(phrase_id.split('.')[1]) position_key = int(phrase_id.split('.')[2]) if subtree._attrs['sameAs'][1:] not in phrases: out_file.write('@@@ ' + base + ':\t' + sentence) # raise UnknownError() else: positions[position_key].append(phrases[subtree._attrs['sameAs'][1:]]) return cls(schema_key, list(positions.values())) def exists(self): return self._schema_key is not None class IllustratesSemanticRole: def __init__(self, argument, subposition, alternation): self._argument = argument self._subposition = subposition position = self._subposition[0]._position self._subposition_str = position.subposition(self._subposition) self._alternation = alternation class IllustratesSemantics: def __init__(self, frame, arguments): self._frame = frame self._arguments = arguments @classmethod def interfere(cls, sentence, base, meaning, illustrated_syntax, semantics, meanings, eid, in_data, out_file, misconnected): arguments = [] frame, all_realizations = semantics.findFrame(meaning) realizations = [] for realization in all_realizations: realization_schema_key = int(realization._schema._id.split('.')[1].split('-')[0]) if realization_schema_key == illustrated_syntax._schema_key: realizations.append(realization) possible_arguments = defaultdict(lambda: []) alternation = 1 for realization in realizations: possible_args = [] loose = [] for subposition in illustrated_syntax._subpositions: argument = realization.findMatchingArgument(subposition) if argument is not None: connection = IllustratesSemanticRole(argument, subposition, alternation) possible_args.append(connection) else: connection = IllustratesSemanticRole(None, subposition, alternation) loose.append(connection) possible_arguments[len(possible_args)].append((possible_args, loose)) alternation += 1 l = list(possible_arguments) if len(l) == 0: #example connected to schema but not connected to any phrases there !!! lu = meanings.locate(meaning) misconnected.write('% ' + sentence + '\n') misconnected.write(base + '\t' + str(lu) + '\t' + str(illustrated_syntax._schema_key) + '\t' + str(eid) + '\n\n') # KKK return None here to attach only to schema? (e.g. awaria) return None else: max_args = max(possible_arguments.keys()) if len(possible_arguments[max_args]) > 1: lu = meanings.locate(meaning) l1 = '% ' + sentence + '\n' l2 = base + '\t' + str(lu) + '\t' + str(illustrated_syntax._schema_key) + '\t' + str(eid) + '\n' l3s = [] equal = True for role_illustrations, _ in possible_arguments[max_args]: roles = [] for role_illustration in role_illustrations: role_str = str(role_illustration._argument._semantic_role) subposition_str = role_illustration._subposition_str argument_str = role_str + ': ' + subposition_str roles.append(argument_str) if len(l3s) > 0: if argument_str not in l3s[0]: equal = False l3s.append('\t' + '\t'.join(roles)) #if equal: if True: arguments = [] for role_illustrations, free in possible_arguments[max_args]: arguments += role_illustrations arguments += free print(('\n'.join(l3s) + '\n')) #else: if not equal: out_file.write(l1) out_file.write(l2) out_file.write('\n'.join(l3s) + '\n') out_file.write('\n') elif len(possible_arguments[max_args]) == 0: raise UnknownError() else: arguments = possible_arguments[max_args][0][0] + possible_arguments[max_args][0][1] return cls(frame, arguments) def exists(self): return len(self._arguments) > 0