Skip to content
Snippets Groups Projects
Commit 020cca6b authored by Tomasz Bartosiak's avatar Tomasz Bartosiak
Browse files

Added schema definition part, packed schema representation

parent 8be25336
Branches
No related tags found
No related merge requests found
#-*- coding:utf-8 -*-
import datetime
from collections import defaultdict
XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
class SchemaPacker():
def __init__(self):
# packed_subentry_id -> packed_schema_id -> (Subentry, Schema) (any that matches)
self.unique_schemas = defaultdict(lambda: defaultdict(lambda: None))
# packed_subentry -> id
self.free_subentry_id = 0
self.unique_subentry_id = defaultdict(lambda: -1)
# packed_schema -> id
self.free_schema_id = 0
self.unique_schema_id = defaultdict(lambda: -1)
# subentry -> schema -> (packed_subentry, packed_schema)
self.packed_schemas = defaultdict(lambda: defaultdict(lambda: None))
def _pack_subentry(self, subentry):
sie = subentry.inherent_sie.name
aspect = ''
if subentry.aspect is not None:
aspect = subentry.aspect.name
negativity = ''
if subentry.aspect is not None:
negativity = subentry.negativity.name
predicativity = subentry.predicativity.name
return (sie, aspect, negativity, predicativity)
def _pack_schema(self, schema):
opinion = 'unk'
if schema.opinion.key is not None:
opinion = schema.opinion.key
positions = []
for position in schema.positions.all():
positions.append(position.id)
positions = tuple(sorted(positions))
return (opinion, positions)
def _pack(self, subentry, schema):
if self.packed_schemas[subentry][schema] is None:
self.packed_schemas[subentry][schema] = (self._pack_subentry(subentry), self._pack_schema(schema))
return self.packed_schemas[subentry][schema]
def add(self, subentry, schema):
packed_subentry, packed_schema = self._pack(subentry, schema)
if self.unique_subentry_id[packed_subentry] == -1:
self.unique_subentry_id[packed_subentry] = self.free_subentry_id
self.free_subentry_id += 1
packed_subentry_id = self.unique_subentry_id[packed_subentry]
if self.unique_schema_id[packed_schema] == -1:
self.unique_schema_id[packed_schema] = self.free_schema_id
self.free_schema_id += 1
packed_schema_id = self.unique_schema_id[packed_schema]
self.unique_schemas[packed_subentry_id][packed_schema_id] = (subentry, schema)
self.packed_schemas[subentry][schema] = (packed_subentry, packed_schema)
def get_ids(self, subentry, schema):
packed_subentry, packed_schema = self.packed_schemas[subentry][schema]
return (self.unique_subentry_id[packed_subentry], self.unique_schema_id[packed_schema])
......@@ -10,6 +10,8 @@ from unifier.models import UnifiedFrame2SlowalFrameMapping, \
UnifiedFrameArgumentSlowalFrameMapping
from connections.models import ArgumentConnection, ExampleConnection
from common.management.commands.schema_packer import SchemaPacker
from collections import defaultdict
XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
......@@ -72,7 +74,7 @@ def write_content(root, unified_frames):
body = etree.SubElement(text, 'body')
schemata = etree.SubElement(body, 'div')
frames = etree.SubElement(body, 'div')
used_schemata = set()
used_schemata = SchemaPacker()
write_unified_frames(frames, unified_frames, used_schemata)
write_used_schemata(schemata, used_schemata)
......@@ -342,7 +344,7 @@ def write_alternations(parent, unified_frame, lexical_unit, mapping, used_schema
argument_fs.attrib['type'] = 'argument'
argument_fs.attrib['sameAs'] = u'#unif_%d.%d-arg' % (unified_frame.id, argument.id)
phrases_f = etree.SubElement(connection_fs, 'f')
write_phrases_coll(phrases_f, schema_hooks)
write_phrases_coll(phrases_f, schema_hooks, used_schemata)
def prepare_alternations(lexical_unit, mapping, used_schemata):
connections_info = analyse_connections(mapping)
......@@ -353,13 +355,20 @@ def prepare_alternations(lexical_unit, mapping, used_schemata):
uargument = argument_mapping.unified_agrument
sargument = argument_mapping.slowal_agrument
success = False
try:
argument_realization = ArgumentConnection.objects.get(argument = sargument)
success = True
except:
print(sargument.id)
if success:
by_schema_realizations = argument_realization.schema_connections.all()
for schema_hook in by_schema_realizations:
if valid_connection(lexical_unit, schema_hook, connections_info):
subentry = schema_hook.subentry
schema = schema_hook.schema
used_schemata.add((subentry, schema))
used_schemata.add(subentry, schema)
alternation = schema_hook.alternation
alternations[(subentry.id, schema.id, alternation)][uargument].append(schema_hook)
......@@ -400,13 +409,14 @@ def valid_connection(lexical_unit, schema_hook, connections_info):
return (in_lemma == in_schema)
return False
def write_phrases_coll(parent, phrases_list):
def write_phrases_coll(parent, phrases_list, used_schemata):
vColl = etree.SubElement(parent, 'vColl')
vColl.attrib['org'] = 'set'
for phrase in phrases_list:
phrase_fs = etree.SubElement(vColl, 'fs')
phrase_fs.attrib['type'] = 'phrase'
phrase_fs.attrib['sameAs'] = u'#unif_%d.%d.%d.%d-phr' %(phrase.subentry.id, phrase.schema.id, phrase.position.id, phrase.phrase_type.id)
subentry_id, schema_id = used_schemata.get_ids(phrase.subentry, phrase.schema)
phrase_fs.attrib['sameAs'] = u'#unif_%d.%d.%d.%d-phr' %(subentry_id, schema_id, phrase.position.id, phrase.phrase_type.id)
#=================== DIV -- SYNTACTIC SCHEMATA ===================#
......@@ -415,23 +425,74 @@ def write_used_schemata(parent, used_schemata):
schemata_head = etree.SubElement(parent, 'head')
schemata_head.text = 'Syntactic Schemata'
for subentry, schema in used_schemata:
write_schema_entry(parent, subentry, schema)
for subentry_id in used_schemata.unique_schemas:
for schema_id in used_schemata.unique_schemas[subentry_id]:
subentry, schema = used_schemata.unique_schemas[subentry_id][schema_id]
write_schema_entry(parent, subentry, subentry_id, schema, schema_id)
def write_schema_entry(parent, subentry, schema):
entry_xml_id = u'unif_%d.%d-schent' %(subentry.id, schema.id)
def write_schema_entry(parent, subentry, subentry_id, schema, schema_id):
entry_xml_id = u'unif_%d.%d-schent' %(subentry_id, schema_id)
entry = etree.SubElement(parent, 'entry')
entry.attrib[etree.QName(XML_NAMESPACE, 'id')] = entry_xml_id
write_schema_definition(entry, subentry, schema)
write_schema_definition(entry, subentry, subentry_id, schema, schema_id)
write_schema(entry, subentry, schema)
write_schema(entry, subentry, subentry_id, schema, schema_id)
def write_schema_definition(parent, subentry, schema):
pass
def write_schema_definition(parent, subentry, subentry_id, schema, schema_id):
d = etree.SubElement(parent, 'def')
d.text = schema_textual_representation(subentry, schema)
def write_schema(parent, subentry, schema):
schema_xml_id = u'unif_%d.%d-sch' %(subentry.id, schema.id)
def schema_textual_representation(subentry, schema):
result = ''
if subentry.inherent_sie.name == 'true':
result += ' się:'
else:
rerult += ':'
opinion = schema.opinion.key
if opinion == 'vul':
result += ' wulgarny:'
elif opinion == 'col':
result += ' potoczny:'
elif opinion == 'dat':
result += ' archaiczny:'
elif opinion == 'bad':
result += ' zły:'
elif opinion == 'unc':
result += ' wątpliwy:'
elif opinion == 'cer':
result += ' pewny:'
else:
result += ' brak:'
if subentry.negativity is not None:
result += ' ' + subentry.negativity.name + ':'
else:
result += ' :'
if subentry.predicativity.name == 'true':
result += ' pred:'
else:
rerult += ':'
if subentry.aspect is not None:
result += ' ' + subentry.aspect.name + ':'
else:
result += ' :'
positions_rep = []
for position in schema.positions.all():
positions_rep.append(str(position))
result += ' ' + ' + '.join(positions_rep)
return result
def write_schema(parent, subentry, subentry_id, schema, schema_id):
schema_xml_id = u'unif_%d.%d-sch' %(subentry_id, schema_id)
schema_fs = etree.SubElement(parent, 'fs')
schema_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = schema_xml_id
......@@ -484,28 +545,33 @@ def write_schema(parent, subentry, schema):
predicativity_binary.attrib['value'] = predicativity
# positions
write_positions(schema_fs, subentry, schema)
write_positions(schema_fs, subentry_id, schema, schema_id)
def write_positions(parent, subentry, schema):
def write_positions(parent, subentry_id, schema, schema_id):
positions = schema.positions.all()
positions_f = etree.SubElement(parent, 'f')
positions_f.attrib['name'] = 'positions'
vColl = etree.SubElement(positions_f, 'vColl')
vColl.attrib['org'] = 'set'
for position in positions:
write_position(vColl, subentry, schema, position)
write_position(vColl, subentry_id, schema_id, position)
def write_position(parent, subentry, schema, position):
position_xml_id = u'unif_%d.%d.%d-psn' %(subentry.id, schema.id, position.id)
def write_position(parent, subentry_id, schema_id, position):
position_xml_id = u'unif_%d.%d.%d-psn' %(subentry_id, schema_id, position.id)
position_fs = etree.SubElement(parent, 'fs')
position_fs.attrib['type'] = 'position'
position_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = position_xml_id
text_rep_f = etree.SubElement(position_fs, 'f')
text_rep_f.attrib['name'] = 'textual_representation'
text_rep_string = etree.SubElement(text_rep_f, 'string')
text_rep_string.text = str(position)
write_function(position_fs, position)
write_control(position_fs, position)
write_phrases(position_fs, subentry, schema, position)
write_phrases(position_fs, subentry_id, schema_id, position)
def write_function(parent, position):
function = position.function
......@@ -532,17 +598,17 @@ def write_control(parent, position):
pred_control_symbol = etree.SubElement(vColl, 'symbol')
pred_control_symbol.attrib['value'] = control
def write_phrases(parent, subentry, schema, position):
def write_phrases(parent, subentry_id, schema_id, position):
phrases = position.phrase_types.all()
phrases_f = etree.SubElement(parent, 'f')
phrases_f.attrib['name'] = 'phrases'
vColl = etree.SubElement(phrases_f, 'vColl')
vColl.attrib['org'] = 'set'
for phrase in phrases:
write_phrase(vColl, subentry, schema, position, phrase)
write_phrase(vColl, subentry_id, schema_id, position, phrase)
def write_phrase(parent, subentry, schema, position, phrase):
phrase_xml_id = u'unif_%d.%d.%d.%d-phr' %(subentry.id, schema.id, position.id, phrase.id)
def write_phrase(parent, subentry_id, schema_id, position, phrase):
phrase_xml_id = u'unif_%d.%d.%d.%d-phr' %(subentry_id, schema_id, position.id, phrase.id)
phrase_fs = etree.SubElement(parent, 'fs')
phrase_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = phrase_xml_id
phrase_fs.attrib['type'] = phrase.main_type.name
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment