Skip to content
Snippets Groups Projects
Commit 020cca6b authored by Tomasz Bartosiak's avatar Tomasz Bartosiak
Browse files

Added schema definition part, packed schema representation

parent 8be25336
Branches
No related tags found
No related merge requests found
#-*- coding:utf-8 -*-
import datetime
from collections import defaultdict
XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
class SchemaPacker():
def __init__(self):
# packed_subentry_id -> packed_schema_id -> (Subentry, Schema) (any that matches)
self.unique_schemas = defaultdict(lambda: defaultdict(lambda: None))
# packed_subentry -> id
self.free_subentry_id = 0
self.unique_subentry_id = defaultdict(lambda: -1)
# packed_schema -> id
self.free_schema_id = 0
self.unique_schema_id = defaultdict(lambda: -1)
# subentry -> schema -> (packed_subentry, packed_schema)
self.packed_schemas = defaultdict(lambda: defaultdict(lambda: None))
def _pack_subentry(self, subentry):
sie = subentry.inherent_sie.name
aspect = ''
if subentry.aspect is not None:
aspect = subentry.aspect.name
negativity = ''
if subentry.aspect is not None:
negativity = subentry.negativity.name
predicativity = subentry.predicativity.name
return (sie, aspect, negativity, predicativity)
def _pack_schema(self, schema):
opinion = 'unk'
if schema.opinion.key is not None:
opinion = schema.opinion.key
positions = []
for position in schema.positions.all():
positions.append(position.id)
positions = tuple(sorted(positions))
return (opinion, positions)
def _pack(self, subentry, schema):
if self.packed_schemas[subentry][schema] is None:
self.packed_schemas[subentry][schema] = (self._pack_subentry(subentry), self._pack_schema(schema))
return self.packed_schemas[subentry][schema]
def add(self, subentry, schema):
packed_subentry, packed_schema = self._pack(subentry, schema)
if self.unique_subentry_id[packed_subentry] == -1:
self.unique_subentry_id[packed_subentry] = self.free_subentry_id
self.free_subentry_id += 1
packed_subentry_id = self.unique_subentry_id[packed_subentry]
if self.unique_schema_id[packed_schema] == -1:
self.unique_schema_id[packed_schema] = self.free_schema_id
self.free_schema_id += 1
packed_schema_id = self.unique_schema_id[packed_schema]
self.unique_schemas[packed_subentry_id][packed_schema_id] = (subentry, schema)
self.packed_schemas[subentry][schema] = (packed_subentry, packed_schema)
def get_ids(self, subentry, schema):
packed_subentry, packed_schema = self.packed_schemas[subentry][schema]
return (self.unique_subentry_id[packed_subentry], self.unique_schema_id[packed_schema])
...@@ -10,6 +10,8 @@ from unifier.models import UnifiedFrame2SlowalFrameMapping, \ ...@@ -10,6 +10,8 @@ from unifier.models import UnifiedFrame2SlowalFrameMapping, \
UnifiedFrameArgumentSlowalFrameMapping UnifiedFrameArgumentSlowalFrameMapping
from connections.models import ArgumentConnection, ExampleConnection from connections.models import ArgumentConnection, ExampleConnection
from common.management.commands.schema_packer import SchemaPacker
from collections import defaultdict from collections import defaultdict
XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
...@@ -72,7 +74,7 @@ def write_content(root, unified_frames): ...@@ -72,7 +74,7 @@ def write_content(root, unified_frames):
body = etree.SubElement(text, 'body') body = etree.SubElement(text, 'body')
schemata = etree.SubElement(body, 'div') schemata = etree.SubElement(body, 'div')
frames = etree.SubElement(body, 'div') frames = etree.SubElement(body, 'div')
used_schemata = set() used_schemata = SchemaPacker()
write_unified_frames(frames, unified_frames, used_schemata) write_unified_frames(frames, unified_frames, used_schemata)
write_used_schemata(schemata, used_schemata) write_used_schemata(schemata, used_schemata)
...@@ -342,7 +344,7 @@ def write_alternations(parent, unified_frame, lexical_unit, mapping, used_schema ...@@ -342,7 +344,7 @@ def write_alternations(parent, unified_frame, lexical_unit, mapping, used_schema
argument_fs.attrib['type'] = 'argument' argument_fs.attrib['type'] = 'argument'
argument_fs.attrib['sameAs'] = u'#unif_%d.%d-arg' % (unified_frame.id, argument.id) argument_fs.attrib['sameAs'] = u'#unif_%d.%d-arg' % (unified_frame.id, argument.id)
phrases_f = etree.SubElement(connection_fs, 'f') phrases_f = etree.SubElement(connection_fs, 'f')
write_phrases_coll(phrases_f, schema_hooks) write_phrases_coll(phrases_f, schema_hooks, used_schemata)
def prepare_alternations(lexical_unit, mapping, used_schemata): def prepare_alternations(lexical_unit, mapping, used_schemata):
connections_info = analyse_connections(mapping) connections_info = analyse_connections(mapping)
...@@ -353,13 +355,20 @@ def prepare_alternations(lexical_unit, mapping, used_schemata): ...@@ -353,13 +355,20 @@ def prepare_alternations(lexical_unit, mapping, used_schemata):
uargument = argument_mapping.unified_agrument uargument = argument_mapping.unified_agrument
sargument = argument_mapping.slowal_agrument sargument = argument_mapping.slowal_agrument
success = False
try:
argument_realization = ArgumentConnection.objects.get(argument = sargument) argument_realization = ArgumentConnection.objects.get(argument = sargument)
success = True
except:
print(sargument.id)
if success:
by_schema_realizations = argument_realization.schema_connections.all() by_schema_realizations = argument_realization.schema_connections.all()
for schema_hook in by_schema_realizations: for schema_hook in by_schema_realizations:
if valid_connection(lexical_unit, schema_hook, connections_info): if valid_connection(lexical_unit, schema_hook, connections_info):
subentry = schema_hook.subentry subentry = schema_hook.subentry
schema = schema_hook.schema schema = schema_hook.schema
used_schemata.add((subentry, schema)) used_schemata.add(subentry, schema)
alternation = schema_hook.alternation alternation = schema_hook.alternation
alternations[(subentry.id, schema.id, alternation)][uargument].append(schema_hook) alternations[(subentry.id, schema.id, alternation)][uargument].append(schema_hook)
...@@ -400,13 +409,14 @@ def valid_connection(lexical_unit, schema_hook, connections_info): ...@@ -400,13 +409,14 @@ def valid_connection(lexical_unit, schema_hook, connections_info):
return (in_lemma == in_schema) return (in_lemma == in_schema)
return False return False
def write_phrases_coll(parent, phrases_list): def write_phrases_coll(parent, phrases_list, used_schemata):
vColl = etree.SubElement(parent, 'vColl') vColl = etree.SubElement(parent, 'vColl')
vColl.attrib['org'] = 'set' vColl.attrib['org'] = 'set'
for phrase in phrases_list: for phrase in phrases_list:
phrase_fs = etree.SubElement(vColl, 'fs') phrase_fs = etree.SubElement(vColl, 'fs')
phrase_fs.attrib['type'] = 'phrase' phrase_fs.attrib['type'] = 'phrase'
phrase_fs.attrib['sameAs'] = u'#unif_%d.%d.%d.%d-phr' %(phrase.subentry.id, phrase.schema.id, phrase.position.id, phrase.phrase_type.id) subentry_id, schema_id = used_schemata.get_ids(phrase.subentry, phrase.schema)
phrase_fs.attrib['sameAs'] = u'#unif_%d.%d.%d.%d-phr' %(subentry_id, schema_id, phrase.position.id, phrase.phrase_type.id)
#=================== DIV -- SYNTACTIC SCHEMATA ===================# #=================== DIV -- SYNTACTIC SCHEMATA ===================#
...@@ -415,23 +425,74 @@ def write_used_schemata(parent, used_schemata): ...@@ -415,23 +425,74 @@ def write_used_schemata(parent, used_schemata):
schemata_head = etree.SubElement(parent, 'head') schemata_head = etree.SubElement(parent, 'head')
schemata_head.text = 'Syntactic Schemata' schemata_head.text = 'Syntactic Schemata'
for subentry, schema in used_schemata: for subentry_id in used_schemata.unique_schemas:
write_schema_entry(parent, subentry, schema) for schema_id in used_schemata.unique_schemas[subentry_id]:
subentry, schema = used_schemata.unique_schemas[subentry_id][schema_id]
write_schema_entry(parent, subentry, subentry_id, schema, schema_id)
def write_schema_entry(parent, subentry, schema): def write_schema_entry(parent, subentry, subentry_id, schema, schema_id):
entry_xml_id = u'unif_%d.%d-schent' %(subentry.id, schema.id) entry_xml_id = u'unif_%d.%d-schent' %(subentry_id, schema_id)
entry = etree.SubElement(parent, 'entry') entry = etree.SubElement(parent, 'entry')
entry.attrib[etree.QName(XML_NAMESPACE, 'id')] = entry_xml_id entry.attrib[etree.QName(XML_NAMESPACE, 'id')] = entry_xml_id
write_schema_definition(entry, subentry, schema) write_schema_definition(entry, subentry, subentry_id, schema, schema_id)
write_schema(entry, subentry, schema) write_schema(entry, subentry, subentry_id, schema, schema_id)
def write_schema_definition(parent, subentry, schema): def write_schema_definition(parent, subentry, subentry_id, schema, schema_id):
pass d = etree.SubElement(parent, 'def')
d.text = schema_textual_representation(subentry, schema)
def write_schema(parent, subentry, schema): def schema_textual_representation(subentry, schema):
schema_xml_id = u'unif_%d.%d-sch' %(subentry.id, schema.id) result = ''
if subentry.inherent_sie.name == 'true':
result += ' się:'
else:
rerult += ':'
opinion = schema.opinion.key
if opinion == 'vul':
result += ' wulgarny:'
elif opinion == 'col':
result += ' potoczny:'
elif opinion == 'dat':
result += ' archaiczny:'
elif opinion == 'bad':
result += ' zły:'
elif opinion == 'unc':
result += ' wątpliwy:'
elif opinion == 'cer':
result += ' pewny:'
else:
result += ' brak:'
if subentry.negativity is not None:
result += ' ' + subentry.negativity.name + ':'
else:
result += ' :'
if subentry.predicativity.name == 'true':
result += ' pred:'
else:
rerult += ':'
if subentry.aspect is not None:
result += ' ' + subentry.aspect.name + ':'
else:
result += ' :'
positions_rep = []
for position in schema.positions.all():
positions_rep.append(str(position))
result += ' ' + ' + '.join(positions_rep)
return result
def write_schema(parent, subentry, subentry_id, schema, schema_id):
schema_xml_id = u'unif_%d.%d-sch' %(subentry_id, schema_id)
schema_fs = etree.SubElement(parent, 'fs') schema_fs = etree.SubElement(parent, 'fs')
schema_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = schema_xml_id schema_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = schema_xml_id
...@@ -484,28 +545,33 @@ def write_schema(parent, subentry, schema): ...@@ -484,28 +545,33 @@ def write_schema(parent, subentry, schema):
predicativity_binary.attrib['value'] = predicativity predicativity_binary.attrib['value'] = predicativity
# positions # positions
write_positions(schema_fs, subentry, schema) write_positions(schema_fs, subentry_id, schema, schema_id)
def write_positions(parent, subentry, schema): def write_positions(parent, subentry_id, schema, schema_id):
positions = schema.positions.all() positions = schema.positions.all()
positions_f = etree.SubElement(parent, 'f') positions_f = etree.SubElement(parent, 'f')
positions_f.attrib['name'] = 'positions' positions_f.attrib['name'] = 'positions'
vColl = etree.SubElement(positions_f, 'vColl') vColl = etree.SubElement(positions_f, 'vColl')
vColl.attrib['org'] = 'set' vColl.attrib['org'] = 'set'
for position in positions: for position in positions:
write_position(vColl, subentry, schema, position) write_position(vColl, subentry_id, schema_id, position)
def write_position(parent, subentry, schema, position): def write_position(parent, subentry_id, schema_id, position):
position_xml_id = u'unif_%d.%d.%d-psn' %(subentry.id, schema.id, position.id) position_xml_id = u'unif_%d.%d.%d-psn' %(subentry_id, schema_id, position.id)
position_fs = etree.SubElement(parent, 'fs') position_fs = etree.SubElement(parent, 'fs')
position_fs.attrib['type'] = 'position' position_fs.attrib['type'] = 'position'
position_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = position_xml_id position_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = position_xml_id
text_rep_f = etree.SubElement(position_fs, 'f')
text_rep_f.attrib['name'] = 'textual_representation'
text_rep_string = etree.SubElement(text_rep_f, 'string')
text_rep_string.text = str(position)
write_function(position_fs, position) write_function(position_fs, position)
write_control(position_fs, position) write_control(position_fs, position)
write_phrases(position_fs, subentry, schema, position) write_phrases(position_fs, subentry_id, schema_id, position)
def write_function(parent, position): def write_function(parent, position):
function = position.function function = position.function
...@@ -532,17 +598,17 @@ def write_control(parent, position): ...@@ -532,17 +598,17 @@ def write_control(parent, position):
pred_control_symbol = etree.SubElement(vColl, 'symbol') pred_control_symbol = etree.SubElement(vColl, 'symbol')
pred_control_symbol.attrib['value'] = control pred_control_symbol.attrib['value'] = control
def write_phrases(parent, subentry, schema, position): def write_phrases(parent, subentry_id, schema_id, position):
phrases = position.phrase_types.all() phrases = position.phrase_types.all()
phrases_f = etree.SubElement(parent, 'f') phrases_f = etree.SubElement(parent, 'f')
phrases_f.attrib['name'] = 'phrases' phrases_f.attrib['name'] = 'phrases'
vColl = etree.SubElement(phrases_f, 'vColl') vColl = etree.SubElement(phrases_f, 'vColl')
vColl.attrib['org'] = 'set' vColl.attrib['org'] = 'set'
for phrase in phrases: for phrase in phrases:
write_phrase(vColl, subentry, schema, position, phrase) write_phrase(vColl, subentry_id, schema_id, position, phrase)
def write_phrase(parent, subentry, schema, position, phrase): def write_phrase(parent, subentry_id, schema_id, position, phrase):
phrase_xml_id = u'unif_%d.%d.%d.%d-phr' %(subentry.id, schema.id, position.id, phrase.id) phrase_xml_id = u'unif_%d.%d.%d.%d-phr' %(subentry_id, schema_id, position.id, phrase.id)
phrase_fs = etree.SubElement(parent, 'fs') phrase_fs = etree.SubElement(parent, 'fs')
phrase_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = phrase_xml_id phrase_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = phrase_xml_id
phrase_fs.attrib['type'] = phrase.main_type.name phrase_fs.attrib['type'] = phrase.main_type.name
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment