Added schema definition part, packed schema representation

020cca6b · Tomasz Bartosiak · 8be25336 · 020cca6b · 020cca6b · 8be25336
Commit 020cca6b authored Nov 9, 2023 by Tomasz Bartosiak
--- a/common/management/commands/schema_packer.py
+++ b/common/management/commands/schema_packer.py
+#-*- coding:utf-8 -*-
+import datetime
+from collections import defaultdict
+XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
+class SchemaPacker():
+    def __init__(self):
+        # packed_subentry_id -> packed_schema_id -> (Subentry, Schema) (any that matches)
+        self.unique_schemas = defaultdict(lambda: defaultdict(lambda: None))
+        # packed_subentry -> id
+        self.free_subentry_id = 0
+        self.unique_subentry_id = defaultdict(lambda: -1)
+        # packed_schema -> id
+        self.free_schema_id = 0
+        self.unique_schema_id = defaultdict(lambda: -1)
+        # subentry -> schema -> (packed_subentry, packed_schema)
+        self.packed_schemas = defaultdict(lambda: defaultdict(lambda: None))
+    def _pack_subentry(self, subentry):
+        sie = subentry.inherent_sie.name
+        aspect = ''
+        if subentry.aspect is not None:
+            aspect = subentry.aspect.name
+        negativity = ''
+        if subentry.aspect is not None:
+            negativity = subentry.negativity.name
+        predicativity = subentry.predicativity.name
+        return (sie, aspect, negativity, predicativity)
+    def _pack_schema(self, schema):
+        opinion = 'unk'
+        if schema.opinion.key is not None:
+            opinion = schema.opinion.key
+        positions = []
+        for position in schema.positions.all():
+            positions.append(position.id)
+        positions = tuple(sorted(positions))
+        return (opinion, positions)
+    def _pack(self, subentry, schema):
+        if self.packed_schemas[subentry][schema] is None:
+            self.packed_schemas[subentry][schema] = (self._pack_subentry(subentry), self._pack_schema(schema))
+        return self.packed_schemas[subentry][schema]
+    def add(self, subentry, schema):
+        packed_subentry, packed_schema = self._pack(subentry, schema)
+        if self.unique_subentry_id[packed_subentry] == -1:
+            self.unique_subentry_id[packed_subentry] = self.free_subentry_id
+            self.free_subentry_id += 1
+        packed_subentry_id = self.unique_subentry_id[packed_subentry]
+        if self.unique_schema_id[packed_schema] == -1:
+            self.unique_schema_id[packed_schema] = self.free_schema_id
+            self.free_schema_id += 1
+        packed_schema_id = self.unique_schema_id[packed_schema]
+        self.unique_schemas[packed_subentry_id][packed_schema_id] = (subentry, schema)
+        self.packed_schemas[subentry][schema] = (packed_subentry, packed_schema)
+    def get_ids(self, subentry, schema):
+        packed_subentry, packed_schema = self.packed_schemas[subentry][schema]
+        return (self.unique_subentry_id[packed_subentry], self.unique_schema_id[packed_schema])
--- a/common/management/commands/valunifier_tei.py
+++ b/common/management/commands/valunifier_tei.py
@@ -10,6 +10,8 @@ from unifier.models import UnifiedFrame2SlowalFrameMapping, \
                           UnifiedFrameArgumentSlowalFrameMapping
 from connections.models import ArgumentConnection, ExampleConnection
+from common.management.commands.schema_packer import SchemaPacker
 from collections import defaultdict
 XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
@@ -72,7 +74,7 @@ def write_content(root, unified_frames):
    body = etree.SubElement(text, 'body')
    schemata = etree.SubElement(body, 'div')
    frames = etree.SubElement(body, 'div')
-    used_schemata = set()
+    used_schemata = SchemaPacker()
    write_unified_frames(frames, unified_frames, used_schemata)
    write_used_schemata(schemata, used_schemata)
@@ -342,7 +344,7 @@ def write_alternations(parent, unified_frame, lexical_unit, mapping, used_schema
            argument_fs.attrib['type'] = 'argument'
            argument_fs.attrib['sameAs'] = u'#unif_%d.%d-arg' % (unified_frame.id, argument.id)
            phrases_f = etree.SubElement(connection_fs, 'f')
-            write_phrases_coll(phrases_f, schema_hooks)
+            write_phrases_coll(phrases_f, schema_hooks, used_schemata)
 def prepare_alternations(lexical_unit, mapping, used_schemata):
    connections_info = analyse_connections(mapping)
@@ -353,13 +355,20 @@ def prepare_alternations(lexical_unit, mapping, used_schemata):
        uargument = argument_mapping.unified_agrument
        sargument = argument_mapping.slowal_agrument
+        success = False
+        try:
            argument_realization = ArgumentConnection.objects.get(argument = sargument)
+            success = True
+        except:
+            print(sargument.id)
+        if success:
            by_schema_realizations = argument_realization.schema_connections.all()
            for schema_hook in by_schema_realizations:
                if valid_connection(lexical_unit, schema_hook, connections_info):
                    subentry = schema_hook.subentry
                    schema = schema_hook.schema
-                used_schemata.add((subentry, schema))
+                    used_schemata.add(subentry, schema)
                    alternation = schema_hook.alternation
                    alternations[(subentry.id, schema.id, alternation)][uargument].append(schema_hook)
@@ -400,13 +409,14 @@ def valid_connection(lexical_unit, schema_hook, connections_info):
        return (in_lemma == in_schema)
    return False
-def write_phrases_coll(parent, phrases_list):
+def write_phrases_coll(parent, phrases_list, used_schemata):
    vColl = etree.SubElement(parent, 'vColl')
    vColl.attrib['org'] = 'set'
    for phrase in phrases_list:
        phrase_fs = etree.SubElement(vColl, 'fs')
        phrase_fs.attrib['type'] = 'phrase'
-        phrase_fs.attrib['sameAs'] = u'#unif_%d.%d.%d.%d-phr' %(phrase.subentry.id, phrase.schema.id, phrase.position.id, phrase.phrase_type.id)
+        subentry_id, schema_id = used_schemata.get_ids(phrase.subentry, phrase.schema)
+        phrase_fs.attrib['sameAs'] = u'#unif_%d.%d.%d.%d-phr' %(subentry_id, schema_id, phrase.position.id, phrase.phrase_type.id)
 #=================== DIV -- SYNTACTIC SCHEMATA ===================#
@@ -415,23 +425,74 @@ def write_used_schemata(parent, used_schemata):
    schemata_head = etree.SubElement(parent, 'head')
    schemata_head.text = 'Syntactic Schemata'
-    for subentry, schema in used_schemata:
+    for subentry_id in used_schemata.unique_schemas:
-        write_schema_entry(parent, subentry, schema)
+        for schema_id in used_schemata.unique_schemas[subentry_id]:
+            subentry, schema = used_schemata.unique_schemas[subentry_id][schema_id]
+            write_schema_entry(parent, subentry, subentry_id, schema, schema_id)
-def write_schema_entry(parent, subentry, schema):
+def write_schema_entry(parent, subentry, subentry_id, schema, schema_id):
-    entry_xml_id = u'unif_%d.%d-schent' %(subentry.id, schema.id)
+    entry_xml_id = u'unif_%d.%d-schent' %(subentry_id, schema_id)
    entry = etree.SubElement(parent, 'entry')
    entry.attrib[etree.QName(XML_NAMESPACE, 'id')] = entry_xml_id
-    write_schema_definition(entry, subentry, schema)
+    write_schema_definition(entry, subentry, subentry_id, schema, schema_id)
-    write_schema(entry, subentry, schema)
+    write_schema(entry, subentry, subentry_id, schema, schema_id)
-def write_schema_definition(parent, subentry, schema):
+def write_schema_definition(parent, subentry, subentry_id, schema, schema_id):
-    pass
+    d = etree.SubElement(parent, 'def')
+    d.text = schema_textual_representation(subentry, schema)
-def write_schema(parent, subentry, schema):
+def schema_textual_representation(subentry, schema):
-    schema_xml_id = u'unif_%d.%d-sch' %(subentry.id, schema.id)
+    result = ''
+    if subentry.inherent_sie.name == 'true':
+        result += ' się:'
+    else:
+        rerult += ':'
+    opinion = schema.opinion.key
+    if opinion == 'vul':
+        result += ' wulgarny:'
+    elif opinion == 'col':
+        result += ' potoczny:'
+    elif opinion == 'dat':
+        result += ' archaiczny:'
+    elif opinion == 'bad':
+        result += ' zły:'
+    elif opinion == 'unc':
+        result += ' wątpliwy:'
+    elif opinion == 'cer':
+        result += ' pewny:'
+    else:
+        result += ' brak:'
+    if subentry.negativity is not None:
+        result += ' ' + subentry.negativity.name + ':'
+    else:
+        result += ' :'
+    if subentry.predicativity.name == 'true':
+        result += ' pred:'
+    else:
+        rerult += ':'
+    if subentry.aspect is not None:
+        result += ' ' + subentry.aspect.name + ':'
+    else:
+        result += ' :'
+    positions_rep = []
+    for position in schema.positions.all():
+        positions_rep.append(str(position))
+    result += ' ' + ' + '.join(positions_rep)
+    return result
+def write_schema(parent, subentry, subentry_id, schema, schema_id):
+    schema_xml_id = u'unif_%d.%d-sch' %(subentry_id, schema_id)
    schema_fs = etree.SubElement(parent, 'fs')
    schema_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = schema_xml_id
@@ -484,28 +545,33 @@ def write_schema(parent, subentry, schema):
    predicativity_binary.attrib['value'] = predicativity
    # positions
-    write_positions(schema_fs, subentry, schema)
+    write_positions(schema_fs, subentry_id, schema, schema_id)
-def write_positions(parent, subentry, schema):
+def write_positions(parent, subentry_id, schema, schema_id):
    positions = schema.positions.all()
    positions_f = etree.SubElement(parent, 'f')
    positions_f.attrib['name'] = 'positions'
    vColl = etree.SubElement(positions_f, 'vColl')
    vColl.attrib['org'] = 'set'
    for position in positions:
-        write_position(vColl, subentry, schema, position)
+        write_position(vColl, subentry_id, schema_id, position)
-def write_position(parent, subentry, schema, position):
+def write_position(parent, subentry_id, schema_id, position):
-    position_xml_id = u'unif_%d.%d.%d-psn' %(subentry.id, schema.id, position.id)
+    position_xml_id = u'unif_%d.%d.%d-psn' %(subentry_id, schema_id, position.id)
    position_fs = etree.SubElement(parent, 'fs')
    position_fs.attrib['type'] = 'position'
    position_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = position_xml_id
+    text_rep_f = etree.SubElement(position_fs, 'f')
+    text_rep_f.attrib['name'] = 'textual_representation'
+    text_rep_string = etree.SubElement(text_rep_f, 'string')
+    text_rep_string.text = str(position)
    write_function(position_fs, position)
    write_control(position_fs, position)
-    write_phrases(position_fs, subentry, schema, position)
+    write_phrases(position_fs, subentry_id, schema_id, position)
 def write_function(parent, position):
    function = position.function
@@ -532,17 +598,17 @@ def write_control(parent, position):
            pred_control_symbol = etree.SubElement(vColl, 'symbol')
            pred_control_symbol.attrib['value'] = control
-def write_phrases(parent, subentry, schema, position):
+def write_phrases(parent, subentry_id, schema_id, position):
    phrases = position.phrase_types.all()
    phrases_f = etree.SubElement(parent, 'f')
    phrases_f.attrib['name'] = 'phrases'
    vColl = etree.SubElement(phrases_f, 'vColl')
    vColl.attrib['org'] = 'set'
    for phrase in phrases:
-        write_phrase(vColl, subentry, schema, position, phrase)
+        write_phrase(vColl, subentry_id, schema_id, position, phrase)
-def write_phrase(parent, subentry, schema, position, phrase):
+def write_phrase(parent, subentry_id, schema_id, position, phrase):
-    phrase_xml_id = u'unif_%d.%d.%d.%d-phr' %(subentry.id, schema.id, position.id, phrase.id)
+    phrase_xml_id = u'unif_%d.%d.%d.%d-phr' %(subentry_id, schema_id, position.id, phrase.id)
    phrase_fs = etree.SubElement(parent, 'fs')
    phrase_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = phrase_xml_id
    phrase_fs.attrib['type'] = phrase.main_type.name

--- a/data/tei/Leksykograf.xml
+++ b/data/tei/Leksykograf.xml