Skip to content
Snippets Groups Projects
Commit ac2fd530 authored by Tomasz Bartosiak's avatar Tomasz Bartosiak
Browse files

Removed most of irrelevant syntactic realizations from tei-xml

parent 7370ff5c
No related merge requests found
...@@ -8,7 +8,7 @@ from xml.sax.saxutils import escape ...@@ -8,7 +8,7 @@ from xml.sax.saxutils import escape
from unifier.models import UnifiedFrame2SlowalFrameMapping, \ from unifier.models import UnifiedFrame2SlowalFrameMapping, \
UnifiedFrameArgumentSlowalFrameMapping UnifiedFrameArgumentSlowalFrameMapping
from connections.models import ArgumentConnection from connections.models import ArgumentConnection, ExampleConnection
from collections import defaultdict from collections import defaultdict
...@@ -325,7 +325,7 @@ def write_lexical_unit(parent, lexical_unit, mapping): ...@@ -325,7 +325,7 @@ def write_lexical_unit(parent, lexical_unit, mapping):
opinion_symbol.attrib['value'] = slowal_frame.opinion.key opinion_symbol.attrib['value'] = slowal_frame.opinion.key
def write_alternations(parent, unified_frame, lexical_unit, mapping, used_schemata): def write_alternations(parent, unified_frame, lexical_unit, mapping, used_schemata):
alternations = prepare_alternations(mapping, used_schemata) alternations = prepare_alternations(lexical_unit, mapping, used_schemata)
for key in sorted(alternations.keys()): for key in sorted(alternations.keys()):
alternation_fs = etree.SubElement(parent, 'fs') alternation_fs = etree.SubElement(parent, 'fs')
alternation_fs.attrib['type'] = 'aternation' alternation_fs.attrib['type'] = 'aternation'
...@@ -344,7 +344,9 @@ def write_alternations(parent, unified_frame, lexical_unit, mapping, used_schema ...@@ -344,7 +344,9 @@ def write_alternations(parent, unified_frame, lexical_unit, mapping, used_schema
phrases_f = etree.SubElement(connection_fs, 'f') phrases_f = etree.SubElement(connection_fs, 'f')
write_phrases_coll(phrases_f, schema_hooks) write_phrases_coll(phrases_f, schema_hooks)
def prepare_alternations(mapping, used_schemata): def prepare_alternations(lexical_unit, mapping, used_schemata):
connections_info = analyse_connections(mapping)
argument_mappings = UnifiedFrameArgumentSlowalFrameMapping.objects.filter(unified_frame_mapping = mapping) argument_mappings = UnifiedFrameArgumentSlowalFrameMapping.objects.filter(unified_frame_mapping = mapping)
alternations = defaultdict(lambda: defaultdict(lambda: [])) alternations = defaultdict(lambda: defaultdict(lambda: []))
for argument_mapping in argument_mappings: for argument_mapping in argument_mappings:
...@@ -354,14 +356,41 @@ def prepare_alternations(mapping, used_schemata): ...@@ -354,14 +356,41 @@ def prepare_alternations(mapping, used_schemata):
argument_realization = ArgumentConnection.objects.get(argument = sargument) argument_realization = ArgumentConnection.objects.get(argument = sargument)
by_schema_realizations = argument_realization.schema_connections.all() by_schema_realizations = argument_realization.schema_connections.all()
for schema_hook in by_schema_realizations: for schema_hook in by_schema_realizations:
subentry = schema_hook.subentry if valid_connection(lexical_unit, schema_hook, connections_info):
schema = schema_hook.schema subentry = schema_hook.subentry
used_schemata.add((subentry, schema)) schema = schema_hook.schema
alternation = schema_hook.alternation used_schemata.add((subentry, schema))
alternations[(subentry.id, schema.id, alternation)][uargument].append(schema_hook) alternation = schema_hook.alternation
alternations[(subentry.id, schema.id, alternation)][uargument].append(schema_hook)
return alternations return alternations
def analyse_connections(mapping):
sframe = mapping.slowal_frame
lus = sframe.lexical_units.all()
lu_schema_connections = defaultdict(lambda: set())
connected_lu_count = defaultdict(lambda: 0)
for lu in lus:
examples = ExampleConnection.objects.filter(lexical_unit=lu)
for example in examples:
scs = example.schema_connections.all()
if len(scs) > 0:
schema_hook = scs[0]
schema_id = (schema_hook.subentry.id, schema_hook.schema.id)
lu_schema_connections[lu].add(schema_id)
connected_lu_count[schema_id] += 1
return (lu_schema_connections, connected_lu_count)
def valid_connection(lexical_unit, schema_hook, connections_info):
schema_id = (schema_hook.subentry.id, schema_hook.schema.id)
lu_schema_connections, connected_lu_count = connections_info
if schema_id in lu_schema_connections[lexical_unit]:
return True
if connected_lu_count[schema_id] == 0:
# @TODO: check for "się"
return True
return False
def write_phrases_coll(parent, phrases_list): def write_phrases_coll(parent, phrases_list):
vColl = etree.SubElement(parent, 'vColl') vColl = etree.SubElement(parent, 'vColl')
vColl.attrib['org'] = 'set' vColl.attrib['org'] = 'set'
......
This diff is collapsed.
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment