Skip to content
Snippets Groups Projects
Commit 3dc849a3 authored by dcz's avatar dcz
Browse files

Dumping unified frames to xml scripts.

parent 9b562850
No related branches found
No related tags found
No related merge requests found
#-*- coding:utf-8 -*-
import datetime
import os
from django.core.management.base import BaseCommand
from optparse import make_option
from unifier.models import UnifiedFrame
from django.contrib.auth.models import User
from users.models import Assignment
from common.valunifier_tei import createteixml
BASEPATH = '.'
class Command(BaseCommand):
args = ''
help = 'Export ValUnifier in TEI format'
def add_arguments(self, parser):
parser.add_argument('-i', '--individual', action='store_true', help='Gen individual files.')
def handle(self, **options):
now = datetime.datetime.now().strftime('%Y%m%d')
if not options['individual']:
print("Full dictionary")
outfile = 'valunifier_' + now + '.xml'
outpath = os.path.join(BASEPATH, outfile)
frames = UnifiedFrame.objects.all()
createteixml(outpath, frames)
else:
for user in User.objects.all():
print("Part for " + user.username)
frames = []
for assignment in Assignment.objects.filter(user=user):
if assignment.subject_ct.model_class() == UnifiedFrame:
frames.append(assignment.subject_ct.get_object_for_this_type(id=assignment.subject_id))
outfile = user.username + '_' + now + '.xml'
outpath = os.path.join(BASEPATH, outfile)
if len(frames) > 0:
createteixml(outpath, frames)
#-*- coding:utf-8 -*-
import datetime
from django.db.models import Count, Min, Max
from lxml import etree
from xml.sax.saxutils import escape
from unifier.models import UnifiedFrame2SlowalFrameMapping, \
UnifiedFrameArgumentSlowalFrameMapping
from connections.models import ArgumentConnection
from collections import defaultdict
XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
def createteixml(outpath, unified_frames):
root = write_root()
write_header(root)
write_content(root, unified_frames)
with open(outpath, 'wb') as output_file:
root.getroottree().write(output_file,
encoding='UTF-8',
pretty_print=True,
xml_declaration=True,
doctype=u'<!DOCTYPE TEI SYSTEM "tei_all.dtd">')
def write_root():
root = etree.Element('TEI')
root.attrib[etree.QName(XML_NAMESPACE, 'lang')] = u'pl'
root.attrib['xmlns'] = u'http://www.tei-c.org/ns/1.0'
return root
def write_header(root, extensions_file=False):
tei_header = etree.SubElement(root, 'teiHeader')
file_desc = etree.SubElement(tei_header, 'fileDesc')
title_stmt = etree.SubElement(file_desc, 'titleStmt')
title = etree.SubElement(title_stmt, 'title')
title.text = u'ValUnifier (?)'
publication_stmt = etree.SubElement(file_desc, 'publicationStmt')
publisher = etree.SubElement(publication_stmt, 'publisher')
publisher.text = u'Institute of Computer Science, Polish Academy of Sciences (IPI PAN)'
date = etree.SubElement(publication_stmt, 'date')
date.attrib['when'] = datetime.datetime.now().strftime('%Y-%m-%d')
write_license_elem(publication_stmt)
source_desc = etree.SubElement(file_desc, 'sourceDesc')
p = etree.SubElement(source_desc, 'p')
p.text = u'?'
def write_license_elem(parent):
availability = etree.SubElement(parent, 'availability')
licence = etree.SubElement(availability, 'licence')
licence.attrib['target'] = u'http://creativecommons.org/licenses/by-sa/4.0/'
p = etree.SubElement(licence, 'p')
p.text = u'(C) Copyright 2012–2018 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN)'
p = etree.SubElement(licence, 'p')
p.text = u'This work is distributed under a CC BY-SA license: http://creativecommons.org/licenses/by-sa/4.0/'
p = etree.SubElement(licence, 'p')
p.text = '?'
def write_content(root, unified_frames):
text = etree.SubElement(root, 'text')
body = etree.SubElement(text, 'body')
schemata = etree.SubElement(body, 'div')
frames = etree.SubElement(body, 'div')
used_schemata = set()
write_unified_frames(frames, unified_frames, used_schemata)
write_used_schemata(schemata, used_schemata)
#=================== DIV -- SEMANTIC FRAMES ===================#
def write_unified_frames(parent, unified_frames, used_schemata):
frames_head = etree.SubElement(parent, 'head')
frames_head.text = 'Semantic Frames'
for unified_frame in unified_frames:
write_unified_frame_entry(parent, unified_frame, used_schemata)
def write_unified_frame_entry(parent, unified_frame, used_schemata):
entry_xml_id = u'unif_%d-ent' % unified_frame.id
entry = etree.SubElement(parent, 'entry')
entry.attrib[etree.QName(XML_NAMESPACE, 'id')] = entry_xml_id
write_unified_frame_definition(entry, unified_frame)
write_status_info(entry, unified_frame)
write_unified_frame(entry, unified_frame)
write_unified_frames_realizations(entry, unified_frame, used_schemata)
def write_unified_frame_definition(entry, unified_frame):
definition = etree.SubElement(entry, 'def')
definition.text = unified_frame.title
def write_status_info(parent, unified_frame):
general_fs = etree.SubElement(parent, 'fs')
general_fs.attrib['type'] = 'general_info'
status_f = etree.SubElement(general_fs, 'f')
status_f.attrib['name'] = 'status'
status_string = etree.SubElement(status_f, 'string')
status_string.text = unified_frame.status
#=================== FS TYPE = "FRAME" ===================#
def write_unified_frame(parent, unified_frame):
frame_xml_id = u'unif_%d-frm' % unified_frame.id
frame_fs = etree.SubElement(parent, 'fs')
frame_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = frame_xml_id
frame_fs.attrib['type'] = 'frame'
write_frame_arguments(frame_fs, unified_frame)
def write_frame_arguments(parent, unified_frame):
arguments_f = etree.SubElement(parent, 'f')
arguments_f.attrib['name'] = 'arguments'
vColl = etree.SubElement(arguments_f, 'vColl')
vColl.attrib['org'] = 'set'
for arg in unified_frame.sorted_arguments():
write_frame_argument(vColl, unified_frame, arg)
def write_frame_argument(parent, frame, arg):
arg_base_id = u'unif_%d' % frame.id
arg_xml_id = arg_base_id + u'.%d-arg' % arg.id
argument_fs = etree.SubElement(parent, 'fs')
argument_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = arg_xml_id
argument_fs.attrib['type'] = 'argument'
write_roles(argument_fs, arg)
write_selectional_preferences(argument_fs, arg, arg_base_id)
def write_roles(parent, arg):
if arg.role_type is not None:
role_type_f = etree.SubElement(parent, 'f')
role_type_f.attrib['name'] = 'role_type'
role_type_symbol = etree.SubElement(role_type_f, 'symbol')
role_type_symbol.attrib['value'] = arg.role_type.type
role_f = etree.SubElement(parent, 'f')
role_f.attrib['name'] = 'role'
role_symbol = etree.SubElement(role_f, 'symbol')
if arg.role is None:
role_symbol.attrib['value'] = '/'.join([r.role.role for r in arg.proposed_roles.all()])
else:
role_symbol.attrib['value'] = arg.role.role.role
if arg.role.attribute is not None and arg.role.sub_attribute is not None:
attributes_f = etree.SubElement(parent, 'f')
attributes_f.attrib['name'] = 'arributes'
vColl = etree.SubElement(attributes_f, 'vColl')
vColl.attrib['org'] = 'set'
if arg.role.attribute is not None:
write_role_attribute(vColl, arg.role.attribute.attribute)
if arg.role.sub_attribute is not None:
write_role_attribute(vColl, arg.role.sub_attribute.sub_attribute)
def write_role_attribute(parent, symbol):
attribute_symbol = etree.SubElement(parent, 'symbol')
attribute_symbol.attrib['value'] = symbol
def write_selectional_preferences(parent, arg, arg_base_id):
if len(arg.predefined.all()) > 0 or\
len(arg.synsets.all()) > 0 or\
len(arg.relations.all()) > 0:
sel_prefs_f = etree.SubElement(parent, 'f')
sel_prefs_f.attrib['name'] = 'sel_prefs'
sel_prefs_groups_fs = etree.SubElement(sel_prefs_f, 'fs')
sel_prefs_groups_fs.attrib['type'] = 'sel_prefs_groups'
write_synsets_selprefs(sel_prefs_groups_fs, arg)
write_predefined_selprefs(sel_prefs_groups_fs, arg)
write_relation_selprefs(sel_prefs_groups_fs, arg, arg_base_id)
def write_synsets_selprefs(parent, arg):
synsets = arg.synsets.all()
if len(synsets) > 0:
synsets_f = etree.SubElement(parent, 'f')
synsets_f.attrib['name'] = 'synsets'
vColl = etree.SubElement(synsets_f, 'vColl')
vColl.attrib['org'] = 'set'
for synset in synsets:
write_synset(vColl, synset)
def write_synset(parent, synset):
id_numeric = etree.SubElement(parent, 'numeric')
id_numeric.attrib['value'] = str(synset.id)
def write_predefined_selprefs(parent, arg):
predefs = arg.predefined.all()
if len(predefs) > 0:
predefs_f = etree.SubElement(parent, 'f')
predefs_f.attrib['name'] = 'predefs'
vColl = etree.SubElement(predefs_f, 'vColl')
vColl.attrib['org'] = 'set'
for predef in predefs:
write_predef(vColl, predef)
def write_predef(parent, predef):
name_symbol = etree.SubElement(parent, 'symbol')
name_symbol.attrib['value'] = predef.name
def write_relation_selprefs(parent, arg, arg_base_id):
relations = arg.relations.all()
if len(relations) > 0:
relations_f = etree.SubElement(parent, 'f')
relations_f.attrib['name'] = 'relations'
vColl = etree.SubElement(relations_f, 'vColl')
vColl.attrib['org'] = 'set'
for relation in relations:
write_relation(vColl, relation, arg_base_id)
def write_relation(parent, relation, arg_base_id):
relation_fs = etree.SubElement(parent, 'fs')
relation_fs.attrib['type'] = 'relation'
relation_f = etree.SubElement(relation_fs, 'f')
relation_f.attrib['name'] = 'type'
type_symbol = etree.SubElement(relation_f, 'symbol')
type_symbol.attrib['value'] = relation.relation.key
to_f = etree.SubElement(relation_fs, 'f')
to_f.attrib['name'] = 'to'
to_xml_link = '#%s.%d-arg' % (arg_base_id, relation.to.id)
arg_link = etree.SubElement(to_f, 'fs')
arg_link.attrib['sameAs'] = to_xml_link
arg_link.attrib['type'] = 'argument'
#=================== FS TYPE = "FRAME_REALIZATIONS" ===================#
def write_unified_frames_realizations(entry, unified_frame, used_schemata):
realizations_fs = etree.SubElement(entry, 'fs')
realizations_fs.attrib['type'] = 'frame_realizations'
realizations_f = etree.SubElement(realizations_fs, 'f')
realizations_f.attrib['name'] = 'realizations'
vColl = etree.SubElement(realizations_f, 'vColl')
vColl.attrib['org'] = 'set'
write_lexical_units_realizations(vColl, unified_frame, used_schemata)
def write_lexical_units_realizations(parent, unified_frame, used_schemata):
frames_mappings = UnifiedFrame2SlowalFrameMapping.objects.filter(unified_frame=unified_frame,\
removed=False)
lexical_units_sortable = {}
lexical_unit_mapping = {}
for mapping in frames_mappings:
frame = mapping.slowal_frame
for lexical_unit in frame.lexical_units.all():
lexical_unit_mapping[lexical_unit] = mapping
lexical_units_sortable[(lexical_unit.base, lexical_unit.sense)] = lexical_unit
for signature in sorted(lexical_units_sortable.keys()):
lexical_unit = lexical_units_sortable[signature]
mapping = lexical_unit_mapping[lexical_unit]
write_lexical_unit_realization(parent, unified_frame, lexical_unit, mapping, used_schemata)
def write_lexical_unit_realization(parent, unified_frame, lexical_unit, mapping, used_schemata):
lexical_unit_realizations_fs = etree.SubElement(parent, "fs")
lexical_unit_realizations_fs.attrib['type'] = "lexical_unit_realizations"
lexical_unit_f = etree.SubElement(lexical_unit_realizations_fs, "f")
lexical_unit_f.attrib['name'] = 'lexical_unit'
write_lexical_unit(lexical_unit_f, lexical_unit, mapping)
syntactic_realizations_f = etree.SubElement(lexical_unit_realizations_fs, 'f')
syntactic_realizations_f.attrib['name'] = 'syntactic_realizations'
vColl = etree.SubElement(syntactic_realizations_f, 'vColl')
vColl.attrib['org'] = 'set'
write_alternations(vColl, unified_frame, lexical_unit, mapping, used_schemata)
def write_lexical_unit(parent, lexical_unit, mapping):
meaning_fs = etree.SubElement(parent, 'fs')
meaning_fs.attrib['type'] = 'lexical_unit'
name_f = etree.SubElement(meaning_fs, 'f')
name_f.attrib['name'] = 'name'
name_content = etree.SubElement(name_f, 'string')
name_content.text = lexical_unit.base
variant_f = etree.SubElement(meaning_fs, 'f')
variant_f.attrib['name'] = 'variant'
variant_string = etree.SubElement(variant_f, 'string')
variant_string.text = lexical_unit.sense
if lexical_unit.luid is not None:
plwnluid_f = etree.SubElement(meaning_fs, 'f')
plwnluid_f.attrib['name'] = 'plwnluid'
plwnluid_numeric = etree.SubElement(plwnluid_f, 'numeric')
plwnluid_numeric.attrib['value'] = str(lexical_unit.luid)
if lexical_unit.synset is not None and lexical_unit.synset.id > 0:
plwnsid_f = etree.SubElement(meaning_fs, 'f')
plwnsid_f.attrib['name'] = 'plwnsid'
plwnsid_numeric = etree.SubElement(plwnsid_f, 'numeric')
plwnsid_numeric.attrib['value'] = str(lexical_unit.synset.id)
slowal_frame = mapping.slowal_frame
if slowal_frame.opinion is not None:
opinion_f = etree.SubElement(meaning_fs, 'f')
opinion_f.attrib['name'] = 'opinion'
opinion_symbol = etree.SubElement(opinion_f, 'symbol')
opinion_symbol.attrib['value'] = slowal_frame.opinion.key
def write_alternations(parent, unified_frame, lexical_unit, mapping, used_schemata):
alternations = prepare_alternations(mapping, used_schemata)
for key in sorted(alternations.keys()):
alternation_fs = etree.SubElement(parent, 'fs')
alternation_fs.attrib['type'] = 'aternation'
connections_f = etree.SubElement(alternation_fs, 'f')
connections_f.attrib['name'] = 'connections'
vColl = etree.SubElement(connections_f, 'vColl')
vColl.attrib['org'] = 'set'
for argument, schema_hooks in alternations[key].items():
connection_fs = etree.SubElement(vColl, 'fs')
connection_fs.attrib['type'] = 'connection'
argument_f = etree.SubElement(connection_fs, 'f')
argument_f.attrib['name'] = 'argument'
argument_fs = etree.SubElement(argument_f, 'fs')
argument_fs.attrib['type'] = 'argument'
argument_fs.attrib['sameAs'] = u'#unif_%d.%d-arg' % (unified_frame.id, argument.id)
phrases_f = etree.SubElement(connection_fs, 'f')
write_phrases_coll(phrases_f, schema_hooks)
def prepare_alternations(mapping, used_schemata):
argument_mappings = UnifiedFrameArgumentSlowalFrameMapping.objects.filter(unified_frame_mapping = mapping)
alternations = defaultdict(lambda: defaultdict(lambda: []))
for argument_mapping in argument_mappings:
uargument = argument_mapping.unified_agrument
sargument = argument_mapping.slowal_agrument
argument_realization = ArgumentConnection.objects.get(argument = sargument)
by_schema_realizations = argument_realization.schema_connections.all()
for schema_hook in by_schema_realizations:
subentry = schema_hook.subentry
schema = schema_hook.schema
used_schemata.add((subentry, schema))
alternation = schema_hook.alternation
alternations[(subentry.id, schema.id, alternation)][uargument].append(schema_hook)
return alternations
def write_phrases_coll(parent, phrases_list):
vColl = etree.SubElement(parent, 'vColl')
vColl.attrib['org'] = 'set'
for phrase in phrases_list:
phrase_fs = etree.SubElement(vColl, 'fs')
phrase_fs.attrib['type'] = 'phrase'
phrase_fs.attrib['sameAs'] = u'#unif_%d.%d.%d.%d-phr' %(phrase.subentry.id, phrase.schema.id, phrase.position.id, phrase.phrase_type.id)
#=================== DIV -- SYNTACTIC SCHEMATA ===================#
def write_used_schemata(parent, used_schemata):
schemata_head = etree.SubElement(parent, 'head')
schemata_head.text = 'Syntactic Schemata'
for subentry, schema in used_schemata:
write_schema_entry(parent, subentry, schema)
def write_schema_entry(parent, subentry, schema):
entry_xml_id = u'unif_%d.%d-schent' %(subentry.id, schema.id)
entry = etree.SubElement(parent, 'entry')
entry.attrib[etree.QName(XML_NAMESPACE, 'id')] = entry_xml_id
write_schema_definition(entry, subentry, schema)
write_schema(entry, subentry, schema)
def write_schema_definition(parent, subentry, schema):
pass
def write_schema(parent, subentry, schema):
schema_xml_id = u'unif_%d.%d-sch' %(subentry.id, schema.id)
schema_fs = etree.SubElement(parent, 'fs')
schema_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = schema_xml_id
schema_fs.attrib['type'] = 'schema'
# textual representation @TODO -- not present in the database
# text_rep_f_elem = etree.SubElement(schema_fs_elem, 'f')
# text_rep_f_elem.attrib['name'] = 'text_rep'
# text_rep_string = etree.SubElement(text_rep_f_elem, 'string')
# text_rep = schema.get_position_spaced_text_rep()
# if schema.characteristics.filter(type=u'ZWROTNOŚĆ', value__value=u'się').exists():
# text_rep = ' ' + text_rep
# text_rep_string.text = lemma.entry_obj.name + text_rep.replace(':',': ')
# schema opinion
schema_opinion = schema.opinion.key
if schema.opinion.key is None:
schema_opinion = 'unk'
opinion_f = etree.SubElement(schema_fs, 'f')
opinion_f.attrib['name'] = 'opinion'
opinion_symbol = etree.SubElement(opinion_f, 'symbol')
opinion_symbol.attrib['value'] = schema_opinion
# inherent "się"
reflex = subentry.inherent_sie.name
selfmark_f = etree.SubElement(schema_fs, 'f')
selfmark_f.attrib['name'] = 'inherent_sie'
selfmark_binary = etree.SubElement(selfmark_f, 'binary')
selfmark_binary.attrib['value'] = reflex
# aspect
aspect_f = etree.SubElement(schema_fs, 'f')
aspect_f.attrib['name'] = 'aspect'
if subentry.aspect is not None and subentry.aspect.name != '':
aspect_symbol = etree.SubElement(aspect_f, 'symbol')
aspect_symbol.attrib['value'] = subentry.aspect.name
# negativity
negativity_f = etree.SubElement(schema_fs, 'f')
negativity_f.attrib['name'] = 'negativity'
if subentry.negativity is not None and subentry.negativity.name != '':
negativity_symbol = etree.SubElement(negativity_f, 'symbol')
negativity_symbol.attrib['value'] = subentry.negativity.name
# predicativity
predicativity = subentry.predicativity.name
predicativity_f = etree.SubElement(schema_fs, 'f')
predicativity_f.attrib['name'] = 'predicativity'
predicativity_binary = etree.SubElement(predicativity_f, 'binary')
predicativity_binary.attrib['value'] = predicativity
# positions
write_positions(schema_fs, subentry, schema)
def write_positions(parent, subentry, schema):
positions = schema.positions.all()
positions_f = etree.SubElement(parent, 'f')
positions_f.attrib['name'] = 'positions'
vColl = etree.SubElement(positions_f, 'vColl')
vColl.attrib['org'] = 'set'
for position in positions:
write_position(vColl, subentry, schema, position)
def write_position(parent, subentry, schema, position):
position_xml_id = u'unif_%d.%d.%d-psn' %(subentry.id, schema.id, position.id)
position_fs = etree.SubElement(parent, 'fs')
position_fs.attrib['type'] = 'position'
position_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = position_xml_id
write_function(position_fs, position)
write_control(position_fs, position)
write_phrases(position_fs, subentry, schema, position)
def write_function(parent, position):
function = position.function
if function is not None:
function_f = etree.SubElement(parent, 'f')
function_f.attrib['name'] = 'function'
function_symbol = etree.SubElement(function_f, 'symbol')
function_symbol.attrib['value'] = function.name
def write_control(parent, position):
control = position.control
pred_control = position.pred_control
if control is not None or pred_control is not None:
control_f = etree.SubElement(parent, 'f')
control_f.attrib['name'] = 'control'
vColl = etree.SubElement(control_f, 'vColl')
vColl.attrib['org'] = 'set'
if control is not None:
control = control.name
control_symbol = etree.SubElement(vColl, 'symbol')
control_symbol.attrib['value'] = control
if pred_control is not None:
control = pred_control.name
pred_control_symbol = etree.SubElement(vColl, 'symbol')
pred_control_symbol.attrib['value'] = control
def write_phrases(parent, subentry, schema, position):
phrases = position.phrase_types.all()
phrases_f = etree.SubElement(parent, 'f')
phrases_f.attrib['name'] = 'phrases'
vColl = etree.SubElement(phrases_f, 'vColl')
vColl.attrib['org'] = 'set'
for phrase in phrases:
write_phrase(vColl, subentry, schema, position, phrase)
def write_phrase(parent, subentry, schema, position, phrase):
phrase_xml_id = u'unif_%d.%d.%d.%d-phr' %(subentry.id, schema.id, position.id, phrase.id)
phrase_fs = etree.SubElement(parent, 'fs')
phrase_fs.attrib[etree.QName(XML_NAMESPACE, 'id')] = phrase_xml_id
phrase_fs.attrib['type'] = phrase.main_type.name
# @TODO -- currently no expansions file
# if phrase.realizations.exists() and write_expansions_id:
# write_expansions_link(phrase_fs_elem, phrase)
write_phrase_textrep(phrase_fs, phrase)
def write_phrase_textrep(parent, phrase):
text_rep = phrase.text_rep
textrep_f = etree.SubElement(parent, 'f')
textrep_f.attrib['name'] = 'textual_representation'
textrep_string = etree.SubElement(textrep_f, 'string')
textrep_string.text = text_rep
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment