Skip to content
Snippets Groups Projects
Commit 22148d8f authored by dcz's avatar dcz
Browse files

Importing unified frames

parent e4e154ec
No related branches found
No related tags found
No related merge requests found
Showing
with 4678 additions and 101 deletions
......@@ -8,6 +8,10 @@ In order to run the development environment locally:
./docker/scripts/run-docker
docker-compose stop backend — bo reset_db.sh nie lubi jak coś się łączy do bazki
docker-compose run backend bash — I w nim ./reset_db.sh
docker-compose start backend
## Working in the development environment
Whenever you need to establish an interactive bash session in the running applicaiton container, execute:
......
<?xml version='1.0' encoding='UTF-8'?>
<matching>
<unifier_frame>
<argument id="1">
<semantic_role type="role"/>
<roles>
<role name="Initiator"/>
</roles>
</argument>
<argument id="2">
<semantic_role type="alernative">
<roles>
<role name="Theme"/>
<role name="Recipient"/>
</roles>
</semantic_role>
</argument>
<argument id="3">
<semantic_role type="role"/>
<roles>
<role name="Manner"/>
</roles>
</argument>
<argument id="4">
<semantic_role type="role"/>
<roles>
<role name="Instrument"/>
</roles>
</argument>
<argument id="5">
<semantic_role type="modifier"/>
<roles>
<role name="Condition"/>
</roles>
</argument>
</unifier_frame>
<connections>
<slowal_frame id="19813">
<arguments_connections>
<arguments_connection unifier_argument_id="1" slowal_id="57748"/>
<arguments_connection unifier_argument_id="2" slowal_id="57749"/>
<arguments_connection unifier_argument_id="3" slowal_id="57750"/>
<arguments_connection unifier_argument_id="4" slowal_id="57751"/>
</arguments_connections>
</slowal_frame>
<slowal_frame id="3532">
<arguments_connections>
<arguments_connection unifier_argument_id="1" slowal_id="10073"/>
<arguments_connection unifier_argument_id="2" slowal_id="10076"/>
<arguments_connection unifier_argument_id="3" slowal_id="10074"/>
<arguments_connection unifier_argument_id="4" slowal_id="10077"/>
<arguments_connection unifier_argument_id="5" slowal_id="10075"/>
</arguments_connections>
</slowal_frame>
<slowal_frame id="24231">
<arguments_connections>
<arguments_connection unifier_argument_id="1" slowal_id="71024"/>
<arguments_connection unifier_argument_id="2" slowal_id="71025"/>
<arguments_connection unifier_argument_id="3" slowal_id="71026"/>
</arguments_connections>
</slowal_frame>
<slowal_frame id="20103">
<arguments_connections>
<arguments_connection unifier_argument_id="1" slowal_id="58605"/>
<arguments_connection unifier_argument_id="2" slowal_id="58606"/>
<arguments_connection unifier_argument_id="5" slowal_id="58607"/>
</arguments_connections>
</slowal_frame>
<slowal_frame id="8377">
<arguments_connections>
<arguments_connection unifier_argument_id="1" slowal_id="23921"/>
<arguments_connection unifier_argument_id="2" slowal_id="23923"/>
<arguments_connection unifier_argument_id="3" slowal_id="23922"/>
</arguments_connections>
</slowal_frame>
<slowal_frame id="12173">
<arguments_connections>
<arguments_connection unifier_argument_id="1" slowal_id="35045"/>
<arguments_connection unifier_argument_id="2" slowal_id="35046"/>
<arguments_connection unifier_argument_id="3" slowal_id="35047"/>
<arguments_connection unifier_argument_id="5" slowal_id="35048"/>
</arguments_connections>
</slowal_frame>
</connections>
</matching>
<?xml version='1.0' encoding='UTF-8'?>
<matching>
<unifier_frame>
<argument id="1">
<semantic_role type="role"/>
<roles>
<role name="Initiator"/>
</roles>
</argument>
<argument id="2">
<semantic_role type="alernative">
<roles>
<role name="Theme"/>
<role name="Recipient"/>
</roles>
</semantic_role>
</argument>
<argument id="3">
<semantic_role type="role"/>
<roles>
<role name="Manner"/>
</roles>
</argument>
<argument id="4">
<semantic_role type="role"/>
<roles>
<role name="Instrument"/>
</roles>
</argument>
<argument id="5">
<semantic_role type="modifier"/>
<roles>
<role name="Condition"/>
</roles>
</argument>
<connections>
<slowal_frame id="1">
<arguments_connections>
<arguments_connection unifier_argument_id="1" slowal_id="1"/>
<arguments_connection unifier_argument_id="2" slowal_id="2"/>
<arguments_connection unifier_argument_id="3" slowal_id="3"/>
</arguments_connections>
</slowal_frame>
<slowal_frame id="2">
<arguments_connections>
<arguments_connection unifier_argument_id="1" slowal_id="5"/>
<arguments_connection unifier_argument_id="2" slowal_id="4"/>
</arguments_connections>
</slowal_frame>
<slowal_frame id="3">
<arguments_connections>
<arguments_connection unifier_argument_id="3" slowal_id="6"/>
</arguments_connections>
</slowal_frame>
</connections>
</unifier_frame>
</matching>
This diff is collapsed.
#! /usr/bin/python
# -*- coding: utf-8 -*-
from xml.sax import handler
from importer.unification import UnifiedFrameImport
class XMLNode:
def __init__(self, name, attrs, parent):
self._name = name
self._attrs = attrs
self._children = []
self._parent = parent
self._content = ""
def addChild(self, child):
self._children.append(child)
def setContent(self, content):
self._content = content
def __str__(self):
att = zip(self._attrs.keys(), self._attrs.values())
return self._name + '[' + str(att) + '](' + ';'.join([str(temp) for temp in self._children]) + ')'
class UnificationPreprocessHandler(handler.ContentHandler):
def __init__(self):
handler.ContentHandler.__init__(self)
self._subtree = None
self._current = None
self._constructing = False
self._content = ""
self.entry_meanings = {}
self.meanings = {}
self.frames = {}
def startElement(self, name, attrs):
if name == 'date':
pass
if name == 'unifier_frame':
self._constructing = True
self._content = ""
if (self._constructing):
node = XMLNode(name, attrs, self._current)
if self._current is not None:
self._current.addChild(node)
else:
self._subtree = node
self._current = node
def endElement(self, name):
if self._current is not None:
self._current.setContent(self._content.strip())
self._current = self._current._parent
if name == 'unifier_frame':
UnifiedFrameImport.UnifiedFrameImport.storeUnifiedFrame(self._subtree)
self._content = ''
def characters(self, content):
self._content += content
#! /usr/bin/python
# -*- coding: utf-8 -*-
import unifier
from importer.Argument import SemanticRole
from semantics.models import FrameOpinion
import semantics.models
from unifier.models import UnifiedFrameArgument, UnifiedFrame, UnifiedFrame2SlowalFrameMapping, \
UnifiedFrameArgumentSlowalFrameMapping
class UnifiedFrameImport:
@classmethod
def storeAndGetRoleType(self, type):
role_type = semantics.models.RoleType.objects.get(type=type)
return role_type
@classmethod
def storeUnifiedFrameArgument(cls, tree, unifiedFrame):
id = tree._attrs['id']
attribute = None
semantic_roles = []
for subtree in tree._children:
if subtree._name == 'semantic_role':
role_type = subtree._attrs['type']
elif subtree._name == 'roles':
for value in subtree._children:
role = value._attrs['name']
semantic_role = SemanticRole(role, attribute)
semantic_roles.append(semantic_role)
else:
print(subtree._attrs['name'])
raise UnknownError()
role_type_obj = UnifiedFrameImport.storeAndGetRoleType(role_type)
role_type_obj.save()
argument = UnifiedFrameArgument(id=id,
role_type=role_type_obj,
unified_frame=unifiedFrame)
argument.proposed_roles.set([])
argument.role = None
argument.save()
for semantic_role in semantic_roles:
semantic_roles_obj = semantic_role.store()
semantic_roles_obj.save()
argument.proposed_roles.add(semantic_roles_obj)
argument.save()
@classmethod
def storeSlowalFrame(cls, tree, unifiedFrame):
slowal_frame_id = tree._attrs['id']
unifiedFrame2SlowalFrameMapping = UnifiedFrame2SlowalFrameMapping(unified_frame=unifiedFrame,
slowal_frame_id=slowal_frame_id)
unifiedFrame2SlowalFrameMapping.save()
for subtree in tree._children:
if subtree._name == 'arguments_connections':
for subsubtree in subtree._children:
if subsubtree._name == 'arguments_connection':
unifier_argument_id = subsubtree._attrs['unifier_argument_id']
slowal_id = subsubtree._attrs['slowal_id']
unifiedFrameArgumentSlowalFrameMapping = UnifiedFrameArgumentSlowalFrameMapping(unified_frame_mapping=unifiedFrame2SlowalFrameMapping,
unified_agrument_id=unifier_argument_id,
slowal_agrument_id=slowal_id)
unifiedFrameArgumentSlowalFrameMapping.save()
@classmethod
def storeUnifiedFrame(cls, frame_tree):
unifiedFrame = UnifiedFrame()
unifiedFrame.save()
for node in frame_tree._children:
if node._name == 'argument':
UnifiedFrameImport.storeUnifiedFrameArgument(node, unifiedFrame)
if node._name == 'connections':
for subnode in node._children:
if subnode._name == 'slowal_frame':
UnifiedFrameImport.storeSlowalFrame(subnode, unifiedFrame)
......@@ -134,6 +134,9 @@ def import_plWordnet():
print()
STORE_HYPERNYMS_DISABLED = os.getenv('STORE_HYPERNYMS_DISABLED')
if STORE_HYPERNYMS_DISABLED != 'true':
print("Storing hypernyms...")
hypernyms = parser.getContentHandler()._hypernymy_to_base
print(len(hypernyms))
......@@ -153,3 +156,5 @@ def import_plWordnet():
print(i, child, parents)
child.hypernyms.add(*parents)
print("...DONE")
else:
print("Storing hypernyms disabled.")
......@@ -18,6 +18,7 @@ time python manage.py start_import
time python manage.py import_expansions
time python manage.py import_plWordnet
time python manage.py import_tei
time python manage.py import_unification
head import.log
wc import.log
......
......@@ -8,7 +8,7 @@ from . import choices
class Frame(models.Model):
lexical_units = models.ManyToManyField(LexicalUnit, related_name='frames')
opinion = models.ForeignKey('FrameOpinion', on_delete=models.PROTECT)
arguments_count = models.PositiveIntegerField(null=False, default=0)
arguments_count = models.PositiveIntegerField(null=False, default=0) #na potrzeby filtrowania
status = models.TextField(
max_length=10,
choices=choices.LexicalUnitStatus.choices,
......@@ -35,11 +35,18 @@ class FrameOpinion(models.Model):
class Argument(models.Model):
#rola
role = models.ForeignKey('ArgumentRole', on_delete=models.PROTECT)
#3 typy preferencji
predefined = models.ManyToManyField('PredefinedSelectionalPreference')
synsets = models.ManyToManyField(Synset)
relations = models.ManyToManyField('RelationalSelectionalPreference')
#odwołanie do ramy
frame = models.ForeignKey(Frame, related_name='arguments', on_delete=models.PROTECT)
#do wyszukiwania
preferences_count = models.PositiveIntegerField(null=False, default=0)
def __str__(self):
......@@ -81,6 +88,13 @@ class RoleAttribute(models.Model):
return self.attribute
class RoleType(models.Model):
type = models.CharField(max_length=20)
def __str__(self):
return self.type
class PredefinedSelectionalPreference(models.Model):
key = models.CharField(max_length=20, unique=True)
# name = TODO: wymaga lokalizacji
......
......@@ -64,6 +64,7 @@ INSTALLED_APPS = [
'users.apps.UsersConfig',
'crispy_forms',
'django_extensions',
'unifier.apps.UnifierConfig',
]
CRISPY_TEMPLATE_PACK = 'bootstrap4'
......
......@@ -17,7 +17,7 @@ from syntax.models_phrase import (
LemmaOperator, LemmaCooccur,
ModificationType,
)
from semantics.models import FrameOpinion, ArgumentRole, SemanticRole, RoleAttribute, PredefinedSelectionalPreference, SelectionalPreferenceRelation
from semantics.models import FrameOpinion, ArgumentRole, SemanticRole, RoleAttribute, PredefinedSelectionalPreference, SelectionalPreferenceRelation, RoleType
from common.models import ImportInProgress
class Command(BaseCommand):
......@@ -32,7 +32,8 @@ def import_tei():
logging.basicConfig(filename='import.log', level=logging.DEBUG)
#xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20200926_smaller.xml')
xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913_smaller.xml')
# xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913_smaller.xml')
xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913_smallest.xml')
xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), xml_file)
......@@ -71,6 +72,7 @@ def import_constants():
import_phrase_attributes()
import_lemma_operators()
import_modification_types()
import_semantic_role_types()
def import_poses():
poses = [u'unk', u'adj', u'noun', u'adv', u'verb']
......@@ -175,6 +177,13 @@ def import_semantic_roles():
r = ArgumentRole(role=role, attribute=attribute)
r.save()
def import_semantic_role_types():
role_types = [(10, u'role'), (20, u'alernative'), (30, u'modifier')]
for pri, name in role_types:
cont = RoleType(type=name)
cont.save()
def import_predefined_preferences():
predefs = [u'ALL', u'LUDZIE', u'ISTOTY', u'PODMIOTY', u'KOMUNIKAT', u'KONCEPCJA', u'WYTWÓR', u'JADŁO', u'CZAS', u'OBIEKTY', u'CECHA', u'CZYNNOŚĆ', u'KIEDY', u'CZEMU', u'ILOŚĆ', u'POŁOŻENIE', u'DOBRA', u'MIEJSCE', u'SYTUACJA', u'OTOCZENIE']
for name in predefs:
......
#! /usr/bin/python
# -*- coding: utf-8 -*-
from django.core.management.base import BaseCommand
import os, logging
from xml.sax import handler, make_parser
from importer.unification.UnificationPreprocessXML import UnificationPreprocessHandler
from shellvalier.settings import BASE_DIR
from common.models import ImportInProgress
class Command(BaseCommand):
args = 'none'
help = ''
def handle(self, **options):
import_unification()
def import_unification():
logging.basicConfig(filename='import_unification.log', level=logging.DEBUG)
xml_file = os.path.join(BASE_DIR, 'data', 'unification', 'unify_example_small.xml')
xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), xml_file)
parser = make_parser()
parser.setFeature(handler.feature_external_ges, False)
parser.setContentHandler(UnificationPreprocessHandler())
parser.parse(xml_path)
ImportInProgress.objects.all().delete()
from django.apps import AppConfig
class UnifierConfig(AppConfig):
name = 'unifier'
from django.db import models
from meanings.models import LexicalUnit, Synset
from semantics.models import choices, PredefinedSelectionalPreference, RelationalSelectionalPreference, ArgumentRole, \
RoleType, Argument, Frame
class UnifiedLexicalUnitCluster(models.Model):
lexical_units = models.ManyToManyField(LexicalUnit, related_name='UnifiedLexicalUnitCluster')
def __str__(self):
return '%s: %s' % (self.opinion, ' + '.join([str(arg) for arg in self.sorted_arguments()]))
class UnifiedFrameStatus(models.Model):
status = models.CharField(max_length=20)
def __str__(self):
return self.status
class UnifiedFrame(models.Model):
unified_lexical_unit_cluster = models.ForeignKey(UnifiedLexicalUnitCluster, on_delete=models.PROTECT)
status = models.ForeignKey(UnifiedFrameStatus, on_delete=models.PROTECT)
arguments_count = models.PositiveIntegerField(null=False, default=0)
status = models.TextField(
max_length=10,
choices=choices.LexicalUnitStatus.choices,
default=choices.LexicalUnitStatus.PROCESSING,
)
title = models.CharField(max_length=200, default=None, blank=True, null=True)
def sorted_arguments(self): # TODO: zaimplementowac wlasciwe sortowanie
return Argument.objects.filter(frame=self)
return UnifiedFrameArgument.objects.filter(frame=self)
def __str__(self):
return '%s: %s' % (self.opinion, ' + '.join([str(arg) for arg in self.sorted_arguments()]))
class UnifiedFrameArgument(models.Model):
id = models.CharField(max_length=20, primary_key=True)
role_type = models.ForeignKey(RoleType, on_delete=models.PROTECT)
#rola - wybrana przez użytkownika
role = models.ForeignKey(ArgumentRole, on_delete=models.PROTECT, default=None, blank=True, null=True)
#role zaproponowane przez system unifikacyjny
proposed_roles = models.ManyToManyField(ArgumentRole, related_name='proposed_roles')
class Argument(models.Model):
role = models.ForeignKey('ArgumentRole', on_delete=models.PROTECT)
predefined = models.ManyToManyField('PredefinedSelectionalPreference')
#3 typy preferencji - wybrane przez użytkownika
predefined = models.ManyToManyField(PredefinedSelectionalPreference)
synsets = models.ManyToManyField(Synset)
relations = models.ManyToManyField('RelationalSelectionalPreference')
frame = models.ForeignKey(Frame, related_name='arguments', on_delete=models.PROTECT)
preferences_count = models.PositiveIntegerField(null=False, default=0)
relations = models.ManyToManyField(RelationalSelectionalPreference)
def __str__(self):
return str(self.role)
class ArgumentRole(models.Model):
role = models.ForeignKey('SemanticRole', on_delete=models.PROTECT)
attribute = models.ForeignKey('RoleAttribute', null=True, on_delete=models.PROTECT)
#odwołanie do ramy
unified_frame = models.ForeignKey(UnifiedFrame, related_name='unified_arguments', default=None, blank=True, null=True, on_delete=models.PROTECT)
def __str__(self):
if self.attribute is None:
return str(self.role)
else:
return '{}, {}'.format(str(self.role), str(self.attribute))
class SemanticRole(models.Model):
role = models.CharField(max_length=20)
color = models.CharField(max_length=11, null=True)
priority = models.PositiveIntegerField()
class Meta:
ordering = ['priority']
def __str__(self):
return self.role
class RoleAttribute(models.Model):
attribute = models.CharField(max_length=20)
gradient = models.CharField(max_length=10, null=True)
priority = models.PositiveIntegerField()
class Meta:
ordering = ['priority']
def __str__(self):
return self.attribute
class UnifiedFrame2SlowalFrameMapping(models.Model):
unified_frame = models.ForeignKey(UnifiedFrame, related_name='unified_frame', on_delete=models.PROTECT)
slowal_frame = models.ForeignKey(Frame, related_name='slowal_frame', on_delete=models.PROTECT)
class PredefinedSelectionalPreference(models.Model):
key = models.CharField(max_length=20, unique=True)
# name = TODO: wymaga lokalizacji
def __str__(self):
return self.key
class UnifiedFrameArgumentSlowalFrameMapping(models.Model):
unified_agrument = models.ForeignKey(UnifiedFrameArgument, related_name='unified_agrument', on_delete=models.PROTECT)
slowal_agrument = models.ForeignKey(Argument, related_name='slowal_agrument', on_delete=models.PROTECT)
unified_frame_mapping = models.ForeignKey(UnifiedFrame2SlowalFrameMapping, related_name='unified_frame_mapping', on_delete=models.PROTECT)
class RelationalSelectionalPreference(models.Model):
relation = models.ForeignKey('SelectionalPreferenceRelation', on_delete=models.PROTECT)
to = models.ForeignKey('Argument', on_delete=models.PROTECT)
def __str__(self):
return '%s -> %s' % (self.relation, self.to)
class SelectionalPreferenceRelation(models.Model):
key = models.CharField(max_length=40, unique=True)
# name = TODO: wymaga lokalizacji
plwn_id = models.IntegerField(null=True)
class Meta:
ordering = ['key']
def __str__(self):
return self.key
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment