From e20a6dcd6c074dabf513f6a82cd93f85822a13eb Mon Sep 17 00:00:00 2001 From: dcz <dcz@ipipan.waw.pl> Date: Thu, 9 Jun 2022 16:36:44 +0200 Subject: [PATCH] Importing data with original primary keys --- data/unification/unify_example_small.xml | 18 +++++++++--------- .../templates/unification_edit_display.html | 4 ++-- importer/Argument.py | 1 + importer/Entry.py | 3 ++- importer/Example.py | 6 ++++-- importer/Frame.py | 1 + importer/unification/UnifiedFrameImport.py | 2 +- .../management/commands/import_plWordnet.py | 2 +- unifier/models.py | 4 +--- 9 files changed, 22 insertions(+), 19 deletions(-) diff --git a/data/unification/unify_example_small.xml b/data/unification/unify_example_small.xml index 3276082..4dbecd2 100644 --- a/data/unification/unify_example_small.xml +++ b/data/unification/unify_example_small.xml @@ -33,22 +33,22 @@ </roles> </argument> <connections> - <slowal_frame id="1"> + <slowal_frame id="104274"> <arguments_connections> - <arguments_connection unifier_argument_id="1" slowal_id="1"/> - <arguments_connection unifier_argument_id="2" slowal_id="2"/> - <arguments_connection unifier_argument_id="3" slowal_id="3"/> + <arguments_connection unifier_argument_id="1" slowal_id="141952"/> + <arguments_connection unifier_argument_id="2" slowal_id="141953"/> + <arguments_connection unifier_argument_id="3" slowal_id="143669"/> </arguments_connections> </slowal_frame> - <slowal_frame id="2"> + <slowal_frame id="104232"> <arguments_connections> - <arguments_connection unifier_argument_id="1" slowal_id="5"/> - <arguments_connection unifier_argument_id="2" slowal_id="4"/> + <arguments_connection unifier_argument_id="1" slowal_id="141913"/> + <arguments_connection unifier_argument_id="2" slowal_id="141914"/> </arguments_connections> </slowal_frame> - <slowal_frame id="3"> + <slowal_frame id="104238"> <arguments_connections> - <arguments_connection unifier_argument_id="3" slowal_id="6"/> + <arguments_connection unifier_argument_id="3" slowal_id="141916"/> </arguments_connections> </slowal_frame> </connections> diff --git a/entries/templates/unification_edit_display.html b/entries/templates/unification_edit_display.html index bae37f4..af4a9b0 100644 --- a/entries/templates/unification_edit_display.html +++ b/entries/templates/unification_edit_display.html @@ -51,7 +51,7 @@ Błędna </td> <td id="extract-frame" style="padding: 10px 15px 10px 15px; color: #000000;" - onclick="extract_frames_to_new_frame(1, [1,2], null)"> + onclick="extract_frames_to_new_frame(1, [104274,104238], null)"> Rozdziel </td> <td id="hide-slowal-frame" style="padding: 10px 15px 10px 15px; color: #000000;"> @@ -69,7 +69,7 @@ Niepasująca </td> <td id="move-slowal-frame" style="padding: 10px 15px 10px 15px; color: #000000;" - onclick="extract_frames_to_new_frame(7, [1], 1)"> + onclick="extract_frames_to_new_frame(3, [104274], 1)"> Przerzuć </td> <td id="show-slowal-frame" style="padding: 10px 15px 10px 15px; color: #000000;"> diff --git a/importer/Argument.py b/importer/Argument.py index 956d304..7cf48ff 100644 --- a/importer/Argument.py +++ b/importer/Argument.py @@ -158,6 +158,7 @@ class Argument: argument = semantics.models.Argument(role=role, frame=frame, preferences_count=len(self._selectional_preferences)) + argument.pk = int(self._id.split(".")[1].split("-")[0]) argument.save() self._db_id = argument.id return argument diff --git a/importer/Entry.py b/importer/Entry.py index ad552b7..9bd173e 100644 --- a/importer/Entry.py +++ b/importer/Entry.py @@ -12,6 +12,7 @@ import connections.models class Entry: def __init__(self, entry_tree, entry_meanings, meanings, frames, examples_in_data, examples_out_file, misconnected_out_file): + self._id = int(entry_tree._attrs['xml:id'].split("_")[1].split("-")[0]) self._base = entry_tree._children[0]._children[0]._content self._pos = entry_tree._children[0]._children[1]._content print("processing: " + self._base) @@ -29,7 +30,7 @@ class Entry: pos = POS.objects.get(tag=self._pos) status = Status.objects.get(key=self._status) # @TODO: nie ma frequency w xml-u - entry = connections.models.Entry(name=self._base, pos=pos, status=status, frequency_1M=0, frequency_300M=0) + entry = connections.models.Entry(pk=self._id, name=self._base, pos=pos, status=status, frequency_1M=0, frequency_300M=0) entry.save() try: self._syntax.store(entry, stored_positions) diff --git a/importer/Example.py b/importer/Example.py index bd93adb..4623bd1 100644 --- a/importer/Example.py +++ b/importer/Example.py @@ -14,7 +14,8 @@ def clean_sentence(sentence): class Example: - def __init__(self, sentence, source, opinion, note, illustrated_syntax, meaning, illustrated_semantics): + def __init__(self, eid, sentence, source, opinion, note, illustrated_syntax, meaning, illustrated_semantics): + self._eid = eid self._sentence = sentence self._source = source self._opinion = opinion @@ -66,7 +67,7 @@ class Example: print(example_tree) raise UnknownError() - return cls(sentence, source, opinion, note, illustrated_syntax, meaning, illustrated_semantics) + return cls(eid, sentence, source, opinion, note, illustrated_syntax, meaning, illustrated_semantics) def store(self, entry, meanings): # self._phrases = illustrated_syntax @@ -79,6 +80,7 @@ class Example: opinion=opinion, source=source, note=self._note) + example.pk = self._eid example.save() if not self._phrases.exists(): print(' STORING EXAMPLE:', self._sentence) diff --git a/importer/Frame.py b/importer/Frame.py index d728626..ee6f94b 100644 --- a/importer/Frame.py +++ b/importer/Frame.py @@ -52,6 +52,7 @@ class Frame: if len(frames) == 0 and self._base is not None: opinion = FrameOpinion.objects.get(key=self._opinion) frame = semantics.models.Frame(opinion=opinion) + frame.pk = int(self._id.split("_")[1].split("-")[0]) frame.save() self._db_id = frame.id arguments = {} diff --git a/importer/unification/UnifiedFrameImport.py b/importer/unification/UnifiedFrameImport.py index 7774f8f..cef6d69 100644 --- a/importer/unification/UnifiedFrameImport.py +++ b/importer/unification/UnifiedFrameImport.py @@ -35,7 +35,7 @@ class UnifiedFrameImport: role_type_obj = UnifiedFrameImport.storeAndGetRoleType(role_type) role_type_obj.save() - argument = UnifiedFrameArgument(id=id, + argument = UnifiedFrameArgument(pk=int(id), role_type=role_type_obj, unified_frame=unifiedFrame) argument.proposed_roles.set([]) diff --git a/meanings/management/commands/import_plWordnet.py b/meanings/management/commands/import_plWordnet.py index 4cf34a6..9449a47 100644 --- a/meanings/management/commands/import_plWordnet.py +++ b/meanings/management/commands/import_plWordnet.py @@ -72,7 +72,7 @@ class PlWNHandler(handler.ContentHandler): if desc == 'brak danych': desc = '' pos = POS_MAP[pos] - lu = LexicalUnit(luid=luid, base=lubase, sense=lusense, pos=pos, synset=s, gloss='', definition=desc, text_rep='{}-{}'.format(lubase, lusense)) + lu = LexicalUnit(pk=luid, luid=luid, base=lubase, sense=lusense, pos=pos, synset=s, gloss='', definition=desc, text_rep='{}-{}'.format(lubase, lusense)) # print luid, lubase, lusense self._lexical_units_to_base.append(lu) self._unit = False diff --git a/unifier/models.py b/unifier/models.py index 98efe6d..4962b3e 100644 --- a/unifier/models.py +++ b/unifier/models.py @@ -43,7 +43,7 @@ class UnifiedFrame(models.Model): cnt = UnifiedFrameArgument.objects.count()+1 old_2_new_argument_mapping = {} for unified_frame_argument in unified_frame_arguments: - new_unified_frame_argument = UnifiedFrameArgument.objects.create(id=cnt, + new_unified_frame_argument = UnifiedFrameArgument.objects.create(pk=cnt, role_type=unified_frame_argument.role_type, role=unified_frame_argument.role, unified_frame=new_frame) @@ -79,8 +79,6 @@ class UnifiedFrame(models.Model): class UnifiedFrameArgument(models.Model): - id = models.CharField(max_length=20, primary_key=True) - role_type = models.ForeignKey(RoleType, on_delete=models.PROTECT) #rola - wybrana przez użytkownika role = models.ForeignKey(ArgumentRole, on_delete=models.PROTECT, default=None, blank=True, null=True) -- GitLab