From ab61355adda08e78458e041c0cbc30aa68275c16 Mon Sep 17 00:00:00 2001 From: dcz2 <dcz@ipipan.waw.pl> Date: Wed, 6 Apr 2022 20:47:07 +0200 Subject: [PATCH] Make reset_db great again --- .gitignore | 1 + .../management/commands/import_plWordnet.py | 18 +++++++++++------- reset_db.sh | 9 +++++---- syntax/management/commands/import_tei.py | 1 - 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 84e6ffe..90568fa 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ */migrations/*_auto_*.py /.datastore/ /.idea/ +/import.log diff --git a/meanings/management/commands/import_plWordnet.py b/meanings/management/commands/import_plWordnet.py index a92a662..bd75b3b 100644 --- a/meanings/management/commands/import_plWordnet.py +++ b/meanings/management/commands/import_plWordnet.py @@ -1,5 +1,6 @@ #! /usr/bin/python # -*- coding: utf-8 -*- +import zipfile from django.core.management.base import BaseCommand @@ -93,13 +94,14 @@ class Command(BaseCommand): import_plWordnet() def import_plWordnet(): - xml_file = os.path.join(BASE_DIR, 'data', 'plwordnet', 'plwordnet_2_1.xml') - xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), xml_file) + wordnet_dir = os.path.join(BASE_DIR, 'data', 'plwordnet') + zipped_xml_file = os.path.join(wordnet_dir, 'plwordnet_2_1.xml.zip') parser = make_parser() parser.setContentHandler(PlWNHandler()) print("Parsing Wordnet...") - parser.parse(xml_path) + with zipfile.ZipFile(zipped_xml_file, 'r') as zip_file: + parser.parse(zip_file.open("data/plwordnet/plwordnet_2_1.xml")) print("...DONE") print() @@ -140,12 +142,14 @@ def import_plWordnet(): i += 1 try: child = Synset.objects.get(id=child_id) - parents = [Synset.objects.get(id=parent_id) for parent_id in parent_ids] - except: - print('************', child_id, parent_ids) + except Synset.DoesNotExist: + print(f'************ Missing Synset {child_id}') continue + parents = list(Synset.objects.filter(id__in=parent_ids).only("id")) + missing_parent_ids = set(parent_ids) - {p.id for p in parents} + if missing_parent_ids: + print(f'************ Missing parent Synsets for {child_id}: {missing_parent_ids}') if i % 2000 == 0: print(i, child, parents) child.hypernyms.add(*parents) - child.save() print("...DONE") diff --git a/reset_db.sh b/reset_db.sh index c3b1cc4..d4446d2 100755 --- a/reset_db.sh +++ b/reset_db.sh @@ -1,7 +1,8 @@ #!/bin/bash -sudo su postgres -c "dropdb shellvalier" -sudo su postgres -c "createdb shellvalier -E UTF8 -T template0 -l pl_PL.utf8" +set -e + +python manage.py reset_db --noinput # reset the migrations since we create the DB from scratch find . -path "*/migrations/*.py" -not -name "__init__.py" -delete @@ -10,7 +11,7 @@ find . -path "*/migrations/*.pyc" -delete python manage.py makemigrations python manage.py migrate -rm import.log +rm import.log || true time python manage.py start_import time python manage.py import_expansions @@ -21,7 +22,7 @@ head import.log wc import.log #python manage.py check_descriptions -python manage.py generate_semantics_css +#python manage.py generate_semantics_css # TODO dev only!!! python manage.py shell -c "from django.contrib.auth.models import User; User.objects.create_superuser('shell', '', 'valier')" diff --git a/syntax/management/commands/import_tei.py b/syntax/management/commands/import_tei.py index da37586..121729a 100644 --- a/syntax/management/commands/import_tei.py +++ b/syntax/management/commands/import_tei.py @@ -71,7 +71,6 @@ def import_constants(): import_phrase_attributes() import_lemma_operators() import_modification_types() - pass def import_poses(): poses = [u'unk', u'adj', u'noun', u'adv', u'verb'] -- GitLab