From d5ae117b83a2f36eafc0427d65afb364776af149 Mon Sep 17 00:00:00 2001
From: dcz <dcz@ipipan.waw.pl>
Date: Thu, 18 May 2023 17:49:10 +0200
Subject: [PATCH] Loading selected walenty dict Readme

---
 README.md                                | 25 +++++++
 syntax/management/commands/import_tei.py | 88 ++++++++++++++++++------
 2 files changed, 93 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index a485892..44db7df 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,13 @@ In order to run the development environment locally:
             -> run ./reset_db.sh script in interactive bash
             -> exit interactive bash by typing ctrl-d
         docker-compose start backend
+        
+By default the database is fulfilled by a small subset of the Polish Valence Dictionary.
+To load different dictionary file one have to run following tasks before executing ./reset_db.sh script:
+ 
+ * download full Walenty dataset (TEI format can by downlowaded from http://zil.ipipan.waw.pl/Walenty)
+ * unpack zip archive and place xml file in ./data/walenty
+ * set environment variable WALENTY_FILE_NAME to the name of file (e.g. export WALENTY_FILE_NAME=walenty_20210913.xml) 
 
 In order to reinstall a database instance a folder specified by the DATABASE_DIR should be removed.
 
@@ -35,3 +42,21 @@ In order to build the frontend Vue.js application for production execute the fol
       docker-compose run frontend yarn build
 
 Compiled application files will be located in `frontend/dist/`.
+
+## Default users
+
+####Admin user
+
+ * Login: shell
+ * Password: valier
+
+####Leksykograf user
+
+ * Login:  Leksykograf
+ * Password: valier111
+
+####Super leksykograf user
+
+ * Login: Superleksykograf
+ * HasÅ‚o: valier111
+
diff --git a/syntax/management/commands/import_tei.py b/syntax/management/commands/import_tei.py
index d41b1b3..627b7d6 100644
--- a/syntax/management/commands/import_tei.py
+++ b/syntax/management/commands/import_tei.py
@@ -1,26 +1,32 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 
+import logging
+import os
+from xml.sax import handler, make_parser
+
 from django.core.management.base import BaseCommand
 
-import sys, os, shutil, codecs, copy, errno, logging
-from xml.sax import saxutils, handler, make_parser
-from importer.WalentyXML import WalentyTeiHandler
-from importer.WalentyPreprocessXML import WalentyPreprocessTeiHandler
-from shellvalier.settings import BASE_DIR
+from common.models import ImportInProgress
 from connections.models import POS, Status
 from examples.models import ExampleOpinion, ExampleSource
+from importer.WalentyPreprocessXML import WalentyPreprocessTeiHandler
+from importer.WalentyXML import WalentyTeiHandler
+from semantics.models import FrameOpinion, ArgumentRole, SemanticRole, RoleAttribute, \
+    SelectionalPreferenceRelation, RoleType
+from shellvalier.environment import get_environment
+from shellvalier.settings import BASE_DIR
 from syntax.management.commands.add_predefined_preferences import create_predefined_preferences
 from syntax.management.commands.import_relations import import_relations
-from syntax.models import SchemaOpinion, Aspect, InherentSie, Negativity, Predicativity, SyntacticFunction, Control, PredicativeControl, Position
+from syntax.models import SchemaOpinion, Aspect, InherentSie, Negativity, Predicativity, SyntacticFunction, \
+    Control, PredicativeControl, Position
 from syntax.models_phrase import (
     Case, PhraseAspect, AdverbialCategory, PhraseNegativity, PhraseInherentSie,
     Number, Gender, Degree,
     LemmaOperator, LemmaCooccur,
     ModificationType,
 )
-from semantics.models import FrameOpinion, ArgumentRole, SemanticRole, RoleAttribute, PredefinedSelectionalPreference, SelectionalPreferenceRelation, RoleType
-from common.models import ImportInProgress
+
 
 class Command(BaseCommand):
     args = 'none'
@@ -29,16 +35,17 @@ class Command(BaseCommand):
     def handle(self, **options):
         import_tei()
 
-def import_tei():
 
+def import_tei():
     logging.basicConfig(filename='import.log', level=logging.DEBUG)
 
-    xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913_smaller.xml')
-    #xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913_smaller.xml')
+    xml_file_name = get_environment('WALENTY_FILE_NAME', default='walenty_20210913_smaller.xml')
+
+    xml_file = os.path.join(BASE_DIR, 'data', 'walenty', xml_file_name)
+    # xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913_smaller.xml')
     # xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913_smallest.xml')
     # xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913.xml')
 
-
     xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), xml_file)
 
     import_constants()
@@ -57,6 +64,7 @@ def import_tei():
     parser.parse(xml_path)
     ImportInProgress.objects.all().delete()
 
+
 def import_constants():
     import_poses()
     import_statuses()
@@ -81,60 +89,73 @@ def import_constants():
     create_predefined_preferences()
     import_relations()
 
+
 def import_poses():
     poses = [u'unk', u'adj', u'noun', u'adv', u'verb']
     for pos_tag in poses:
         pos = POS(tag=pos_tag)
         pos.save()
 
+
 def import_statuses():
-    statuses = [(10, u'do obrÃ³bki'), (20, u'w obrÃ³bce'), (25, u'do usuniÄ™cia'), (30, u'gotowe'), (35, u'zalÄ…Å¼kowe'), (40, u'sprawdzone'), (50, u'(F) w obrÃ³bce'), (60, u'(F) gotowe'), (70, u'(F) sprawdzone'), (80, u'(S) w obrÃ³bce'), (90, u'(S) gotowe'), (100, u'(S) sprawdzone')]
+    statuses = [(10, u'do obrÃ³bki'), (20, u'w obrÃ³bce'), (25, u'do usuniÄ™cia'), (30, u'gotowe'), (35, u'zalÄ…Å¼kowe'),
+                (40, u'sprawdzone'), (50, u'(F) w obrÃ³bce'), (60, u'(F) gotowe'), (70, u'(F) sprawdzone'),
+                (80, u'(S) w obrÃ³bce'), (90, u'(S) gotowe'), (100, u'(S) sprawdzone')]
     for pri, name in statuses:
         status = Status(key=name, priority=pri)
         status.save()
 
+
 def import_schema_opinions():
     opinions = [(60, u'vul'), (50, u'col'), (40, u'dat'), (30, u'bad'), (20, u'unc'), (10, u'cer')]
     for pri, short in opinions:
         opinion = SchemaOpinion(key=short, priority=pri)
         opinion.save()
 
+
 def import_frame_opinions():
-    opinions = [(70, u'met'), (60, u'vul'), (50, u'col'), (40, u'dat'), (30, u'bad'), (20, u'unc'), (10, u'cer'), (80, u'dom'), (90, u'rar'), (100, u'unk')]
+    opinions = [(70, u'met'), (60, u'vul'), (50, u'col'), (40, u'dat'), (30, u'bad'), (20, u'unc'), (10, u'cer'),
+                (80, u'dom'), (90, u'rar'), (100, u'unk')]
     for pri, short in opinions:
         opinion = FrameOpinion(key=short, priority=pri)
         opinion.save()
 
+
 def import_aspects():
     aspects = [(10, u'imperf'), (20, u'perf'), (32, u'_'), (42, u'')]
     for pri, name in aspects:
         aspect = Aspect(name=name, priority=pri)
         aspect.save()
 
+
 def import_inherent_sies():
     sies = [(10, u'false'), (20, u'true')]
     for pri, name in sies:
         sie = InherentSie(name=name, priority=pri)
         sie.save()
 
+
 def import_negativities():
     negativities = [(20, u'aff'), (10, u'neg'), (31, u'_'), (41, u'')]
     for pri, name in negativities:
         neg = Negativity(name=name, priority=pri)
         neg.save()
 
+
 def import_predicativities():
     predicativities = [(20, u'false'), (10, u'true')]
     for pri, name in predicativities:
         pred = Predicativity(name=name, priority=pri)
         pred.save()
 
+
 def import_syntactic_functions():
     functions = [(0, u'subj'), (20, u'head'), (10, u'obj')]
     for pri, name in functions:
         sf = SyntacticFunction(name=name, priority=pri)
         sf.save()
 
+
 def import_control_tags():
     controls = [(10, u'controller'), (20, u'controllee'), (30, u'controller2'), (40, u'controllee2')]
     for pri, name in controls:
@@ -145,6 +166,7 @@ def import_control_tags():
         cont = PredicativeControl(name=name, priority=pri)
         cont.save()
 
+
 def import_semantic_roles():
     roles = [
         (10, u'Initiator', u'91,106,217', None),
@@ -169,7 +191,8 @@ def import_semantic_roles():
     # priorities set so that, when role and attribute priorities are added,
     # Role_Source < Role_Foreground < Role_Background < Role_Goal
     # and Role can be inserted anywhere into that hierarchy
-    attributes = [(1, u'Source', None, u'left'), (3, u'Foreground', None, u'top'), (5, u'Background', None, u'bottom'), (7, u'Goal', None, u'right')]
+    attributes = [(1, u'Source', None, u'left'), (3, u'Foreground', None, u'top'), (5, u'Background', None, u'bottom'),
+                  (7, u'Goal', None, u'right')]
     for pri, role, color, gradient in roles:
         role = SemanticRole(role=role, color=color, priority=pri)
         role.save()
@@ -191,31 +214,44 @@ def import_semantic_role_types():
         cont = RoleType(type=name)
         cont.save()
 
+
 # def import_predefined_preferences():
-#     predefs = [u'ALL', u'LUDZIE', u'ISTOTY', u'PODMIOTY', u'KOMUNIKAT', u'KONCEPCJA', u'WYTWÃ“R', u'JADÅO', u'CZAS', u'OBIEKTY', u'CECHA', u'CZYNNOÅšÄ†', u'KIEDY', u'CZEMU', u'ILOÅšÄ†', u'POÅOÅ»ENIE', u'DOBRA', u'MIEJSCE', u'SYTUACJA', u'OTOCZENIE']
+#     predefs = [u'ALL', u'LUDZIE', u'ISTOTY', u'PODMIOTY', u'KOMUNIKAT',
+#     u'KONCEPCJA', u'WYTWÃ“R', u'JADÅO', u'CZAS', u'OBIEKTY', u'CECHA',
+#     u'CZYNNOÅšÄ†', u'KIEDY', u'CZEMU', u'ILOÅšÄ†', u'POÅOÅ»ENIE', u'DOBRA', u'MIEJSCE', u'SYTUACJA', u'OTOCZENIE']
 #     for name in predefs:
 #         predef = PredefinedSelectionalPreference(key=name)
 #         predef.save()
 
 def import_preference_relations():
-    relations = [(14, u'meronimia'), (15, u'holonimia'), (20, u'meronimia (typu czÄ™Å›Ä‡)'), (21, u'meronimia (typu porcja)'), (22, u'meronimia (typu miejsce)'), (23, u'meronimia (typu element)'), (24, u'meronimia (typu materiaÅ‚)'), (25, u'holonimia (typu czÄ™Å›Ä‡)'), (26, u'holonimia (typu porcja)'), (27, u'holonimia (typu miejsce)'), (28, u'holonimia (typu element)'), (29, u'holonimia (typu materiaÅ‚)'), (51, u'nosiciel stanu/cechy'), (52, u'stan/cecha'), (61, u'synonimia miÄ™dzyparadygmatyczna'), (64, u'meronimia (typu element taksonomiczny)'), (65, u'holonimia (typu element taksonomiczny)'), (108, u'fuzzynimia synsetÃ³w'), (-1, u'RELAT')]
+    relations = [(14, u'meronimia'), (15, u'holonimia'), (20, u'meronimia (typu czÄ™Å›Ä‡)'),
+                 (21, u'meronimia (typu porcja)'), (22, u'meronimia (typu miejsce)'), (23, u'meronimia (typu element)'),
+                 (24, u'meronimia (typu materiaÅ‚)'), (25, u'holonimia (typu czÄ™Å›Ä‡)'), (26, u'holonimia (typu porcja)'),
+                 (27, u'holonimia (typu miejsce)'), (28, u'holonimia (typu element)'),
+                 (29, u'holonimia (typu materiaÅ‚)'), (51, u'nosiciel stanu/cechy'), (52, u'stan/cecha'),
+                 (61, u'synonimia miÄ™dzyparadygmatyczna'), (64, u'meronimia (typu element taksonomiczny)'),
+                 (65, u'holonimia (typu element taksonomiczny)'), (108, u'fuzzynimia synsetÃ³w'), (-1, u'RELAT')]
     for id, name in relations:
         relat = SelectionalPreferenceRelation(plwn_id=id, key=name)
         relat.save()
 
 
 def import_examples_sources():
-    sources = [(0, u'NKJP0.5M'), (1, u'NKJP1.2M'), (2, u'NKJP30M'), (3, u'NKJP250M'), (4, u'NKJP300M'), (5, u'NKJP500M'), (6, u'NKJP1800M'), (7, u'linguistic_literature'), (8, u'other_literature'), (9, u'own')]
+    sources = [(0, u'NKJP0.5M'), (1, u'NKJP1.2M'), (2, u'NKJP30M'), (3, u'NKJP250M'), (4, u'NKJP300M'),
+               (5, u'NKJP500M'), (6, u'NKJP1800M'), (7, u'linguistic_literature'), (8, u'other_literature'),
+               (9, u'own')]
     for pri, name in sources:
         es = ExampleSource(key=name, priority=pri)
         es.save()
 
+
 def import_examples_opinions():
     opinions = [(0, 'zÅ‚y'), (1, 'wÄ…tpliwy'), (2, 'dobry')]
     for pri, name in opinions:
         eo = ExampleOpinion(key=name, priority=pri)
         eo.save()
 
+
 def import_phrase_attributes():
     import_cases()
     import_phrase_aspects()
@@ -231,54 +267,65 @@ def import_phrase_attributes():
     dummy_position.save()
     assert (dummy_position.id == 1)
 
+
 def import_cases():
-    cases = [(0, u'str'), (1, u'nom'), (2, u'gen'), (3, u'dat'), (4, u'acc'), (5, u'inst'), (6, u'loc'), (10, u'pred'), (11, u'part'), (12, u'postp'), (13, u'agr')]
+    cases = [(0, u'str'), (1, u'nom'), (2, u'gen'), (3, u'dat'), (4, u'acc'), (5, u'inst'), (6, u'loc'), (10, u'pred'),
+             (11, u'part'), (12, u'postp'), (13, u'agr')]
     for pri, name in cases:
         case = Case(name=name, priority=pri)
         case.save()
 
+
 def import_phrase_aspects():
     aspects = [(10, u'imperf'), (20, u'perf'), (30, u'_')]
     for pri, name in aspects:
         aspect = PhraseAspect(name=name, priority=pri)
         aspect.save()
 
+
 def import_phrase_negativities():
     negativities = [(10, u'aff'), (20, u'neg'), (30, u'_')]
     for pri, name in negativities:
         negativity = PhraseNegativity(name=name, priority=pri)
         negativity.save()
 
+
 def import_phrase_inherent_sies():
     sies = [(10, u'siÄ™'), (20, u'')]
     for pri, name in sies:
         sie = PhraseInherentSie(name=name, priority=pri)
         sie.save()
 
+
 def import_adverbial_categories():
-    advcats = [(1, u'locat'), (2, u'abl'), (3, u'adl'), (4, u'perl'), (5, u'temp'), (6, u'dur'), (7, 'mod'), (8, 'caus'), (9, 'dest'), (10, 'instr'), (11, 'pron'), (12, 'misc')]
+    advcats = [(1, u'locat'), (2, u'abl'), (3, u'adl'), (4, u'perl'), (5, u'temp'), (6, u'dur'), (7, 'mod'),
+               (8, 'caus'), (9, 'dest'), (10, 'instr'), (11, 'pron'), (12, 'misc')]
     for pri, name in advcats:
         advcat = AdverbialCategory(name=name, priority=pri)
         advcat.save()
 
+
 def import_numbers():
     numbers = [(1, u'sg'), (2, u'pl'), (10, u'agr'), (20, u'_')]
     for pri, name in numbers:
         number = Number(name=name, priority=pri)
         number.save()
 
+
 def import_genders():
     genders = [(1, u'm1'), (2, u'm2'), (3, u'm3'), (4, u'f'), (5, u'n'), (10, u'agr')]
     for pri, name in genders:
         gender = Gender(name=name, priority=pri)
         gender.save()
 
+
 def import_degrees():
     degrees = [(1, u'pos'), (2, u'com'), (3, u'sup'), (20, u'_')]
     for pri, name in degrees:
         degree = Degree(name=name, priority=pri)
         degree.save()
 
+
 def import_lemma_operators():
     operators = [(1, u'xor'), (2, u'or')]
     for pri, name in operators:
@@ -289,6 +336,7 @@ def import_lemma_operators():
         cooccur = LemmaCooccur(name=name, priority=pri)
         cooccur.save()
 
+
 def import_modification_types():
     modtypes = [(1, u'ratr'), (2, u'ratr1'), (3, u'atr'), (4, u'atr1'), (5, u'natr')]
     for pri, name in modtypes:
-- 
GitLab