Loading selected walenty dict

Readme

Loading selected walenty dict
Readme
d5ae117b · dcz · 72c644eb · d5ae117b · d5ae117b
Commit d5ae117b authored 2 years ago by dcz
--- a/README.md
+++ b/README.md
@@ -19,6 +19,13 @@ In order to run the development environment locally:
            -> run ./reset_db.sh script in interactive bash
            -> exit interactive bash by typing ctrl-d
        docker-compose start backend
+        
+By default the database is fulfilled by a small subset of the Polish Valence Dictionary.
+To load different dictionary file one have to run following tasks before executing ./reset_db.sh script:
+ 
+ * download full Walenty dataset (TEI format can by downlowaded from http://zil.ipipan.waw.pl/Walenty)
+ * unpack zip archive and place xml file in ./data/walenty
+ * set environment variable WALENTY_FILE_NAME to the name of file (e.g. export WALENTY_FILE_NAME=walenty_20210913.xml) 

 In order to reinstall a database instance a folder specified by the DATABASE_DIR should be removed.

@@ -35,3 +42,21 @@ In order to build the frontend Vue.js application for production execute the fol
      docker-compose run frontend yarn build

 Compiled application files will be located in `frontend/dist/`.
+
+## Default users
+
+####Admin user
+
+ * Login: shell
+ * Password: valier
+
+####Leksykograf user
+
+ * Login:  Leksykograf
+ * Password: valier111
+
+####Super leksykograf user
+
+ * Login: Superleksykograf
+ * Hasło: valier111
+
--- a/syntax/management/commands/import_tei.py
+++ b/syntax/management/commands/import_tei.py
 #! /usr/bin/python
 # -*- coding: utf-8 -*-

+import logging
+import os
+from xml.sax import handler, make_parser
+
 from django.core.management.base import BaseCommand

-import sys, os, shutil, codecs, copy, errno, logging
-from xml.sax import saxutils, handler, make_parser
-from importer.WalentyXML import WalentyTeiHandler
-from importer.WalentyPreprocessXML import WalentyPreprocessTeiHandler
-from shellvalier.settings import BASE_DIR
+from common.models import ImportInProgress
 from connections.models import POS, Status
 from examples.models import ExampleOpinion, ExampleSource
+from importer.WalentyPreprocessXML import WalentyPreprocessTeiHandler
+from importer.WalentyXML import WalentyTeiHandler
+from semantics.models import FrameOpinion, ArgumentRole, SemanticRole, RoleAttribute, \
+    SelectionalPreferenceRelation, RoleType
+from shellvalier.environment import get_environment
+from shellvalier.settings import BASE_DIR
 from syntax.management.commands.add_predefined_preferences import create_predefined_preferences
 from syntax.management.commands.import_relations import import_relations
-from syntax.models import SchemaOpinion, Aspect, InherentSie, Negativity, Predicativity, SyntacticFunction, Control, PredicativeControl, Position
+from syntax.models import SchemaOpinion, Aspect, InherentSie, Negativity, Predicativity, SyntacticFunction, \
+    Control, PredicativeControl, Position
 from syntax.models_phrase import (
    Case, PhraseAspect, AdverbialCategory, PhraseNegativity, PhraseInherentSie,
    Number, Gender, Degree,
    LemmaOperator, LemmaCooccur,
    ModificationType,
 )
-from semantics.models import FrameOpinion, ArgumentRole, SemanticRole, RoleAttribute, PredefinedSelectionalPreference, SelectionalPreferenceRelation, RoleType
-from common.models import ImportInProgress
+

 class Command(BaseCommand):
    args = 'none'
@@ -29,16 +35,17 @@ class Command(BaseCommand):
    def handle(self, **options):
        import_tei()

-def import_tei():

+def import_tei():
    logging.basicConfig(filename='import.log', level=logging.DEBUG)

-    xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913_smaller.xml')
-    #xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913_smaller.xml')
+    xml_file_name = get_environment('WALENTY_FILE_NAME', default='walenty_20210913_smaller.xml')
+
+    xml_file = os.path.join(BASE_DIR, 'data', 'walenty', xml_file_name)
+    # xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913_smaller.xml')
    # xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913_smallest.xml')
    # xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913.xml')

-
    xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), xml_file)

    import_constants()
@@ -57,6 +64,7 @@ def import_tei():
    parser.parse(xml_path)
    ImportInProgress.objects.all().delete()

+
 def import_constants():
    import_poses()
    import_statuses()
@@ -81,60 +89,73 @@ def import_constants():
    create_predefined_preferences()
    import_relations()

+
 def import_poses():
    poses = [u'unk', u'adj', u'noun', u'adv', u'verb']
    for pos_tag in poses:
        pos = POS(tag=pos_tag)
        pos.save()

+
 def import_statuses():
-    statuses = [(10, u'do obróbki'), (20, u'w obróbce'), (25, u'do usunięcia'), (30, u'gotowe'), (35, u'zalążkowe'), (40, u'sprawdzone'), (50, u'(F) w obróbce'), (60, u'(F) gotowe'), (70, u'(F) sprawdzone'), (80, u'(S) w obróbce'), (90, u'(S) gotowe'), (100, u'(S) sprawdzone')]
+    statuses = [(10, u'do obróbki'), (20, u'w obróbce'), (25, u'do usunięcia'), (30, u'gotowe'), (35, u'zalążkowe'),
+                (40, u'sprawdzone'), (50, u'(F) w obróbce'), (60, u'(F) gotowe'), (70, u'(F) sprawdzone'),
+                (80, u'(S) w obróbce'), (90, u'(S) gotowe'), (100, u'(S) sprawdzone')]
    for pri, name in statuses:
        status = Status(key=name, priority=pri)
        status.save()

+
 def import_schema_opinions():
    opinions = [(60, u'vul'), (50, u'col'), (40, u'dat'), (30, u'bad'), (20, u'unc'), (10, u'cer')]
    for pri, short in opinions:
        opinion = SchemaOpinion(key=short, priority=pri)
        opinion.save()

+
 def import_frame_opinions():
-    opinions = [(70, u'met'), (60, u'vul'), (50, u'col'), (40, u'dat'), (30, u'bad'), (20, u'unc'), (10, u'cer'), (80, u'dom'), (90, u'rar'), (100, u'unk')]
+    opinions = [(70, u'met'), (60, u'vul'), (50, u'col'), (40, u'dat'), (30, u'bad'), (20, u'unc'), (10, u'cer'),
+                (80, u'dom'), (90, u'rar'), (100, u'unk')]
    for pri, short in opinions:
        opinion = FrameOpinion(key=short, priority=pri)
        opinion.save()

+
 def import_aspects():
    aspects = [(10, u'imperf'), (20, u'perf'), (32, u'_'), (42, u'')]
    for pri, name in aspects:
        aspect = Aspect(name=name, priority=pri)
        aspect.save()

+
 def import_inherent_sies():
    sies = [(10, u'false'), (20, u'true')]
    for pri, name in sies:
        sie = InherentSie(name=name, priority=pri)
        sie.save()

+
 def import_negativities():
    negativities = [(20, u'aff'), (10, u'neg'), (31, u'_'), (41, u'')]
    for pri, name in negativities:
        neg = Negativity(name=name, priority=pri)
        neg.save()

+
 def import_predicativities():
    predicativities = [(20, u'false'), (10, u'true')]
    for pri, name in predicativities:
        pred = Predicativity(name=name, priority=pri)
        pred.save()

+
 def import_syntactic_functions():
    functions = [(0, u'subj'), (20, u'head'), (10, u'obj')]
    for pri, name in functions:
        sf = SyntacticFunction(name=name, priority=pri)
        sf.save()

+
 def import_control_tags():
    controls = [(10, u'controller'), (20, u'controllee'), (30, u'controller2'), (40, u'controllee2')]
    for pri, name in controls:
@@ -145,6 +166,7 @@ def import_control_tags():
        cont = PredicativeControl(name=name, priority=pri)
        cont.save()

+
 def import_semantic_roles():
    roles = [
        (10, u'Initiator', u'91,106,217', None),
@@ -169,7 +191,8 @@ def import_semantic_roles():
    # priorities set so that, when role and attribute priorities are added,
    # Role_Source < Role_Foreground < Role_Background < Role_Goal
    # and Role can be inserted anywhere into that hierarchy
-    attributes = [(1, u'Source', None, u'left'), (3, u'Foreground', None, u'top'), (5, u'Background', None, u'bottom'), (7, u'Goal', None, u'right')]
+    attributes = [(1, u'Source', None, u'left'), (3, u'Foreground', None, u'top'), (5, u'Background', None, u'bottom'),
+                  (7, u'Goal', None, u'right')]
    for pri, role, color, gradient in roles:
        role = SemanticRole(role=role, color=color, priority=pri)
        role.save()
@@ -191,31 +214,44 @@ def import_semantic_role_types():
        cont = RoleType(type=name)
        cont.save()

+
 # def import_predefined_preferences():
-#     predefs = [u'ALL', u'LUDZIE', u'ISTOTY', u'PODMIOTY', u'KOMUNIKAT', u'KONCEPCJA', u'WYTWÓR', u'JADŁO', u'CZAS', u'OBIEKTY', u'CECHA', u'CZYNNOŚĆ', u'KIEDY', u'CZEMU', u'ILOŚĆ', u'POŁOŻENIE', u'DOBRA', u'MIEJSCE', u'SYTUACJA', u'OTOCZENIE']
+#     predefs = [u'ALL', u'LUDZIE', u'ISTOTY', u'PODMIOTY', u'KOMUNIKAT',
+#     u'KONCEPCJA', u'WYTWÓR', u'JADŁO', u'CZAS', u'OBIEKTY', u'CECHA',
+#     u'CZYNNOŚĆ', u'KIEDY', u'CZEMU', u'ILOŚĆ', u'POŁOŻENIE', u'DOBRA', u'MIEJSCE', u'SYTUACJA', u'OTOCZENIE']
 #     for name in predefs:
 #         predef = PredefinedSelectionalPreference(key=name)
 #         predef.save()

 def import_preference_relations():
-    relations = [(14, u'meronimia'), (15, u'holonimia'), (20, u'meronimia (typu część)'), (21, u'meronimia (typu porcja)'), (22, u'meronimia (typu miejsce)'), (23, u'meronimia (typu element)'), (24, u'meronimia (typu materiał)'), (25, u'holonimia (typu część)'), (26, u'holonimia (typu porcja)'), (27, u'holonimia (typu miejsce)'), (28, u'holonimia (typu element)'), (29, u'holonimia (typu materiał)'), (51, u'nosiciel stanu/cechy'), (52, u'stan/cecha'), (61, u'synonimia międzyparadygmatyczna'), (64, u'meronimia (typu element taksonomiczny)'), (65, u'holonimia (typu element taksonomiczny)'), (108, u'fuzzynimia synsetów'), (-1, u'RELAT')]
+    relations = [(14, u'meronimia'), (15, u'holonimia'), (20, u'meronimia (typu część)'),
+                 (21, u'meronimia (typu porcja)'), (22, u'meronimia (typu miejsce)'), (23, u'meronimia (typu element)'),
+                 (24, u'meronimia (typu materiał)'), (25, u'holonimia (typu część)'), (26, u'holonimia (typu porcja)'),
+                 (27, u'holonimia (typu miejsce)'), (28, u'holonimia (typu element)'),
+                 (29, u'holonimia (typu materiał)'), (51, u'nosiciel stanu/cechy'), (52, u'stan/cecha'),
+                 (61, u'synonimia międzyparadygmatyczna'), (64, u'meronimia (typu element taksonomiczny)'),
+                 (65, u'holonimia (typu element taksonomiczny)'), (108, u'fuzzynimia synsetów'), (-1, u'RELAT')]
    for id, name in relations:
        relat = SelectionalPreferenceRelation(plwn_id=id, key=name)
        relat.save()


 def import_examples_sources():
-    sources = [(0, u'NKJP0.5M'), (1, u'NKJP1.2M'), (2, u'NKJP30M'), (3, u'NKJP250M'), (4, u'NKJP300M'), (5, u'NKJP500M'), (6, u'NKJP1800M'), (7, u'linguistic_literature'), (8, u'other_literature'), (9, u'own')]
+    sources = [(0, u'NKJP0.5M'), (1, u'NKJP1.2M'), (2, u'NKJP30M'), (3, u'NKJP250M'), (4, u'NKJP300M'),
+               (5, u'NKJP500M'), (6, u'NKJP1800M'), (7, u'linguistic_literature'), (8, u'other_literature'),
+               (9, u'own')]
    for pri, name in sources:
        es = ExampleSource(key=name, priority=pri)
        es.save()

+
 def import_examples_opinions():
    opinions = [(0, 'zły'), (1, 'wątpliwy'), (2, 'dobry')]
    for pri, name in opinions:
        eo = ExampleOpinion(key=name, priority=pri)
        eo.save()

+
 def import_phrase_attributes():
    import_cases()
    import_phrase_aspects()
@@ -231,54 +267,65 @@ def import_phrase_attributes():
    dummy_position.save()
    assert (dummy_position.id == 1)

+
 def import_cases():
-    cases = [(0, u'str'), (1, u'nom'), (2, u'gen'), (3, u'dat'), (4, u'acc'), (5, u'inst'), (6, u'loc'), (10, u'pred'), (11, u'part'), (12, u'postp'), (13, u'agr')]
+    cases = [(0, u'str'), (1, u'nom'), (2, u'gen'), (3, u'dat'), (4, u'acc'), (5, u'inst'), (6, u'loc'), (10, u'pred'),
+             (11, u'part'), (12, u'postp'), (13, u'agr')]
    for pri, name in cases:
        case = Case(name=name, priority=pri)
        case.save()

+
 def import_phrase_aspects():
    aspects = [(10, u'imperf'), (20, u'perf'), (30, u'_')]
    for pri, name in aspects:
        aspect = PhraseAspect(name=name, priority=pri)
        aspect.save()

+
 def import_phrase_negativities():
    negativities = [(10, u'aff'), (20, u'neg'), (30, u'_')]
    for pri, name in negativities:
        negativity = PhraseNegativity(name=name, priority=pri)
        negativity.save()

+
 def import_phrase_inherent_sies():
    sies = [(10, u'się'), (20, u'')]
    for pri, name in sies:
        sie = PhraseInherentSie(name=name, priority=pri)
        sie.save()

+
 def import_adverbial_categories():
-    advcats = [(1, u'locat'), (2, u'abl'), (3, u'adl'), (4, u'perl'), (5, u'temp'), (6, u'dur'), (7, 'mod'), (8, 'caus'), (9, 'dest'), (10, 'instr'), (11, 'pron'), (12, 'misc')]
+    advcats = [(1, u'locat'), (2, u'abl'), (3, u'adl'), (4, u'perl'), (5, u'temp'), (6, u'dur'), (7, 'mod'),
+               (8, 'caus'), (9, 'dest'), (10, 'instr'), (11, 'pron'), (12, 'misc')]
    for pri, name in advcats:
        advcat = AdverbialCategory(name=name, priority=pri)
        advcat.save()

+
 def import_numbers():
    numbers = [(1, u'sg'), (2, u'pl'), (10, u'agr'), (20, u'_')]
    for pri, name in numbers:
        number = Number(name=name, priority=pri)
        number.save()

+
 def import_genders():
    genders = [(1, u'm1'), (2, u'm2'), (3, u'm3'), (4, u'f'), (5, u'n'), (10, u'agr')]
    for pri, name in genders:
        gender = Gender(name=name, priority=pri)
        gender.save()

+
 def import_degrees():
    degrees = [(1, u'pos'), (2, u'com'), (3, u'sup'), (20, u'_')]
    for pri, name in degrees:
        degree = Degree(name=name, priority=pri)
        degree.save()

+
 def import_lemma_operators():
    operators = [(1, u'xor'), (2, u'or')]
    for pri, name in operators:
@@ -289,6 +336,7 @@ def import_lemma_operators():
        cooccur = LemmaCooccur(name=name, priority=pri)
        cooccur.save()

+
 def import_modification_types():
    modtypes = [(1, u'ratr'), (2, u'ratr1'), (3, u'atr'), (4, u'atr1'), (5, u'natr')]
    for pri, name in modtypes: