From bb0cecacf190462c3b23471caa02cbadaba8b5ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Grzegorz=20Kubo=C5=84?= <g_kubon@e-science.pl>
Date: Mon, 13 Mar 2023 16:24:34 +0100
Subject: [PATCH] changes accustoming new db schema, fixed tests from last
 commit

---
 plwn/bases.py                                |   1 +
 plwn/enums.py                                |   2 +-
 plwn/readers/nodes.py                        |   2 +-
 plwn/storages/sqlite.py                      | 243 +++++++++++++------
 tests/abstract_cases/test_unit_and_synset.py |  27 +++
 5 files changed, 193 insertions(+), 82 deletions(-)

diff --git a/plwn/bases.py b/plwn/bases.py
index 43f9712..0e66bc7 100644
--- a/plwn/bases.py
+++ b/plwn/bases.py
@@ -939,6 +939,7 @@ class LexicalUnitBase(object):
         """
         lu_dict = {
             u'id': self.id,
+            u'legacy_id': self.legacy_id,
             u'lemma': self.lemma,
             u'pos': self.pos.value,
             u'variant': self.variant,
diff --git a/plwn/enums.py b/plwn/enums.py
index 6dfcc70..de9f4d8 100644
--- a/plwn/enums.py
+++ b/plwn/enums.py
@@ -296,7 +296,7 @@ class Domain(Enum):
 
     bhp = u'najwyższe w hierarchii'
     czy = u'czynności (nazwy)'
-    wytw = u'wytwory ludzkie (nazwy)'
+    wytw = u'wytwory ludzkie(nazwy)'
     cech = u'cechy ludzi i zwierzÄ…t'
     czc = u'części ciała'
     umy = u'związane z myśleniem'
diff --git a/plwn/readers/nodes.py b/plwn/readers/nodes.py
index 9f58999..45fb601 100644
--- a/plwn/readers/nodes.py
+++ b/plwn/readers/nodes.py
@@ -122,7 +122,7 @@ def make_relation_type_node(**props):
     """
     rel = RelationTypeNode(
         name=props.pop('name'),
-        legacy_id=props.pop('legacy_id'),
+        legacy_id=props.pop('legacy_id', None),
         kind=props.pop('kind'),
         parent=props.pop('parent', None),
         aliases=props.pop('aliases', frozenset()),
diff --git a/plwn/storages/sqlite.py b/plwn/storages/sqlite.py
index 904e540..940d4b1 100644
--- a/plwn/storages/sqlite.py
+++ b/plwn/storages/sqlite.py
@@ -64,76 +64,72 @@ PRAGMA foreign_keys = ON;
 
 -- Metadata table. Used for version number, currently
 CREATE TABLE tbl_plwn_meta (
-    name VARCHAR(255) NOT NULL  ,
-    value BLOB NULL
+    name TEXT UNIQUE NOT NULL  ,
+    value BLOB
 );
 
 -- Tables for constant values
 CREATE TABLE tbl_pos (
-    id INTEGER NOT NULL  ,
-    value VARCHAR(255) NOT NULL  ,
-    PRIMARY KEY (id)
+    id INTEGER PRIMARY KEY,
+    value TEXT NOT NULL
 );
 
 CREATE TABLE tbl_verbaspect (
-    id INTEGER NOT NULL  ,
-    value VARCHAR(255) NOT NULL  ,
-    PRIMARY KEY (id)
+    id INTEGER PRIMARY KEY,
+    value TEXT UNIQUE NOT NULL
 );
 
 CREATE TABLE tbl_emotionmark (
-    id INTEGER NOT NULL  ,
-    value VARCHAR(255) NOT NULL  ,
-    PRIMARY KEY (id)
+    id INTEGER PRIMARY KEY,
+    value TEXT UNIQUE NOT NULL
 );
 
 CREATE TABLE tbl_emotionname (
-    id INTEGER NOT NULL  ,
-    value VARCHAR(255) NOT NULL  ,
-    PRIMARY KEY (id)
+    id INTEGER PRIMARY KEY,
+    value TEXT UNIQUE NOT NULL COLLATE locale
 );
 
 CREATE TABLE tbl_emotionvaluation (
-    id INTEGER NOT NULL  ,
-    value VARCHAR(255) NOT NULL  ,
-    PRIMARY KEY (id)
+    id INTEGER PRIMARY KEY,
+    value TEXT NOT NULL COLLATE locale
 );
 
 CREATE TABLE tbl_domain (
-    id INTEGER NOT NULL  ,
-    value VARCHAR(255) NOT NULL  ,
-    PRIMARY KEY (id)
+    id INTEGER PRIMARY KEY,
+    value TEXT NOT NULL COLLATE locale
 );
 CREATE UNIQUE INDEX value ON tbl_domain (value);
 
 -- Synset only gets one simple table
 CREATE TABLE tbl_synset (
-    id BLOB  ,
+    id BLOB PRIMARY KEY,
     legacy_id INTEGER NULL  ,
-    definition TEXT NULL  ,
-    isartificial INTEGER NOT NULL DEFAULT '0' ,
-    PRIMARY KEY (id)
+    definition TEXT COLLATE locale,
+    isartificial INTEGER NOT NULL DEFAULT 0
 );
 
 -- Lexical units have several tables, since they have several list-like
 -- properties. They also need indexes for lookup.
 
 CREATE TABLE tbl_lexicalunit (
-    id BLOB  ,
+    id BLOB PRIMARY KEY,
     legacy_id INTEGER NULL  ,
-    lemma VARCHAR(255) NOT NULL  ,
-    pos INTEGER NOT NULL  ,
+    lemma TEXT NOT NULL COLLATE locale,
+    pos INTEGER NOT NULL
+        REFERENCES tbl_pos (id),
     variant INTEGER NOT NULL  ,
-    synset BLOB NOT NULL  ,
+    synset BLOB NOT NULL
+        REFERENCES tbl_synset (id),
     unitindex INTEGER NOT NULL  ,
-    definition TEXT NULL  ,
-    domain INTEGER NOT NULL  ,
-    verbaspect INTEGER NULL  ,
-    isemotional INTEGER NULL  ,
-    emotionmark INTEGER NULL  ,
-    emotionexample1 TEXT NULL  ,
-    emotionexample2 TEXT NULL  ,
-    PRIMARY KEY (id)
+    definition TEXT COLLATE locale,
+    domain INTEGER NOT NULL
+        REFERENCES tbl_domain (id),
+    verbaspect INTEGER
+        REFERENCES tbl_verbaspect (id),
+    isemotional INTEGER,
+    emotionmark INTEGER,
+    emotionexample1 TEXT COLLATE locale,
+    emotionexample2 TEXT COLLATE locale
 );
 
 CREATE UNIQUE INDEX lemma ON tbl_lexicalunit (lemma, pos, variant);
@@ -143,33 +139,40 @@ CREATE UNIQUE INDEX synset ON tbl_lexicalunit (synset, unitindex);
 
 -- Tables dependant on lexicalunit
 CREATE TABLE tbl_senseexample (
-    unitid BLOB NOT NULL  ,
-    example TEXT NOT NULL  ,
-    source TEXT NOT NULL
+    unitid BLOB NOT NULL
+        REFERENCES tbl_lexicalunit (id),
+    example TEXT NOT NULL COLLATE locale,
+    source TEXT NOT NULL COLLATE locale
 );
 CREATE INDEX sen_i ON tbl_senseexample (unitid);
 
 CREATE TABLE tbl_externallink (
-    unitid BLOB NOT NULL  ,
-    link TEXT NOT NULL
+    unitid BLOB NOT NULL
+        REFERENCES tbl_lexicalunit (id),
+    link TEXT NOT NULL COLLATE locale
 );
 CREATE INDEX link_i ON tbl_externallink (unitid);
 
 CREATE TABLE tbl_usagenote (
-    unitid BLOB NOT NULL  ,
-    note TEXT NOT NULL
+    unitid BLOB NOT NULL
+        REFERENCES tbl_lexicalunit (id),
+    note TEXT NOT NULL COLLATE locale
 );
 CREATE INDEX note_i ON tbl_usagenote (unitid);
 
 CREATE TABLE tbl_unitemotionname (
-    unitid BLOB NOT NULL  ,
-    nameid INTEGER NOT NULL  ,
+    unitid BLOB NOT NULL
+        REFERENCES tbl_lexicalunit (id),
+    nameid INTEGER NOT NULL
+        REFERENCES tbl_emotionname (id),
     PRIMARY KEY (unitid, nameid)
 );
 
 CREATE TABLE tbl_unitemotionvaluation (
-    unitid BLOB NOT NULL  ,
-    valuationid INTEGER NOT NULL  ,
+    unitid BLOB NOT NULL
+        REFERENCES tbl_lexicalunit (id),
+    valuationid INTEGER NOT NULL
+        REFERENCES tbl_emotionvaluation (id),
     PRIMARY KEY (unitid, valuationid)
 );
 
@@ -178,24 +181,20 @@ CREATE TABLE tbl_unitemotionvaluation (
 -- The for below are used to gather combinations of parent / child relation
 -- names.
 CREATE TABLE tbl_synsetrelationparentpart (
-    id INTEGER NOT NULL  ,
-    name VARCHAR(255) NOT NULL  ,
-    PRIMARY KEY (id)
+    id INTEGER PRIMARY KEY,
+    name TEXT UNIQUE NOT NULL COLLATE locale
 );
 CREATE TABLE tbl_synsetrelationchildpart (
-    id INTEGER NOT NULL  ,
-    name VARCHAR(255) NOT NULL  ,
-    PRIMARY KEY (id)
+    id INTEGER PRIMARY KEY,
+    name TEXT UNIQUE NOT NULL COLLATE locale
 );
 CREATE TABLE tbl_lexicalrelationparentpart (
-    id INTEGER NOT NULL  ,
-    name VARCHAR(255) NOT NULL  ,
-    PRIMARY KEY (id)
+    id INTEGER PRIMARY KEY,
+    name TEXT UNIQUE NOT NULL COLLATE locale
 );
 CREATE TABLE tbl_lexicalrelationchildpart (
-    id INTEGER NOT NULL  ,
-    name VARCHAR(255) NOT NULL  ,
-    PRIMARY KEY (id)
+    id INTEGER PRIMARY KEY,
+    name TEXT UNIQUE NOT NULL COLLATE locale
 );
 CREATE UNIQUE INDEX name ON tbl_lexicalrelationchildpart (name);
 
@@ -204,18 +203,22 @@ CREATE UNIQUE INDEX name ON tbl_lexicalrelationchildpart (name);
 -- Parent can't be NULL - the no-parent case will be handled by a special empty
 -- string parent. This is so that UNIQUE works correctly.
 CREATE TABLE tbl_synsetrelationtype (
-    id BLOB  ,
+    id INTEGER PRIMARY KEY,
     legacy_id INTEGER NULL  ,
-    parentpart INTEGER NOT NULL  ,
-    childpart INTEGER NOT NULL  ,
-    PRIMARY KEY (id)
+    parentpart INTEGER NOT NULL
+        REFERENCES tbl_synsetrelationparentpart (id),
+    childpart INTEGER NOT NULL
+        REFERENCES tbl_synsetrelationchildpart (id),
+
+    UNIQUE (parentpart, childpart)
 );
 CREATE TABLE tbl_lexicalrelationtype (
-    id BLOB  ,
+    id INTEGER PRIMARY KEY ,
     legacy_id INTEGER NULL  ,
-    parentpart INTEGER NOT NULL  ,
-    childpart INTEGER NOT NULL  ,
-    PRIMARY KEY (id)
+    parentpart INTEGER NOT NULL
+        REFERENCES tbl_lexicalrelationparentpart (id),
+    childpart INTEGER NOT NULL
+        REFERENCES tbl_lexicalrelationchildpart (id)
 );
 CREATE UNIQUE INDEX parentpart ON tbl_lexicalrelationtype (
         parentpart,
@@ -225,17 +228,17 @@ CREATE UNIQUE INDEX parentpart ON tbl_lexicalrelationtype (
 -- The below tables are simply maps of relation aliases to their main IDs.
 -- Reverse indexes are needed, too.
 CREATE TABLE tbl_synsetrelationalias (
-    name VARCHAR(255) NOT NULL  ,
-    relationid BLOB NOT NULL  ,
-    PRIMARY KEY (name)
+    name TEXT PRIMARY KEY NOT NULL COLLATE locale,
+    relationid BLOB NOT NULL
+        REFERENCES tbl_synsetrelationtype (id)
 );
 CREATE INDEX synsetrelationalias_irev ON tbl_synsetrelationalias (
         relationid
 );
 CREATE TABLE tbl_lexicalrelationalias (
-    name VARCHAR(255) NOT NULL  ,
-    relationid BLOB NOT NULL  ,
-    PRIMARY KEY (name)
+    name TEXT PRIMARY KEY NOT NULL COLLATE locale,
+    relationid BLOB NOT NULL
+        REFERENCES tbl_lexicalrelationtype (id)
 );
 CREATE INDEX lexicalrelationalias_irev ON tbl_lexicalrelationalias (
         relationid
@@ -243,15 +246,23 @@ CREATE INDEX lexicalrelationalias_irev ON tbl_lexicalrelationalias (
 
 -- Next are finally the relation instances
 CREATE TABLE tbl_synsetrelation (
-    source BLOB NOT NULL  ,
-    relationtype BLOB NOT NULL  ,
-    target BLOB NOT NULL  ,
+    source BLOB NOT NULL
+        REFERENCES tbl_synset (id),
+    relationtype BLOB NOT NULL
+        REFERENCES tbl_synsetrelationtype (id),
+    target BLOB NOT NULL
+        REFERENCES tbl_synset (id),
+
     PRIMARY KEY (source, relationtype, target)
 );
 CREATE TABLE tbl_lexicalrelation (
-    source BLOB NOT NULL  ,
-    relationtype BLOB NOT NULL  ,
-    target BLOB NOT NULL  ,
+    source BLOB NOT NULL
+        REFERENCES tbl_lexicalunit (id),
+    relationtype BLOB NOT NULL
+        REFERENCES tbl_lexicalrelationtype (id),
+    target BLOB NOT NULL
+        REFERENCES tbl_lexicalunit (id),
+
     PRIMARY KEY (source, relationtype, target)
 );
 
@@ -404,7 +415,7 @@ class PLWordNet(bs.PLWordNetBase):
             )
 
             lu_q = u"""
-            SELECT lemma, pos.value, variant, synset
+            SELECT lemma, tbl_pos.value, variant, synset
             FROM tbl_lexicalunit
                 JOIN tbl_pos ON tbl_pos.id = tbl_lexicalunit.pos
             WHERE tbl_lexicalunit.id = ?
@@ -1397,6 +1408,78 @@ class _DBBuilder(object):
                 en.RelationKind.lexical,
             )
 
+    def _show(self):
+        with closing(self._db.cursor()) as cur:
+            cur.execute(
+                u"""
+                SELECT * FROM tbl_synset
+                """
+            )
+            print(cur.fetchall())
+        with closing(self._db.cursor()) as cur:
+            cur.execute(
+                u"""
+                SELECT * FROM tbl_synsetrelation
+                """
+            )
+            print(cur.fetchall())
+        with closing(self._db.cursor()) as cur:
+            cur.execute(
+                u"""
+                SELECT * FROM tbl_synsetrelationtype
+                """
+            )
+            print(cur.fetchall())
+        with closing(self._db.cursor()) as cur:
+            cur.execute(
+                u"""
+                SELECT * FROM tbl_synsetrelationparentpart
+                """
+            )
+            print(cur.fetchall())
+        with closing(self._db.cursor()) as cur:
+            cur.execute(
+                u"""
+                SELECT * FROM tbl_synsetrelationchildpart
+                """
+            )
+            print(cur.fetchall())
+        with closing(self._db.cursor()) as cur:
+            cur.execute(
+                u"""
+                SELECT * FROM tbl_lexicalunit
+                """
+            )
+            print(cur.fetchall())
+        with closing(self._db.cursor()) as cur:
+            cur.execute(
+                u"""
+                SELECT * FROM tbl_lexicalrelation
+                """
+            )
+            print(cur.fetchall())
+        with closing(self._db.cursor()) as cur:
+            cur.execute(
+                u"""
+                SELECT * FROM tbl_lexicalrelationtype
+                """
+            )
+            print(cur.fetchall())
+        with closing(self._db.cursor()) as cur:
+            cur.execute(
+                u"""
+                SELECT * FROM tbl_lexicalrelationparentpart
+                """
+            )
+            print(cur.fetchall())
+        with closing(self._db.cursor()) as cur:
+            cur.execute(
+                u"""
+                SELECT * FROM tbl_lexicalrelationchildpart
+                """
+            )
+            print(cur.fetchall())
+
     def _insert_synset(self, syn_node):
         self._db.execute(
             u"""
diff --git a/tests/abstract_cases/test_unit_and_synset.py b/tests/abstract_cases/test_unit_and_synset.py
index f3885de..4ac8513 100644
--- a/tests/abstract_cases/test_unit_and_synset.py
+++ b/tests/abstract_cases/test_unit_and_synset.py
@@ -94,6 +94,7 @@ class SynsetRelationsTest(ut.TestCase):
             nd.make_synset_node(id=3),
             nd.make_lexical_unit_node(
                 id=11,
+                legacy_id=11,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=1,
@@ -103,6 +104,7 @@ class SynsetRelationsTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=21,
+                legacy_id=21,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=2,
@@ -112,6 +114,7 @@ class SynsetRelationsTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=31,
+                legacy_id=31,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=3,
@@ -258,6 +261,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase):
             nd.make_synset_node(id=8),
             nd.make_lexical_unit_node(
                 id=11,
+                legacy_id=11,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=1,
@@ -267,6 +271,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=21,
+                legacy_id=21,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=2,
@@ -276,6 +281,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=31,
+                legacy_id=31,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=3,
@@ -285,6 +291,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=41,
+                legacy_id=41,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=4,
@@ -294,6 +301,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=51,
+                legacy_id=51,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=5,
@@ -303,6 +311,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=61,
+                legacy_id=61,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=6,
@@ -312,6 +321,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=71,
+                legacy_id=71,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=7,
@@ -321,6 +331,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=81,
+                legacy_id=81,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=8,
@@ -446,6 +457,7 @@ class SynsetRelationsWithArtificialLoopTest(ut.TestCase):
             nd.make_synset_node(id=4),
             nd.make_lexical_unit_node(
                 id=11,
+                legacy_id=11,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=1,
@@ -455,6 +467,7 @@ class SynsetRelationsWithArtificialLoopTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=21,
+                legacy_id=21,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=2,
@@ -464,6 +477,7 @@ class SynsetRelationsWithArtificialLoopTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=31,
+                legacy_id=31,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=3,
@@ -473,6 +487,7 @@ class SynsetRelationsWithArtificialLoopTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=41,
+                legacy_id=41,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=4,
@@ -529,6 +544,7 @@ class LexicalUnitPropertiesTest(ut.TestCase):
             nd.make_synset_node(id=1),
             nd.make_lexical_unit_node(
                 id=11,
+                legacy_id=11,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=1,
@@ -645,6 +661,7 @@ class LexicalUnitRelationsTest(ut.TestCase):
             nd.make_synset_node(id=1),
             nd.make_lexical_unit_node(
                 id=11,
+                legacy_id=11,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=1,
@@ -658,6 +675,7 @@ class LexicalUnitRelationsTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=12,
+                legacy_id=12,
                 lemma=u'bbb',
                 pos=en.PoS.n,
                 variant=1,
@@ -668,6 +686,7 @@ class LexicalUnitRelationsTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=13,
+                legacy_id=13,
                 lemma=u'ccc',
                 pos=en.PoS.n,
                 variant=1,
@@ -767,6 +786,7 @@ class ItemOrderingTest(ut.TestCase):
             nd.make_synset_node(id=3),
             nd.make_lexical_unit_node(
                 id=11,
+                legacy_id=11,
                 lemma=u'aaa',
                 pos=en.PoS.n,
                 variant=1,
@@ -776,6 +796,7 @@ class ItemOrderingTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=21,
+                legacy_id=21,
                 lemma=u'bbb',
                 pos=en.PoS.n,
                 variant=1,
@@ -785,6 +806,7 @@ class ItemOrderingTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=22,
+                legacy_id=22,
                 lemma=u'Ä…Ä…Ä…',
                 pos=en.PoS.n,
                 variant=2,
@@ -794,6 +816,7 @@ class ItemOrderingTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=31,
+                legacy_id=31,
                 lemma=u'Ä…Ä…Ä…',
                 pos=en.PoS.n,
                 variant=1,
@@ -844,6 +867,7 @@ class ToDictTest(ut.TestCase):
             nd.make_synset_node(id=3),
             nd.make_lexical_unit_node(
                 id=11,
+                legacy_id=11,
                 lemma=u'aaa',
                 pos=en.PoS.v,
                 variant=1,
@@ -867,6 +891,7 @@ class ToDictTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=21,
+                legacy_id=21,
                 lemma=u'bbb',
                 pos=en.PoS.n,
                 variant=1,
@@ -876,6 +901,7 @@ class ToDictTest(ut.TestCase):
             ),
             nd.make_lexical_unit_node(
                 id=31,
+                legacy_id=31,
                 lemma=u'ccc',
                 pos=en.PoS.n,
                 variant=1,
@@ -886,6 +912,7 @@ class ToDictTest(ut.TestCase):
         ))
         self.__lex11_dict = {
             u'id': 11,
+            u'legacy_id': 11,
             u'lemma': u'aaa',
             u'pos': u'czasownik',
             u'variant': 1,
-- 
GitLab