diff --git a/plwn/bases.py b/plwn/bases.py index 43f9712ef12cdbf7d08832f084686d047e078924..0e66bc704b21000b86afe8068fdb41a16d3d6926 100644 --- a/plwn/bases.py +++ b/plwn/bases.py @@ -939,6 +939,7 @@ class LexicalUnitBase(object): """ lu_dict = { u'id': self.id, + u'legacy_id': self.legacy_id, u'lemma': self.lemma, u'pos': self.pos.value, u'variant': self.variant, diff --git a/plwn/enums.py b/plwn/enums.py index 6dfcc70530dfb08d64189a99370872dacae0002e..de9f4d8086be89a0d12c2e42fd731e76190f0bdf 100644 --- a/plwn/enums.py +++ b/plwn/enums.py @@ -296,7 +296,7 @@ class Domain(Enum): bhp = u'najwyższe w hierarchii' czy = u'czynności (nazwy)' - wytw = u'wytwory ludzkie (nazwy)' + wytw = u'wytwory ludzkie(nazwy)' cech = u'cechy ludzi i zwierząt' czc = u'części ciała' umy = u'związane z myśleniem' diff --git a/plwn/readers/nodes.py b/plwn/readers/nodes.py index 9f58999afc307145030394a1894feafd2ef1a914..45fb60123bc992ecb43f0f3b50be2adc98ec0c0d 100644 --- a/plwn/readers/nodes.py +++ b/plwn/readers/nodes.py @@ -122,7 +122,7 @@ def make_relation_type_node(**props): """ rel = RelationTypeNode( name=props.pop('name'), - legacy_id=props.pop('legacy_id'), + legacy_id=props.pop('legacy_id', None), kind=props.pop('kind'), parent=props.pop('parent', None), aliases=props.pop('aliases', frozenset()), diff --git a/plwn/storages/sqlite.py b/plwn/storages/sqlite.py index 904e5400ce632a08d27da49e89f9ebb99713ed27..940d4b11764bedbf70ef409771d257cd2f997de3 100644 --- a/plwn/storages/sqlite.py +++ b/plwn/storages/sqlite.py @@ -64,76 +64,72 @@ PRAGMA foreign_keys = ON; -- Metadata table. Used for version number, currently CREATE TABLE tbl_plwn_meta ( - name VARCHAR(255) NOT NULL , - value BLOB NULL + name TEXT UNIQUE NOT NULL , + value BLOB ); -- Tables for constant values CREATE TABLE tbl_pos ( - id INTEGER NOT NULL , - value VARCHAR(255) NOT NULL , - PRIMARY KEY (id) + id INTEGER PRIMARY KEY, + value TEXT NOT NULL ); CREATE TABLE tbl_verbaspect ( - id INTEGER NOT NULL , - value VARCHAR(255) NOT NULL , - PRIMARY KEY (id) + id INTEGER PRIMARY KEY, + value TEXT UNIQUE NOT NULL ); CREATE TABLE tbl_emotionmark ( - id INTEGER NOT NULL , - value VARCHAR(255) NOT NULL , - PRIMARY KEY (id) + id INTEGER PRIMARY KEY, + value TEXT UNIQUE NOT NULL ); CREATE TABLE tbl_emotionname ( - id INTEGER NOT NULL , - value VARCHAR(255) NOT NULL , - PRIMARY KEY (id) + id INTEGER PRIMARY KEY, + value TEXT UNIQUE NOT NULL COLLATE locale ); CREATE TABLE tbl_emotionvaluation ( - id INTEGER NOT NULL , - value VARCHAR(255) NOT NULL , - PRIMARY KEY (id) + id INTEGER PRIMARY KEY, + value TEXT NOT NULL COLLATE locale ); CREATE TABLE tbl_domain ( - id INTEGER NOT NULL , - value VARCHAR(255) NOT NULL , - PRIMARY KEY (id) + id INTEGER PRIMARY KEY, + value TEXT NOT NULL COLLATE locale ); CREATE UNIQUE INDEX value ON tbl_domain (value); -- Synset only gets one simple table CREATE TABLE tbl_synset ( - id BLOB , + id BLOB PRIMARY KEY, legacy_id INTEGER NULL , - definition TEXT NULL , - isartificial INTEGER NOT NULL DEFAULT '0' , - PRIMARY KEY (id) + definition TEXT COLLATE locale, + isartificial INTEGER NOT NULL DEFAULT 0 ); -- Lexical units have several tables, since they have several list-like -- properties. They also need indexes for lookup. CREATE TABLE tbl_lexicalunit ( - id BLOB , + id BLOB PRIMARY KEY, legacy_id INTEGER NULL , - lemma VARCHAR(255) NOT NULL , - pos INTEGER NOT NULL , + lemma TEXT NOT NULL COLLATE locale, + pos INTEGER NOT NULL + REFERENCES tbl_pos (id), variant INTEGER NOT NULL , - synset BLOB NOT NULL , + synset BLOB NOT NULL + REFERENCES tbl_synset (id), unitindex INTEGER NOT NULL , - definition TEXT NULL , - domain INTEGER NOT NULL , - verbaspect INTEGER NULL , - isemotional INTEGER NULL , - emotionmark INTEGER NULL , - emotionexample1 TEXT NULL , - emotionexample2 TEXT NULL , - PRIMARY KEY (id) + definition TEXT COLLATE locale, + domain INTEGER NOT NULL + REFERENCES tbl_domain (id), + verbaspect INTEGER + REFERENCES tbl_verbaspect (id), + isemotional INTEGER, + emotionmark INTEGER, + emotionexample1 TEXT COLLATE locale, + emotionexample2 TEXT COLLATE locale ); CREATE UNIQUE INDEX lemma ON tbl_lexicalunit (lemma, pos, variant); @@ -143,33 +139,40 @@ CREATE UNIQUE INDEX synset ON tbl_lexicalunit (synset, unitindex); -- Tables dependant on lexicalunit CREATE TABLE tbl_senseexample ( - unitid BLOB NOT NULL , - example TEXT NOT NULL , - source TEXT NOT NULL + unitid BLOB NOT NULL + REFERENCES tbl_lexicalunit (id), + example TEXT NOT NULL COLLATE locale, + source TEXT NOT NULL COLLATE locale ); CREATE INDEX sen_i ON tbl_senseexample (unitid); CREATE TABLE tbl_externallink ( - unitid BLOB NOT NULL , - link TEXT NOT NULL + unitid BLOB NOT NULL + REFERENCES tbl_lexicalunit (id), + link TEXT NOT NULL COLLATE locale ); CREATE INDEX link_i ON tbl_externallink (unitid); CREATE TABLE tbl_usagenote ( - unitid BLOB NOT NULL , - note TEXT NOT NULL + unitid BLOB NOT NULL + REFERENCES tbl_lexicalunit (id), + note TEXT NOT NULL COLLATE locale ); CREATE INDEX note_i ON tbl_usagenote (unitid); CREATE TABLE tbl_unitemotionname ( - unitid BLOB NOT NULL , - nameid INTEGER NOT NULL , + unitid BLOB NOT NULL + REFERENCES tbl_lexicalunit (id), + nameid INTEGER NOT NULL + REFERENCES tbl_emotionname (id), PRIMARY KEY (unitid, nameid) ); CREATE TABLE tbl_unitemotionvaluation ( - unitid BLOB NOT NULL , - valuationid INTEGER NOT NULL , + unitid BLOB NOT NULL + REFERENCES tbl_lexicalunit (id), + valuationid INTEGER NOT NULL + REFERENCES tbl_emotionvaluation (id), PRIMARY KEY (unitid, valuationid) ); @@ -178,24 +181,20 @@ CREATE TABLE tbl_unitemotionvaluation ( -- The for below are used to gather combinations of parent / child relation -- names. CREATE TABLE tbl_synsetrelationparentpart ( - id INTEGER NOT NULL , - name VARCHAR(255) NOT NULL , - PRIMARY KEY (id) + id INTEGER PRIMARY KEY, + name TEXT UNIQUE NOT NULL COLLATE locale ); CREATE TABLE tbl_synsetrelationchildpart ( - id INTEGER NOT NULL , - name VARCHAR(255) NOT NULL , - PRIMARY KEY (id) + id INTEGER PRIMARY KEY, + name TEXT UNIQUE NOT NULL COLLATE locale ); CREATE TABLE tbl_lexicalrelationparentpart ( - id INTEGER NOT NULL , - name VARCHAR(255) NOT NULL , - PRIMARY KEY (id) + id INTEGER PRIMARY KEY, + name TEXT UNIQUE NOT NULL COLLATE locale ); CREATE TABLE tbl_lexicalrelationchildpart ( - id INTEGER NOT NULL , - name VARCHAR(255) NOT NULL , - PRIMARY KEY (id) + id INTEGER PRIMARY KEY, + name TEXT UNIQUE NOT NULL COLLATE locale ); CREATE UNIQUE INDEX name ON tbl_lexicalrelationchildpart (name); @@ -204,18 +203,22 @@ CREATE UNIQUE INDEX name ON tbl_lexicalrelationchildpart (name); -- Parent can't be NULL - the no-parent case will be handled by a special empty -- string parent. This is so that UNIQUE works correctly. CREATE TABLE tbl_synsetrelationtype ( - id BLOB , + id INTEGER PRIMARY KEY, legacy_id INTEGER NULL , - parentpart INTEGER NOT NULL , - childpart INTEGER NOT NULL , - PRIMARY KEY (id) + parentpart INTEGER NOT NULL + REFERENCES tbl_synsetrelationparentpart (id), + childpart INTEGER NOT NULL + REFERENCES tbl_synsetrelationchildpart (id), + + UNIQUE (parentpart, childpart) ); CREATE TABLE tbl_lexicalrelationtype ( - id BLOB , + id INTEGER PRIMARY KEY , legacy_id INTEGER NULL , - parentpart INTEGER NOT NULL , - childpart INTEGER NOT NULL , - PRIMARY KEY (id) + parentpart INTEGER NOT NULL + REFERENCES tbl_lexicalrelationparentpart (id), + childpart INTEGER NOT NULL + REFERENCES tbl_lexicalrelationchildpart (id) ); CREATE UNIQUE INDEX parentpart ON tbl_lexicalrelationtype ( parentpart, @@ -225,17 +228,17 @@ CREATE UNIQUE INDEX parentpart ON tbl_lexicalrelationtype ( -- The below tables are simply maps of relation aliases to their main IDs. -- Reverse indexes are needed, too. CREATE TABLE tbl_synsetrelationalias ( - name VARCHAR(255) NOT NULL , - relationid BLOB NOT NULL , - PRIMARY KEY (name) + name TEXT PRIMARY KEY NOT NULL COLLATE locale, + relationid BLOB NOT NULL + REFERENCES tbl_synsetrelationtype (id) ); CREATE INDEX synsetrelationalias_irev ON tbl_synsetrelationalias ( relationid ); CREATE TABLE tbl_lexicalrelationalias ( - name VARCHAR(255) NOT NULL , - relationid BLOB NOT NULL , - PRIMARY KEY (name) + name TEXT PRIMARY KEY NOT NULL COLLATE locale, + relationid BLOB NOT NULL + REFERENCES tbl_lexicalrelationtype (id) ); CREATE INDEX lexicalrelationalias_irev ON tbl_lexicalrelationalias ( relationid @@ -243,15 +246,23 @@ CREATE INDEX lexicalrelationalias_irev ON tbl_lexicalrelationalias ( -- Next are finally the relation instances CREATE TABLE tbl_synsetrelation ( - source BLOB NOT NULL , - relationtype BLOB NOT NULL , - target BLOB NOT NULL , + source BLOB NOT NULL + REFERENCES tbl_synset (id), + relationtype BLOB NOT NULL + REFERENCES tbl_synsetrelationtype (id), + target BLOB NOT NULL + REFERENCES tbl_synset (id), + PRIMARY KEY (source, relationtype, target) ); CREATE TABLE tbl_lexicalrelation ( - source BLOB NOT NULL , - relationtype BLOB NOT NULL , - target BLOB NOT NULL , + source BLOB NOT NULL + REFERENCES tbl_lexicalunit (id), + relationtype BLOB NOT NULL + REFERENCES tbl_lexicalrelationtype (id), + target BLOB NOT NULL + REFERENCES tbl_lexicalunit (id), + PRIMARY KEY (source, relationtype, target) ); @@ -404,7 +415,7 @@ class PLWordNet(bs.PLWordNetBase): ) lu_q = u""" - SELECT lemma, pos.value, variant, synset + SELECT lemma, tbl_pos.value, variant, synset FROM tbl_lexicalunit JOIN tbl_pos ON tbl_pos.id = tbl_lexicalunit.pos WHERE tbl_lexicalunit.id = ? @@ -1397,6 +1408,78 @@ class _DBBuilder(object): en.RelationKind.lexical, ) + def _show(self): + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT * FROM tbl_synset + """ + ) + print(cur.fetchall()) + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT * FROM tbl_synsetrelation + """ + ) + print(cur.fetchall()) + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT * FROM tbl_synsetrelationtype + """ + ) + print(cur.fetchall()) + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT * FROM tbl_synsetrelationparentpart + """ + ) + print(cur.fetchall()) + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT * FROM tbl_synsetrelationchildpart + """ + ) + print(cur.fetchall()) + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT * FROM tbl_lexicalunit + """ + ) + print(cur.fetchall()) + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT * FROM tbl_lexicalrelation + """ + ) + print(cur.fetchall()) + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT * FROM tbl_lexicalrelationtype + """ + ) + print(cur.fetchall()) + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT * FROM tbl_lexicalrelationparentpart + """ + ) + print(cur.fetchall()) + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT * FROM tbl_lexicalrelationchildpart + """ + ) + print(cur.fetchall()) + def _insert_synset(self, syn_node): self._db.execute( u""" diff --git a/tests/abstract_cases/test_unit_and_synset.py b/tests/abstract_cases/test_unit_and_synset.py index f3885dea2aae8871ef7ba7b48fe087ee43b98a05..4ac8513a1c366532e4b362d375dbb97783733728 100644 --- a/tests/abstract_cases/test_unit_and_synset.py +++ b/tests/abstract_cases/test_unit_and_synset.py @@ -94,6 +94,7 @@ class SynsetRelationsTest(ut.TestCase): nd.make_synset_node(id=3), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.n, variant=1, @@ -103,6 +104,7 @@ class SynsetRelationsTest(ut.TestCase): ), nd.make_lexical_unit_node( id=21, + legacy_id=21, lemma=u'aaa', pos=en.PoS.n, variant=2, @@ -112,6 +114,7 @@ class SynsetRelationsTest(ut.TestCase): ), nd.make_lexical_unit_node( id=31, + legacy_id=31, lemma=u'aaa', pos=en.PoS.n, variant=3, @@ -258,6 +261,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): nd.make_synset_node(id=8), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.n, variant=1, @@ -267,6 +271,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): ), nd.make_lexical_unit_node( id=21, + legacy_id=21, lemma=u'aaa', pos=en.PoS.n, variant=2, @@ -276,6 +281,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): ), nd.make_lexical_unit_node( id=31, + legacy_id=31, lemma=u'aaa', pos=en.PoS.n, variant=3, @@ -285,6 +291,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): ), nd.make_lexical_unit_node( id=41, + legacy_id=41, lemma=u'aaa', pos=en.PoS.n, variant=4, @@ -294,6 +301,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): ), nd.make_lexical_unit_node( id=51, + legacy_id=51, lemma=u'aaa', pos=en.PoS.n, variant=5, @@ -303,6 +311,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): ), nd.make_lexical_unit_node( id=61, + legacy_id=61, lemma=u'aaa', pos=en.PoS.n, variant=6, @@ -312,6 +321,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): ), nd.make_lexical_unit_node( id=71, + legacy_id=71, lemma=u'aaa', pos=en.PoS.n, variant=7, @@ -321,6 +331,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): ), nd.make_lexical_unit_node( id=81, + legacy_id=81, lemma=u'aaa', pos=en.PoS.n, variant=8, @@ -446,6 +457,7 @@ class SynsetRelationsWithArtificialLoopTest(ut.TestCase): nd.make_synset_node(id=4), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.n, variant=1, @@ -455,6 +467,7 @@ class SynsetRelationsWithArtificialLoopTest(ut.TestCase): ), nd.make_lexical_unit_node( id=21, + legacy_id=21, lemma=u'aaa', pos=en.PoS.n, variant=2, @@ -464,6 +477,7 @@ class SynsetRelationsWithArtificialLoopTest(ut.TestCase): ), nd.make_lexical_unit_node( id=31, + legacy_id=31, lemma=u'aaa', pos=en.PoS.n, variant=3, @@ -473,6 +487,7 @@ class SynsetRelationsWithArtificialLoopTest(ut.TestCase): ), nd.make_lexical_unit_node( id=41, + legacy_id=41, lemma=u'aaa', pos=en.PoS.n, variant=4, @@ -529,6 +544,7 @@ class LexicalUnitPropertiesTest(ut.TestCase): nd.make_synset_node(id=1), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.n, variant=1, @@ -645,6 +661,7 @@ class LexicalUnitRelationsTest(ut.TestCase): nd.make_synset_node(id=1), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.n, variant=1, @@ -658,6 +675,7 @@ class LexicalUnitRelationsTest(ut.TestCase): ), nd.make_lexical_unit_node( id=12, + legacy_id=12, lemma=u'bbb', pos=en.PoS.n, variant=1, @@ -668,6 +686,7 @@ class LexicalUnitRelationsTest(ut.TestCase): ), nd.make_lexical_unit_node( id=13, + legacy_id=13, lemma=u'ccc', pos=en.PoS.n, variant=1, @@ -767,6 +786,7 @@ class ItemOrderingTest(ut.TestCase): nd.make_synset_node(id=3), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.n, variant=1, @@ -776,6 +796,7 @@ class ItemOrderingTest(ut.TestCase): ), nd.make_lexical_unit_node( id=21, + legacy_id=21, lemma=u'bbb', pos=en.PoS.n, variant=1, @@ -785,6 +806,7 @@ class ItemOrderingTest(ut.TestCase): ), nd.make_lexical_unit_node( id=22, + legacy_id=22, lemma=u'ąąą', pos=en.PoS.n, variant=2, @@ -794,6 +816,7 @@ class ItemOrderingTest(ut.TestCase): ), nd.make_lexical_unit_node( id=31, + legacy_id=31, lemma=u'ąąą', pos=en.PoS.n, variant=1, @@ -844,6 +867,7 @@ class ToDictTest(ut.TestCase): nd.make_synset_node(id=3), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.v, variant=1, @@ -867,6 +891,7 @@ class ToDictTest(ut.TestCase): ), nd.make_lexical_unit_node( id=21, + legacy_id=21, lemma=u'bbb', pos=en.PoS.n, variant=1, @@ -876,6 +901,7 @@ class ToDictTest(ut.TestCase): ), nd.make_lexical_unit_node( id=31, + legacy_id=31, lemma=u'ccc', pos=en.PoS.n, variant=1, @@ -886,6 +912,7 @@ class ToDictTest(ut.TestCase): )) self.__lex11_dict = { u'id': 11, + u'legacy_id': 11, u'lemma': u'aaa', u'pos': u'czasownik', u'variant': 1,