diff --git a/plwn-api_plwn_dump_new_07-12-2022.sqlite b/plwn-api_plwn_dump_new_07-12-2022.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..19fe03e3c05a58941aea3e5207be305382858d67 Binary files /dev/null and b/plwn-api_plwn_dump_new_07-12-2022.sqlite differ diff --git a/plwn/bases.py b/plwn/bases.py index 0e66bc704b21000b86afe8068fdb41a16d3d6926..09e84e3da0d283c463ebae82412e0a812305f245 100644 --- a/plwn/bases.py +++ b/plwn/bases.py @@ -588,7 +588,7 @@ class SynsetBase(object): :meth:`LexicalUnitBase.to_dict`. """ syn_dict = { - u'id': self.id, + u'id': self.uuid, u'definition': self.definition, u'is_artificial': self.is_artificial, u'units': tuple( @@ -602,7 +602,7 @@ class SynsetBase(object): if include_related: syn_dict[u'related'] = { six.text_type(rel): tuple( - (target.id, target.short_str()) + (target.uuid, target.short_str()) for target in self.related(rel) ) for rel in self.relations @@ -631,7 +631,7 @@ class SynsetBase(object): def __repr__(self): head = self.lexical_units[0] rstr = '<Synset id={!r} lemma={!r} pos={!r} variant={!r}'.format( - self.id, + str(self.uuid), head.lemma, head.pos, head.variant, @@ -938,7 +938,7 @@ class LexicalUnitBase(object): } """ lu_dict = { - u'id': self.id, + u'id': self.uuid, u'legacy_id': self.legacy_id, u'lemma': self.lemma, u'pos': self.pos.value, @@ -949,7 +949,7 @@ class LexicalUnitBase(object): u'external_links': tuple(self.external_links), u'usage_notes': tuple(self.usage_notes), u'domain': self.domain.value, - u'synset': self.synset.id, + u'synset': self.synset.uuid, u'verb_aspect': None if self.verb_aspect is None else self.verb_aspect.value, @@ -966,7 +966,7 @@ class LexicalUnitBase(object): if include_related: lu_dict[u'related'] = { six.text_type(rel): tuple( - (target.id, six.text_type(target)) + (target.uuid, six.text_type(target)) for target in self.related(rel) ) for rel in self.relations @@ -1002,7 +1002,7 @@ class LexicalUnitBase(object): def __repr__(self): return '<LexicalUnit id={!r} lemma={!r} pos={!r} variant={!r}>'.format( - self.id, + str(self.uuid), self.lemma, self.pos, self.variant, diff --git a/plwn/storages/sqlite.py b/plwn/storages/sqlite.py index 940d4b11764bedbf70ef409771d257cd2f997de3..1e7ec64e646256706640296a6aaa2904cb7d47b3 100644 --- a/plwn/storages/sqlite.py +++ b/plwn/storages/sqlite.py @@ -102,7 +102,7 @@ CREATE UNIQUE INDEX value ON tbl_domain (value); -- Synset only gets one simple table CREATE TABLE tbl_synset ( - id BLOB PRIMARY KEY, + id INTEGER PRIMARY KEY, legacy_id INTEGER NULL , definition TEXT COLLATE locale, isartificial INTEGER NOT NULL DEFAULT 0 @@ -112,13 +112,13 @@ CREATE TABLE tbl_synset ( -- properties. They also need indexes for lookup. CREATE TABLE tbl_lexicalunit ( - id BLOB PRIMARY KEY, + id INTEGER PRIMARY KEY, legacy_id INTEGER NULL , lemma TEXT NOT NULL COLLATE locale, pos INTEGER NOT NULL REFERENCES tbl_pos (id), variant INTEGER NOT NULL , - synset BLOB NOT NULL + synset INTEGER NOT NULL REFERENCES tbl_synset (id), unitindex INTEGER NOT NULL , definition TEXT COLLATE locale, @@ -139,7 +139,7 @@ CREATE UNIQUE INDEX synset ON tbl_lexicalunit (synset, unitindex); -- Tables dependant on lexicalunit CREATE TABLE tbl_senseexample ( - unitid BLOB NOT NULL + unitid INTEGER NOT NULL REFERENCES tbl_lexicalunit (id), example TEXT NOT NULL COLLATE locale, source TEXT NOT NULL COLLATE locale @@ -147,21 +147,21 @@ CREATE TABLE tbl_senseexample ( CREATE INDEX sen_i ON tbl_senseexample (unitid); CREATE TABLE tbl_externallink ( - unitid BLOB NOT NULL + unitid INTEGER NOT NULL REFERENCES tbl_lexicalunit (id), link TEXT NOT NULL COLLATE locale ); CREATE INDEX link_i ON tbl_externallink (unitid); CREATE TABLE tbl_usagenote ( - unitid BLOB NOT NULL + unitid INTEGER NOT NULL REFERENCES tbl_lexicalunit (id), note TEXT NOT NULL COLLATE locale ); CREATE INDEX note_i ON tbl_usagenote (unitid); CREATE TABLE tbl_unitemotionname ( - unitid BLOB NOT NULL + unitid INTEGER NOT NULL REFERENCES tbl_lexicalunit (id), nameid INTEGER NOT NULL REFERENCES tbl_emotionname (id), @@ -169,7 +169,7 @@ CREATE TABLE tbl_unitemotionname ( ); CREATE TABLE tbl_unitemotionvaluation ( - unitid BLOB NOT NULL + unitid INTEGER NOT NULL REFERENCES tbl_lexicalunit (id), valuationid INTEGER NOT NULL REFERENCES tbl_emotionvaluation (id), @@ -229,7 +229,7 @@ CREATE UNIQUE INDEX parentpart ON tbl_lexicalrelationtype ( -- Reverse indexes are needed, too. CREATE TABLE tbl_synsetrelationalias ( name TEXT PRIMARY KEY NOT NULL COLLATE locale, - relationid BLOB NOT NULL + relationid INTEGER NOT NULL REFERENCES tbl_synsetrelationtype (id) ); CREATE INDEX synsetrelationalias_irev ON tbl_synsetrelationalias ( @@ -237,7 +237,7 @@ CREATE INDEX synsetrelationalias_irev ON tbl_synsetrelationalias ( ); CREATE TABLE tbl_lexicalrelationalias ( name TEXT PRIMARY KEY NOT NULL COLLATE locale, - relationid BLOB NOT NULL + relationid INTEGER NOT NULL REFERENCES tbl_lexicalrelationtype (id) ); CREATE INDEX lexicalrelationalias_irev ON tbl_lexicalrelationalias ( @@ -246,21 +246,21 @@ CREATE INDEX lexicalrelationalias_irev ON tbl_lexicalrelationalias ( -- Next are finally the relation instances CREATE TABLE tbl_synsetrelation ( - source BLOB NOT NULL + source INTEGER NOT NULL REFERENCES tbl_synset (id), - relationtype BLOB NOT NULL + relationtype INTEGER NOT NULL REFERENCES tbl_synsetrelationtype (id), - target BLOB NOT NULL + target INTEGER NOT NULL REFERENCES tbl_synset (id), PRIMARY KEY (source, relationtype, target) ); CREATE TABLE tbl_lexicalrelation ( - source BLOB NOT NULL + source INTEGER NOT NULL REFERENCES tbl_lexicalunit (id), - relationtype BLOB NOT NULL + relationtype INTEGER NOT NULL REFERENCES tbl_lexicalrelationtype (id), - target BLOB NOT NULL + target INTEGER NOT NULL REFERENCES tbl_lexicalunit (id), PRIMARY KEY (source, relationtype, target) @@ -694,6 +694,7 @@ class LexicalUnit(bs.LexicalUnitBase): self._var = variant self._synid = synid # Rest is unitialized + self._uuid = _UNFETCHED self._leg_id = _UNFETCHED self._syn = _UNFETCHED self._def = _UNFETCHED @@ -745,6 +746,17 @@ class LexicalUnit(bs.LexicalUnitBase): self._leg_id = cur.fetchone()[0] return self._leg_id + @property + def uuid(self): + if self._uuid is _UNFETCHED: + with closing(self._db.cursor()) as cur: + cur.execute( + u"SELECT quote(id) from tbl_lexicalunit WHERE id = ?", + (self._id,), + ) + self._uuid = cur.fetchone()[0][2:-1] + return self._uuid + @property def synset(self): if self._syn is _UNFETCHED or self._syn() is None: @@ -1024,6 +1036,7 @@ class Synset(bs.SynsetBase): self._id = syn_id self._isart = syn_art + self._uuid = _UNFETCHED self._units = _UNFETCHED self._def = _UNFETCHED self._leg_id = _UNFETCHED @@ -1096,6 +1109,17 @@ class Synset(bs.SynsetBase): self._leg_id = cur.fetchone()[0] return self._leg_id + @property + def uuid(self): + if self._uuid is _UNFETCHED: + with closing(self._db.cursor()) as cur: + cur.execute( + u"SELECT quote(id) from tbl_synset WHERE id = ?", + (self._id,), + ) + self._uuid = cur.fetchone()[0][2:-1] + return self._uuid + @property def definition(self): if self._def is _UNFETCHED: @@ -1408,78 +1432,6 @@ class _DBBuilder(object): en.RelationKind.lexical, ) - def _show(self): - with closing(self._db.cursor()) as cur: - cur.execute( - u""" - SELECT * FROM tbl_synset - """ - ) - print(cur.fetchall()) - with closing(self._db.cursor()) as cur: - cur.execute( - u""" - SELECT * FROM tbl_synsetrelation - """ - ) - print(cur.fetchall()) - with closing(self._db.cursor()) as cur: - cur.execute( - u""" - SELECT * FROM tbl_synsetrelationtype - """ - ) - print(cur.fetchall()) - with closing(self._db.cursor()) as cur: - cur.execute( - u""" - SELECT * FROM tbl_synsetrelationparentpart - """ - ) - print(cur.fetchall()) - with closing(self._db.cursor()) as cur: - cur.execute( - u""" - SELECT * FROM tbl_synsetrelationchildpart - """ - ) - print(cur.fetchall()) - with closing(self._db.cursor()) as cur: - cur.execute( - u""" - SELECT * FROM tbl_lexicalunit - """ - ) - print(cur.fetchall()) - with closing(self._db.cursor()) as cur: - cur.execute( - u""" - SELECT * FROM tbl_lexicalrelation - """ - ) - print(cur.fetchall()) - with closing(self._db.cursor()) as cur: - cur.execute( - u""" - SELECT * FROM tbl_lexicalrelationtype - """ - ) - print(cur.fetchall()) - with closing(self._db.cursor()) as cur: - cur.execute( - u""" - SELECT * FROM tbl_lexicalrelationparentpart - """ - ) - print(cur.fetchall()) - with closing(self._db.cursor()) as cur: - cur.execute( - u""" - SELECT * FROM tbl_lexicalrelationchildpart - """ - ) - print(cur.fetchall()) - def _insert_synset(self, syn_node): self._db.execute( u"""