diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c85983e56ca1a3aafdea2352683c531cfe42c1f6..fe40822f862d95b0dffdfa4329f7aca7540b3efb 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,6 +10,7 @@ cache: stages: - check_style - tests + - test_load - push_wheel pep8: @@ -45,3 +46,10 @@ push_wheel: --repository-url https://pypi.clarin-pl.eu/ -u $PIPY_USER -p $PIPY_PASS dist/plwn_api*.whl +test_load: + stage: test_load + before_script: + - pip install --upgrade pip + - pip install . + script: + - python -c 'import plwn; print(plwn.load_default().lexical_unit("pies", plwn.PoS.noun_pl, 2).definition)' diff --git a/plwn/bases.py b/plwn/bases.py index 43f9712ef12cdbf7d08832f084686d047e078924..09e84e3da0d283c463ebae82412e0a812305f245 100644 --- a/plwn/bases.py +++ b/plwn/bases.py @@ -588,7 +588,7 @@ class SynsetBase(object): :meth:`LexicalUnitBase.to_dict`. """ syn_dict = { - u'id': self.id, + u'id': self.uuid, u'definition': self.definition, u'is_artificial': self.is_artificial, u'units': tuple( @@ -602,7 +602,7 @@ class SynsetBase(object): if include_related: syn_dict[u'related'] = { six.text_type(rel): tuple( - (target.id, target.short_str()) + (target.uuid, target.short_str()) for target in self.related(rel) ) for rel in self.relations @@ -631,7 +631,7 @@ class SynsetBase(object): def __repr__(self): head = self.lexical_units[0] rstr = '<Synset id={!r} lemma={!r} pos={!r} variant={!r}'.format( - self.id, + str(self.uuid), head.lemma, head.pos, head.variant, @@ -938,7 +938,8 @@ class LexicalUnitBase(object): } """ lu_dict = { - u'id': self.id, + u'id': self.uuid, + u'legacy_id': self.legacy_id, u'lemma': self.lemma, u'pos': self.pos.value, u'variant': self.variant, @@ -948,7 +949,7 @@ class LexicalUnitBase(object): u'external_links': tuple(self.external_links), u'usage_notes': tuple(self.usage_notes), u'domain': self.domain.value, - u'synset': self.synset.id, + u'synset': self.synset.uuid, u'verb_aspect': None if self.verb_aspect is None else self.verb_aspect.value, @@ -965,7 +966,7 @@ class LexicalUnitBase(object): if include_related: lu_dict[u'related'] = { six.text_type(rel): tuple( - (target.id, six.text_type(target)) + (target.uuid, six.text_type(target)) for target in self.related(rel) ) for rel in self.relations @@ -1001,7 +1002,7 @@ class LexicalUnitBase(object): def __repr__(self): return '<LexicalUnit id={!r} lemma={!r} pos={!r} variant={!r}>'.format( - self.id, + str(self.uuid), self.lemma, self.pos, self.variant, diff --git a/plwn/config.ini b/plwn/config.ini index ec3c6ffbc546b7cc4832deaca53288cb9a1a5482..23a2ffb4d237a8c0ff9c2b93b7d2eead0c844ebf 100644 --- a/plwn/config.ini +++ b/plwn/config.ini @@ -1,2 +1,2 @@ [DOWNLOAD] -default_model = https://minio.clarin-pl.eu/public/models/plwn_api_dumps/plwn_dump_27-03-2018.sqlite \ No newline at end of file +default_model = https://minio.clarin-pl.eu/public/models/plwn_api_dumps/plwn_dump_new_07-12-2022.sqlite diff --git a/plwn/download.py b/plwn/download.py index f5647645180dea0dcf739020ef29b272d113a24a..9fac08a192f613c75d7bb14063e00df9e9bdd149 100644 --- a/plwn/download.py +++ b/plwn/download.py @@ -42,7 +42,7 @@ def download(name="default_model"): return if name in models: url = config["DOWNLOAD"]["default_model"] - url = url.replace("plwn_dump_27-03-2018.sqlite", name) + url = url.replace("plwn_dump_new_07-12-2022.sqlite", name) r = requests.get(url) with open(name, "wb") as f: f.write(r.content) diff --git a/plwn/enums.py b/plwn/enums.py index 246fc3c8290958889f6d55fa5bf0d1098045bc2f..f99afb6198bc3b667683f6886c61aea9354b310d 100644 --- a/plwn/enums.py +++ b/plwn/enums.py @@ -104,10 +104,10 @@ class PoS(Enum): __order__ = 'verb noun adverb adjective ' \ 'verb_en noun_en adverb_en adjective_en' - verb = u'verb' - noun = u'noun' - adverb = u'adverb' - adjective = u'adjective' + verb = u'czasownik' + noun = u'rzeczownik' + adverb = u'przysłówek' + adjective = u'przymiotnik' # English (PWN) PoSes verb_en = u'verb_en' @@ -170,8 +170,9 @@ class VerbAspect(Enum): """Defines verb aspect values used in plWordNet.""" if six.PY2: - __order__ = 'perfective imperfective predicative two_aspect' + __order__ = 'perfective imperfective predicative two_aspect no' + no = u'no' perfective = u'perf' imperfective = u'imperf' predicative = u'pred' @@ -296,7 +297,7 @@ class Domain(Enum): bhp = u'najwyższe w hierarchii' czy = u'czynności (nazwy)' - wytw = u'wytwory ludzkie (nazwy)' + wytw = u'wytwory ludzkie(nazwy)' cech = u'cechy ludzi i zwierząt' czc = u'części ciała' umy = u'związane z myśleniem' @@ -346,8 +347,8 @@ class Domain(Enum): sys = u'systematyka, klasyfikacja' - adj = u'PWN: all adjective clusters' - adv = u'PWN: all adverbs' + adj = u'all adjective clusters' + adv = u'all adverbs' mat = u'przymiotniki materiałowe' diff --git a/plwn/readers/nodes.py b/plwn/readers/nodes.py index b1bf8d3cafafd96ae67ee3c0012489bd71d6fb7e..45fb60123bc992ecb43f0f3b50be2adc98ec0c0d 100644 --- a/plwn/readers/nodes.py +++ b/plwn/readers/nodes.py @@ -31,19 +31,19 @@ __all__ = ( SynsetNode = namedtuple( "SynsetNode", - ["id", "definition", "related", "is_artificial"], + ["id", "legacy_id", "definition", "related", "is_artificial"], ) LexicalUnitNode = namedtuple( "LexicalUnitNode", - ["id", "lemma", "pos", "variant", "synset", "unit_index", "definition", - "usage_notes", "external_links", "examples", "examples_sources", - "domain", "related", "verb_aspect", "is_emotional", "emotion_markedness", - "emotion_names", "emotion_valuations", "emotion_example_1", - "emotion_example_2"] + ["id", "legacy_id", "lemma", "pos", "variant", "synset", "unit_index", + "definition", "usage_notes", "external_links", "examples", + "examples_sources", "domain", "related", "verb_aspect", "is_emotional", + "emotion_markedness", "emotion_names", "emotion_valuations", + "emotion_example_1", "emotion_example_2"] ) RelationTypeNode = namedtuple( "RelationTypeNode", - ["kind", "name", "parent", "aliases"], + ["kind", "legacy_id", "name", "parent", "aliases"], ) @@ -58,6 +58,7 @@ def make_synset_node(**props): """ syn = SynsetNode( id=props.pop('id'), + legacy_id=props.pop('legacy_id', None), definition=props.pop('definition', None), related=props.pop('related', ()), is_artificial=props.pop('is_artificial', False), @@ -80,6 +81,7 @@ def make_lexical_unit_node(**props): """ lex = LexicalUnitNode( id=props.pop('id'), + legacy_id=props.pop('legacy_id', None), lemma=props.pop('lemma'), pos=props.pop('pos'), variant=props.pop('variant'), @@ -120,6 +122,7 @@ def make_relation_type_node(**props): """ rel = RelationTypeNode( name=props.pop('name'), + legacy_id=props.pop('legacy_id', None), kind=props.pop('kind'), parent=props.pop('parent', None), aliases=props.pop('aliases', frozenset()), diff --git a/plwn/readers/ubylmf.py b/plwn/readers/ubylmf.py index a3859ef5bc91737206c05d4c442103d09ad10a00..f91d2d3210c0d9a81565d8c2cd86432b2d434bdd 100644 --- a/plwn/readers/ubylmf.py +++ b/plwn/readers/ubylmf.py @@ -136,6 +136,7 @@ def _make_lexicalunit(xml_lexicalentry, xml_sense): ) return make_lexical_unit_node( id=lu_id, + legacy_id=None, lemma=lu_lemma, pos=PoS(lu_pos), synset=lu_synset, diff --git a/plwn/storages/sqlite.py b/plwn/storages/sqlite.py index 3983068dc6058ad0343ff03e6660e8a4bcb078a6..7d411384f158c0a55dffe09b2cbbdacee2091ffa 100644 --- a/plwn/storages/sqlite.py +++ b/plwn/storages/sqlite.py @@ -63,121 +63,116 @@ _DB_SCHEMA_SCRIPT = u""" PRAGMA foreign_keys = ON; -- Metadata table. Used for version number, currently -CREATE TABLE plwn_meta ( - name TEXT UNIQUE NOT NULL, +CREATE TABLE tbl_plwn_meta ( + name TEXT UNIQUE NOT NULL , value BLOB ); -- Tables for constant values -CREATE TABLE pos ( +CREATE TABLE tbl_pos ( id INTEGER PRIMARY KEY, - value TEXT UNIQUE NOT NULL + value TEXT NOT NULL ); -CREATE TABLE verbaspect ( +CREATE TABLE tbl_verbaspect ( id INTEGER PRIMARY KEY, value TEXT UNIQUE NOT NULL ); -CREATE TABLE emotionmark ( +CREATE TABLE tbl_emotionmark ( id INTEGER PRIMARY KEY, value TEXT UNIQUE NOT NULL ); -CREATE TABLE emotionname ( +CREATE TABLE tbl_emotionname ( id INTEGER PRIMARY KEY, value TEXT UNIQUE NOT NULL COLLATE locale ); -CREATE TABLE emotionvaluation ( +CREATE TABLE tbl_emotionvaluation ( id INTEGER PRIMARY KEY, - value TEXT UNIQUE NOT NULL COLLATE locale + value TEXT NOT NULL COLLATE locale ); -CREATE TABLE domain ( +CREATE TABLE tbl_domain ( id INTEGER PRIMARY KEY, - value TEXT UNIQUE NOT NULL COLLATE locale + value TEXT NOT NULL COLLATE locale ); +CREATE UNIQUE INDEX value ON tbl_domain (value); -- Synset only gets one simple table -CREATE TABLE synset ( +CREATE TABLE tbl_synset ( id INTEGER PRIMARY KEY, + legacy_id INTEGER NULL , definition TEXT COLLATE locale, isartificial INTEGER NOT NULL DEFAULT 0 ); -- Lexical units have several tables, since they have several list-like -- properties. They also need indexes for lookup. -CREATE TABLE lexicalunit ( + +CREATE TABLE tbl_lexicalunit ( id INTEGER PRIMARY KEY, + legacy_id INTEGER NULL , lemma TEXT NOT NULL COLLATE locale, pos INTEGER NOT NULL - REFERENCES pos (id), - variant INTEGER NOT NULL, + REFERENCES tbl_pos (id), + variant INTEGER NOT NULL , synset INTEGER NOT NULL - REFERENCES synset (id), - unitindex INTEGER NOT NULL, + REFERENCES tbl_synset (id), + unitindex INTEGER NOT NULL , definition TEXT COLLATE locale, domain INTEGER NOT NULL - REFERENCES domain (id), + REFERENCES tbl_domain (id), verbaspect INTEGER - REFERENCES verbaspect (id), + REFERENCES tbl_verbaspect (id), isemotional INTEGER, - emotionmark INTEGER - REFERENCES emotionmark (id), + emotionmark INTEGER, emotionexample1 TEXT COLLATE locale, - emotionexample2 TEXT COLLATE locale, - - UNIQUE (lemma, pos, variant), - -- Also, each unit needs its of place in synset - UNIQUE (synset, unitindex) + emotionexample2 TEXT COLLATE locale ); --- lem-pos-var and synset-unitindex indexes (and partial ones) are --- automatically made because of UNIQUE constraint, but additional indexes --- need to be created. -CREATE INDEX lex_i_lem_var ON lexicalunit (lemma, variant); -CREATE INDEX lex_i_pos ON lexicalunit (pos); --- No index for variant itself - it's not an useful use case +CREATE UNIQUE INDEX lemma ON tbl_lexicalunit (lemma, pos, variant); +CREATE INDEX lex_i_lem_var ON tbl_lexicalunit (lemma, variant); +CREATE INDEX lex_i_pos ON tbl_lexicalunit (pos); +CREATE UNIQUE INDEX synset ON tbl_lexicalunit (synset, unitindex); -- Tables dependant on lexicalunit -CREATE TABLE senseexample ( +CREATE TABLE tbl_senseexample ( unitid INTEGER NOT NULL - REFERENCES lexicalunit (id), + REFERENCES tbl_lexicalunit (id), example TEXT NOT NULL COLLATE locale, source TEXT NOT NULL COLLATE locale ); -CREATE INDEX sen_i ON senseexample (unitid); +CREATE INDEX sen_i ON tbl_senseexample (unitid); -CREATE TABLE externallink ( +CREATE TABLE tbl_externallink ( unitid INTEGER NOT NULL - REFERENCES lexicalunit (id), + REFERENCES tbl_lexicalunit (id), link TEXT NOT NULL COLLATE locale ); -CREATE INDEX link_i ON externallink (unitid); +CREATE INDEX link_i ON tbl_externallink (unitid); -CREATE TABLE usagenote ( +CREATE TABLE tbl_usagenote ( unitid INTEGER NOT NULL - REFERENCES lexicalunit (id), + REFERENCES tbl_lexicalunit (id), note TEXT NOT NULL COLLATE locale ); -CREATE INDEX note_i ON usagenote (unitid); +CREATE INDEX note_i ON tbl_usagenote (unitid); -CREATE TABLE unitemotionname ( +CREATE TABLE tbl_unitemotionname ( unitid INTEGER NOT NULL - REFERENCES lexicalunit (id), + REFERENCES tbl_lexicalunit (id), nameid INTEGER NOT NULL - REFERENCES emotionname (id), - + REFERENCES tbl_emotionname (id), PRIMARY KEY (unitid, nameid) ); -CREATE TABLE unitemotionvaluation ( +CREATE TABLE tbl_unitemotionvaluation ( unitid INTEGER NOT NULL - REFERENCES lexicalunit (id), + REFERENCES tbl_lexicalunit (id), valuationid INTEGER NOT NULL - REFERENCES emotionvaluation (id), - + REFERENCES tbl_emotionvaluation (id), PRIMARY KEY (unitid, valuationid) ); @@ -185,113 +180,123 @@ CREATE TABLE unitemotionvaluation ( -- The for below are used to gather combinations of parent / child relation -- names. -CREATE TABLE synsetrelationparentpart ( +CREATE TABLE tbl_synsetrelationparentpart ( id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL COLLATE locale ); -CREATE TABLE synsetrelationchildpart ( +CREATE TABLE tbl_synsetrelationchildpart ( id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL COLLATE locale ); -CREATE TABLE lexicalrelationparentpart ( +CREATE TABLE tbl_lexicalrelationparentpart ( id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL COLLATE locale ); -CREATE TABLE lexicalrelationchildpart ( +CREATE TABLE tbl_lexicalrelationchildpart ( id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL COLLATE locale ); +CREATE UNIQUE INDEX name ON tbl_lexicalrelationchildpart (name); + -- Next, gather these parts into relation types themselves. -- Parent can't be NULL - the no-parent case will be handled by a special empty -- string parent. This is so that UNIQUE works correctly. -CREATE TABLE synsetrelationtype ( +CREATE TABLE tbl_synsetrelationtype ( id INTEGER PRIMARY KEY, + legacy_id INTEGER NULL , parentpart INTEGER NOT NULL - REFERENCES synsetrelationparentpart (id), + REFERENCES tbl_synsetrelationparentpart (id), childpart INTEGER NOT NULL - REFERENCES synsetrelationchildpart (id), + REFERENCES tbl_synsetrelationchildpart (id), UNIQUE (parentpart, childpart) ); -CREATE TABLE lexicalrelationtype ( - id INTEGER PRIMARY KEY, +CREATE TABLE tbl_lexicalrelationtype ( + id INTEGER PRIMARY KEY , + legacy_id INTEGER NULL , parentpart INTEGER NOT NULL - REFERENCES lexicalrelationparentpart (id), + REFERENCES tbl_lexicalrelationparentpart (id), childpart INTEGER NOT NULL - REFERENCES lexicalrelationchildpart (id), - - UNIQUE (parentpart, childpart) + REFERENCES tbl_lexicalrelationchildpart (id) +); +CREATE UNIQUE INDEX parentpart ON tbl_lexicalrelationtype ( + parentpart, + childpart ); -- The below tables are simply maps of relation aliases to their main IDs. -- Reverse indexes are needed, too. -CREATE TABLE synsetrelationalias ( +CREATE TABLE tbl_synsetrelationalias ( name TEXT PRIMARY KEY NOT NULL COLLATE locale, relationid INTEGER NOT NULL - REFERENCES synsetrelationtype (id) + REFERENCES tbl_synsetrelationtype (id) +); +CREATE INDEX synsetrelationalias_irev ON tbl_synsetrelationalias ( + relationid ); -CREATE INDEX synsetrelationalias_irev ON synsetrelationalias (relationid); -CREATE TABLE lexicalrelationalias ( +CREATE TABLE tbl_lexicalrelationalias ( name TEXT PRIMARY KEY NOT NULL COLLATE locale, relationid INTEGER NOT NULL - REFERENCES lexicalrelationtype (id) + REFERENCES tbl_lexicalrelationtype (id) +); +CREATE INDEX lexicalrelationalias_irev ON tbl_lexicalrelationalias ( + relationid ); -CREATE INDEX lexicalrelationalias_irev ON lexicalrelationalias (relationid); -- Next are finally the relation instances -CREATE TABLE synsetrelation ( +CREATE TABLE tbl_synsetrelation ( source INTEGER NOT NULL - REFERENCES synset (id), + REFERENCES tbl_synset (id), relationtype INTEGER NOT NULL - REFERENCES synsetrelationtype (id), + REFERENCES tbl_synsetrelationtype (id), target INTEGER NOT NULL - REFERENCES synset (id), + REFERENCES tbl_synset (id), PRIMARY KEY (source, relationtype, target) ); -CREATE TABLE lexicalrelation ( +CREATE TABLE tbl_lexicalrelation ( source INTEGER NOT NULL - REFERENCES lexicalunit (id), + REFERENCES tbl_lexicalunit (id), relationtype INTEGER NOT NULL - REFERENCES lexicalrelationtype (id), + REFERENCES tbl_lexicalrelationtype (id), target INTEGER NOT NULL - REFERENCES lexicalunit (id), + REFERENCES tbl_lexicalunit (id), PRIMARY KEY (source, relationtype, target) ); -- Insert the special empty values for the parent part tables -INSERT INTO synsetrelationparentpart (name) VALUES (''); -INSERT INTO lexicalrelationparentpart (name) VALUES (''); +INSERT INTO tbl_synsetrelationparentpart (name) VALUES (''); +INSERT INTO tbl_lexicalrelationparentpart (name) VALUES (''); """ # }}} _RELTYPE_TABLES = { - en.RelationKind.synset: u'synsetrelationtype', - en.RelationKind.lexical: u'lexicalrelationtype', + en.RelationKind.synset: u'tbl_synsetrelationtype', + en.RelationKind.lexical: u'tbl_lexicalrelationtype', } _RELALIAS_TABLES = { - en.RelationKind.synset: u'synsetrelationalias', - en.RelationKind.lexical: u'lexicalrelationalias', + en.RelationKind.synset: u'tbl_synsetrelationalias', + en.RelationKind.lexical: u'tbl_lexicalrelationalias', } _RELPARENTPART_TABLES = { - en.RelationKind.synset: u'synsetrelationparentpart', - en.RelationKind.lexical: u'lexicalrelationparentpart', + en.RelationKind.synset: u'tbl_synsetrelationparentpart', + en.RelationKind.lexical: u'tbl_lexicalrelationparentpart', } _RELCHILDPART_TABLES = { - en.RelationKind.synset: u'synsetrelationchildpart', - en.RelationKind.lexical: u'lexicalrelationchildpart', + en.RelationKind.synset: u'tbl_synsetrelationchildpart', + en.RelationKind.lexical: u'tbl_lexicalrelationchildpart', } _RELINST_TABLES = { - en.RelationKind.synset: u'synsetrelation', - en.RelationKind.lexical: u'lexicalrelation', + en.RelationKind.synset: u'tbl_synsetrelation', + en.RelationKind.lexical: u'tbl_lexicalrelation', } class PLWordNet(bs.PLWordNetBase): _STORAGE_NAME = 'sqlite3' - _SCHEMA_VERSION = 4 + _SCHEMA_VERSION = '4' @classmethod def from_reader(cls, reader, dump_to=None): @@ -370,12 +375,31 @@ class PLWordNet(bs.PLWordNetBase): ) def lexical_unit_by_id(self, id_): + + if isinstance(id_, str): + uuid = "X'" + id_.replace('-', '').upper() + "'" + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT id + FROM tbl_lexicalunit + WHERE quote(tbl_lexicalunit.id) = ? + """, + (uuid,) + ) + temp_id = cur.fetchone() + if temp_id is None: + raise exc.LexicalUnitNotFound('id=' + repr(id_)) + else: + id_ = temp_id[0] + with closing(self._db.cursor()) as cur: cur.execute( u""" - SELECT lemma, pos.value, variant, synset - FROM lexicalunit JOIN pos ON lexicalunit.pos = pos.id - WHERE lexicalunit.id = ? + SELECT lemma, tbl_pos.value, variant, synset + FROM tbl_lexicalunit + JOIN tbl_pos ON tbl_lexicalunit.pos = tbl_pos.id + WHERE tbl_lexicalunit.id = ? """, (id_,) ) @@ -403,16 +427,16 @@ class PLWordNet(bs.PLWordNetBase): cur.execute( u""" SELECT source, target, relationtype - FROM lexicalrelation + FROM tbl_lexicalrelation """ + where_clause, param_tuple, ) lu_q = u""" - SELECT lemma, pos.value, variant, synset - FROM lexicalunit - JOIN pos ON pos.id = lexicalunit.pos - WHERE lexicalunit.id = ? + SELECT lemma, tbl_pos.value, variant, synset + FROM tbl_lexicalunit + JOIN tbl_pos ON tbl_pos.id = tbl_lexicalunit.pos + WHERE tbl_lexicalunit.id = ? """ edges = [] @@ -467,9 +491,26 @@ class PLWordNet(bs.PLWordNetBase): ) def synset_by_id(self, id_): + if isinstance(id_, str): + uuid = "X'" + id_.replace('-', '').upper() + "'" + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT id + FROM tbl_synset + WHERE quote(tbl_synset.id) = ? + """, + (uuid,) + ) + temp_id = cur.fetchone() + if temp_id is None: + raise exc.LexicalUnitNotFound('id=' + repr(id_)) + else: + id_ = temp_id[0] + with closing(self._db.cursor()) as cur: cur.execute( - u"SELECT EXISTS (SELECT 1 FROM synset WHERE id = ?)", + u"SELECT EXISTS (SELECT 1 FROM tbl_synset WHERE id = ?)", (id_,), ) if not cur.fetchone()[0]: @@ -495,14 +536,14 @@ class PLWordNet(bs.PLWordNetBase): ) select_clause = u"SELECT source, target, relationtype" - from_clause = u"FROM synsetrelation" + from_clause = u"FROM tbl_synsetrelation" # Pre-fetch artificial status if skipping is necessary if skip_artificial: select_clause += u", parentsyn.isartificial, childsyn.isartificial" from_clause += ( - u" JOIN synset AS parentsyn ON parentsyn.id = source" - u" JOIN synset AS childsyn ON childsyn.id = target" + u" JOIN tbl_synset AS parentsyn ON parentsyn.id = source" + u" JOIN tbl_synset AS childsyn ON childsyn.id = target" ) yield_edges = self.__syn_edges_withskip else: @@ -552,11 +593,12 @@ class PLWordNet(bs.PLWordNetBase): with closing(self._db.cursor()) as cur: cur.execute( u""" - SELECT lexicalunit.id, lemma, pos.value, variant, synset - FROM lexicalunit - JOIN pos ON lexicalunit.pos = pos.id + SELECT tbl_lexicalunit.id, lemma, tbl_pos.value, + variant, synset + FROM tbl_lexicalunit + JOIN tbl_pos ON tbl_lexicalunit.pos = tbl_pos.id WHERE COALESCE(lemma = :lem, :defval) - AND COALESCE(pos.value = :pos, :defval) + AND COALESCE(tbl_pos.value = :pos, :defval) AND COALESCE(variant = :var, :defval) """, { @@ -589,34 +631,37 @@ class PLWordNet(bs.PLWordNetBase): with self._db: self._db.executemany( - u"INSERT OR IGNORE INTO pos (value) VALUES (?)", + u"INSERT OR IGNORE INTO tbl_pos (value) VALUES (?)", ((p.value,) for p in en.PoS), ).close() self._db.executemany( - u"INSERT OR IGNORE INTO verbaspect (value) VALUES (?)", + u"INSERT OR IGNORE INTO tbl_verbaspect (value) VALUES (?)", ((va.value,) for va in en.VerbAspect), ).close() self._db.executemany( - u"INSERT OR IGNORE INTO emotionmark (value) VALUES (?)", + u"INSERT OR IGNORE INTO tbl_emotionmark (value) VALUES (?)", ((em.value,) for em in en.EmotionMarkedness), ).close() self._db.executemany( - u"INSERT OR IGNORE INTO emotionname (value) VALUES (?)", + u"INSERT OR IGNORE INTO tbl_emotionname (value) VALUES (?)", ((en.value,) for en in en.EmotionName), ).close() self._db.executemany( - u"INSERT OR IGNORE INTO emotionvaluation (value) VALUES (?)", + u""" + INSERT OR IGNORE INTO tbl_emotionvaluation (value) + VALUES (?) + """, ((ev.value,) for ev in en.EmotionValuation), ).close() self._db.executemany( - u"INSERT OR IGNORE INTO domain (value) VALUES (?)", + u"INSERT OR IGNORE INTO tbl_domain (value) VALUES (?)", ((dm.value,) for dm in en.Domain), ).close() # Insert version if the database is new self._db.execute( u""" - INSERT OR IGNORE INTO plwn_meta (name, value) + INSERT OR IGNORE INTO tbl_plwn_meta (name, value) VALUES ('version', ?) """, (self._SCHEMA_VERSION,), @@ -626,7 +671,7 @@ class PLWordNet(bs.PLWordNetBase): with closing(self._db.cursor()) as cur: try: cur.execute( - u"SELECT value FROM plwn_meta WHERE name = 'version'", + u"SELECT value FROM tbl_plwn_meta WHERE name = 'version'", ) except sqlite3.OperationalError: raise exc.LoadException( @@ -684,6 +729,8 @@ class LexicalUnit(bs.LexicalUnitBase): self._var = variant self._synid = synid # Rest is unitialized + self._uuid = _UNFETCHED + self._leg_id = _UNFETCHED self._syn = _UNFETCHED self._def = _UNFETCHED self._usn = _UNFETCHED @@ -723,6 +770,35 @@ class LexicalUnit(bs.LexicalUnitBase): def is_english(self): return self._pos.is_english + @property + def legacy_id(self): + if self._leg_id is _UNFETCHED: + with closing(self._db.cursor()) as cur: + cur.execute( + u"SELECT legacy_id FROM tbl_lexicalunit WHERE id = ?", + (self._id,), + ) + self._leg_id = cur.fetchone()[0] + return self._leg_id + + @property + def uuid(self): + if self._uuid is _UNFETCHED: + with closing(self._db.cursor()) as cur: + cur.execute( + u"SELECT quote(id) from tbl_lexicalunit WHERE id = ?", + (self._id,), + ) + temp = cur.fetchone()[0] + if isinstance(temp, str): + indices = [2, 10, 14, 18, 22] + self._uuid = ('-').join([temp[i:j] for i, j in + zip(indices, indices[1:] + [None]) + ]).lower()[:-1] + else: + self._uuid = temp + return self._uuid + @property def synset(self): if self._syn is _UNFETCHED or self._syn() is None: @@ -737,7 +813,7 @@ class LexicalUnit(bs.LexicalUnitBase): if self._def is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( - u"SELECT definition FROM lexicalunit WHERE id = ?", + u"SELECT definition FROM tbl_lexicalunit WHERE id = ?", (self._id,), ) self._def = cur.fetchone()[0] @@ -748,7 +824,7 @@ class LexicalUnit(bs.LexicalUnitBase): if self._exms is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( - u"SELECT example FROM senseexample WHERE unitid = ?", + u"SELECT example FROM tbl_senseexample WHERE unitid = ?", (self._id,), ) self._exms = tuple(row[0] for row in cur) @@ -759,7 +835,7 @@ class LexicalUnit(bs.LexicalUnitBase): if self._exms_srcs is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( - u"SELECT source FROM senseexample WHERE unitid = ?", + u"SELECT source FROM tbl_senseexample WHERE unitid = ?", (self._id,), ) self._exms_srcs = tuple(row[0] for row in cur) @@ -770,7 +846,7 @@ class LexicalUnit(bs.LexicalUnitBase): if self._extl is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( - u"SELECT link FROM externallink WHERE unitid = ?", + u"SELECT link FROM tbl_externallink WHERE unitid = ?", (self._id,), ) self._extl = tuple(row[0] for row in cur) @@ -781,7 +857,7 @@ class LexicalUnit(bs.LexicalUnitBase): if self._usn is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( - u"SELECT note FROM usagenote WHERE unitid = ?", + u"SELECT note FROM tbl_usagenote WHERE unitid = ?", (self._id,), ) self._usn = tuple(row[0] for row in cur) @@ -793,10 +869,10 @@ class LexicalUnit(bs.LexicalUnitBase): with closing(self._db.cursor()) as cur: cur.execute( u""" - SELECT domain.value - FROM lexicalunit JOIN domain - ON lexicalunit.domain = domain.id - WHERE lexicalunit.id = ? + SELECT tbl_domain.value + FROM tbl_lexicalunit JOIN tbl_domain + ON tbl_lexicalunit.domain = tbl_domain.id + WHERE tbl_lexicalunit.id = ? """, (self._id,), ) @@ -809,10 +885,10 @@ class LexicalUnit(bs.LexicalUnitBase): with closing(self._db.cursor()) as cur: cur.execute( u""" - SELECT verbaspect.value - FROM lexicalunit JOIN verbaspect - ON lexicalunit.verbaspect = verbaspect.id - WHERE lexicalunit.id = ? + SELECT tbl_verbaspect.value + FROM tbl_lexicalunit JOIN tbl_verbaspect + ON tbl_lexicalunit.verbaspect = tbl_verbaspect.id + WHERE tbl_lexicalunit.id = ? """, (self._id,), ) @@ -825,7 +901,7 @@ class LexicalUnit(bs.LexicalUnitBase): if self._is_emo is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( - u"SELECT isemotional FROM lexicalunit WHERE id = ?", + u"SELECT isemotional FROM tbl_lexicalunit WHERE id = ?", (self._id,), ) rowval = cur.fetchone()[0] @@ -838,10 +914,10 @@ class LexicalUnit(bs.LexicalUnitBase): with closing(self._db.cursor()) as cur: cur.execute( u""" - SELECT emotionmark.value - FROM lexicalunit JOIN emotionmark - ON lexicalunit.emotionmark = emotionmark.id - WHERE lexicalunit.id = ? + SELECT tbl_emotionmark.value + FROM tbl_lexicalunit JOIN tbl_emotionmark + ON tbl_lexicalunit.emotionmark = tbl_emotionmark.id + WHERE tbl_lexicalunit.id = ? """, (self._id,), ) @@ -859,11 +935,11 @@ class LexicalUnit(bs.LexicalUnitBase): with closing(self._db.cursor()) as cur: cur.execute( u""" - SELECT emotionname.value - FROM emotionname JOIN unitemotionname - ON emotionname.id = unitemotionname.nameid - WHERE unitemotionname.unitid = ? - ORDER BY emotionname.value + SELECT tbl_emotionname.value + FROM tbl_emotionname JOIN tbl_unitemotionname + ON tbl_emotionname.id = tbl_unitemotionname.nameid + WHERE tbl_unitemotionname.unitid = ? + ORDER BY tbl_emotionname.value """, (self._id,), ) @@ -876,12 +952,12 @@ class LexicalUnit(bs.LexicalUnitBase): with closing(self._db.cursor()) as cur: cur.execute( u""" - SELECT emotionvaluation.value - FROM emotionvaluation JOIN unitemotionvaluation - ON emotionvaluation.id = - unitemotionvaluation.valuationid - WHERE unitemotionvaluation.unitid = ? - ORDER BY emotionvaluation.value + SELECT tbl_emotionvaluation.value + FROM tbl_emotionvaluation JOIN tbl_unitemotionvaluation + ON tbl_emotionvaluation.id = + tbl_unitemotionvaluation.valuationid + WHERE tbl_unitemotionvaluation.unitid = ? + ORDER BY tbl_emotionvaluation.value """, (self._id,), ) @@ -896,7 +972,7 @@ class LexicalUnit(bs.LexicalUnitBase): if self._emo_ex1 is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( - u"SELECT emotionexample1 FROM lexicalunit WHERE id = ?", + u"SELECT emotionexample1 FROM tbl_lexicalunit WHERE id = ?", (self._id,), ) self._emo_ex1 = cur.fetchone()[0] @@ -907,7 +983,7 @@ class LexicalUnit(bs.LexicalUnitBase): if self._emo_ex2 is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( - u"SELECT emotionexample2 FROM lexicalunit WHERE id = ?", + u"SELECT emotionexample2 FROM tbl_lexicalunit WHERE id = ?", (self._id,), ) self._emo_ex2 = cur.fetchone()[0] @@ -921,7 +997,7 @@ class LexicalUnit(bs.LexicalUnitBase): cur.execute( u""" SELECT DISTINCT relationtype - FROM lexicalrelation + FROM tbl_lexicalrelation WHERE source = ? """, (self._id,), @@ -940,10 +1016,11 @@ class LexicalUnit(bs.LexicalUnitBase): with closing(self._db.cursor()) as cur: cur.execute( u""" - SELECT lexicalunit.id, lemma, pos.value, variant, synset - FROM lexicalrelation - JOIN lexicalunit ON lexicalunit.id = target - JOIN pos ON lexicalunit.pos = pos.id + SELECT tbl_lexicalunit.id, + lemma, tbl_pos.value, variant, synset + FROM tbl_lexicalrelation + JOIN tbl_lexicalunit ON tbl_lexicalunit.id = target + JOIN tbl_pos ON tbl_lexicalunit.pos = tbl_pos.id WHERE source = ? {} """.format(_make_relationtype_where(relinfos)), tuple(itt.chain( @@ -966,10 +1043,10 @@ class LexicalUnit(bs.LexicalUnitBase): cur.execute( u""" SELECT relationtype, - lexicalunit.id, lemma, pos.value, variant, synset - FROM lexicalrelation - JOIN lexicalunit ON lexicalunit.id = target - JOIN pos ON lexicalunit.pos = pos.id + tbl_lexicalunit.id, lemma, tbl_pos.value, variant, synset + FROM tbl_lexicalrelation + JOIN tbl_lexicalunit ON tbl_lexicalunit.id = target + JOIN tbl_pos ON tbl_lexicalunit.pos = tbl_pos.id WHERE source = ? {} """.format(_make_relationtype_where(relinfos)), tuple(itt.chain( @@ -1001,8 +1078,10 @@ class Synset(bs.SynsetBase): self._id = syn_id self._isart = syn_art + self._uuid = _UNFETCHED self._units = _UNFETCHED self._def = _UNFETCHED + self._leg_id = _UNFETCHED self._pos = _UNFETCHED self._is_polish = _UNFETCHED @@ -1038,8 +1117,9 @@ class Synset(bs.SynsetBase): with closing(self._db.cursor()) as cur: cur.execute( u""" - SELECT lexicalunit.id, lemma, pos.value, variant - FROM lexicalunit JOIN pos ON lexicalunit.pos = pos.id + SELECT tbl_lexicalunit.id, lemma, tbl_pos.value, variant + FROM tbl_lexicalunit + JOIN tbl_pos ON tbl_lexicalunit.pos = tbl_pos.id WHERE synset = ? ORDER BY unitindex """, @@ -1060,12 +1140,41 @@ class Synset(bs.SynsetBase): assert self._units return self._units + @property + def legacy_id(self): + if self._leg_id is _UNFETCHED: + with closing(self._db.cursor()) as cur: + cur.execute( + u"SELECT legacy_id FROM tbl_synset WHERE id = ?", + (self._id,), + ) + self._leg_id = cur.fetchone()[0] + return self._leg_id + + @property + def uuid(self): + if self._uuid is _UNFETCHED: + with closing(self._db.cursor()) as cur: + cur.execute( + u"SELECT quote(id) from tbl_synset WHERE id = ?", + (self._id,), + ) + temp = cur.fetchone()[0] + if isinstance(temp, str): + indices = [2, 10, 14, 18, 22] + self._uuid = ('-').join([temp[i:j] for i, j in + zip(indices, indices[1:] + [None]) + ]).lower()[:-1] + else: + self._uuid = temp + return self._uuid + @property def definition(self): if self._def is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( - u"SELECT definition FROM synset WHERE id = ?", + u"SELECT definition FROM tbl_synset WHERE id = ?", (self._id,), ) row = cur.fetchone() @@ -1078,7 +1187,7 @@ class Synset(bs.SynsetBase): if self._isart is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( - u"SELECT isartificial FROM synset WHERE id = ?", + u"SELECT isartificial FROM tbl_synset WHERE id = ?", (self._id,), ) row = cur.fetchone() @@ -1094,7 +1203,7 @@ class Synset(bs.SynsetBase): cur.execute( u""" SELECT DISTINCT relationtype - FROM synsetrelation + FROM tbl_synsetrelation WHERE source = ? """, (self._id,), @@ -1116,11 +1225,11 @@ class Synset(bs.SynsetBase): en.RelationKind.synset, ) select_clause = u"SELECT target" - from_clause = u"FROM synsetrelation" + from_clause = u"FROM tbl_synsetrelation" if skip_artificial: - select_clause += u", synset.isartificial, relationtype" - from_clause += u" JOIN synset ON target = synset.id" + select_clause += u", tbl_synset.isartificial, relationtype" + from_clause += u" JOIN tbl_synset ON target = tbl_synset.id" yield_related = self.__related_withskip else: yield_related = self.__related_noskip @@ -1153,11 +1262,11 @@ class Synset(bs.SynsetBase): en.RelationKind.synset, ) select_clause = u"SELECT relationtype, target" - from_clause = u"FROM synsetrelation" + from_clause = u"FROM tbl_synsetrelation" if skip_artificial: - select_clause += u", synset.isartificial" - from_clause += u" JOIN synset ON target = synset.id" + select_clause += u", tbl_synset.isartificial" + from_clause += u" JOIN tbl_synset ON target = tbl_synset.id" yield_related = self.__related_withskip_pairs else: yield_related = self.__related_noskip_pairs @@ -1240,6 +1349,19 @@ class RelationInfo(bs.RelationInfoBase): self._par = _UNFETCHED self._name = _UNFETCHED self._aliases = _UNFETCHED + self._leg_id = _UNFETCHED + + @property + def legacy_id(self): + if self._leg_id is _UNFETCHED: + with closing(self._db.cursor()) as cur: + cur.execute( + u"SELECT legacy_id FROM {} WHERE id = ?" + .format(_RELTYPE_TABLES[self._kind]), + (self._id,), + ) + self._leg_id = cur.fetchone()[0] + return self._leg_id @property def kind(self): @@ -1319,18 +1441,23 @@ class _DBBuilder(object): # Synset to lexical units relations also need to be deferred. self._synid2lexids = coll.defaultdict(list) # Cache IDs of constant values - with closing(db.execute(u"SELECT value, id FROM pos")) as cur: + with closing(db.execute(u"SELECT value, id FROM tbl_pos")) as cur: self._posids = dict(cur) - with closing(db.execute(u"SELECT value, id FROM verbaspect")) as cur: + with closing(db.execute(u"SELECT value, id FROM tbl_verbaspect")) \ + as cur: self._vaids = dict(cur) - with closing(db.execute(u"SELECT value, id FROM emotionmark")) as cur: + with closing(db.execute(u"SELECT value, id FROM tbl_emotionmark")) \ + as cur: self._emids = dict(cur) - with closing(db.execute(u"SELECT value, id FROM emotionname")) as cur: - self._enids = dict(cur) - with closing(db.execute(u"SELECT value, id FROM emotionvaluation")) \ + with closing(db.execute(u"SELECT value, id FROM tbl_emotionname")) \ as cur: + self._enids = dict(cur) + with closing(db.execute(u""" + SELECT value, id + FROM tbl_emotionvaluation + """)) as cur: self._evids = dict(cur) - with closing(db.execute(u"SELECT value, id FROM domain")) as cur: + with closing(db.execute(u"SELECT value, id FROM tbl_domain")) as cur: self._dmids = dict(cur) def __call__(self, reader): @@ -1357,10 +1484,11 @@ class _DBBuilder(object): def _insert_synset(self, syn_node): self._db.execute( u""" - INSERT INTO synset (id, definition, isartificial) - VALUES (?, ?, ?) + INSERT INTO tbl_synset (id, legacy_id, definition, isartificial) + VALUES (?, ?, ?, ?) """, - (syn_node.id, syn_node.definition, syn_node.is_artificial), + (syn_node.id, syn_node.legacy_id, syn_node.definition, + syn_node.is_artificial), ).close() # Related go into temp storage self._adhoc_synrels[syn_node.id] = syn_node.related @@ -1386,9 +1514,11 @@ class _DBBuilder(object): childname_id = self._ensure_rel_part_name(child_tbl, rel_node.name) # And now the relation itself cur.execute( - u"INSERT INTO {} (parentpart, childpart) VALUES (?, ?)" - .format(type_tbl), - (parname_id, childname_id), + u""" + INSERT INTO {} (legacy_id, parentpart, childpart) + VALUES (?, ?, ?) + """.format(type_tbl), + (rel_node.legacy_id, parname_id, childname_id), ) # Do aliases if present if rel_node.aliases: @@ -1408,15 +1538,15 @@ class _DBBuilder(object): try: cur.execute( u""" - INSERT INTO lexicalunit ( - id, lemma, pos, variant, + INSERT INTO tbl_lexicalunit ( + id, legacy_id, lemma, pos, variant, synset, unitindex, definition, domain, verbaspect, isemotional, emotionmark, emotionexample1, emotionexample2 ) VALUES ( - :id, :lemma, :pos, :var, + :id, :legacy_id, :lemma, :pos, :var, :syn, :uidx, :def, :dom, :va, :emo_is, :emo_m, @@ -1425,6 +1555,7 @@ class _DBBuilder(object): """, { u'id': lu_node.id, + u'legacy_id': lu_node.legacy_id, u'lemma': lu_node.lemma, u'pos': self._posids[lu_node.pos.value], u'var': lu_node.variant, @@ -1459,7 +1590,7 @@ class _DBBuilder(object): cur.executemany( u""" - INSERT INTO senseexample (unitid, example, source) + INSERT INTO tbl_senseexample (unitid, example, source) VALUES (?, ?, ?) """, ( @@ -1470,14 +1601,14 @@ class _DBBuilder(object): ) cur.executemany( u""" - INSERT INTO usagenote (unitid, note) + INSERT INTO tbl_usagenote (unitid, note) VALUES (?, ?) """, ((lu_node.id, note) for note in lu_node.usage_notes), ) cur.executemany( u""" - INSERT INTO externallink (unitid, link) + INSERT INTO tbl_externallink (unitid, link) VALUES (?, ?) """, ((lu_node.id, link) @@ -1485,7 +1616,7 @@ class _DBBuilder(object): ) cur.executemany( u""" - INSERT INTO unitemotionname (unitid, nameid) + INSERT INTO tbl_unitemotionname (unitid, nameid) VALUES (?, ?) """, ( @@ -1495,7 +1626,8 @@ class _DBBuilder(object): ) cur.executemany( u""" - INSERT INTO unitemotionvaluation (unitid, valuationid) + INSERT INTO tbl_unitemotionvaluation + (unitid, valuationid) VALUES (?, ?) """, ( @@ -1576,7 +1708,10 @@ class _DBBuilder(object): return row[0] cur.execute( - u"INSERT INTO {} (parentpart, childpart) VALUES (?, ?)" + u""" + INSERT INTO {} (parentpart, childpart) + VALUES (?, ?) + """ .format(type_tbl), (empty_parent_id, child_id), ) @@ -1632,10 +1767,11 @@ class _DBBuilder(object): with closing(self._db.cursor()) as cur: cur.execute( u""" - SELECT synset.id - FROM synset - LEFT JOIN lexicalunit ON synset.id = lexicalunit.synset - WHERE lexicalunit.synset IS NULL + SELECT tbl_synset.id + FROM tbl_synset + LEFT JOIN tbl_lexicalunit + ON tbl_synset.id = tbl_lexicalunit.synset + WHERE tbl_lexicalunit.synset IS NULL """, ) empties = tuple(row[0] for row in cur) diff --git a/tests/abstract_cases/test_unit_and_synset.py b/tests/abstract_cases/test_unit_and_synset.py index 626e50f2161d79f5026a2152a3becc2392617fd9..4ac8513a1c366532e4b362d375dbb97783733728 100644 --- a/tests/abstract_cases/test_unit_and_synset.py +++ b/tests/abstract_cases/test_unit_and_synset.py @@ -29,9 +29,10 @@ class SynsetPropertiesTest(ut.TestCase): def setUp(self): self.__plwn = self._PLWNClass.from_reader(( - nd.make_synset_node(id=1, definition=u'foobar'), + nd.make_synset_node(id=1, legacy_id=1, definition=u'foobar'), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.n, variant=1, @@ -93,6 +94,7 @@ class SynsetRelationsTest(ut.TestCase): nd.make_synset_node(id=3), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.n, variant=1, @@ -102,6 +104,7 @@ class SynsetRelationsTest(ut.TestCase): ), nd.make_lexical_unit_node( id=21, + legacy_id=21, lemma=u'aaa', pos=en.PoS.n, variant=2, @@ -111,6 +114,7 @@ class SynsetRelationsTest(ut.TestCase): ), nd.make_lexical_unit_node( id=31, + legacy_id=31, lemma=u'aaa', pos=en.PoS.n, variant=3, @@ -257,6 +261,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): nd.make_synset_node(id=8), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.n, variant=1, @@ -266,6 +271,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): ), nd.make_lexical_unit_node( id=21, + legacy_id=21, lemma=u'aaa', pos=en.PoS.n, variant=2, @@ -275,6 +281,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): ), nd.make_lexical_unit_node( id=31, + legacy_id=31, lemma=u'aaa', pos=en.PoS.n, variant=3, @@ -284,6 +291,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): ), nd.make_lexical_unit_node( id=41, + legacy_id=41, lemma=u'aaa', pos=en.PoS.n, variant=4, @@ -293,6 +301,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): ), nd.make_lexical_unit_node( id=51, + legacy_id=51, lemma=u'aaa', pos=en.PoS.n, variant=5, @@ -302,6 +311,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): ), nd.make_lexical_unit_node( id=61, + legacy_id=61, lemma=u'aaa', pos=en.PoS.n, variant=6, @@ -311,6 +321,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): ), nd.make_lexical_unit_node( id=71, + legacy_id=71, lemma=u'aaa', pos=en.PoS.n, variant=7, @@ -320,6 +331,7 @@ class SynsetRelationsWithArtificialTest(ut.TestCase): ), nd.make_lexical_unit_node( id=81, + legacy_id=81, lemma=u'aaa', pos=en.PoS.n, variant=8, @@ -445,6 +457,7 @@ class SynsetRelationsWithArtificialLoopTest(ut.TestCase): nd.make_synset_node(id=4), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.n, variant=1, @@ -454,6 +467,7 @@ class SynsetRelationsWithArtificialLoopTest(ut.TestCase): ), nd.make_lexical_unit_node( id=21, + legacy_id=21, lemma=u'aaa', pos=en.PoS.n, variant=2, @@ -463,6 +477,7 @@ class SynsetRelationsWithArtificialLoopTest(ut.TestCase): ), nd.make_lexical_unit_node( id=31, + legacy_id=31, lemma=u'aaa', pos=en.PoS.n, variant=3, @@ -472,6 +487,7 @@ class SynsetRelationsWithArtificialLoopTest(ut.TestCase): ), nd.make_lexical_unit_node( id=41, + legacy_id=41, lemma=u'aaa', pos=en.PoS.n, variant=4, @@ -528,6 +544,7 @@ class LexicalUnitPropertiesTest(ut.TestCase): nd.make_synset_node(id=1), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.n, variant=1, @@ -644,6 +661,7 @@ class LexicalUnitRelationsTest(ut.TestCase): nd.make_synset_node(id=1), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.n, variant=1, @@ -657,6 +675,7 @@ class LexicalUnitRelationsTest(ut.TestCase): ), nd.make_lexical_unit_node( id=12, + legacy_id=12, lemma=u'bbb', pos=en.PoS.n, variant=1, @@ -667,6 +686,7 @@ class LexicalUnitRelationsTest(ut.TestCase): ), nd.make_lexical_unit_node( id=13, + legacy_id=13, lemma=u'ccc', pos=en.PoS.n, variant=1, @@ -766,6 +786,7 @@ class ItemOrderingTest(ut.TestCase): nd.make_synset_node(id=3), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.n, variant=1, @@ -775,6 +796,7 @@ class ItemOrderingTest(ut.TestCase): ), nd.make_lexical_unit_node( id=21, + legacy_id=21, lemma=u'bbb', pos=en.PoS.n, variant=1, @@ -784,6 +806,7 @@ class ItemOrderingTest(ut.TestCase): ), nd.make_lexical_unit_node( id=22, + legacy_id=22, lemma=u'ąąą', pos=en.PoS.n, variant=2, @@ -793,6 +816,7 @@ class ItemOrderingTest(ut.TestCase): ), nd.make_lexical_unit_node( id=31, + legacy_id=31, lemma=u'ąąą', pos=en.PoS.n, variant=1, @@ -843,6 +867,7 @@ class ToDictTest(ut.TestCase): nd.make_synset_node(id=3), nd.make_lexical_unit_node( id=11, + legacy_id=11, lemma=u'aaa', pos=en.PoS.v, variant=1, @@ -858,14 +883,15 @@ class ToDictTest(ut.TestCase): verb_aspect=en.VerbAspect.pred, emotion_markedness=en.EmotionMarkedness.strong_negative, emotion_names=(en.EmotionName.surprise,), - emotion_valuations=( - en.EmotionValuation.ugliness, + emotion_valuations=[ en.EmotionValuation.error, - ), + en.EmotionValuation.ugliness, + ], emotion_example_1=u'Bad thing.', ), nd.make_lexical_unit_node( id=21, + legacy_id=21, lemma=u'bbb', pos=en.PoS.n, variant=1, @@ -875,6 +901,7 @@ class ToDictTest(ut.TestCase): ), nd.make_lexical_unit_node( id=31, + legacy_id=31, lemma=u'ccc', pos=en.PoS.n, variant=1, @@ -885,8 +912,9 @@ class ToDictTest(ut.TestCase): )) self.__lex11_dict = { u'id': 11, + u'legacy_id': 11, u'lemma': u'aaa', - u'pos': u'verb', + u'pos': u'czasownik', u'variant': 1, u'synset': 1, u'definition': u'bar', diff --git a/tests/cases/test_ubylmf_reader.py b/tests/cases/test_ubylmf_reader.py index a8d7aba9944d7b84dab9e87c67291e7d6211ca1b..735c91c832890214572ba8aa2cd8ceadbdd2e7ef 100644 --- a/tests/cases/test_ubylmf_reader.py +++ b/tests/cases/test_ubylmf_reader.py @@ -28,7 +28,7 @@ test_xml = u"""<?xml version="1.0" encoding="UTF-8" ?> <LexicalResource dtdVersion="ubyDTD_1_0.dtd" name="plWordnet"> <Lexicon languageIdentifier="pl" id="1" name="Słowosieć 2.2"> -<LexicalEntry id="15" partOfSpeech="noun"> +<LexicalEntry id="15" partOfSpeech="rzeczownik"> <Lemma> <FormRepresentation writtenForm="'patafizyka"/> </Lemma> @@ -173,7 +173,7 @@ class UBYLMFReaderTest(unittest.TestCase): # Missing <Lemma> xml_lu = et.fromstring( u""" - <LexicalEntry id="15" partOfSpeech="noun"> + <LexicalEntry id="15" partOfSpeech="rzeczownik"> </LexicalEntry> """.encode(ENCODING) ) @@ -192,7 +192,7 @@ class UBYLMFReaderTest(unittest.TestCase): # Empty <Lemma> xml_lu = et.fromstring( u""" - <LexicalEntry id="15" partOfSpeech="noun"> + <LexicalEntry id="15" partOfSpeech="rzeczownik"> <Lemma> <FormRepresentation writtenForm=""/> </Lemma> @@ -227,7 +227,7 @@ class UBYLMFReaderTest(unittest.TestCase): # Incorrect unit index xml_lu = et.fromstring( u""" - <LexicalEntry id="15" partOfSpeech="noun"> + <LexicalEntry id="15" partOfSpeech="rzeczownik"> <Lemma> <FormRepresentation writtenForm="'patafizyka"/> </Lemma>