# coding: utf8

# Copyright (C) 2017 Michał Kaliński
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Implementation that stores data from plWordNet in a sqlite databse file.

With an impromptu schema.
"""

from __future__ import absolute_import, division
try:
    from future_builtins import zip
except ImportError:
    pass

import sqlite3
import collections as coll
from contextlib import closing
import errno
import itertools as itt
import locale
import logging
import os
import shutil
import tempfile
import weakref

import six

from ..readers import nodes as nd
from ..utils.artifilter import (
    filter_artificial_related_synsets,
    filter_artificial_synset_edges,
)
from ..utils.relinfotuple import RelationInfoTuple
from .. import bases as bs, exceptions as exc, enums as en


__all__ = 'PLWordNet', 'Synset', 'LexicalUnit', 'RelationInfo'


_LOG = logging.getLogger(__name__)

# Marker object for data that has not been fetched from the database
_UNFETCHED = object()

# SQL script used to initialize the database. {{{
# "locale" collation must be defined on the connection before this is executed.
_DB_SCHEMA_SCRIPT = u"""
PRAGMA foreign_keys = ON;

-- Metadata table. Used for version number, currently
CREATE TABLE tbl_plwn_meta (
    name TEXT UNIQUE NOT NULL  ,
    value BLOB
);

-- Tables for constant values
CREATE TABLE tbl_pos (
    id INTEGER PRIMARY KEY,
    value TEXT NOT NULL
);

CREATE TABLE tbl_verbaspect (
    id INTEGER PRIMARY KEY,
    value TEXT UNIQUE NOT NULL
);

CREATE TABLE tbl_emotionmark (
    id INTEGER PRIMARY KEY,
    value TEXT UNIQUE NOT NULL
);

CREATE TABLE tbl_emotionname (
    id INTEGER PRIMARY KEY,
    value TEXT UNIQUE NOT NULL COLLATE locale
);

CREATE TABLE tbl_emotionvaluation (
    id INTEGER PRIMARY KEY,
    value TEXT NOT NULL COLLATE locale
);

CREATE TABLE tbl_domain (
    id INTEGER PRIMARY KEY,
    value TEXT NOT NULL COLLATE locale
);
CREATE UNIQUE INDEX value ON tbl_domain (value);

-- Synset only gets one simple table
CREATE TABLE tbl_synset (
    id INTEGER PRIMARY KEY,
    legacy_id INTEGER NULL  ,
    definition TEXT COLLATE locale,
    isartificial INTEGER NOT NULL DEFAULT 0
);

-- Lexical units have several tables, since they have several list-like
-- properties. They also need indexes for lookup.

CREATE TABLE tbl_lexicalunit (
    id INTEGER PRIMARY KEY,
    legacy_id INTEGER NULL  ,
    lemma TEXT NOT NULL COLLATE locale,
    pos INTEGER NOT NULL
        REFERENCES tbl_pos (id),
    variant INTEGER NOT NULL  ,
    synset INTEGER NOT NULL
        REFERENCES tbl_synset (id),
    unitindex INTEGER NOT NULL  ,
    definition TEXT COLLATE locale,
    domain INTEGER NOT NULL
        REFERENCES tbl_domain (id),
    verbaspect INTEGER
        REFERENCES tbl_verbaspect (id),
    isemotional INTEGER,
    emotionmark INTEGER,
    emotionexample1 TEXT COLLATE locale,
    emotionexample2 TEXT COLLATE locale
);

CREATE UNIQUE INDEX lemma ON tbl_lexicalunit (lemma, pos, variant);
CREATE INDEX lex_i_lem_var ON tbl_lexicalunit (lemma, variant);
CREATE INDEX lex_i_pos ON tbl_lexicalunit (pos);
CREATE UNIQUE INDEX synset ON tbl_lexicalunit (synset, unitindex);

-- Tables dependant on lexicalunit
CREATE TABLE tbl_senseexample (
    unitid INTEGER NOT NULL
        REFERENCES tbl_lexicalunit (id),
    example TEXT NOT NULL COLLATE locale,
    source TEXT NOT NULL COLLATE locale
);
CREATE INDEX sen_i ON tbl_senseexample (unitid);

CREATE TABLE tbl_externallink (
    unitid INTEGER NOT NULL
        REFERENCES tbl_lexicalunit (id),
    link TEXT NOT NULL COLLATE locale
);
CREATE INDEX link_i ON tbl_externallink (unitid);

CREATE TABLE tbl_usagenote (
    unitid INTEGER NOT NULL
        REFERENCES tbl_lexicalunit (id),
    note TEXT NOT NULL COLLATE locale
);
CREATE INDEX note_i ON tbl_usagenote (unitid);

CREATE TABLE tbl_unitemotionname (
    unitid INTEGER NOT NULL
        REFERENCES tbl_lexicalunit (id),
    nameid INTEGER NOT NULL
        REFERENCES tbl_emotionname (id),
    PRIMARY KEY (unitid, nameid)
);

CREATE TABLE tbl_unitemotionvaluation (
    unitid INTEGER NOT NULL
        REFERENCES tbl_lexicalunit (id),
    valuationid INTEGER NOT NULL
        REFERENCES tbl_emotionvaluation (id),
    PRIMARY KEY (unitid, valuationid)
);

-- Relation tables --

-- The for below are used to gather combinations of parent / child relation
-- names.
CREATE TABLE tbl_synsetrelationparentpart (
    id INTEGER PRIMARY KEY,
    name TEXT UNIQUE NOT NULL COLLATE locale
);
CREATE TABLE tbl_synsetrelationchildpart (
    id INTEGER PRIMARY KEY,
    name TEXT UNIQUE NOT NULL COLLATE locale
);
CREATE TABLE tbl_lexicalrelationparentpart (
    id INTEGER PRIMARY KEY,
    name TEXT UNIQUE NOT NULL COLLATE locale
);
CREATE TABLE tbl_lexicalrelationchildpart (
    id INTEGER PRIMARY KEY,
    name TEXT UNIQUE NOT NULL COLLATE locale
);
CREATE UNIQUE INDEX name ON tbl_lexicalrelationchildpart (name);


-- Next, gather these parts into relation types themselves.
-- Parent can't be NULL - the no-parent case will be handled by a special empty
-- string parent. This is so that UNIQUE works correctly.
CREATE TABLE tbl_synsetrelationtype (
    id INTEGER PRIMARY KEY,
    legacy_id INTEGER NULL  ,
    parentpart INTEGER NOT NULL
        REFERENCES tbl_synsetrelationparentpart (id),
    childpart INTEGER NOT NULL
        REFERENCES tbl_synsetrelationchildpart (id),

    UNIQUE (parentpart, childpart)
);
CREATE TABLE tbl_lexicalrelationtype (
    id INTEGER PRIMARY KEY ,
    legacy_id INTEGER NULL  ,
    parentpart INTEGER NOT NULL
        REFERENCES tbl_lexicalrelationparentpart (id),
    childpart INTEGER NOT NULL
        REFERENCES tbl_lexicalrelationchildpart (id)
);
CREATE UNIQUE INDEX parentpart ON tbl_lexicalrelationtype (
        parentpart,
        childpart
);

-- The below tables are simply maps of relation aliases to their main IDs.
-- Reverse indexes are needed, too.
CREATE TABLE tbl_synsetrelationalias (
    name TEXT PRIMARY KEY NOT NULL COLLATE locale,
    relationid INTEGER NOT NULL
        REFERENCES tbl_synsetrelationtype (id)
);
CREATE INDEX synsetrelationalias_irev ON tbl_synsetrelationalias (
        relationid
);
CREATE TABLE tbl_lexicalrelationalias (
    name TEXT PRIMARY KEY NOT NULL COLLATE locale,
    relationid INTEGER NOT NULL
        REFERENCES tbl_lexicalrelationtype (id)
);
CREATE INDEX lexicalrelationalias_irev ON tbl_lexicalrelationalias (
        relationid
);

-- Next are finally the relation instances
CREATE TABLE tbl_synsetrelation (
    source INTEGER NOT NULL
        REFERENCES tbl_synset (id),
    relationtype INTEGER NOT NULL
        REFERENCES tbl_synsetrelationtype (id),
    target INTEGER NOT NULL
        REFERENCES tbl_synset (id),

    PRIMARY KEY (source, relationtype, target)
);
CREATE TABLE tbl_lexicalrelation (
    source INTEGER NOT NULL
        REFERENCES tbl_lexicalunit (id),
    relationtype INTEGER NOT NULL
        REFERENCES tbl_lexicalrelationtype (id),
    target INTEGER NOT NULL
        REFERENCES tbl_lexicalunit (id),

    PRIMARY KEY (source, relationtype, target)
);

-- Insert the special empty values for the parent part tables
INSERT INTO tbl_synsetrelationparentpart (name) VALUES ('');
INSERT INTO tbl_lexicalrelationparentpart (name) VALUES ('');
"""  # }}}

_RELTYPE_TABLES = {
    en.RelationKind.synset: u'tbl_synsetrelationtype',
    en.RelationKind.lexical: u'tbl_lexicalrelationtype',
}
_RELALIAS_TABLES = {
    en.RelationKind.synset: u'tbl_synsetrelationalias',
    en.RelationKind.lexical: u'tbl_lexicalrelationalias',
}
_RELPARENTPART_TABLES = {
    en.RelationKind.synset: u'tbl_synsetrelationparentpart',
    en.RelationKind.lexical: u'tbl_lexicalrelationparentpart',
}
_RELCHILDPART_TABLES = {
    en.RelationKind.synset: u'tbl_synsetrelationchildpart',
    en.RelationKind.lexical: u'tbl_lexicalrelationchildpart',
}
_RELINST_TABLES = {
    en.RelationKind.synset: u'tbl_synsetrelation',
    en.RelationKind.lexical: u'tbl_lexicalrelation',
}


class PLWordNet(bs.PLWordNetBase):

    _STORAGE_NAME = 'sqlite3'
    _SCHEMA_VERSION = '4'

    @classmethod
    def from_reader(cls, reader, dump_to=None):
        plwn = cls(dump_to)

        try:
            plwn.__init_db()
            plwn.__read_data(reader)
        except BaseException:
            plwn.close()
            raise

        return plwn

    @classmethod
    def from_dump(cls, dump):
        plwn = cls(dump)

        try:
            plwn.__check_db()
        except BaseException:
            plwn.close()
            raise

        return plwn

    def __init__(self, db_file=None):
        """**NOTE:** This constructor should not be invoked directly.

        Use one of the standard methods: ``from_dump`` or ``from_reader``.
        """
        if db_file is None:
            self._tmp_dir = tempfile.mkdtemp(prefix='plwn_api-')
            # Close the file immediately, we just need the
            db_file = os.path.join(self._tmp_dir, 'db')
        else:
            self._tmp_dir = None

        try:
            self._db = sqlite3.connect(db_file)
        except BaseException:
            self.__drop_tmpdir()
            raise

        try:
            # Init the locale collation, needs to be done for every connection.
            # The str cast is for py2/3 compatibility
            self._db.create_collation('locale', locale.strcoll)
        except BaseException:
            self._db.close()
            self.__drop_tmpdir()
            raise

        self._relcache = _RelCache(self._db)

    def close(self):
        self._db.close()
        self.__drop_tmpdir()

    def lexical_units(self, lemma=None, pos=None, variant=None):
        return tuple(
            LexicalUnit(self._db, self._relcache, *row)
            for row in self._select_lexical_units(lemma, pos, variant, True)
        )

    def lexical_unit(self, lemma, pos, variant):
        return LexicalUnit(
            self._db,
            self._relcache,
            *self._get_one_lexical_unit(
                lemma,
                pos,
                variant,
                exc.LexicalUnitNotFound,
            )
        )

    def lexical_unit_by_id(self, id_):

        if isinstance(id_, str):
            uuid = "X'" + id_.replace('-', '').upper() + "'"
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"""
                    SELECT id
                    FROM tbl_lexicalunit
                    WHERE quote(tbl_lexicalunit.id) = ?
                    """,
                    (uuid,)
                )
                temp_id = cur.fetchone()
            if temp_id is None:
                raise exc.LexicalUnitNotFound('id=' + repr(id_))
            else:
                id_ = temp_id[0]

        with closing(self._db.cursor()) as cur:
            cur.execute(
                u"""
                SELECT lemma, tbl_pos.value, variant, synset
                FROM tbl_lexicalunit
                    JOIN tbl_pos ON tbl_lexicalunit.pos = tbl_pos.id
                WHERE tbl_lexicalunit.id = ?
                """,
                (id_,)
            )
            row = cur.fetchone()
        if row is None:
            raise exc.LexicalUnitNotFound('id=' + repr(id_))
        return LexicalUnit(self._db, self._relcache, id_, *row)

    def lexical_relation_edges(self, include=None, exclude=None):
        parsed_include = frozenset(itt.chain.from_iterable(
            self._relcache.get_ids(rel, en.RelationKind.lexical)
            for rel in include
        )) if include is not None else None
        parsed_exclude = frozenset(itt.chain.from_iterable(
            self._relcache.get_ids(rel, en.RelationKind.lexical)
            for rel in exclude
        )) if exclude is not None else None
        where_clause, param_tuple = _make_include_exclude(
            parsed_include,
            parsed_exclude,
            u'relationtype',
        )

        with closing(self._db.cursor()) as cur:
            cur.execute(
                u"""
                SELECT source, target, relationtype
                FROM tbl_lexicalrelation
                """ + where_clause,
                param_tuple,
            )

            lu_q = u"""
            SELECT lemma, tbl_pos.value, variant, synset
            FROM tbl_lexicalunit
                JOIN tbl_pos ON tbl_pos.id = tbl_lexicalunit.pos
            WHERE tbl_lexicalunit.id = ?
            """

            edges = []

            for parent_id, child_id, rel_id in cur:
                with closing(self._db.cursor()) as cur2:
                    cur2.execute(lu_q, (parent_id,))
                    par_lu = LexicalUnit(
                        self._db,
                        self._relcache,
                        parent_id,
                        *cur2.fetchone()
                    )
                    cur2.execute(lu_q, (child_id,))
                    chl_lu = LexicalUnit(
                        self._db,
                        self._relcache,
                        child_id,
                        *cur2.fetchone()
                    )
                edges.append(bs.RelationEdge(
                    par_lu,
                    self._relcache.get_info_by_id(
                        rel_id,
                        en.RelationKind.lexical,
                    ),
                    chl_lu,
                ))

            return tuple(edges)

    def synsets(self, lemma=None, pos=None, variant=None):
        synids = frozenset(
            row[-1]
            for row in self._select_lexical_units(lemma, pos, variant, True)
        )
        return tuple(
            Synset(self._db, self._relcache, synid)
            for synid in synids
        )

    def synset(self, lemma, pos, variant):
        return Synset(
            self._db,
            self._relcache,
            self._get_one_lexical_unit(
                lemma,
                pos,
                variant,
                exc.SynsetNotFound,
            )[-1],
        )

    def synset_by_id(self, id_):
        if isinstance(id_, str):
            uuid = "X'" + id_.replace('-', '').upper() + "'"
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"""
                    SELECT id
                    FROM tbl_synset
                    WHERE quote(tbl_synset.id) = ?
                    """,
                    (uuid,)
                )
                temp_id = cur.fetchone()
            if temp_id is None:
                raise exc.LexicalUnitNotFound('id=' + repr(id_))
            else:
                id_ = temp_id[0]

        with closing(self._db.cursor()) as cur:
            cur.execute(
                u"SELECT EXISTS (SELECT 1 FROM tbl_synset WHERE id = ?)",
                (id_,),
            )
            if not cur.fetchone()[0]:
                raise exc.SynsetNotFound('id=' + repr(id_))
        return Synset(self._db, self._relcache, id_)

    def synset_relation_edges(self,
                              include=None,
                              exclude=None,
                              skip_artificial=True):
        parsed_include = frozenset(itt.chain.from_iterable(
            self._relcache.get_ids(rel, en.RelationKind.synset)
            for rel in include
        )) if include is not None else None
        parsed_exclude = frozenset(itt.chain.from_iterable(
            self._relcache.get_ids(rel, en.RelationKind.synset)
            for rel in exclude
        )) if exclude is not None else None
        where_clause, param_tuple = _make_include_exclude(
            parsed_include,
            parsed_exclude,
            u'relationtype',
        )

        select_clause = u"SELECT source, target, relationtype"
        from_clause = u"FROM tbl_synsetrelation"

        # Pre-fetch artificial status if skipping is necessary
        if skip_artificial:
            select_clause += u", parentsyn.isartificial, childsyn.isartificial"
            from_clause += (
                u" JOIN tbl_synset AS parentsyn ON parentsyn.id = source"
                u" JOIN tbl_synset AS childsyn ON childsyn.id = target"
            )
            yield_edges = self.__syn_edges_withskip
        else:
            yield_edges = self.__syn_edges_noskip

        with closing(self._db.cursor()) as cur:
            cur.execute(
                u'\n'.join((select_clause, from_clause, where_clause)),
                param_tuple,
            )
            return tuple(yield_edges(cur))

    def relations_info(self, name=None, kind=None):
        if name is None:
            return (
                itt.chain(
                    self._relcache.get_all_of_kind(en.RelationKind.synset),
                    self._relcache.get_all_of_kind(en.RelationKind.lexical),
                )
                if kind is None
                else self._relcache.get_all_of_kind(
                    en.RelationKind(kind),
                )
            )
        else:
            return (
                itt.chain(
                    self._relcache.get_infos_by_name(
                        name,
                        en.RelationKind.synset,
                        allow_nonexistent=True,
                    ),
                    self._relcache.get_infos_by_name(
                        name,
                        en.RelationKind.lexical,
                        allow_nonexistent=True,
                    ),
                )
                if kind is None
                else self._relcache.get_infos_by_name(
                    name,
                    en.RelationKind(kind),
                )
            )

    def _select_lexical_units(self, lemma, pos, variant, defval):
        with closing(self._db.cursor()) as cur:
            cur.execute(
                u"""
                SELECT tbl_lexicalunit.id, lemma, tbl_pos.value,
                    variant, synset
                FROM tbl_lexicalunit
                    JOIN tbl_pos ON tbl_lexicalunit.pos = tbl_pos.id
                WHERE COALESCE(lemma = :lem, :defval)
                    AND COALESCE(tbl_pos.value = :pos, :defval)
                    AND COALESCE(variant = :var, :defval)
                """,
                {
                    u'lem': lemma,
                    u'pos': en.PoS(pos).value if pos else None,
                    u'var': variant,
                    u'defval': defval,
                },
            )
            return cur.fetchall()

    def _get_one_lexical_unit(self, lemma, pos, variant, exc_class):
        # False by default will force-return nothing if any is None
        lu_rows = self._select_lexical_units(lemma, pos, variant, False)
        try:
            lu_row = lu_rows[0]
        except IndexError:
            raise exc_class(
                'lemma={!r}, pos={!r}, variant={!r}'.format(
                    lemma,
                    pos,
                    variant,
                ),
            )
        assert len(lu_rows) == 1
        return lu_row

    def __init_db(self):
        self._db.executescript(_DB_SCHEMA_SCRIPT).close()

        with self._db:
            self._db.executemany(
                u"INSERT OR IGNORE INTO tbl_pos (value) VALUES (?)",
                ((p.value,) for p in en.PoS),
            ).close()
            self._db.executemany(
                u"INSERT OR IGNORE INTO tbl_verbaspect (value) VALUES (?)",
                ((va.value,) for va in en.VerbAspect),
            ).close()
            self._db.executemany(
                u"INSERT OR IGNORE INTO tbl_emotionmark (value) VALUES (?)",
                ((em.value,) for em in en.EmotionMarkedness),
            ).close()
            self._db.executemany(
                u"INSERT OR IGNORE INTO tbl_emotionname (value) VALUES (?)",
                ((en.value,) for en in en.EmotionName),
            ).close()
            self._db.executemany(
                u"""
                INSERT OR IGNORE INTO tbl_emotionvaluation (value)
                VALUES (?)
                """,
                ((ev.value,) for ev in en.EmotionValuation),
            ).close()
            self._db.executemany(
                u"INSERT OR IGNORE INTO tbl_domain (value) VALUES (?)",
                ((dm.value,) for dm in en.Domain),
            ).close()

            # Insert version if the database is new
            self._db.execute(
                u"""
                INSERT OR IGNORE INTO tbl_plwn_meta (name, value)
                VALUES ('version', ?)
                """,
                (self._SCHEMA_VERSION,),
            ).close()

    def __check_db(self):
        with closing(self._db.cursor()) as cur:
            try:
                cur.execute(
                    u"SELECT value FROM tbl_plwn_meta WHERE name = 'version'",
                )
            except sqlite3.OperationalError:
                raise exc.LoadException(
                    'Connected database seems not to be a PLWN database',
                )

            row = cur.fetchone()

        verval = row[0] if row is not None else None
        if verval != self._SCHEMA_VERSION:
            raise exc.DumpVersionException(verval, self._SCHEMA_VERSION)

    def __read_data(self, reader):
        _DBBuilder(self._db)(reader)

    def __drop_tmpdir(self):
        if self._tmp_dir is not None:
            try:
                shutil.rmtree(self._tmp_dir)
            except OSError as e:
                if e.errno != errno.ENOENT:
                    raise

    def __syn_edges_noskip(self, rowiter):
        for parent_id, child_id, rel_id in rowiter:
            yield bs.RelationEdge(
                Synset(self._db, self._relcache, parent_id),
                self._relcache.get_info_by_id(rel_id, en.RelationKind.synset),
                Synset(self._db, self._relcache, child_id),
            )

    def __syn_edges_withskip(self, rowiter):
        return filter_artificial_synset_edges(
            bs.RelationEdge(
                Synset(self._db, self._relcache, parent_id, bool(parent_art)),
                self._relcache.get_info_by_id(rel_id, en.RelationKind.synset),
                Synset(self._db, self._relcache, child_id, bool(child_art)),
            )
            for parent_id, child_id, rel_id, parent_art, child_art in rowiter
        )


class LexicalUnit(bs.LexicalUnitBase):

    def __init__(self, conn, relcache, id_, lemma, pos, variant, synid):
        """**NOTE:** This constructor should not be called directly.

        Use :class:`PLWordNet` methods to obtain lexical units.
        """
        self._db = conn
        self._relcache = relcache
        self._id = id_
        self._lemma = lemma
        self._pos = en.PoS(pos)
        self._var = variant
        self._synid = synid
        # Rest is unitialized
        self._uuid = _UNFETCHED
        self._leg_id = _UNFETCHED
        self._syn = _UNFETCHED
        self._def = _UNFETCHED
        self._usn = _UNFETCHED
        self._extl = _UNFETCHED
        self._exms = _UNFETCHED
        self._exms_srcs = _UNFETCHED
        self._dom = _UNFETCHED
        self._va = _UNFETCHED
        self._is_emo = _UNFETCHED
        self._emo_mark = _UNFETCHED
        self._emo_names = _UNFETCHED
        self._emo_valuations = _UNFETCHED
        self._emo_ex1 = _UNFETCHED
        self._emo_ex2 = _UNFETCHED

    @property
    def id(self):
        return self._id

    @property
    def lemma(self):
        return self._lemma

    @property
    def pos(self):
        return self._pos

    @property
    def variant(self):
        return self._var

    @property
    def is_polish(self):
        return self._pos.is_polish

    @property
    def is_english(self):
        return self._pos.is_english

    @property
    def legacy_id(self):
        if self._leg_id is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT legacy_id FROM tbl_lexicalunit WHERE id = ?",
                    (self._id,),
                )
                self._leg_id = cur.fetchone()[0]
        return self._leg_id

    @property
    def uuid(self):
        if self._uuid is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT quote(id) from tbl_lexicalunit WHERE id = ?",
                    (self._id,),
                )
                temp = cur.fetchone()[0]
                if isinstance(temp, str):
                    indices = [2, 10, 14, 18, 22]
                    self._uuid = ('-').join([temp[i:j] for i, j in
                                             zip(indices, indices[1:] + [None])
                                             ]).lower()[:-1]
                else:
                    self._uuid = temp
        return self._uuid

    @property
    def synset(self):
        if self._syn is _UNFETCHED or self._syn() is None:
            syn = Synset(self._db, self._relcache, self._synid)
            # Use weakref to avoid circular refrence to synset
            self._syn = weakref.ref(syn)
            return syn
        return self._syn()

    @property
    def definition(self):
        if self._def is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT definition FROM tbl_lexicalunit WHERE id = ?",
                    (self._id,),
                )
                self._def = cur.fetchone()[0]
        return self._def

    @property
    def sense_examples(self):
        if self._exms is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT example FROM tbl_senseexample WHERE unitid = ?",
                    (self._id,),
                )
                self._exms = tuple(row[0] for row in cur)
        return self._exms

    @property
    def sense_examples_sources(self):
        if self._exms_srcs is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT source FROM tbl_senseexample WHERE unitid = ?",
                    (self._id,),
                )
                self._exms_srcs = tuple(row[0] for row in cur)
        return self._exms_srcs

    @property
    def external_links(self):
        if self._extl is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT link FROM tbl_externallink WHERE unitid = ?",
                    (self._id,),
                )
                self._extl = tuple(row[0] for row in cur)
        return self._extl

    @property
    def usage_notes(self):
        if self._usn is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT note FROM tbl_usagenote WHERE unitid = ?",
                    (self._id,),
                )
                self._usn = tuple(row[0] for row in cur)
        return self._usn

    @property
    def domain(self):
        if self._dom is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"""
                    SELECT tbl_domain.value
                    FROM tbl_lexicalunit JOIN tbl_domain
                        ON tbl_lexicalunit.domain = tbl_domain.id
                    WHERE tbl_lexicalunit.id = ?
                    """,
                    (self._id,),
                )
                self._dom = en.Domain(cur.fetchone()[0])
        return self._dom

    @property
    def verb_aspect(self):
        if self._va is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"""
                    SELECT tbl_verbaspect.value
                    FROM tbl_lexicalunit JOIN tbl_verbaspect
                        ON tbl_lexicalunit.verbaspect = tbl_verbaspect.id
                    WHERE tbl_lexicalunit.id = ?
                    """,
                    (self._id,),
                )
                row = cur.fetchone()
            self._va = None if row is None else en.VerbAspect(row[0])
        return self._va

    @property
    def is_emotional(self):
        if self._is_emo is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT isemotional FROM tbl_lexicalunit WHERE id = ?",
                    (self._id,),
                )
                rowval = cur.fetchone()[0]
            self._is_emo = None if rowval is None else bool(rowval)
        return self._is_emo

    @property
    def emotion_markedness(self):
        if self._emo_mark is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"""
                    SELECT tbl_emotionmark.value
                    FROM tbl_lexicalunit JOIN tbl_emotionmark
                        ON tbl_lexicalunit.emotionmark = tbl_emotionmark.id
                    WHERE tbl_lexicalunit.id = ?
                    """,
                    (self._id,),
                )
                row = cur.fetchone()
            self._emo_mark = (
                None
                if row is None
                else en.EmotionMarkedness(row[0])
            )
        return self._emo_mark

    @property
    def emotion_names(self):
        if self._emo_names is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"""
                    SELECT tbl_emotionname.value
                    FROM tbl_emotionname JOIN tbl_unitemotionname
                        ON tbl_emotionname.id = tbl_unitemotionname.nameid
                    WHERE tbl_unitemotionname.unitid = ?
                    ORDER BY tbl_emotionname.value
                    """,
                    (self._id,),
                )
                self._emo_names = tuple(en.EmotionName(row[0]) for row in cur)
        return self._emo_names

    @property
    def emotion_valuations(self):
        if self._emo_valuations is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"""
                    SELECT tbl_emotionvaluation.value
                    FROM tbl_emotionvaluation JOIN tbl_unitemotionvaluation
                        ON tbl_emotionvaluation.id =
                            tbl_unitemotionvaluation.valuationid
                    WHERE tbl_unitemotionvaluation.unitid = ?
                    ORDER BY tbl_emotionvaluation.value
                    """,
                    (self._id,),
                )
                self._emo_valuations = tuple(
                    en.EmotionValuation(row[0])
                    for row in cur
                )
        return self._emo_valuations

    @property
    def emotion_example(self):
        if self._emo_ex1 is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT emotionexample1 FROM tbl_lexicalunit WHERE id = ?",
                    (self._id,),
                )
                self._emo_ex1 = cur.fetchone()[0]
        return self._emo_ex1

    @property
    def emotion_example_secondary(self):
        if self._emo_ex2 is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT emotionexample2 FROM tbl_lexicalunit WHERE id = ?",
                    (self._id,),
                )
                self._emo_ex2 = cur.fetchone()[0]
        return self._emo_ex2

    @property
    def relations(self):
        # Not caching, since this is an informative method that will probably
        # not be called very often.
        with closing(self._db.cursor()) as cur:
            cur.execute(
                u"""
                SELECT DISTINCT relationtype
                FROM tbl_lexicalrelation
                WHERE source = ?
                """,
                (self._id,),
            )
            return RelationInfoTuple(sorted(
                self._relcache.get_info_by_id(row[0], en.RelationKind.lexical)
                for row in cur
            ))

    def related(self, relation_id=None):
        relinfos = _parse_related_relid(
            relation_id,
            self._relcache,
            en.RelationKind.lexical,
        )
        with closing(self._db.cursor()) as cur:
            cur.execute(
                u"""
                SELECT tbl_lexicalunit.id,
                    lemma, tbl_pos.value, variant, synset
                FROM tbl_lexicalrelation
                    JOIN tbl_lexicalunit ON tbl_lexicalunit.id = target
                    JOIN tbl_pos ON tbl_lexicalunit.pos = tbl_pos.id
                WHERE source = ? {}
                """.format(_make_relationtype_where(relinfos)),
                tuple(itt.chain(
                    (self._id,),
                    (ri._id for ri in (relinfos or ())),
                )),
            )
            return tuple(
                LexicalUnit(self._db, self._relcache, *row)
                for row in cur
            )

    def related_pairs(self, relation_id=None):
        relinfos = _parse_related_relid(
            relation_id,
            self._relcache,
            en.RelationKind.lexical,
        )
        with closing(self._db.cursor()) as cur:
            cur.execute(
                u"""
                SELECT relationtype,
                    tbl_lexicalunit.id, lemma, tbl_pos.value, variant, synset
                FROM tbl_lexicalrelation
                    JOIN tbl_lexicalunit ON tbl_lexicalunit.id = target
                    JOIN tbl_pos ON tbl_lexicalunit.pos = tbl_pos.id
                WHERE source = ? {}
                """.format(_make_relationtype_where(relinfos)),
                tuple(itt.chain(
                    (self._id,),
                    (ri._id for ri in (relinfos or ())),
                )),
            )
            return tuple(
                (
                    self._relcache.get_info_by_id(
                        row[0],
                        en.RelationKind.lexical,
                    ),
                    LexicalUnit(self._db, self._relcache, *row[1:]),
                )
                for row in cur
            )


class Synset(bs.SynsetBase):

    def __init__(self, conn, relcache, syn_id, syn_art=_UNFETCHED):
        """**NOTE:** This constructor should not be called directly.

        Use :class:`PLWordNet` methods to obtain synsets.
        """
        self._db = conn
        self._relcache = relcache
        self._id = syn_id
        self._isart = syn_art

        self._uuid = _UNFETCHED
        self._units = _UNFETCHED
        self._def = _UNFETCHED
        self._leg_id = _UNFETCHED

        self._pos = _UNFETCHED
        self._is_polish = _UNFETCHED
        self._is_english = _UNFETCHED

    @property
    def id(self):
        return self._id

    @property
    def pos(self):
        if self._pos == _UNFETCHED:
            (self._pos,) = {unit.pos for unit in self.lexical_units}
        return self._pos

    @property
    def is_polish(self):
        if self._is_polish is _UNFETCHED:
            self._is_polish = any(unit.is_polish
                                  for unit in self.lexical_units)
        return self._is_polish

    @property
    def is_english(self):
        if self._is_english is _UNFETCHED:
            self._is_english = any(unit.is_english
                                   for unit in self.lexical_units)
        return self._is_english

    @property
    def lexical_units(self):
        if self._units is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"""
                    SELECT tbl_lexicalunit.id, lemma, tbl_pos.value, variant
                    FROM tbl_lexicalunit
                        JOIN tbl_pos ON tbl_lexicalunit.pos = tbl_pos.id
                    WHERE synset = ?
                    ORDER BY unitindex
                    """,
                    (self._id,),
                )
                self._units = tuple(
                    LexicalUnit(
                        self._db,
                        self._relcache,
                        row[0],
                        row[1],
                        row[2],
                        row[3],
                        self._id,
                    )
                    for row in cur
                )
            assert self._units
        return self._units

    @property
    def legacy_id(self):
        if self._leg_id is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT legacy_id FROM tbl_synset WHERE id = ?",
                    (self._id,),
                )
                self._leg_id = cur.fetchone()[0]
        return self._leg_id

    @property
    def uuid(self):
        if self._uuid is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT quote(id) from tbl_synset WHERE id = ?",
                    (self._id,),
                )
                temp = cur.fetchone()[0]
                if isinstance(temp, str):
                    indices = [2, 10, 14, 18, 22]
                    self._uuid = ('-').join([temp[i:j] for i, j in
                                             zip(indices, indices[1:] + [None])
                                             ]).lower()[:-1]
                else:
                    self._uuid = temp
        return self._uuid

    @property
    def definition(self):
        if self._def is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT definition FROM tbl_synset WHERE id = ?",
                    (self._id,),
                )
                row = cur.fetchone()
            assert row is not None
            self._def = row[0] if row[0] is not None else None
        return self._def

    @property
    def is_artificial(self):
        if self._isart is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT isartificial FROM tbl_synset WHERE id = ?",
                    (self._id,),
                )
                row = cur.fetchone()
            assert row is not None
            self._isart = bool(row[0])
        return self._isart

    @property
    def relations(self):
        # Not caching, since this is an informational method that will probably
        # not be called very often.
        with closing(self._db.cursor()) as cur:
            cur.execute(
                u"""
                SELECT DISTINCT relationtype
                FROM tbl_synsetrelation
                WHERE source = ?
                """,
                (self._id,),
            )
            return RelationInfoTuple(sorted(
                self._relcache.get_info_by_id(row[0], en.RelationKind.synset)
                for row in cur
            ))

    def related(self,
                relation_id=None,
                skip_artificial=True,
                _forbidden=None):

        _forbidden = _forbidden or set()
        relinfos = _parse_related_relid(
            relation_id,
            self._relcache,
            en.RelationKind.synset,
        )
        select_clause = u"SELECT target"
        from_clause = u"FROM tbl_synsetrelation"

        if skip_artificial:
            select_clause += u", tbl_synset.isartificial, relationtype"
            from_clause += u" JOIN tbl_synset ON target = tbl_synset.id"
            yield_related = self.__related_withskip
        else:
            yield_related = self.__related_noskip

        with closing(self._db.cursor()) as cur:
            cur.execute(
                u'\n'.join((
                    select_clause,
                    from_clause,
                    u"WHERE source = ? {}".format(
                        _make_relationtype_where(relinfos),
                    ),
                )),
                tuple(itt.chain(
                    (self._id,),
                    (ri._id for ri in (relinfos or ())),
                )),
            )
            return frozenset(yield_related(cur, _forbidden))

    def related_pairs(self,
                      relation_id=None,
                      skip_artificial=True,
                      _forbidden=None):

        _forbidden = _forbidden or set()
        relinfos = _parse_related_relid(
            relation_id,
            self._relcache,
            en.RelationKind.synset,
        )
        select_clause = u"SELECT relationtype, target"
        from_clause = u"FROM tbl_synsetrelation"

        if skip_artificial:
            select_clause += u", tbl_synset.isartificial"
            from_clause += u" JOIN tbl_synset ON target = tbl_synset.id"
            yield_related = self.__related_withskip_pairs
        else:
            yield_related = self.__related_noskip_pairs

        with closing(self._db.cursor()) as cur:
            cur.execute(
                u'\n'.join((
                    select_clause,
                    from_clause,
                    u"WHERE source = ? {}".format(
                        _make_relationtype_where(relinfos),
                    ),
                )),
                tuple(itt.chain(
                    (self._id,),
                    (ri._id for ri in (relinfos or ())),
                )),
            )
            return frozenset(yield_related(cur, _forbidden))

    def __related_noskip(self, rowiter, forbidden):
        return (Synset(self._db, self._relcache, synid) for synid, in rowiter)

    def __related_noskip_pairs(self, rowiter, forbidden):
        return (
            (
                self._relcache.get_info_by_id(relid, en.RelationKind.synset),
                Synset(self._db, self._relcache, synid),
            )
            for relid, synid in rowiter
        )

    def __related_withskip(self, rowiter, forbidden):
        return (
            fil_pair[0]
            for fil_pair in self.__inner_related_withskip(rowiter, forbidden)
        )

    def __related_withskip_pairs(self, rowiter, forbidden):
        re_rowiter = (
            (synid, isart, relid)
            for relid, synid, isart in rowiter
        )
        return (
            (relinfo, fil_syn)
            for fil_syn, relinfo in self.__inner_related_withskip(
                re_rowiter,
                forbidden,
            )
        )

    def __inner_related_withskip(self, rowiter, forbidden):
        return filter_artificial_related_synsets(
            (
                (
                    Synset(self._db, self._relcache, synid, isart),
                    self._relcache.get_info_by_id(
                        relid,
                        en.RelationKind.synset,
                    ),
                )
                for synid, isart, relid in rowiter
            ),
            forbidden,
        )


class RelationInfo(bs.RelationInfoBase):

    def __init__(self, db, id_, kind):
        """**NOTE:** This constructor should not be called directly.

        Use :class:`PLWordNet` methods to obtain relation info.
        """
        self._db = db
        # The ID is internal only, and can be used only with ``kind``
        self._id = id_
        self._kind = kind

        self._par = _UNFETCHED
        self._name = _UNFETCHED
        self._aliases = _UNFETCHED
        self._leg_id = _UNFETCHED

    @property
    def legacy_id(self):
        if self._leg_id is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"SELECT legacy_id FROM {} WHERE id = ?"
                    .format(_RELTYPE_TABLES[self._kind]),
                    (self._id,),
                )
                self._leg_id = cur.fetchone()[0]
        return self._leg_id

    @property
    def kind(self):
        return self._kind

    @property
    def parent(self):
        if self._par is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"""
                    SELECT name
                    FROM {parpart} JOIN {reltype} ON {parpart}.id = parentpart
                    WHERE {reltype}.id = ?
                    """.format(
                        parpart=_RELPARENTPART_TABLES[self._kind],
                        reltype=_RELTYPE_TABLES[self._kind],
                    ),
                    (self._id,),
                )
                row = cur.fetchone()
            assert row is not None
            # Convert the bogus '' value back to proper None
            self._par = row[0] or None
        return self._par

    @property
    def name(self):
        if self._name is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"""
                    SELECT name
                    FROM {chlpart} JOIN {reltype} ON {chlpart}.id = childpart
                    WHERE {reltype}.id = ?
                    """.format(
                        chlpart=_RELCHILDPART_TABLES[self._kind],
                        reltype=_RELTYPE_TABLES[self._kind],
                    ),
                    (self._id,),
                )
                row = cur.fetchone()
            assert row is not None
            self._name = row[0]
        return self._name

    @property
    def aliases(self):
        if self._aliases is _UNFETCHED:
            with closing(self._db.cursor()) as cur:
                cur.execute(
                    u"""
                    SELECT name FROM {}
                    WHERE relationid = ?
                    ORDER BY name
                    """.format(_RELALIAS_TABLES[self._kind]),
                    (self._id,),
                )
                self._aliases = tuple(row[0] for row in cur)
        return self._aliases


class _DBBuilder(object):

    def __init__(self, db):
        self._db = db
        self._node_handlers = {
            nd.SynsetNode: self._insert_synset,
            nd.LexicalUnitNode: self._insert_unit,
            nd.RelationTypeNode: self._insert_relation_type,
        }
        # Ad-hoc relations (for cases where we don't have relation type nodes)
        # need to be added later to weed out nonexistent ones targets and
        # avoid foreign key failures (which are a bit obtuse in sqlite3).
        self._adhoc_synrels = {}
        self._adhoc_lexrels = {}
        # Synset to lexical units relations also need to be deferred.
        self._synid2lexids = coll.defaultdict(list)
        # Cache IDs of constant values
        with closing(db.execute(u"SELECT value, id FROM tbl_pos")) as cur:
            self._posids = dict(cur)
        with closing(db.execute(u"SELECT value, id FROM tbl_verbaspect")) \
                as cur:
            self._vaids = dict(cur)
        with closing(db.execute(u"SELECT value, id FROM tbl_emotionmark")) \
                as cur:
            self._emids = dict(cur)
        with closing(db.execute(u"SELECT value, id FROM tbl_emotionname")) \
                as cur:
            self._enids = dict(cur)
        with closing(db.execute(u"""
                SELECT value, id
                FROM tbl_emotionvaluation
                """)) as cur:
            self._evids = dict(cur)
        with closing(db.execute(u"SELECT value, id FROM tbl_domain")) as cur:
            self._dmids = dict(cur)

    def __call__(self, reader):
        with self._db:
            for node in reader:
                self._node_handlers[type(node)](node)

        with self._db:
            self._finalize_units()

        with self._db:
            self._prune_empty_synsets()

        with self._db:
            self._finalize_related(
                self._adhoc_synrels,
                en.RelationKind.synset,
            )
            self._finalize_related(
                self._adhoc_lexrels,
                en.RelationKind.lexical,
            )

    def _insert_synset(self, syn_node):
        self._db.execute(
            u"""
            INSERT INTO tbl_synset (id, legacy_id, definition, isartificial)
            VALUES (?, ?, ?, ?)
            """,
            (syn_node.id, syn_node.legacy_id, syn_node.definition,
             syn_node.is_artificial),
        ).close()
        # Related go into temp storage
        self._adhoc_synrels[syn_node.id] = syn_node.related

    def _insert_unit(self, lu_node):
        # Unfortunately, we can't insert into DB until we have all synsets. So
        # save nodes in temp dict.
        self._synid2lexids[lu_node.synset].append(lu_node)
        # But deal with relations
        self._adhoc_lexrels[lu_node.id] = lu_node.related

    def _insert_relation_type(self, rel_node):
        type_tbl = _RELTYPE_TABLES[rel_node.kind]
        parent_tbl = _RELPARENTPART_TABLES[rel_node.kind]
        child_tbl = _RELCHILDPART_TABLES[rel_node.kind]

        with closing(self._db.cursor()) as cur:
            # Ensure the name is there
            parname_id = self._ensure_rel_part_name(
                parent_tbl,
                rel_node.parent or u'',
            )
            childname_id = self._ensure_rel_part_name(child_tbl, rel_node.name)
            # And now the relation itself
            cur.execute(
                u"""
                INSERT INTO {} (legacy_id, parentpart, childpart)
                VALUES (?, ?, ?)
                """.format(type_tbl),
                (rel_node.legacy_id, parname_id, childname_id),
            )
            # Do aliases if present
            if rel_node.aliases:
                rel_id = cur.lastrowid
                alias_tbl = _RELALIAS_TABLES[rel_node.kind]
                cur.executemany(
                    u"INSERT INTO {} (name, relationid) VALUES (?, ?)"
                    .format(alias_tbl),
                    ((nam, rel_id) for nam in rel_node.aliases),
                )

    def _finalize_units(self):
        # All synsets are in, can add units now.
        with closing(self._db.cursor()) as cur:
            for synid, lu_nodes in six.iteritems(self._synid2lexids):
                for lu_node in lu_nodes:
                    try:
                        cur.execute(
                            u"""
                            INSERT INTO tbl_lexicalunit (
                                id, legacy_id, lemma, pos, variant,
                                synset, unitindex,
                                definition, domain, verbaspect,
                                isemotional, emotionmark,
                                emotionexample1, emotionexample2
                            )
                            VALUES (
                                :id, :legacy_id, :lemma, :pos, :var,
                                :syn, :uidx,
                                :def, :dom, :va,
                                :emo_is, :emo_m,
                                :emo_ex1, :emo_ex2
                            )
                            """,
                            {
                                u'id': lu_node.id,
                                u'legacy_id': lu_node.legacy_id,
                                u'lemma': lu_node.lemma,
                                u'pos': self._posids[lu_node.pos.value],
                                u'var': lu_node.variant,
                                u'syn': lu_node.synset,
                                u'uidx': lu_node.unit_index,
                                u'def': lu_node.definition,
                                u'dom': self._dmids[lu_node.domain.value],
                                u'va': None
                                if lu_node.verb_aspect is None
                                else self._vaids[lu_node.verb_aspect.value],
                                u'emo_is': lu_node.is_emotional,
                                u'emo_m': None
                                if lu_node.emotion_markedness is None
                                else self._emids[
                                    lu_node.emotion_markedness.value
                                ],
                                u'emo_ex1': lu_node.emotion_example_1,
                                u'emo_ex2': lu_node.emotion_example_2,
                            },
                        )
                    except sqlite3.IntegrityError:
                        _LOG.exception(
                            'Pair (synset=%d, unitindex=%d) of unit %d '
                            'causes integrity error',
                            lu_node.synset,
                            lu_node.unit_index,
                            lu_node.id,
                        )
                        # Drop relations for this unit, if any
                        self._adhoc_lexrels.pop(lu_node.id, None)
                        continue

                    cur.executemany(
                        u"""
                        INSERT INTO tbl_senseexample (unitid, example, source)
                        VALUES (?, ?, ?)
                        """,
                        (
                            (lu_node.id, exm, exm_src)
                            for exm, exm_src in zip(lu_node.examples,
                                                    lu_node.examples_sources)
                        ),
                    )
                    cur.executemany(
                        u"""
                        INSERT INTO tbl_usagenote (unitid, note)
                        VALUES (?, ?)
                        """,
                        ((lu_node.id, note) for note in lu_node.usage_notes),
                    )
                    cur.executemany(
                        u"""
                        INSERT INTO tbl_externallink (unitid, link)
                        VALUES (?, ?)
                        """,
                        ((lu_node.id, link)
                         for link in lu_node.external_links),
                    )
                    cur.executemany(
                        u"""
                        INSERT INTO tbl_unitemotionname (unitid, nameid)
                        VALUES (?, ?)
                        """,
                        (
                            (lu_node.id, self._enids[emo_name.value])
                            for emo_name in lu_node.emotion_names
                        ),
                    )
                    cur.executemany(
                        u"""
                        INSERT INTO tbl_unitemotionvaluation
                            (unitid, valuationid)
                        VALUES (?, ?)
                        """,
                        (
                            (lu_node.id, self._evids[emo_val.value])
                            for emo_val in lu_node.emotion_valuations
                        ),
                    )

    def _finalize_related(self, related, kind):
        # Insert all relation names from the related dict as global-level
        # relations, if they have no SEP in them. If such relations are not
        # defined, define them. If relation names do have SEP in them, don't
        # try defining them, just assume the types are known and try getting ad
        # their IDs.
        with closing(self._db.cursor()) as cur:
            for source_id, related_pairs in six.iteritems(related):
                for relation_name, target_id in related_pairs:
                    relname_parent, relname_child = RelationInfo.split_name(
                        relation_name,
                    )
                    try:
                        rel_id = (
                            self._get_child_relation(relname_child, kind)
                            if relname_parent is None
                            else self._get_full_relation(
                                relname_parent,
                                relname_child,
                                kind,
                            )
                        )
                    except exc.InvalidRelationTypeException:
                        _LOG.exception(
                            'Relation "%s" (between %d --> %d) unknown, '
                            'dropped',
                            relation_name,
                            source_id,
                            target_id,
                        )
                        continue

                    try:
                        cur.execute(
                            u"""
                            INSERT INTO {} (source, relationtype, target)
                            VALUES (?, ?, ?)
                            """.format(_RELINST_TABLES[kind]),
                            (source_id, rel_id, target_id),
                        )
                    except sqlite3.IntegrityError:
                        _LOG.exception(
                            'Relation "%s" between %d --> %d causes error, '
                            'dropped',
                            relation_name,
                            source_id,
                            target_id,
                        )

    def _get_child_relation(self, relation_name, kind):
        type_tbl = _RELTYPE_TABLES[kind]
        parent_tbl = _RELPARENTPART_TABLES[kind]
        child_tbl = _RELCHILDPART_TABLES[kind]

        with closing(self._db.cursor()) as cur:
            # Get the special empty string parent, since it will be used
            # several times.
            empty_parent_id = self._ensure_rel_part_name(parent_tbl, u'')
            child_id = self._ensure_rel_part_name(child_tbl, relation_name)
            # Now, try selecting the relation with empty parent. Otherwise,
            # just add it.
            cur.execute(
                u"SELECT id FROM {} WHERE parentpart = ? AND childpart = ?"
                .format(type_tbl),
                (empty_parent_id, child_id),
            )
            row = cur.fetchone()

            if row is not None:
                return row[0]

            cur.execute(
                u"""
                INSERT INTO {} (parentpart, childpart)
                VALUES (?, ?)
                """
                .format(type_tbl),
                (empty_parent_id, child_id),
            )
            return cur.lastrowid

    def _get_full_relation(self, parent_name, child_name, kind):
        # For full relation names, only try selecting them, not adding the
        # types, to reduce complexity.
        with closing(self._db.cursor()) as cur:
            cur.execute(
                u"""
                SELECT {reltype}.id
                FROM {reltype}
                    JOIN {parpart} ON parentpart = {parpart}.id
                    JOIN {chlpart} ON childpart = {chlpart}.id
                WHERE {parpart}.name = ? AND {chlpart}.name = ?
                """.format(
                    reltype=_RELTYPE_TABLES[kind],
                    parpart=_RELPARENTPART_TABLES[kind],
                    chlpart=_RELCHILDPART_TABLES[kind],
                ),
                (parent_name, child_name),
            )
            row = cur.fetchone()
            if row is None:
                raise exc.InvalidRelationTypeException(
                    kind,
                    (parent_name, child_name),
                )
            return row[0]

    def _ensure_rel_part_name(self, tbl_name, rel_name):
        with closing(self._db.cursor()) as cur:
            # Is the name in already?
            cur.execute(
                u"SELECT id FROM {} WHERE name = ?".format(tbl_name),
                (rel_name,),
            )
            row = cur.fetchone()

            if row is not None:
                return row[0]

            # Insert it then
            cur.execute(
                u"INSERT INTO {} (name) VALUES (?)".format(tbl_name),
                (rel_name,),
            )

            return cur.lastrowid

    def _prune_empty_synsets(self):
        with closing(self._db.cursor()) as cur:
            cur.execute(
                u"""
                SELECT tbl_synset.id
                FROM tbl_synset
                    LEFT JOIN tbl_lexicalunit
                    ON tbl_synset.id = tbl_lexicalunit.synset
                WHERE tbl_lexicalunit.synset IS NULL
                """,
            )
            empties = tuple(row[0] for row in cur)

        if not empties:
            # All clear!
            return

        for synid in empties:
            _LOG.warning('Synset %d is empty', synid)

        self._db.execute(
            u"DELETE FROM synset WHERE id IN ({})".format(
                u','.join(u'?' * len(empties))
            ),
            empties,
        ).close()


class _RelCache(object):

    def __init__(self, db):
        self._db = db
        self._ids = {
            en.RelationKind.synset: {},
            en.RelationKind.lexical: {},
        }
        self._infos = {
            en.RelationKind.synset: _RelCacheInfoDict(
                db,
                en.RelationKind.synset,
            ),
            en.RelationKind.lexical: _RelCacheInfoDict(
                db,
                en.RelationKind.lexical,
            ),
        }

    def get_ids(self, relname, kind, allow_nonexistent=False):
        idcache = self._ids[kind]

        try:
            found = idcache[relname]
        except KeyError:
            found = None
        else:
            return found

        # If this is a full name (with parent and child), get that.
        # Otherwise, check alias, childname and parentname - in that order.
        # For bare parentname, return not one ID, but a set of all children
        # IDs.
        # Finally, if that fails, just raise an exception.

        try:
            parent, name = RelationInfo.split_name(relname)
        except ValueError:
            raise exc.InvalidRelationTypeException(kind, relname)

        if parent is not None:
            found = self._find_by_fullname(parent, name, kind)
        else:
            found = self._find_by_alias(name, kind)
            if found is None:
                found = self._find_by_childname(name, kind)
            if found is None:
                found = self._find_by_parentname(name, kind)

        if found is None:
            if allow_nonexistent:
                return ()
            else:
                raise exc.InvalidRelationTypeException(kind, relname)

        idcache[relname] = found
        return found

    def get_infos_by_name(self, relname, kind, allow_nonexistent=False):
        infocache = self._infos[kind]
        ids = self.get_ids(relname, kind, allow_nonexistent)
        return tuple(infocache[id_] for id_ in ids)

    def get_info_by_id(self, id_, kind):
        return self._infos[kind][id_]

    def get_all_of_kind(self, kind):
        with closing(self._db.cursor()) as cur:
            cur.execute(u"SELECT id FROM {}".format(_RELTYPE_TABLES[kind]))
            return tuple(self._infos[kind][row[0]] for row in cur)

    def ensure_infos(self, item, kind):
        if isinstance(item, RelationInfo):
            return item,
        if isinstance(item, six.integer_types):
            return self.get_info_by_id(item, kind),
        if isinstance(item, six.string_types):
            return self.get_infos_by_name(item, kind)
        raise TypeError(
            repr(item) + ' is not an integer, string or RelationInfo',
        )

    def _find_by_fullname(self, parent, child, kind):
        with closing(self._db.cursor()) as cur:
            cur.execute(
                u"""
                SELECT {reltype}.id
                FROM {reltype}
                    JOIN {parpart} ON parentpart = {parpart}.id
                    JOIN {chlpart} ON childpart = {chlpart}.id
                WHERE {parpart}.name = ? AND {chlpart}.name = ?
                """.format(
                    reltype=_RELTYPE_TABLES[kind],
                    parpart=_RELPARENTPART_TABLES[kind],
                    chlpart=_RELCHILDPART_TABLES[kind],
                ),
                (parent or u'', child),
            )
            row = cur.fetchone()
        return None if row is None else tuple(row)

    def _find_by_alias(self, name, kind):
        with closing(self._db.cursor()) as cur:
            cur.execute(
                u"SELECT relationid FROM {} WHERE name = ?".format(
                    _RELALIAS_TABLES[kind],
                ),
                (name,),
            )
            row = cur.fetchone()
        return None if row is None else tuple(row)

    def _find_by_childname(self, name, kind):
        with closing(self._db.cursor()) as cur:
            cur.execute(
                u"""
                SELECT {reltype}.id
                FROM {reltype} JOIN {chlpart} ON childpart = {chlpart}.id
                WHERE name = ?
                """.format(
                    reltype=_RELTYPE_TABLES[kind],
                    chlpart=_RELCHILDPART_TABLES[kind],
                ),
                (name,),
            )
            rows = cur.fetchall()
        if len(rows) > 1:
            raise exc.AmbiguousRelationTypeException(name)
        return None if not rows else tuple(rows[0])

    def _find_by_parentname(self, name, kind):
        # This one can by design return a set of values: all children of a
        # relation.
        with closing(self._db.cursor()) as cur:
            cur.execute(
                u"""
                SELECT {reltype}.id
                FROM {reltype} JOIN {parpart} ON parentpart = {parpart}.id
                WHERE name = ?
                """.format(
                    reltype=_RELTYPE_TABLES[kind],
                    parpart=_RELPARENTPART_TABLES[kind],
                ),
                (name,),
            )
            return tuple(row[0] for row in cur) or None


class _RelCacheInfoDict(dict):

    def __init__(self, db, kind):
        super(_RelCacheInfoDict, self).__init__()
        self.__db = db
        self.__kind = kind

    def __missing__(self, id_):
        ri = RelationInfo(self.__db, id_, self.__kind)
        self[id_] = ri
        return ri


def _make_include_exclude(include, exclude, fieldname):
    """Creates ``WHERE`` clause and the parameter tuple.

    For simple ``IN`` and ``NOT IN`` case.
    """
    if include is not None:
        whereclause = u"WHERE {} IN ({})".format(
            fieldname,
            _qmarks(len(include)),
        )
        includetuple = tuple(include)
    else:
        whereclause = u''
        includetuple = ()

    if exclude is not None:
        if not whereclause:
            whereclause = u"WHERE {} NOT IN ({})"
        else:
            whereclause += u" AND {} NOT IN ({})"

        whereclause = whereclause.format(
            fieldname,
            _qmarks(len(exclude)),
        )
        excludetuple = tuple(exclude)
    else:
        excludetuple = ()

    return whereclause, includetuple + excludetuple


def _parse_related_relid(relid, relcache, relkind):
    if relid is None:
        return None

    if (isinstance(relid, coll.Iterable) and
            not isinstance(relid, six.string_types)):
        return frozenset(itt.chain.from_iterable(
            relcache.ensure_infos(r, relkind)
            for r in relid
        ))

    return relcache.ensure_infos(relid, relkind)


def _make_relationtype_where(relinfos):
    """Create a ``WHERE`` clause appendix.

    For limiting ``related`` queries to sets of relations
    """
    return u'' if not relinfos else (
        u'AND relationtype IN ({})'.format(_qmarks(len(relinfos)))
    )


def _qmarks(length):
    """Create a sequence of question marks for prepared sqlite query."""
    return u','.join(itt.repeat(u'?', length))


_this_storage_ = PLWordNet
