# coding: utf8

# Copyright (C) 2017 Michał Kaliński
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""Implementation of wndb reader."""

from __future__ import absolute_import, division


from contextlib import closing
import io
import logging

import sqlalchemy as sa

from .. import enums as en
from .wnschema import WNSchemaProcessor


__all__ = 'WNDBReader',


_LOG = logging.getLogger(__name__)

# I'm not sure what role the relationtype of type 2 (synonymy) fulfills, but it
# seems completely unused by the relation tables. As such, it will be easiest
# to just omit it.
_SYNO_REL_OBJTYPE = 2


class WNDBReader(object):
    """Generate UBY-LMF format.

    Compatible records directly from plWordNet database.

    SQLAlchemy is required for this method to work.
    """

    def __init__(self, wordnet_db_url_file):
        with io.open(wordnet_db_url_file) as db_url_f:
            self._db_url = db_url_f.readline().strip()

        self._db_eng = sa.create_engine(self._db_url)
        self._db_meta = sa.MetaData(self._db_eng)
        # Define required tables
        self._dbt_synset = self.__mktable(u'synset')
        self._dbt_synrel = self.__mktable(u'synsetrelation')
        self._dbt_reltype = self.__mktable(u'relationtype')
        self._dbt_lexunit = self.__mktable(u'lexicalunit')
        self._dbt_lexrel = self.__mktable(u'lexicalrelation')
        self._dbt_uns = self.__mktable(u'unitandsynset')
        self._dbt_emo = self.__mktable(u'emotion')

        self._schema = WNSchemaProcessor()

    def __iter__(self):
        # First, get relation type data, since it depends on nothing. And the
        # other two kinds of nodes will need the full relation names.
        # Then, get lexical units, since they will show which synsets are
        # needed.
        # Finally, get the synsets.
        self._extract_relation_types()
        self._extract_emotions()
        self._extract_units()
        self._extract_unit_rels()
        self._extract_uns()
        self._extract_syns()
        self._extract_syn_rels()

        for node in self._schema.finalize():
            yield node

    def _extract_relation_types(self):
        reltype_q = sa.select((
            self._dbt_reltype.c.ID,
            self._dbt_reltype.c.PARENT_ID,
            self._dbt_reltype.c.objecttype,
            self._dbt_reltype.c.name,
            self._dbt_reltype.c.shortcut,
        ))
        with closing(self._db_eng.execute(reltype_q)) as result:
            for row in result:
                parent_id = row[self._dbt_reltype.c.PARENT_ID]
                object_type = row[self._dbt_reltype.c.objecttype]
                if object_type != _SYNO_REL_OBJTYPE:
                    self._schema.take_relation_type(
                        row[self._dbt_reltype.c.ID],
                        # Ignore the kind information of relation types that
                        # have parents. It will be inherited.
                        en.RelationKind.by_db_number(object_type)
                        if parent_id is None
                        else None,
                        row[self._dbt_reltype.c.name],
                        row[self._dbt_reltype.c.shortcut],
                        row[self._dbt_reltype.c.PARENT_ID],
                    )

    def _extract_emotions(self):
        emo_q = sa.select((
            self._dbt_emo.c.lexicalunit_id,
            self._dbt_emo.c.emotions,
            self._dbt_emo.c.valuations,
            self._dbt_emo.c.markedness,
            self._dbt_emo.c.unitStatus,
            self._dbt_emo.c.example1,
            self._dbt_emo.c.example2,
            self._dbt_emo.c.super_anotation,
        ))
        with closing(self._db_eng.execute(emo_q)) as result:
            for row in result:
                ustatus = bool(row[self._dbt_emo.c.unitStatus])
                superann = bool(row[self._dbt_emo.c.super_anotation])
                markstr = row[self._dbt_emo.c.markedness]

                if markstr is not None:
                    try:
                        mark = en.EmotionMarkedness.normalized(markstr)
                    except (ValueError, TypeError):
                        _LOG.error(
                            'Value %r is not valid as emotion markedness; '
                            'skipping record %r',
                            markstr,
                            row,
                        )
                        continue
                else:
                    mark = None

                self._schema.take_emotion(
                    row[self._dbt_emo.c.lexicalunit_id],
                    mark,
                    _make_emo_tuple(
                        en.EmotionName,
                        row[self._dbt_emo.c.emotions],
                    ),
                    _make_emo_tuple(
                        en.EmotionValuation,
                        row[self._dbt_emo.c.valuations],
                    ),
                    row[self._dbt_emo.c.example1],
                    row[self._dbt_emo.c.example2],
                    ustatus,
                    superann,
                )

    def _extract_units(self):
        lexunit_q = sa.select((
            self._dbt_lexunit.c.ID,
            self._dbt_lexunit.c.lemma,
            self._dbt_lexunit.c.pos,
            self._dbt_lexunit.c.variant,
            self._dbt_lexunit.c.domain,
            self._dbt_lexunit.c.comment,
            self._dbt_lexunit.c.verb_aspect,
        ))
        with closing(self._db_eng.execute(lexunit_q)) as result:
            for row in result:
                self._schema.take_lexical_unit(
                    row[self._dbt_lexunit.c.ID],
                    row[self._dbt_lexunit.c.lemma],
                    en.PoS.by_db_number(row[self._dbt_lexunit.c.pos]),
                    row[self._dbt_lexunit.c.variant],
                    en.Domain.by_db_number(row[self._dbt_lexunit.c.domain]),
                    row[self._dbt_lexunit.c.comment],
                    en.VerbAspect.by_db_number(
                        row[self._dbt_lexunit.c.verb_aspect],
                        True,
                    ),
                )

    def _extract_uns(self):
        uns_q = sa.select((
            self._dbt_uns.c.SYN_ID,
            self._dbt_uns.c.LEX_ID,
            self._dbt_uns.c.unitindex,
        ))
        with closing(self._db_eng.execute(uns_q)) as result:
            for row in result:
                self._schema.take_unit_to_synset(
                    row[self._dbt_uns.c.LEX_ID],
                    row[self._dbt_uns.c.SYN_ID],
                    row[self._dbt_uns.c.unitindex],
                )

    def _extract_unit_rels(self):
        lexrel_q = sa.select((
            self._dbt_lexrel.c.PARENT_ID,
            self._dbt_lexrel.c.CHILD_ID,
            self._dbt_lexrel.c.REL_ID,
        ))
        with closing(self._db_eng.execute(lexrel_q)) as result:
            for row in result:
                self._schema.take_lexical_relation(
                    row[self._dbt_lexrel.c.PARENT_ID],
                    row[self._dbt_lexrel.c.CHILD_ID],
                    row[self._dbt_lexrel.c.REL_ID],
                )

    def _extract_syns(self):
        synset_q = sa.select((
            self._dbt_synset.c.ID,
            self._dbt_synset.c.isabstract,
            self._dbt_synset.c.definition,
        ))
        with closing(self._db_eng.execute(synset_q)) as result:
            for row in result:
                self._schema.take_synset(
                    row[self._dbt_synset.c.ID],
                    row[self._dbt_synset.c.definition],
                    bool(row[self._dbt_synset.c.isabstract]),
                )

    def _extract_syn_rels(self):
        synrel_q = sa.select((
            self._dbt_synrel.c.PARENT_ID,
            self._dbt_synrel.c.CHILD_ID,
            self._dbt_synrel.c.REL_ID,
        ))
        with closing(self._db_eng.execute(synrel_q)) as result:
            for row in result:
                self._schema.take_synset_relation(
                    row[self._dbt_synrel.c.PARENT_ID],
                    row[self._dbt_synrel.c.CHILD_ID],
                    row[self._dbt_synrel.c.REL_ID],
                )

    def __mktable(self, table_name):
        return sa.Table(table_name, self._db_meta, autoload=True)


def _make_enums_from_values(enclass, valiter):
    for val in valiter:
        try:
            en = enclass(val)
        except ValueError:
            _LOG.error('Value %r is not valid for %r', val, enclass)
        else:
            yield en


def _make_emo_tuple(enclass, emoval):
    return () if emoval is None else tuple(frozenset(_make_enums_from_values(
        enclass,
        # Skip empty elements in the values sequence (some people just append a
        # ";").
        (item for item in emoval.split(u';') if item),
    )))


_this_reader_ = WNDBReader
