# coding: utf8

# Copyright (C) 2017 Michał Kaliński
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import absolute_import, division


import xml.etree.ElementTree as et

from .wnschema import WNSchemaProcessor
from .. import enums as en


__all__ = 'WNXMLReader',


_POSES = {
    u'rzeczownik': en.PoS.n,
    u'czasownik': en.PoS.v,
    u'przymiotnik': en.PoS.adj,
    u'przysłówek': en.PoS.adv,
    u'rzeczownik pwn': en.PoS.n,
    u'czasownik pwn': en.PoS.v,
    u'przymiotnik pwn': en.PoS.adj,
    u'przysłówek pwn': en.PoS.adv,
}
_RELKINDS = {
    u'relacja pomiędzy synsetami': en.RelationKind.synset,
    u'relacja leksykalna': en.RelationKind.lexical,
    u'relacja synonimii': en.RelationKind.lexical
}
_BOOLVALUES = {u'true': True, u'false': False}


# Examples of nodes that this reader is supposed to parse:
# <lexical-unit id="478387" name=".22" pos="rzeczownik pwn"
#     tagcount="0" domain="wytw" desc="" workstate="Nie przetworzone"
#     source="użytkownika" variant="1"/>
# <lexicalrelations parent="107360" child="61999" relation="104"
#     valid="true" owner=""/>
# <relationtypes id="242" type="relacja leksykalna"
#     name="rola: materiał"
#     description="Relacja roli: materiału jest wyjątkową relacją roli,
# łączącą przymiotniki materiałowe z ich podstawami rzeczownikowymi nazwami
# substancji i materiałów."
#     posstr="rzeczownik,przymiotnik"
#     display="&lt;x#> jest zrobione z &lt;y#>" shortcut="mat"
#     autoreverse="false" pwn="">
# Child relation types have the additional "parent" attribute.
# <relationtypes id="35" type="relacja leksykalna" parent="32"
#     name="pacjens|obiekt" description="(dziedziczone)"
#     posstr="(dziedziczone)"
#     display="&lt;x#> jest pacjensem dla czynności wyrażanej przez &lt;y#>"
#     shortcut="rol:pacj" autoreverse="false" pwn="p_rp">
# <synset id="12" workstate="Nie przetworzone" split="1" owner=""
#     definition="" desc="" abstract="false">
#     <unit-id>12</unit-id>
#     <unit-id>10191</unit-id>
# </synset>
# <synsetrelations parent="1366" child="551" relation="10"
#     valid="true" owner=""/>

class WNXMLReader(object):

    def __init__(self, wordnet_xml_file):
        self._wnxml_file = wordnet_xml_file
        self._schema = WNSchemaProcessor()

        self._dispatch = {
            u'lexical-unit': self._proc_lexunit,
            u'synset': self._proc_synset,
            u'relationtypes': self._proc_reltype,
            u'synsetrelations': _make_proc_relinst(
                self._schema.take_synset_relation,
            ),
            u'lexicalrelations': _make_proc_relinst(
                self._schema.take_lexical_relation,
            ),
        }

    def __iter__(self):
        for _, elem in et.iterparse(self._wnxml_file):
            elem_proc = self._dispatch.get(elem.tag)
            if elem_proc is not None:
                elem_proc(elem)

        for node in self._schema.finalize():
            yield node

    def _proc_reltype(self, elem):
        id_ = int(elem.get('id'))
        kind = _RELKINDS[elem.get('type')]
        parent = elem.get('parent')
        if parent is not None:
            parent = int(parent)

        self._schema.take_relation_type(
            id_,
            kind,
            elem.get('name'),
            elem.get('shortcut'),
            parent,
        )

    def _proc_lexunit(self, elem):
        id_ = int(elem.get('id'))
        var = int(elem.get('variant'))
        pos = _POSES[elem.get('pos')]
        dom = elem.get('domain')
        try:
            dom = en.Domain(dom)
        except ValueError:
            try:
                dom = getattr(en.Domain, dom)
            except AttributeError:
                raise ValueError(
                    "The domains found in the dump are incompatible with "
                    "current library version."
                )

        self._schema.take_lexical_unit(
            id_,
            elem.get('name'),
            pos,
            var,
            dom,
            elem.get('desc'),
            None,  # No verb aspect at present
        )

    def _proc_synset(self, elem):
        id_ = int(elem.get('id'))
        isart = _BOOLVALUES[elem.get('abstract')]

        self._proc_synset_units(
            id_,
            (uelem for uelem in elem if uelem.tag == u'unit-id'),
        )
        self._schema.take_synset(id_, elem.get('definition'), isart)

    def _proc_synset_units(self, synid, unit_elems):
        for uidx, uelem in enumerate(unit_elems, 1):
            self._schema.take_unit_to_synset(
                int(uelem.text),
                synid,
                uidx,
            )


def _make_proc_relinst(taker):
    def elem_proc(elem):
        parent = int(elem.get('parent'))
        child = int(elem.get('child'))
        relid = int(elem.get('relation'))

        taker(parent, child, relid)

    return elem_proc


_this_reader_ = WNXMLReader
