# coding: utf8

# Copyright (C) 2017 Michał Kaliński
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Wrappers around synset-yielding generators.

That handle exclusion of artificial synsets.

The algorithm used here is pretty simple. When a relation edge reaches an
artificial synset, a matching relation is searched in edges originating from
the artificial synset. If found, the node it leads to is treated as target of
the relation from the source node.

If there is no matching relation, the edge to the artificial synset is treated
as nonexistent.
"""

# XXX Both functions maintain a set of items that they have already been
# yielded, to ensure that the wrappers will not yield the same item more than
# once.
# This makes them slower and uses up more memory, but in practice plWordNet
# structure is very unreliable with regards to not having loops and multiple
# paths.
# Look into removing these set objects only if speed / memory somehow becomes
# a concern.
# XXX At the same time, an assumption about plWordNet structure is made: that
# no lexical unit belonging to an artificial synset is connected to any other
# lexical unit by lexical relations. Surely, that should be easy to maintain?

from __future__ import absolute_import, division


__all__ = (
    'filter_artificial_related_synsets',
    'filter_artificial_synset_edges',
)


def filter_artificial_related_synsets(syn_and_relation_iter, forbidden=None):
    """Filter a related synsets iterable.

    Skipping over artificial synsets using ``relation_name``.

    :param syn_and_relation_iter: Iterable of pairs of
        ``(target_synset, relation)``. The relation is needed to generate
        edges skipping over the artificial synset.
    :type syn_and_relation_iter: Iterable[Tuple[SynsetBase, RelationInfoBase]]

    :return: The related synsets iterable with artificial synsets dealt with
        according to the algorithm.
    :rtype: Generator[SynsetBase]
    """
    forbidden = forbidden or set()
    for target_syn, relation in syn_and_relation_iter:
        for filtered_syn in _inner_filter(target_syn, relation, forbidden):
            yield filtered_syn, relation


def filter_artificial_synset_edges(syn_rel_edges_iter):
    """Filter an iterable of synset relation edges.

    Replacing edges to artificial synsets with edges to "next" nodes,
    using the relation of the edge.

    Edges ending in artificial nodes are simply discarded.

    :param edges_iter: Iterable of relationship edges between synsets.
    :type edges_iter: Iterable[RelationEdge]

    :return: The synset edges iterable with artificial synsets dealt with
        according to the algorithm.
    :rtype: Generator[RelationEdge]
    """
    for edge in syn_rel_edges_iter:
        # Drop all edges starting in artificial synsets
        if edge.source.is_artificial:
            continue

        forbidden = set()

        for filtered_syn in _inner_filter(edge.target,
                                          edge.relation,
                                          forbidden):
            yield edge._replace(target=filtered_syn)


def _inner_filter(target_syn, relation, forbidden):
    if target_syn.id in forbidden:
        return
    forbidden.add(target_syn.id)

    if target_syn.is_artificial:
        # Apply the filter recursively for any artificial synset found in the
        # target set. This should not cause recursion more than two-three
        # levels deep.
        for rec_target_syn in target_syn.related(relation, True, forbidden):
            yield rec_target_syn
    else:
        yield target_syn
