Skip to content
Snippets Groups Projects
_loading.py 4.44 KiB
"""Defines user-facing functions that allow simple construction of
:class:`PLWordnetBase` instances, with selected storages and readers.
"""

from __future__ import absolute_import, division, print_function


from collections import namedtuple
from importlib import import_module
import textwrap as tw

import six


__all__ = 'read', 'load', 'show_source_formats', 'show_storage_formats'


_Info = namedtuple('_Info', ('desc', 'modname'))

_READERS = {
    'uby-lmf': _Info('Discontinued XML-based format', 'ubylmf'),
    'database': _Info(
        'MySQL database of plWordNet. Only works on python 2 and requires '
        'certain additional libraries. This is meant for internal use only '
        'and will not work for most users. The file should contain one line '
        'with SQLAlchemy URL to the database.',
        'wndb',
    ),
    'xml': _Info('The official PLWN XML format', 'wnxml'),
}
_STORAGES = {
    'sqlite3': _Info(
        'File database format, with a compact schema (compared to internal '
        'PLWN database).',
        'sqlite',
    ),
    'objects': _Info(
        'Stores data in plain python objects, dumping them in pickle format. '
        'Quick to construct, but querying and memory efficiency is not '
        'guaranteed.',
        'objects',
    ),
}

# Defaults for this version
_READERS[None] = _READERS['xml']
_STORAGES[None] = _STORAGES['sqlite3']


def _imp_reader(modname):
    # Pre-import the root package - py3 needs this?
    import plwn.readers  # noqa
    return import_module('.' + modname, 'plwn.readers')._this_reader_


def _imp_storage(modname):
    # Pre-import the root package - py3 needs this?
    import plwn.storages  # noqa
    return import_module('.' + modname, 'plwn.storages')._this_storage_


def read(source_file,
         source_format=None,
         storage_file=None,
         storage_format=None):
    """Read plWordNet data from a file and return the right
    :class:`PLWordnetBase` subclass instance for the selected parameters.

    Where defaults are mentioned, those values may change with each minor
    version of PLWN API. If you depend on some particular format for a long
    running program, state it explicitly.

    :param str source_file: Path to the file from which the plWordNet data will
    be read. The required contents of the file depend on selected
    ``source_format``.

    :param str source_format: Name of the format of data that's contained in
    ``source_file``. If ``None``, then the default for the current version will
    be chosen.

    :param str storage_file: Path to the file where the internal representation
    of the storage will be dumped. It will be possible to load this file using
    :func:`load`. If ``None``, then the representation will not be dumped.

    :param str storage_format: Name of the format in which PLWN API will store
    data in memory. Access methods provided should be the same, but their
    efficiency may differ. If ``None``, then the default for the current
    version will be chosen.

    :rtype: PLWordnetBase
    """

    stor_cls = _imp_storage(_STORAGES[storage_format].modname)
    rdr = _imp_reader(_READERS[source_format].modname)
    return stor_cls.from_reader(rdr(source_file), storage_file)


def load(storage_file, storage_format=None):
    """Read plWordNet data from a cached file with internal PLWN API
    representation.

    This function is much faster than :func:`read` if such file is available.

    :param str storage_file: Path to the file from which the cached data will
    be read.

    :param str storage_format: Name of the format the data is stored in. It
    must match the actual format and version of schema contained in the file.

    :rtype: PLWordnetBase
    """

    stor_cls = _imp_storage(_STORAGES[storage_format].modname)
    return stor_cls.from_dump(storage_file)


def show_source_formats():
    """Print names and short descriptions of available source file formats to
    ``stdout``.

    This function is primarily meant to be informative in interactive shell
    mode.
    """

    _show(_READERS)


def show_storage_formats():
    """Print names and short descriptions of available storage formats to
    ``stdout``.

    This function is primarily meant to be informative in interactive shell
    mode.
    """

    _show(_STORAGES)


def _show(dict_):
    for name, info in six.iteritems(dict_):
        if name is None:
            continue

        print(name)
        print('-' * len(name))
        print(tw.fill(info.desc), end='\n\n')