-
leszeks authoredf492369b
_loading.py 4.44 KiB
"""Defines user-facing functions that allow simple construction of
:class:`PLWordnetBase` instances, with selected storages and readers.
"""
from __future__ import absolute_import, division, print_function
from collections import namedtuple
from importlib import import_module
import textwrap as tw
import six
__all__ = 'read', 'load', 'show_source_formats', 'show_storage_formats'
_Info = namedtuple('_Info', ('desc', 'modname'))
_READERS = {
'uby-lmf': _Info('Discontinued XML-based format', 'ubylmf'),
'database': _Info(
'MySQL database of plWordNet. Only works on python 2 and requires '
'certain additional libraries. This is meant for internal use only '
'and will not work for most users. The file should contain one line '
'with SQLAlchemy URL to the database.',
'wndb',
),
'xml': _Info('The official PLWN XML format', 'wnxml'),
}
_STORAGES = {
'sqlite3': _Info(
'File database format, with a compact schema (compared to internal '
'PLWN database).',
'sqlite',
),
'objects': _Info(
'Stores data in plain python objects, dumping them in pickle format. '
'Quick to construct, but querying and memory efficiency is not '
'guaranteed.',
'objects',
),
}
# Defaults for this version
_READERS[None] = _READERS['xml']
_STORAGES[None] = _STORAGES['sqlite3']
def _imp_reader(modname):
# Pre-import the root package - py3 needs this?
import plwn.readers # noqa
return import_module('.' + modname, 'plwn.readers')._this_reader_
def _imp_storage(modname):
# Pre-import the root package - py3 needs this?
import plwn.storages # noqa
return import_module('.' + modname, 'plwn.storages')._this_storage_
def read(source_file,
source_format=None,
storage_file=None,
storage_format=None):
"""Read plWordNet data from a file and return the right
:class:`PLWordnetBase` subclass instance for the selected parameters.
Where defaults are mentioned, those values may change with each minor
version of PLWN API. If you depend on some particular format for a long
running program, state it explicitly.
:param str source_file: Path to the file from which the plWordNet data will
be read. The required contents of the file depend on selected
``source_format``.
:param str source_format: Name of the format of data that's contained in
``source_file``. If ``None``, then the default for the current version will
be chosen.
:param str storage_file: Path to the file where the internal representation
of the storage will be dumped. It will be possible to load this file using
:func:`load`. If ``None``, then the representation will not be dumped.
:param str storage_format: Name of the format in which PLWN API will store
data in memory. Access methods provided should be the same, but their
efficiency may differ. If ``None``, then the default for the current
version will be chosen.
:rtype: PLWordnetBase
"""
stor_cls = _imp_storage(_STORAGES[storage_format].modname)
rdr = _imp_reader(_READERS[source_format].modname)
return stor_cls.from_reader(rdr(source_file), storage_file)
def load(storage_file, storage_format=None):
"""Read plWordNet data from a cached file with internal PLWN API
representation.
This function is much faster than :func:`read` if such file is available.
:param str storage_file: Path to the file from which the cached data will
be read.
:param str storage_format: Name of the format the data is stored in. It
must match the actual format and version of schema contained in the file.
:rtype: PLWordnetBase
"""
stor_cls = _imp_storage(_STORAGES[storage_format].modname)
return stor_cls.from_dump(storage_file)
def show_source_formats():
"""Print names and short descriptions of available source file formats to
``stdout``.
This function is primarily meant to be informative in interactive shell
mode.
"""
_show(_READERS)
def show_storage_formats():
"""Print names and short descriptions of available storage formats to
``stdout``.
This function is primarily meant to be informative in interactive shell
mode.
"""
_show(_STORAGES)
def _show(dict_):
for name, info in six.iteritems(dict_):
if name is None:
continue
print(name)
print('-' * len(name))
print(tw.fill(info.desc), end='\n\n')