# coding: utf8 # Copyright (C) 2017 Michał Kaliński # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """Defines user-facing functions. That allow simple construction of :class:`PLWordnetBase` instances, with selected storages and readers. """ from __future__ import absolute_import, division, print_function from collections import namedtuple from importlib import import_module import textwrap as tw import six from . import exceptions as exc try: from .default import get_default_load_args except ImportError: get_default_load_args = None __all__ = ( 'read', 'load', 'load_default', 'show_source_formats', 'show_storage_formats', ) _Info = namedtuple('_Info', ('desc', 'modname')) _READERS = { 'uby-lmf': _Info('Discontinued XML-based format', 'ubylmf'), 'database': _Info( 'MySQL database of plWordNet. Only works on python 2 and requires ' 'certain additional libraries. This is meant for internal use only ' 'and will not work for most users. The file should contain one line ' 'with SQLAlchemy URL to the database.', 'wndb', ), 'xml': _Info('The official PLWN XML format', 'wnxml'), } _STORAGES = { 'sqlite3': _Info( 'File database format, with a compact schema (compared to internal ' 'PLWN database).', 'sqlite', ), } # Defaults for this version _READERS[None] = _READERS['database'] _STORAGES[None] = _STORAGES['sqlite3'] def _imp_reader(modname): # Pre-import the root package - py3 needs this? import plwn.readers # noqa return import_module('.' + modname, 'plwn.readers')._this_reader_ def _imp_storage(modname): # Pre-import the root package - py3 needs this? import plwn.storages # noqa return import_module('.' + modname, 'plwn.storages')._this_storage_ def read(source_file, source_format=None, storage_file=None, storage_format=None): """Read plWordNet data from a file. Return the right :class:`PLWordnetBase` subclass instance for the selected parameters. Where defaults are mentioned, those values may change with each minor version of PLWN API. If you depend on some particular format for a long running program, state it explicitly. :param str source_file: Path to the file from which the plWordNet data will be read. The required contents of the file depend on selected ``source_format``. :param str source_format: Name of the format of data that's contained in ``source_file``. If ``None``, then the default for the current version will be chosen. :param str storage_file: Path to the file where the internal representation of the storage will be dumped. It will be possible to load this file using :func:`load`. If ``None``, then the representation will not be dumped. :param str storage_format: Name of the format in which PLWN API will store data in memory. Access methods provided should be the same, but their efficiency may differ. If ``None``, then the default for the current version will be chosen. :rtype: PLWordnetBase """ stor_cls = _imp_storage(_STORAGES[storage_format].modname) rdr = _imp_reader(_READERS[source_format].modname) return stor_cls.from_reader(rdr(source_file), storage_file) def load(storage_file, storage_format=None): """Read plWordNet data from a cached file. With internal PLWN API representation. This function is much faster than :func:`read` if such file is available. :param str storage_file: Path to the file from which the cached data will be read. :param str storage_format: Name of the format the data is stored in. It must match the actual format and version of schema contained in the file. :rtype: PLWordnetBase """ stor_cls = _imp_storage(_STORAGES[storage_format].modname) return stor_cls.from_dump(storage_file) def load_default(): """Load and return the default, bundled version of plWordNet data. This function will fail if the bundled version is not present in the package; this may occur in some builds where specialized plWordNet versions are needed and disk space is a concern. :rtype: PLWordnetBase :raises PLWNAPIException: If no default data is bundled. """ try: args = get_default_load_args() except TypeError: raise exc.PLWNAPIException( 'No default storage bundled with this PLWN API package', ) return load(*args) def show_source_formats(): """Print names and short descriptions. Of available source file formats to ``stdout``. This function is primarily meant to be informative in interactive shell mode. """ _show(_READERS) def show_storage_formats(): """Print names and short descriptions. Of available storage formats to ``stdout``. This function is primarily meant to be informative in interactive shell mode. """ _show(_STORAGES) def _show(dict_): for name, info in six.iteritems(dict_): if name is None: continue print(name) print('-' * len(name)) print(tw.fill(info.desc), end='\n\n')