diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..03ddfb79af466854b50db957d7d84f7340319304 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,37 @@ +image: clarinpl/python:3.6 + +before_script: + - pip install tox==2.9.1 + +cache: + paths: + - .tox + +stages: + - check_style + - push_wheel + +pep8: + stage: check_style + script: + - tox -v -e pep8 + +docstyle: + stage: check_style + script: + - tox -v -e docstyle + +push_wheel: + before_script: + - pip install twine + only: + - master + stage: push_wheel + when: on_success + script: + - python setup.py sdist bdist_wheel + - python -m twine upload + --repository-url https://pypi.clarin-pl.eu/ + -u $PIPY_USER -p $PIPY_PASS dist/plwn_api*.whl + + diff --git a/plwn/__init__.py b/plwn/__init__.py index 69262d7dbff725b6934dbcbbedb0ff02daef9cad..fefc9a10e254fa4ea7c73918d4cfc176650ada63 100644 --- a/plwn/__init__.py +++ b/plwn/__init__.py @@ -1,6 +1,17 @@ -from ._loading import * +from ._loading import read +from ._loading import load +from ._loading import show_source_formats +from ._loading import show_storage_formats from .enums import PoS # Setup logging for the package (not) import logging as _logging _logging.getLogger('plwn').addHandler(_logging.NullHandler()) + +__all__ = [ + "PoS", + "read", + "load", + "show_storage_formats", + "show_source_formats", +] diff --git a/plwn/_loading.py b/plwn/_loading.py index 709680eaa8ed015e3c66c466c0ab4ce12701b2f8..6571f47fb35453487a441d8304b9d537d491c0ac 100644 --- a/plwn/_loading.py +++ b/plwn/_loading.py @@ -1,5 +1,7 @@ -"""Defines user-facing functions that allow simple construction of -:class:`PLWordnetBase` instances, with selected storages and readers. +"""Defines user-facing functions. + +That allow simple construction of :class:`PLWordnetBase` instances, +with selected storages and readers. """ from __future__ import absolute_import, division, print_function @@ -63,8 +65,10 @@ def read(source_file, source_format=None, storage_file=None, storage_format=None): - """Read plWordNet data from a file and return the right - :class:`PLWordnetBase` subclass instance for the selected parameters. + """Read plWordNet data from a file. + + Return the right :class:`PLWordnetBase` subclass instance for + the selected parameters. Where defaults are mentioned, those values may change with each minor version of PLWN API. If you depend on some particular format for a long @@ -89,15 +93,15 @@ def read(source_file, :rtype: PLWordnetBase """ - stor_cls = _imp_storage(_STORAGES[storage_format].modname) rdr = _imp_reader(_READERS[source_format].modname) return stor_cls.from_reader(rdr(source_file), storage_file) def load(storage_file, storage_format=None): - """Read plWordNet data from a cached file with internal PLWN API - representation. + """Read plWordNet data from a cached file. + + With internal PLWN API representation. This function is much faster than :func:`read` if such file is available. @@ -109,30 +113,29 @@ def load(storage_file, storage_format=None): :rtype: PLWordnetBase """ - stor_cls = _imp_storage(_STORAGES[storage_format].modname) return stor_cls.from_dump(storage_file) def show_source_formats(): - """Print names and short descriptions of available source file formats to - ``stdout``. + """Print names and short descriptions. + + Of available source file formats to ``stdout``. This function is primarily meant to be informative in interactive shell mode. """ - _show(_READERS) def show_storage_formats(): - """Print names and short descriptions of available storage formats to - ``stdout``. + """Print names and short descriptions. + + Of available storage formats to ``stdout``. This function is primarily meant to be informative in interactive shell mode. """ - _show(_STORAGES) diff --git a/plwn/bases.py b/plwn/bases.py index dae5f90cd2bdb745f0d29a89492f8b7cfd66c7bc..b104c5d77f15e691b97ccef6d622c777234792db 100644 --- a/plwn/bases.py +++ b/plwn/bases.py @@ -1,5 +1,7 @@ -"""Base, abstract classes for plWordNet objects, implementing common -functionality independent of structures holding wordnet data. +"""Base, abstract classes for plWordNet objects. + +Implementing common functionality independent of structures holding +wordnet data. """ from __future__ import absolute_import, division @@ -40,8 +42,10 @@ class PLWordNetBase(object): @classmethod def from_reader(cls, reader, dump_to=None): - """Create a new instance from a source reader, optionally saving it in - an internal representation format in another file. + """Create a new instance from a source reader. + + Optionally saving it in an internal representation format + in another file. :param reader: Generator that yields :class:`SynsetNone` and :class:`LexicalUnitNode` from a source representation. @@ -54,7 +58,6 @@ class PLWordNetBase(object): :returns: New instance of PLWN API entry point. :rtype: PLWordNetBase """ - raise NotImplementedError() @classmethod @@ -70,16 +73,17 @@ class PLWordNetBase(object): :returns: New instance of PLWN API entry point. :rtype: PLWordNetBase """ - return NotImplementedError() def __init__(self): + """Initialize PLWordNetBase.""" self._rel_resolver = get_default_relation_resolver() @abc.abstractmethod def synsets(self, lemma=None, pos=None, variant=None): - """Iterate over synsets form plWordNet, filtered by lemma, part of - speech and variant. + """Iterate over synsets form plWordNet. + + Filtered by lemma, part ofspeech and variant. If a parameter is omitted, then any value is accepted (so ``synsets()`` iterates over all synsets). @@ -101,13 +105,13 @@ class PLWordNetBase(object): :raises InvalidPoSException: If a query is made for a PoS that is not one of the valid constants. """ - pass @abc.abstractmethod def synset(self, lemma, pos, variant): - """Get the synset containing the unit with the lemma, part of speech - and variant. + """Get the synset. + + Containing the unit with the lemma, part of speech and variant. Unlike :meth:`.synsets`, all parameters of this method are mandatory. It either returns a single synset, or raises and exception if no @@ -129,7 +133,6 @@ class PLWordNetBase(object): :raises InvalidPoSException: If a query is made for a PoS that is not one of the valid constants. """ - pass @abc.abstractmethod @@ -147,13 +150,13 @@ class PLWordNetBase(object): :raises InvalidSynsetIdentifierException: If there's no synset with the ID in plWordnet. """ - pass @abc.abstractmethod def lexical_units(self, lemma=None, pos=None, variant=None): - """Iterate over lexical units form plWordNet, filtered by lemma, part - of speech and variant. + """Iterate over lexical units form plWordNet. + + Filtered by lemma, part of speech and variant. If a parameter is omitted, then any value is accepted (so ``lexical_units()`` iterates over all units). @@ -171,7 +174,6 @@ class PLWordNetBase(object): :raises InvalidPoSException: If a query is made for a PoS that is not one of the valid constants. """ - pass @abc.abstractmethod @@ -196,7 +198,6 @@ class PLWordNetBase(object): :raises InvalidPoSException: If a query is made for a PoS that is not one of the valid constants. """ - pass @abc.abstractmethod @@ -213,13 +214,13 @@ class PLWordNetBase(object): :raises InvalidLexicalUnitIdentifierException: If there's no lexical unit with the ID in plWordnet. """ - pass @abc.abstractmethod def synset_relation_edges(self, include=None, exclude=None): - """Iterate over all synset relation instances in plWordnet, yielding - them as tuples. + """Iterate over all synset relation instances in plWordnet. + + Yielding them as tuples. Named tuples in format ``(source, relation, target)`` (:data:`RelationEdge`) are yielded by this method. @@ -241,25 +242,23 @@ class PLWordNetBase(object): :returns: Generator of tuples representing synset relation edges. :rtype: Iterable[Tuple[SynsetBase,str,SynsetBase]] """ - pass @abc.abstractmethod def lexical_relation_edges(self, include=None, exclude=None): - """Iterate over all lexical relation instances in plWordnet, yielding - them as tuples. + """Iterate over all lexical relation instances in plWordnet. + + Yielding them as tuples. This method behaves very closely to :meth:`.synset_relation_edges`, but for lexical relations. :rtype: Iterable[Tuple[LexicalUnitBase,str,LexicalUnitBase]] """ - pass def close(self): - """Perform necessary cleanup operations and close this PLWordNet - instance. + """Perform necessary cleanup operations, close this PLWordNet instance. Often, temporary files are created when reading and parsing plWordNet, and non-temporary files may be opened. Call this method to properly @@ -271,7 +270,6 @@ class PLWordNetBase(object): It's legal to call this method several times. It's not legal to call any other methods after :meth:`.close` has been called. """ - pass def to_graphml(self, @@ -291,8 +289,9 @@ class PLWordNetBase(object): excluded_synset_nodes=None, included_lexical_unit_nodes=None, excluded_lexical_unit_nodes=None): - """Export the wordnet as graph in `GraphML - <http://graphml.graphdrawing.org/>`_ format. + """Export the wordnet as graph. + + In `GraphML <http://graphml.graphdrawing.org/>`_ format. Normally, nodes of the graph are synsets, and edges are relations between synsets. It's possible to make the graph made of lexical units @@ -416,7 +415,6 @@ class PLWordNetBase(object): :raises ValueError: If ``graph_type`` is not one of the allowed values. """ - gwn = go.GraphMLWordNet() gb = go.GraphMLBuilder(self, gwn) @@ -447,16 +445,16 @@ class PLWordNetBase(object): include_attributes=include_attributes, included_synset_attributes=included_synset_attributes, excluded_synset_attributes=excluded_synset_attributes, - included_lexical_unit_attributes= - included_lexical_unit_attributes, - excluded_lexical_unit_attributes= - excluded_lexical_unit_attributes, + included_lexical_unit_attributes=( + included_lexical_unit_attributes), + excluded_lexical_unit_attributes=( + excluded_lexical_unit_attributes), included_synset_relations=included_synset_relations, excluded_synset_relations=excluded_synset_relations, - included_lexical_unit_relations= - included_lexical_unit_relations, - excluded_lexical_unit_relations= - excluded_lexical_unit_relations, + included_lexical_unit_relations=( + included_lexical_unit_relations), + excluded_lexical_unit_relations=( + excluded_lexical_unit_relations), included_synset_nodes=included_synset_nodes, excluded_synset_nodes=excluded_synset_nodes, included_lexical_unit_nodes=included_lexical_unit_nodes, @@ -491,17 +489,16 @@ class SynsetBase(object): @abc.abstractproperty def id(self): - """``int`` + """``int``. The internal identifier of the synset in plWordnet. It is unique among all synsets. """ - pass @abc.abstractproperty def lexical_units(self): - """``Tuple[LexicalUnitBase]`` + """``Tuple[LexicalUnitBase]``. Lexical units contained in the synsets. Ordering of units within the tuple is arbitrary, but constant. The first unit is the synset's head, @@ -509,27 +506,24 @@ class SynsetBase(object): At least one lexical unit is always present in every synset. """ - pass @abc.abstractproperty def definition(self): - """``str`` + """``str``. Textual description of the synset's meaning. Will be an empty string if the definition is not present in plWordNet. """ - pass @abc.abstractproperty def relations(self): - """``Tuple[str]`` + """``Tuple[str]``. Tuple of all outward relations that lead from this synset. """ - pass @abc.abstractmethod @@ -544,13 +538,12 @@ class SynsetBase(object): :raises InvalidRelationNameException: If ``relation_name`` is not a valid name of a synset relation in plWordNet. """ - pass def to_dict(self, include_related=True, include_units_data=True): - """ - Create a JSON-compatible dictionary with all the public properties of - the synset. + """Create a JSON-compatible dictionary. + + With all the public properties of the synset. Enums are converted to their values and all collections are converted to tuples. @@ -574,7 +567,6 @@ class SynsetBase(object): :returns: Dictionary contain data of the synset. :rtype: Mapping[str, Any] """ - syn_dict = { u'id': self.id, u'definition': self.definition, @@ -598,10 +590,7 @@ class SynsetBase(object): return syn_dict def short_str(self): - """ - Shorter version of ``str`` cast that displays only the first unit. - """ - + """Shorter version of ``str`` cast that displays only the first unit.""" sstr = [u'{', six.text_type(self.lexical_units[0])] if len(self.lexical_units) > 1: sstr.append( @@ -664,70 +653,64 @@ class LexicalUnitBase(object): @abc.abstractproperty def id(self): - """``int`` + """``int``. The internal identifier of the lexical unit in plWordnet. It is unique among all units. """ - pass @abc.abstractproperty def lemma(self): - """``str`` + """``str``. Lemma of the unit, basic form of the word(s) the unit represents. """ - pass @abc.abstractproperty def pos(self): - """``PoS`` + """``PoS``. Part of speech of the unit. This will be one of enumeration constants from :class:`PoS`. To get the textual value, use ``pos.value``. """ - pass @abc.abstractproperty def variant(self): - """``int`` + """``int``. If the same lemma has different meanings as the same part of speech, this number will be used to tell them apart. The first meaning has the number 1. """ - pass @abc.abstractproperty def definition(self): - """``str`` + """``str``. Textual description of the lexical unit's meaning. Will be an empty string if the definition is not present in plWordNet. """ - pass @abc.abstractproperty def sense_examples(self): - """``Tuple[str]`` + """``Tuple[str]``. Fragments of text that show how the lexical unit is used in the language. May be an empty collection, if no examples are present. """ - pass @abc.abstractproperty def sense_examples_sources(self): - """``Tuple[str]`` + """``Tuple[str]``. Symbolic representations of sources from which the sense examples were taken. @@ -739,24 +722,22 @@ class LexicalUnitBase(object): To get pairs of of examples with their sources, use ``zip(sense_examples, sense_examples_sources)`` """ - # TODO List of source symbols, link to? pass @abc.abstractproperty def external_links(self): - """``Tuple[str]`` + """``Tuple[str]``. URLs to webpages describing the meaning of the lexical unit. May be an empty collection, if no examples are present. """ - pass @abc.abstractproperty def usage_notes(self): - """``Tuple[str]`` + """``Tuple[str]``. Symbols denoting certain properties of how the lexical unit is used. @@ -764,31 +745,28 @@ class LexicalUnitBase(object): May be an empty collection. """ - pass @abc.abstractproperty def domain(self): - """``Domain`` + """``Domain``. Wordnet domain the lexical unit belongs to. """ - pass @abc.abstractproperty def verb_aspect(self): - """``Optional[VerbAspect]`` + """``Optional[VerbAspect]``. Aspect of a verb. This will be one of the constants from :class:`VerbAspect`, or ``None``, if the lexical unit is not a verb. """ - pass @abc.abstractproperty def emotion_markedness(self): - """``Optional[EmotionMarkedness]`` + """``Optional[EmotionMarkedness]``. Markedness of emotional connotations of the lexical unit. May be ``None``, if the unit has no emotional markedness. @@ -796,61 +774,54 @@ class LexicalUnitBase(object): If this property is ``None``, then all other ``emotion_*`` properties will be ``None`` or empty. """ - pass @abc.abstractproperty def emotion_names(self): - """``Tuple[str, ...]`` + """``Tuple[str, ...]``. Names of emotions associated with this lexical unit. """ - pass @abc.abstractproperty def emotion_valuations(self): - """``Tuple[str, ...]`` + """``Tuple[str, ...]``. Valuations of emotions associated with this lexical unit. """ - pass @abc.abstractproperty def emotion_example(self): - """``Optional[str]`` + """``Optional[str]``. An example of an emotionally loaded sentence using the lexical unit. """ - pass @abc.abstractproperty def emotion_example_secondary(self): - """``Optional[str]`` + """``Optional[str]``. This property is not ``None`` only if ``emotion_markedness`` is ``amb``. In such case, :attr:`.emotion_example` will be an example of a positive sentence, and this one will be a negative sentence. """ - pass @abc.abstractproperty def synset(self): - """``SynsetBase`` + """``SynsetBase``. The synset the unit belongs to. """ - pass @abc.abstractmethod def related(self, relation_name): - """Iterate over lexical units to whom this unit has a - certain relation. + """Iterate over lexical units to whom this unit has a certain relation. :param str relation_name: The name of the relation to follow. @@ -860,22 +831,20 @@ class LexicalUnitBase(object): :raises InvalidRelationNameException: If ``relation_name`` is not a valid name of a lexical relation in plWordNet. """ - pass @abc.abstractproperty def relations(self): - """``Tuple[str]`` + """``Tuple[str]``. Tuple of all outward relations that lead from this lexical unit. """ - pass def to_dict(self, include_related=True): - """ - Create a JSON-compatible dictionary with all the public properties of - the lexical unit. + """Create a JSON-compatible dictionary. + + With all the public properties of the lexical unit. Enums are converted to their values and all collections are converted to tuples. @@ -894,7 +863,6 @@ class LexicalUnitBase(object): :returns: Dictionary contain data of the lexical unit. :rtype: Mapping[str, Any] """ - lu_dict = { u'id': self.id, u'lemma': self.lemma, diff --git a/plwn/enums.py b/plwn/enums.py index 67ad34a9697c7b76eb8078c8c4d672325a85e01e..e14f67ac2a5d7083b744361cddef07152c1b7137 100644 --- a/plwn/enums.py +++ b/plwn/enums.py @@ -1,7 +1,5 @@ # coding: utf8 -""" -Enumerated values used in plWordNet -""" +"""Enumerated values used in plWordNet.""" from __future__ import absolute_import, division @@ -47,9 +45,7 @@ _POS_ENUM2NUM = {} class PoS(Enum): - """ - Defines **Part of Speech** values used by plWN. - """ + """Defines **Part of Speech** values used by plWN.""" if six.PY2: __order__ = _POS_ORDER @@ -82,9 +78,7 @@ _VA_ENUM2NUM = {} class VerbAspect(Enum): - """ - Defines aspect values used by verbs in plWN. - """ + """Defines aspect values used by verbs in plWN.""" if six.PY2: __order__ = _VA_ORDER @@ -116,9 +110,7 @@ _fill_numtrans(VerbAspect, _VA_NUM2ENUM, _VA_ENUM2NUM) class EmotionMarkedness(Enum): - """ - Defines markedness of emotions associated with some lexical units. - """ + """Defines markedness of emotions associated with some lexical units.""" strong_positive = u'+ m' strong_negative = u'- m' @@ -134,11 +126,10 @@ class EmotionMarkedness(Enum): @classmethod def normalized(cls, strvalue): - """ - Return an instance of this enum with string value normalized with - regards to whitespace. - """ + """Return an instance of this enum. + With string value normalized with regards to whitespace. + """ strvalue = strvalue.strip() # Try the one value value that won't require matching @@ -156,9 +147,7 @@ class EmotionMarkedness(Enum): class EmotionName(Enum): - """ - Possible names of emotions associated with some lexical units. - """ + """Possible names of emotions associated with some lexical units.""" joy = u'radość' trust = u'zaufanie' @@ -180,9 +169,7 @@ class EmotionName(Enum): class EmotionValuation(Enum): - """ - Possible valuations of emotions associated with some lexical units. - """ + """Possible valuations of emotions associated with some lexical units.""" usefulness = u'użyteczność' good = u'dobro' @@ -220,9 +207,7 @@ _DOM_ENUM2NUM = {} class Domain(Enum): - """ - Wordnet domains of lexical units. - """ + """Wordnet domains of lexical units.""" if six.PY2: __order__ = _DOM_ORDER @@ -301,9 +286,8 @@ _fill_numtrans(Domain, _DOM_NUM2ENUM, _DOM_ENUM2NUM) def make_values_tuple(enum_seq): - """ - Auxiliary function that converts a sequence of enums to a tuple of enum - values. - """ + """Auxiliary function. + That converts a sequence of enums to a tuple of enumvalues. + """ return tuple(en.value for en in enum_seq) diff --git a/plwn/exceptions.py b/plwn/exceptions.py index ebc2c9ad9a87cfbdd18dacc531e2dfdc87d8035b..ccb23844fee9154559f3e919ad2b5a81cab1f427 100644 --- a/plwn/exceptions.py +++ b/plwn/exceptions.py @@ -29,6 +29,7 @@ class NotFound(PLWNAPIException): """Base for exceptions raised when an object is not found.""" def __init__(self, lemma, pos, variant, *args): + """Initialize NotFound.""" super(NotFound, self).__init__(*args) self.args = ('lemma={!r} pos={!r} variant={!r}'.format( @@ -57,11 +58,13 @@ class ReaderException(PLWNAPIException): class MalformedIdentifierException(ReaderException): - """Raised during UBY-LMF parsing, when a malformed identifier is - encountered. + """Raised during UBY-LMF parsing. + + When a malformed identifier is encountered. """ def __init__(self, id_): + """Initialize MalformedIdentifierException.""" super(MalformedIdentifierException, self).__init__( "Malformed identifier, expected digits at the end of the original" " id instead got {!r}" @@ -76,11 +79,13 @@ class LoadException(PLWNAPIException): class DumpVersionException(LoadException): - """Raised when a dumped storage has wrong version (suggesting incompatible - format). + """Raised when a dumped storage has wrong version. + + Suggesting incompatible format. """ def __init__(self, version_is, version_required): + """Initialize DumpVersionException.""" super(DumpVersionException, self).__init__(version_is, version_required) self.version_is = version_is @@ -106,16 +111,18 @@ class InvalidLexicalUnitIdentifierException(PLWNAPIException): class InvalidRelationNameException(PLWNAPIException): - """Raised when attempting to select synsets or units related by a relation - that does not exist. + """Raised when attempting to select synsets or units. + + Related by a relation that does not exist. """ pass class InvalidPoSException(PLWNAPIException): - """Raised when a query for PoS is made, which is not one of the valid - constants. + """Raised when a query for PoS is made. + + Which is not one of the valid constants. """ pass diff --git a/plwn/readers/comments.py b/plwn/readers/comments.py index 9aaa329920fec9b7acd5aa9347afffb27538b7ad..4a0f6efeb0c9702fb7af9e2742b95569fd06b0c0 100644 --- a/plwn/readers/comments.py +++ b/plwn/readers/comments.py @@ -1,5 +1,6 @@ -"""Parsing strings in wordnet comment format, for readers that need to deal -with them. +"""Parsing strings in wordnet comment format. + +For readers that need to deal with them. Importing this module introduces dependency on wncomments. """ @@ -51,15 +52,15 @@ CommentData = namedtuple( def parse_comment_string(cmt_str): - """Parse a comment string and extract all data required by PLWN API packed - in a named tuple. + """Parse a comment string. + + Extract all data required by PLWN API packed in a named tuple. :param str cmt_str: String in PLWN comment format. :returns: Extracted and ordered items needed by PLWN API. :rtype: CommentData """ - try: cmt = plwnc.Comment.parse(cmt_str, WN_TAGS) except plwnce.PLWNCommentsException: diff --git a/plwn/readers/ubylmf.py b/plwn/readers/ubylmf.py index 642155285ee8df64f6faecd040adb3b7d1eed466..d28b361fa2ff46d34e3e1633c65040f931978677 100644 --- a/plwn/readers/ubylmf.py +++ b/plwn/readers/ubylmf.py @@ -1,6 +1,8 @@ # FIXME Some assert statements should be converted to regular raises (asserts # should not be used for anything other than checking for errors in the code # itself). +"""Implementation of ubylmf reader.""" + from xml.etree import ElementTree import re import logging diff --git a/plwn/readers/wndb.py b/plwn/readers/wndb.py index 1677f3ff629de48e09201e39d2df4be517a02834..47a1f547e86da7bae089d602b9e3769569a12a3b 100644 --- a/plwn/readers/wndb.py +++ b/plwn/readers/wndb.py @@ -1,4 +1,6 @@ # coding: utf8 +"""Implementation of wndb reader.""" + from __future__ import absolute_import, division import collections as coll @@ -32,8 +34,9 @@ _EmotionData = coll.namedtuple( def wndb_reader(wordnet_db_url): - """Generate UBY-LMF format compatible records directly from plWordNet - database. + """Generate UBY-LMF format compatible records. + + Directly from plWordNet database. sqlalchemy is required for this method to work. @@ -43,7 +46,6 @@ def wndb_reader(wordnet_db_url): :return: a generator over PLwordnet entities. :rtype: generator """ - db_eng = sa.create_engine(wordnet_db_url) db_meta = sa.MetaData(db_eng) visited_synsets = set() diff --git a/plwn/readers/wnxml.py b/plwn/readers/wnxml.py index acfbc494906922ca282600197e3ab770bf669160..18b40bf98b196e7061027cd149496eca86f3303f 100644 --- a/plwn/readers/wnxml.py +++ b/plwn/readers/wnxml.py @@ -1,4 +1,6 @@ # coding: utf8 +"""Implementation of wnxml readwer.""" + from __future__ import absolute_import, division @@ -39,7 +41,6 @@ def wnxml_reader(wnxml_file): :return: a generator over PLwordnet entities. :rtype: generator """ - # The regrettably huge global storage for yielding synsets = {} lexunits = {} diff --git a/plwn/relresolver.py b/plwn/relresolver.py index 92eec11aef02bc9747f1d8c73f938ef779c61123..940a529e6a3281acfba93a12cc4110edceb1f865 100644 --- a/plwn/relresolver.py +++ b/plwn/relresolver.py @@ -1,3 +1,4 @@ +"""Implementation of Relation Resolver.""" from __future__ import absolute_import, division @@ -18,14 +19,11 @@ _log = logging.getLogger(__name__) class RelationResolver(object): - """ - Stores dictionary of relation name aliases to full names. - """ + """Stores dictionary of relation name aliases to full names.""" @classmethod def from_tsv(cls, tsv_stream): - """ - Creates an instance from a TSV file. + """Creates an instance from a TSV file. The first item of each line should be the full name, and every other should be an alias (similar to ``from_reverse_dict``). @@ -35,7 +33,6 @@ class RelationResolver(object): :rtype: RelationResolver """ - adict = {} for line in tsv_stream: @@ -48,15 +45,14 @@ class RelationResolver(object): @classmethod def from_reverse_dict(cls, rdict): - """ - Creates an instance from a dictionary mapping full names to lists of - aliases that should resolve to them. + """Creates an instance from a dictionary. + + Mapping full names to lists of aliases that should resolve to them. :type rdict: Mapping[str, List[str]] :rtype: RelationResolver """ - adict = {} for full, aliases in six.iteritems(rdict): @@ -66,29 +62,27 @@ class RelationResolver(object): return cls(adict) def __init__(self, aliases): - """ + """. + :param aliases: Dictionary (or pairs sequence) mapping relation aliases to full names. :type aliases: Mapping[str, str] """ - self._aliases = dict(aliases) def add_alias(self, alias, fullname): - """ - Add a new alias to the dictionary: + """Add a new alias to the dictionary. :param str alias: The alias. :param str fullname: The name the alias will resolve to. """ - self._aliases[alias] = fullname def resolve_name(self, relname): - """ - Resolve a possible alias to a full name. If ``relname`` is not a known - alias, it's returned unchanged. + """Resolve a possible alias to a full name. + + If ``relname`` is not a known alias, it's returned unchanged. :param str relname: The relation name that may be an alias that needs to be resolved. @@ -97,14 +91,13 @@ class RelationResolver(object): to. :rtype: str """ - return self._aliases.get(relname, relname) def get_default_relation_resolver(): - """ - Create an instance of ``RelationResolver`` that loads a file with all - default relation name aliases. + """Create an instance of ``RelationResolver``. + + That loads a file with all default relation name aliases. The default aliases TSV file is located in ``plwn`` package root, as ``relation_aliases.tsv``. @@ -113,7 +106,6 @@ def get_default_relation_resolver(): first call. :rtype: RelationResolver """ - global _default_resolver_obj if _default_resolver_obj is None: diff --git a/plwn/storages/objects.py b/plwn/storages/objects.py index 37fa7f3cf7c6debf1ed4232458e21a0b2edb801d..9618a75d9bbea17f3b450c9fa6b0f3f754bfe897 100644 --- a/plwn/storages/objects.py +++ b/plwn/storages/objects.py @@ -1,5 +1,7 @@ -"""Implementation which stores data in plain python objects. Should be fairly -fast to construct, but querying and memory efficiencies may not be too great. +"""Implementation which stores data in plain python objects. + +Should be fairly fast to construct, but querying and memory +efficiencies may not be too great. """ from __future__ import absolute_import, absolute_import @@ -105,10 +107,10 @@ class PLWordNet(bases.PLWordNetBase): item_rel_dict[src_id] = irel_dict def __init__(self): - """**NOTE:** This constructor should not be invoked directly. Use one - of the standard methods: ``from_dump`` or ``from_reader``. - """ + """**NOTE:** This constructor should not be invoked directly. + Use one of the standard methods: ``from_dump`` or ``from_reader``. + """ super(PLWordNet, self).__init__() # Remember the version for unpickling check @@ -362,10 +364,10 @@ class LexicalUnit(bases.LexicalUnitBase): emo_valuations, emo_ex1, emo_ex2): - """**NOTE:** This constructor should not be called directly. Use - :class:`PLWordNet` methods to obtain lexical units. - """ + """**NOTE:** This constructor should not be called directly. + Use :class:`PLWordNet` methods to obtain lexical units. + """ self._relr = get_default_relation_resolver() self._wn = wn @@ -476,10 +478,10 @@ class Synset(bases.SynsetBase): __slots__ = '_relr', '_wn', '_id', '_units', '_def' def __init__(self, wn, synid, unit_ids, def_): - """**NOTE:** This constructor should not be called directly. Use - :class:`PLWordNet` methods to obtain synsets. - """ + """**NOTE:** This constructor should not be called directly. + Use :class:`PLWordNet` methods to obtain synsets. + """ self._relr = get_default_relation_resolver() self._wn = wn diff --git a/plwn/storages/sqlite.py b/plwn/storages/sqlite.py index 17d0691673df46916e24da29090d8235eb52307a..4c01856e8dba214d5ead277419950d39aea361bd 100644 --- a/plwn/storages/sqlite.py +++ b/plwn/storages/sqlite.py @@ -1,5 +1,6 @@ -"""Implementation that stores data from plWordNet in a sqlite databse file, -with an impromptu schema. +"""Implementation that stores data from plWordNet in a sqlite databse file. + +With an impromptu schema. """ from __future__ import absolute_import, division @@ -229,10 +230,11 @@ class PLWordNet(bases.PLWordNetBase): @staticmethod def _make_include_exclude(include, exclude): - """Creates ``WHERE`` clause and the parameter tuple for simple ``IN`` + """. + + Creates ``WHERE`` clause and the parameter tuple for simple ``IN`` and ``NOT IN`` case. """ - if include is not None: whereclause = u"WHERE name IN ({})".format( u','.join(itt.repeat(u'?', len(include))) @@ -258,10 +260,10 @@ class PLWordNet(bases.PLWordNetBase): return whereclause, includetuple + excludetuple def __init__(self, db_file=None): - """**NOTE:** This constructor should not be invoked directly. Use one - of the standard methods: ``from_dump`` or ``from_reader``. - """ + """**NOTE:** This constructor should not be invoked directly. + Use one of the standard methods: ``from_dump`` or ``from_reader``. + """ super(PLWordNet, self).__init__() if db_file is None: @@ -555,10 +557,10 @@ class LexicalUnit(bases.LexicalUnitBase): _NO_VAL = object() def __init__(self, conn, id_, lemma, pos, variant, synid): - """**NOTE:** This constructor should not be called directly. Use - :class:`PLWordNet` methods to obtain lexical units. - """ + """**NOTE:** This constructor should not be called directly. + Use :class:`PLWordNet` methods to obtain lexical units. + """ self._relr = get_default_relation_resolver() self._db = conn @@ -825,10 +827,10 @@ class Synset(bases.SynsetBase): __slots__ = '_relr', '_db', '_id', '_units', '_def' def __init__(self, conn, syn_id): - """**NOTE:** This constructor should not be called directly. Use - :class:`PLWordNet` methods to obtain synsets. - """ + """**NOTE:** This constructor should not be called directly. + Use :class:`PLWordNet` methods to obtain synsets. + """ self._relr = get_default_relation_resolver() self._db = conn @@ -1176,4 +1178,5 @@ class _DBBuilder(object): empties, ).close() + _this_storage_ = PLWordNet diff --git a/plwn/utils/graphmlout.py b/plwn/utils/graphmlout.py index 53ecac14bbc9b448e3a46fbfc3d4717bb34e96df..910b545ddf85ac99b6a6e9af23f2f0eb292bf546 100644 --- a/plwn/utils/graphmlout.py +++ b/plwn/utils/graphmlout.py @@ -1,3 +1,5 @@ +"""Implementation that stores data from plWordNet as a GraphML tree.""" + from __future__ import absolute_import, division try: str = unicode @@ -65,6 +67,7 @@ class GraphMLWordNet(object): ) def __init__(self): + """Initialize GraphMLWordNet.""" self._root = et.Element( u'graphml', # The commented out xmlns declaration is correct, but inserting @@ -82,8 +85,9 @@ class GraphMLWordNet(object): self._attr_types = {} def add_attribute_type(self, id_, name, type_, for_=u'node'): - """Adds an attribute which can be then assigned to node or edge - instances. + """Adds an attribute. + + Which can be then assigned to node or edge instances. :param str id_: Unique (in the whole XML) identifier of the attribute type. @@ -96,7 +100,6 @@ class GraphMLWordNet(object): :raises ValueError: If ``type_`` or ``for_`` were passed an illegal value. """ - if not isinstance(type_, self._DataType): raise ValueError('type_={!r}'.format(type_)) if for_ != u'node' and for_ != u'edge': @@ -130,7 +133,6 @@ class GraphMLWordNet(object): :raises KeyError: If any of the names in ``attributes`` was not previously defined. """ - node = et.SubElement( self._graph, u'node', @@ -157,7 +159,6 @@ class GraphMLWordNet(object): :raises KeyError: If any of the names in ``attributes`` was not previously defined. """ - edge = et.SubElement( self._graph, u'edge', @@ -171,7 +172,6 @@ class GraphMLWordNet(object): :param Union[str,TextIO] file_: Stream or name of the file to which the graph should be written. """ - self._tree.write(file_, 'utf-8') def _add_attributes_to(self, element, attributes): @@ -186,9 +186,11 @@ class GraphMLWordNet(object): class GraphMLBuilder(object): - """Class that bridges :class:`plwn.bases.PLWordNetBase` and - :class:`GraphMLWordNet`, extracting data from the former and putting it - into the latter in the appropriate format. + """Class that bridges. + + :class:`plwn.bases.PLWordNetBase` and :class:`GraphMLWordNet`, + extracting data from the former and putting it into the latter + in the appropriate format. This is an auxiliary class which usually shouldn't be constructed directly. Use an appropriate method from :class:`plwn.bases.PLWordNet`. @@ -199,7 +201,8 @@ class GraphMLBuilder(object): _EDGE_UNS_TEMPLATE = u'uns--{}--{}--{}' def __init__(self, plwn, gmlwn): - """ + """. + :param plwn: The plWordNet instance from which the data will be extracted. :type plwn: plwn.bases.PLWordNetBase @@ -207,7 +210,6 @@ class GraphMLBuilder(object): ``plwn``. :type gmlwn: GraphMLWordNet """ - self._plwn = plwn self._graphout = gmlwn @@ -236,7 +238,6 @@ class GraphMLBuilder(object): included_relations, excluded_relations): """See :meth:`plwn.bases.PLWordNetBase.to_graphml` for description.""" - added_attributes = ( self._add_synset_attrs(included_attributes, excluded_attributes) if (include_attributes or @@ -740,17 +741,16 @@ class GraphMLBuilder(object): @staticmethod def _check_include_exclude(item, include_set, exclude_set): - """``True`` if item is in include and not in exclude. If the set is - ``None``, the check for the set is ``True``. - """ + """``True`` if item is in include and not in exclude. + If the set is ``None``, the check for the set is ``True``. + """ return ((include_set is None or item in include_set) and (exclude_set is None or item not in exclude_set)) @staticmethod def _check_include_exclude_2(item1, item2, include_set, exclude_set): """Check for two items in include/exclude (ex. for edges).""" - return ((include_set is None or (item1 in include_set and item2 in include_set)) and (exclude_set is None or @@ -768,13 +768,15 @@ class GraphMLBuilder(object): class _AttrIncluder(object): - """ - Aux class for the repetitive "check if attribute should be included" -> + """Aux class. + + For the repetitive "check if attribute should be included" -> "store it in all required places" cycle. """ def __init__(self, graphout, type_prefix, checkfunc): - """ + """. + :param GraphMLWordNet graphout: The output graph instance. :param str type_prefix: Unique names of attributes will be prefixed @@ -784,7 +786,6 @@ class _AttrIncluder(object): return ``True`` if it should be included and ``False`` otherwise. :type checkfunc: Callable[[str], bool] """ - self._graphout = graphout self._prefix = type_prefix self._check = checkfunc diff --git a/plwn/utils/sorting.py b/plwn/utils/sorting.py index 6f4a6878ce75ae1b547854e4da75bce8b4116449..bd37a82ec17a9f4fd5c995774399fc22daa70dbb 100644 --- a/plwn/utils/sorting.py +++ b/plwn/utils/sorting.py @@ -1,6 +1,4 @@ -""" -Sorting keys that provide locale-dependant alphabetical sorting. -""" +"""Sorting keys that provide locale-dependant alphabetical sorting.""" from __future__ import absolute_import, division diff --git a/plwn/utils/tupwrap.py b/plwn/utils/tupwrap.py index 49c2ca512967e785f59c92101b5c32f5980ce8cb..7e94abc3e65c9dc5cb1205e70fcbe81fd904e4db 100644 --- a/plwn/utils/tupwrap.py +++ b/plwn/utils/tupwrap.py @@ -1,6 +1,7 @@ -"""Wrapper for all functions that return generators, calling the wrapped -generator will wrap the contents in a tuple (as a faster, chaining way or -``tuple(generator)``). +"""Wrapper for all functions that return generators. + +Calling the wrapped generator will wrap the contents in a tuple +(as a faster, chaining way or ``tuple(generator)``). """ from __future__ import absolute_import, unicode_literals, division @@ -22,6 +23,7 @@ class TupWrapper(object): __slots__ = '_gen', def __init__(self, generator): + """Initialize TupWrapper.""" self._gen = generator def __iter__(self): diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000000000000000000000000000000000000..66929ec087524845e2f13bc3f48c2656aad11adc --- /dev/null +++ b/tox.ini @@ -0,0 +1,48 @@ +[tox] +envlist = pep8,docstyle +skipsdist = True + +[testenv:pep8] +deps = + flake8 +basepython = python3 +commands = + flake8 {posargs} + +[testenv:docstyle] +deps = + pydocstyle +basepython = python3 +commands = + pydocstyle --verbose {posargs} + +[flake8] +# W504 skipped because it is overeager and unnecessary +ignore = W504 +show-source = True +exclude = .git,.venv,.tox,dist,doc,*egg,build,venv +import-order-style = pep8 +max-line-length = 80 + + +[pydocstyle] +# D101 Missing docstring in public class +# D102 Missing docstring in public method +# D103 Missing docstring in public function +# D104 Missing docstring in public package +# D105 Missing docstring in magic method +# D203 1 blank line required before class docstring +# D213 Multi-line docstring summary should start at the second line +# D214 Section is over-indented +# D215 Section underline is over-indented +# D401 First line should be in imperative mood; try rephrasing +# D405 Section name should be properly capitalized +# D406 Section name should end with a newline +# D407 Missing dashed underline after section +# D408 Section underline should be in the line following the section’s name +# D409 Section underline should match the length of its name +# D410 Missing blank line after section +# D411 Missing blank line before section +ignore = D101,D102,D103,D104,D105,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411 +match-dir = ^(?!\.tox|venv).* +match = ^(?!setup).*\.py