Initial commit

f492369b · leszeks · bf7d8539 · f492369b · f492369b · f492369b
Commit f492369b authored Jul 9, 2020 by leszeks
--- a/MANIFEST.in
+++ b/MANIFEST.in
+include README-pl-beta.txt
--- a/PKG-INFO
+++ b/PKG-INFO
+Metadata-Version: 1.0
+Name: PLWN_API
+Version: 0.9
+Summary: Python API to access plWordNet lexicon
+Home-page: UNKNOWN
+Author: Michał Kaliński
+Author-email: michal.kalinski@pwr.edu.pl
+License: UNKNOWN
+Description: UNKNOWN
+Platform: UNKNOWN
--- a/PLWN_API.egg-info/PKG-INFO
+++ b/PLWN_API.egg-info/PKG-INFO
+Metadata-Version: 1.0
+Name: PLWN-API
+Version: 0.9
+Summary: Python API to access plWordNet lexicon
+Home-page: UNKNOWN
+Author: Michał Kaliński
+Author-email: michal.kalinski@pwr.edu.pl
+License: UNKNOWN
+Description: UNKNOWN
+Platform: UNKNOWN
--- a/PLWN_API.egg-info/SOURCES.txt
+++ b/PLWN_API.egg-info/SOURCES.txt
+MANIFEST.in
+README-pl-beta.txt
+setup.py
+PLWN_API.egg-info/PKG-INFO
+PLWN_API.egg-info/SOURCES.txt
+PLWN_API.egg-info/dependency_links.txt
+PLWN_API.egg-info/requires.txt
+PLWN_API.egg-info/top_level.txt
+plwn/__init__.py
+plwn/_loading.py
+plwn/bases.py
+plwn/enums.py
+plwn/exceptions.py
+plwn/relation_aliases.tsv
+plwn/relresolver.py
+plwn/readers/__init__.py
+plwn/readers/comments.py
+plwn/readers/nodes.py
+plwn/readers/ubylmf.py
+plwn/readers/wndb.py
+plwn/readers/wnxml.py
+plwn/storages/__init__.py
+plwn/storages/objects.py
+plwn/storages/sqlite.py
+plwn/utils/__init__.py
+plwn/utils/graphmlout.py
+plwn/utils/sorting.py
+plwn/utils/tupwrap.py
\ No newline at end of file
--- a/PLWN_API.egg-info/dependency_links.txt
+++ b/PLWN_API.egg-info/dependency_links.txt
+
--- a/PLWN_API.egg-info/requires.txt
+++ b/PLWN_API.egg-info/requires.txt
+six>=1.10
+enum34>=1.1.2
--- a/PLWN_API.egg-info/top_level.txt
+++ b/PLWN_API.egg-info/top_level.txt
+plwn
--- a/README-pl-beta.txt
+++ b/README-pl-beta.txt
+******************
+**** PlWN API ****
+******************
+
+PlWN API umożliwia:
+   - wyszukiwanie synsetów i jednostek leksykalnych w Słowosieci;
+   - dostęp do własności synsetów i jednostek leksykalnych, oraz ich relacji;
+   - eksport całości bądź części Słowosieci do grafu.
+
+To README jest krótką, tymczasową instrukcją do wersji beta PlWN API.
+Interfejs oraz funkcjonalność mogą ulec zmianie.
+
+=============
+Inicjalizacja
+=============
+
+   >>> import plwn
+   >>> wn = plwn.load('plwn-3.0.db', 'sqlite3')
+
+=================
+Zrzuty baz danych
+=================
+
+Na ten moment, zalecany jest dostęp do bazy danych Słowosieci poprzez zrzuty do
+plików SQLite, "plwn-X.db" (gdzie X to wersja Słowosieci). Zrzuty powinny
+były zostać udostępnione razem z API.
+
+==============
+Funkcjonalność
+==============
+
+Opis funkcjonalności jest dostępny poprzez docstringi modułu plwn/bases.py
+
+   $ pydoc plwn.bases
+
+Dodatkowo, w plwn/_pos.py znajduje się lista stałych wartości part-of-speech
+używanych przez API.
+
+Zgodnie z konwencją przyjętą przez Python 3, większość metod zwracających
+kolekcje obiektów zwraca je w postaci generatorów.
+
+   >>> wn.lexical_units(lemma=u'pies')
+   TupWrapper(<generator object <genexpr> at 0x7f1048583410>)
+
+Jeśli celem zapytania jest iteracja po wynikach, nie potrzeba niczego więcej.
+
+   >>> for lu in wn.lexical_units(lemma=u'pies'):
+   >>>   print(lu.id)
+   5563
+   52245
+   ...
+
+Aby otrzymać listę (albo inną kolekcję), należy rzutować otrzymany obiekt.
+
+   >>> list(wn.lexical_units(lemma=u'pies'))
+   [<LexicalUnit id=5563 lemma=u'pies' pos=u'noun' variant=1>,
+    <LexicalUnit id=52245 lemma=u'pies' pos=u'noun' variant=2>,
+    ...
+   ]
+
+Dla wygody w trybie interaktywnym Pythona, generatory są opakowane w obiekty
+"TupWrapper", które umożliwiają rzutowanie generatora do typu krotki
+poprzez "wywołanie" obiektu.
+
+   >>> wn.lexical_units(lemma=u'pies')()
+   (<LexicalUnit id=5563 lemma=u'pies' pos=u'noun' variant=1>,
+    <LexicalUnit id=52245 lemma=u'pies' pos=u'noun' variant=2>,
+    ...
+   )
+
+Jednak w przypadku pisania programów odwołujących się do API zalecane jest
+jawne rzutowanie zwracanych generatorów. "Explicit is better than implicit."
--- a/plwn/.bases.py.swp
+++ b/plwn/.bases.py.swp
--- a/plwn/__init__.py
+++ b/plwn/__init__.py
+from ._loading import *
+from .enums import PoS
+
+# Setup logging for the package (not)
+import logging as _logging
+_logging.getLogger('plwn').addHandler(_logging.NullHandler())
--- a/plwn/_loading.py
+++ b/plwn/_loading.py
+"""Defines user-facing functions that allow simple construction of
+:class:`PLWordnetBase` instances, with selected storages and readers.
+"""
+
+from __future__ import absolute_import, division, print_function
+
+
+from collections import namedtuple
+from importlib import import_module
+import textwrap as tw
+
+import six
+
+
+__all__ = 'read', 'load', 'show_source_formats', 'show_storage_formats'
+
+
+_Info = namedtuple('_Info', ('desc', 'modname'))
+
+_READERS = {
+    'uby-lmf': _Info('Discontinued XML-based format', 'ubylmf'),
+    'database': _Info(
+        'MySQL database of plWordNet. Only works on python 2 and requires '
+        'certain additional libraries. This is meant for internal use only '
+        'and will not work for most users. The file should contain one line '
+        'with SQLAlchemy URL to the database.',
+        'wndb',
+    ),
+    'xml': _Info('The official PLWN XML format', 'wnxml'),
+}
+_STORAGES = {
+    'sqlite3': _Info(
+        'File database format, with a compact schema (compared to internal '
+        'PLWN database).',
+        'sqlite',
+    ),
+    'objects': _Info(
+        'Stores data in plain python objects, dumping them in pickle format. '
+        'Quick to construct, but querying and memory efficiency is not '
+        'guaranteed.',
+        'objects',
+    ),
+}
+
+# Defaults for this version
+_READERS[None] = _READERS['xml']
+_STORAGES[None] = _STORAGES['sqlite3']
+
+
+def _imp_reader(modname):
+    # Pre-import the root package - py3 needs this?
+    import plwn.readers  # noqa
+    return import_module('.' + modname, 'plwn.readers')._this_reader_
+
+
+def _imp_storage(modname):
+    # Pre-import the root package - py3 needs this?
+    import plwn.storages  # noqa
+    return import_module('.' + modname, 'plwn.storages')._this_storage_
+
+
+def read(source_file,
+         source_format=None,
+         storage_file=None,
+         storage_format=None):
+    """Read plWordNet data from a file and return the right
+    :class:`PLWordnetBase` subclass instance for the selected parameters.
+
+    Where defaults are mentioned, those values may change with each minor
+    version of PLWN API. If you depend on some particular format for a long
+    running program, state it explicitly.
+
+    :param str source_file: Path to the file from which the plWordNet data will
+    be read. The required contents of the file depend on selected
+    ``source_format``.
+
+    :param str source_format: Name of the format of data that's contained in
+    ``source_file``. If ``None``, then the default for the current version will
+    be chosen.
+
+    :param str storage_file: Path to the file where the internal representation
+    of the storage will be dumped. It will be possible to load this file using
+    :func:`load`. If ``None``, then the representation will not be dumped.
+
+    :param str storage_format: Name of the format in which PLWN API will store
+    data in memory. Access methods provided should be the same, but their
+    efficiency may differ. If ``None``, then the default for the current
+    version will be chosen.
+
+    :rtype: PLWordnetBase
+    """
+
+    stor_cls = _imp_storage(_STORAGES[storage_format].modname)
+    rdr = _imp_reader(_READERS[source_format].modname)
+    return stor_cls.from_reader(rdr(source_file), storage_file)
+
+
+def load(storage_file, storage_format=None):
+    """Read plWordNet data from a cached file with internal PLWN API
+    representation.
+
+    This function is much faster than :func:`read` if such file is available.
+
+    :param str storage_file: Path to the file from which the cached data will
+    be read.
+
+    :param str storage_format: Name of the format the data is stored in. It
+    must match the actual format and version of schema contained in the file.
+
+    :rtype: PLWordnetBase
+    """
+
+    stor_cls = _imp_storage(_STORAGES[storage_format].modname)
+    return stor_cls.from_dump(storage_file)
+
+
+def show_source_formats():
+    """Print names and short descriptions of available source file formats to
+    ``stdout``.
+
+    This function is primarily meant to be informative in interactive shell
+    mode.
+    """
+
+    _show(_READERS)
+
+
+def show_storage_formats():
+    """Print names and short descriptions of available storage formats to
+    ``stdout``.
+
+    This function is primarily meant to be informative in interactive shell
+    mode.
+    """
+
+    _show(_STORAGES)
+
+
+def _show(dict_):
+    for name, info in six.iteritems(dict_):
+        if name is None:
+            continue
+
+        print(name)
+        print('-' * len(name))
+        print(tw.fill(info.desc), end='\n\n')
--- a/plwn/bases.py
+++ b/plwn/bases.py
--- a/plwn/enums.py
+++ b/plwn/enums.py
+# coding: utf8
+"""
+Enumerated values used in plWordNet
+"""
+
+from __future__ import absolute_import, division
+
+
+import re
+from enum import Enum
+
+import six
+
+
+__all__ = (
+    'PoS',
+    'VerbAspect',
+    'EmotionMarkedness',
+    'EmotionName',
+    'EmotionValuation',
+    'Domain',
+    'make_values_tuple',
+)
+
+
+# Helper function for making dictionaries translating enum instances into
+# numbers used to denote them in plWN database.
+def _fill_numtrans(enumclass, num2enum, enum2num):
+    for num, enuminst in enumerate(enumclass, 1):
+        num2enum[num] = enuminst
+        enum2num[enuminst] = num
+
+
+def _get_from_numtrans(numtrans, num, optional):
+    try:
+        return numtrans[num]
+    except KeyError:
+        if optional:
+            return None
+        raise
+
+
+# Explicit ordering is needed only in python 2.
+_POS_ORDER = 'verb noun adverb adjective'
+_POS_NUM2ENUM = {}
+_POS_ENUM2NUM = {}
+
+
+class PoS(Enum):
+    """
+    Defines **Part of Speech** values used by plWN.
+    """
+
+    if six.PY2:
+        __order__ = _POS_ORDER
+
+    verb = u'verb'
+    noun = u'noun'
+    adverb = u'adverb'
+    adjective = u'adjective'
+
+    v = verb
+    n = noun
+    adv = adverb
+    adj = adjective
+
+    @staticmethod
+    def by_db_number(number, optional=False):
+        return _get_from_numtrans(_POS_NUM2ENUM, number, optional)
+
+    @property
+    def db_number(self):
+        return _POS_ENUM2NUM[self]
+
+
+_fill_numtrans(PoS, _POS_NUM2ENUM, _POS_ENUM2NUM)
+
+
+_VA_ORDER = 'perfective imperfective predicative two_aspect'
+_VA_NUM2ENUM = {}
+_VA_ENUM2NUM = {}
+
+
+class VerbAspect(Enum):
+    """
+    Defines aspect values used by verbs in plWN.
+    """
+
+    if six.PY2:
+        __order__ = _VA_ORDER
+
+    perfective = u'perf'
+    imperfective = u'imperf'
+    predicative = u'pred'
+    two_aspect = u'imperf.perf'
+
+    perf = perfective
+    imperf = imperfective
+    pred = predicative
+    two = two_aspect
+
+    # Additionally, some Polish abbreviations
+    dk = perfective
+    ndk = imperfective
+
+    @staticmethod
+    def by_db_number(number, optional=False):
+        return _get_from_numtrans(_VA_NUM2ENUM, number, optional)
+
+    @property
+    def db_number(self):
+        return _VA_ENUM2NUM[self]
+
+
+_fill_numtrans(VerbAspect, _VA_NUM2ENUM, _VA_ENUM2NUM)
+
+
+class EmotionMarkedness(Enum):
+    """
+    Defines markedness of emotions associated with some lexical units.
+    """
+
+    strong_positive = u'+ m'
+    strong_negative = u'- m'
+    weak_positive = u'+ s'
+    weak_negative = u'- s'
+    ambiguous = u'amb'
+
+    plus_m = strong_positive
+    minus_m = strong_negative
+    plus_s = weak_positive
+    minus_s = weak_negative
+    amb = ambiguous
+
+    @classmethod
+    def normalized(cls, strvalue):
+        """
+        Return an instance of this enum with string value normalized with
+        regards to whitespace.
+        """
+
+        strvalue = strvalue.strip()
+
+        # Try the one value value that won't require matching
+        if strvalue == cls.ambiguous.value:
+            return cls.ambiguous
+
+        match = re.match(r'([+-])\s*([sm])', strvalue, re.U)
+
+        if not match:
+            # This can't be a valid string, so let the built-in exception
+            # raise.
+            return cls(strvalue)
+
+        return cls(match.group(1) + u' ' + match.group(2))
+
+
+class EmotionName(Enum):
+    """
+    Possible names of emotions associated with some lexical units.
+    """
+
+    joy = u'radość'
+    trust = u'zaufanie'
+    anticipation = u'cieszenie się na coś oczekiwanego'
+    surprise = u'zaskoczenie czymś nieprzewidywanym'
+    sadness = u'smutek'
+    anger = u'złość'
+    fear = u'strach'
+    disgust = u'wstręt'
+
+    radosc = joy
+    zaufanie = trust
+    cieszenie_sie_na = anticipation
+    zaskoczenie = surprise
+    smutek = sadness
+    zlosc = anger
+    strach = fear
+    wstret = disgust
+
+
+class EmotionValuation(Enum):
+    """
+    Possible valuations of emotions associated with some lexical units.
+    """
+
+    usefulness = u'użyteczność'
+    good = u'dobro'
+    truth = u'prawda'
+    knowledge = u'wiedza'
+    beauty = u'piękno'
+    happiness = u'szczęście'
+    uselessness = u'nieużyteczność'
+    harm = u'krzywda'
+    ignorance = u'niewiedza'
+    error = u'błąd'
+    ugliness = u'brzydota'
+    unhappiness = u'nieszczęście'
+
+    uzytecznosc = usefulness
+    dobro = good
+    prawda = truth
+    wiedza = knowledge
+    piekno = beauty
+    szczescie = happiness
+    nieuzytecznosc = uselessness
+    krzywda = harm
+    niewiedza = ignorance
+    blad = error
+    brzydota = ugliness
+    nieszczescie = unhappiness
+
+
+_DOM_ORDER = 'bhp czy wytw cech czc umy por zdarz czuj jedz grp msc cel rz ' \
+    'os zj rsl pos prc il zw ksz st sbst czas zwz hig zmn cumy cpor wal ' \
+    'cjedz dtk cwytw cczuj ruch pst cpos sp cst pog jak rel odcz grad sys ' \
+    'adj adv cdystr caku cper cdel'
+_DOM_NUM2ENUM = {}
+_DOM_ENUM2NUM = {}
+
+
+class Domain(Enum):
+    """
+    Wordnet domains of lexical units.
+    """
+
+    if six.PY2:
+        __order__ = _DOM_ORDER
+
+    bhp = u'najwyższe w hierarchii'
+    czy = u'czynności (nazwy)'
+    wytw = u'wytwory ludzkie (nazwy)'
+    cech = u'cechy ludzi i zwierząt'
+    czc = u'części ciała'
+    umy = u'związane z myśleniem'
+    por = u'związane z porozumiewaniem się'
+    zdarz = u'zdarzenia'
+    czuj = u'uczucia, odczucia i emocje'
+    jedz = u'jedzenie'
+    grp = u'grupy ludzi i rzeczy'
+    msc = u'miejsca i umiejscowienie'
+    cel = u'cel działania'
+    rz = u'obiekty naturalne'
+    os = u'ludzie'
+    zj = u'zjawiska naturalne'
+    rsl = u'nazwy roślin'
+    pos = u'posiadanie i jego zmiana'
+    prc = u'procesy naturalne'
+    il = u'ilość, liczebność, jednoski miary'
+    zw = u'zwierzęta'
+    ksz = u'kształty'
+    st = u'sytuacje statyczne (stany)'
+    sbst = u'substancje'
+    czas = u'czas i stosunki czasowe'
+    zwz = u'związek miedzy ludźmi, rzeczami lub ideami'
+
+    hig = u'pielęgnacja ciała'
+    zmn = u'zmiana wielkości, temeraturym natężenia, itp.'
+    cumy = u'czasowniki myślenia (szeroko rozumianego)'
+    cpor = u'czasowniki mówienia, śpiewania itp.'
+    wal = u'czasowniki rywalizacji fizycznej'
+    cjedz = u'czasowniki jedzenia'
+    dtk = u'czasowniki oznacz. kontakt fizyczny ' \
+        u'(dotykanie, uderzenie, rycie itp.)'
+    cwytw = u'czasowniki oznacz. wytwarzanie czegoś'
+    cczuj = u'czasowniki wyrażające uczucia'
+    ruch = u'czasowniki ruchu'
+    pst = u'czasowniki postrzegania (percepcji)'
+    cpos = u'czasowniki posiadania i zmiany posiadania'
+    sp = u'czasowniki oznacz. wydarzenie i działania społeczne i polityczne'
+    cst = u'czasowniki stanowe'
+    pog = u'czasowniki oznacz. zjawiska pogodowe'
+
+    jak = u'przymiotniki jakościowe'
+    rel = u'przymiotniki relacyjne (rzeczownikowe)'
+    odcz = u'przymiotniki odczasownikowe'
+    grad = u'przymiotniki odprzymiotnikowe (natężenie cechy)'
+
+    sys = u'systematyka, klasyfikacja'
+
+    adj = u'PWN: all adjective clusters'
+    adv = u'PWN: all adverbs'
+
+    mat = u'przymiotniki materiałowe'
+
+    cdystr = u'czasownki dystrybutywne'
+    caku = u'czasowniki akumulatywne'
+    cper = u'czasowniki perduratywne'
+    cdel = u'czasowniki delimitatywne'
+
+    @staticmethod
+    def by_db_number(number, optional=False):
+        return _get_from_numtrans(_DOM_NUM2ENUM, number, optional)
+
+    @property
+    def db_number(self):
+        return _DOM_ENUM2NUM[self]
+
+
+_fill_numtrans(Domain, _DOM_NUM2ENUM, _DOM_ENUM2NUM)
+
+
+def make_values_tuple(enum_seq):
+    """
+    Auxiliary function that converts a sequence of enums to a tuple of enum
+    values.
+    """
+
+    return tuple(en.value for en in enum_seq)
--- a/plwn/exceptions.py
+++ b/plwn/exceptions.py
+"""Custom exceptions raised by PLWN API."""
+
+from __future__ import absolute_import, division
+
+
+__all__ = (
+    'PLWNAPIException',
+    'NotFound',
+    'LexicalUnitNotFound',
+    'SynsetNotFound',
+    'ReaderException',
+    'MalformedIdentifierException',
+    'LoadException',
+    'DumpVersionException',
+    'InvalidSynsetIdentifierException',
+    'InvalidLexicalUnitIdentifierException',
+    'InvalidRelationNameException',
+    'InvalidPoSException',
+)
+
+
+class PLWNAPIException(Exception):
+    """Base for all exceptions in the module."""
+
+    pass
+
+
+class NotFound(PLWNAPIException):
+    """Base for exceptions raised when an object is not found."""
+
+    def __init__(self, lemma, pos, variant, *args):
+        super(NotFound, self).__init__(*args)
+
+        self.args = ('lemma={!r} pos={!r} variant={!r}'.format(
+            lemma,
+            pos,
+            variant,
+        ),) + self.args
+
+
+class LexicalUnitNotFound(NotFound):
+    """Raised when a lexical unit is not found during lookup."""
+
+    pass
+
+
+class SynsetNotFound(NotFound):
+    """Raised when a synset is not found during lookup."""
+
+    pass
+
+
+class ReaderException(PLWNAPIException):
+    """Raised when there's an error in the format expected by a reader."""
+
+    pass
+
+
+class MalformedIdentifierException(ReaderException):
+    """Raised during UBY-LMF parsing, when a malformed identifier is
+    encountered.
+    """
+
+    def __init__(self, id_):
+        super(MalformedIdentifierException, self).__init__(
+            "Malformed identifier, expected digits at the end of the original"
+            " id instead got {!r}"
+            .format(id_)
+        )
+
+
+class LoadException(PLWNAPIException):
+    """Raised when a storage can't be loaded from file."""
+
+    pass
+
+
+class DumpVersionException(LoadException):
+    """Raised when a dumped storage has wrong version (suggesting incompatible
+    format).
+    """
+
+    def __init__(self, version_is, version_required):
+        super(DumpVersionException, self).__init__(version_is,
+                                                   version_required)
+        self.version_is = version_is
+        self.version_required = version_required
+
+    def __str__(self):
+        return (
+            'Invalid schema version of dumped storage: {!r} (should be {!r})'
+            .format(self.version_is, self.version_required)
+        )
+
+
+class InvalidSynsetIdentifierException(PLWNAPIException):
+    """Raised when a query for a nonexistent synset ID is made."""
+
+    pass
+
+
+class InvalidLexicalUnitIdentifierException(PLWNAPIException):
+    """Raised when a query for a nonexistent lexical unit ID is made."""
+
+    pass
+
+
+class InvalidRelationNameException(PLWNAPIException):
+    """Raised when attempting to select synsets or units related by a relation
+    that does not exist.
+    """
+
+    pass
+
+
+class InvalidPoSException(PLWNAPIException):
+    """Raised when a query for PoS is made, which is not one of the valid
+    constants.
+    """
+
+    pass
--- a/plwn/readers/__init__.py
+++ b/plwn/readers/__init__.py
--- a/plwn/readers/comments.py
+++ b/plwn/readers/comments.py
+"""Parsing strings in wordnet comment format, for readers that need to deal
+with them.
+
+Importing this module introduces dependency on wncomments.
+"""
+
+from __future__ import absolute_import, division
+
+
+from collections import namedtuple
+import itertools as itt
+
+import plwn_comments as plwnc
+import plwn_comments.exceptions as plwnce
+import plwn_comments.utils.usage_tags as plwncu
+
+
+__all__ = (
+    'WN_TAGS',
+    'NON_EXAMPLE_TAG_NAMES',
+    'CommentData',
+    'parse_comment_string',
+)
+
+
+#: :class:`plwn_comments.TagBank` structure that defines all kinds of comment
+#: tags which are needed by PLWN API.
+WN_TAGS = plwnc.TagBank()
+# Usage notes
+WN_TAGS.define(u'K')
+# External links
+WN_TAGS.define(u'L', u'{')
+# Definition
+WN_TAGS.define(u'D')
+
+#: The distinction for these tags is useful, since all examples go to one
+#: place.
+NON_EXAMPLE_TAG_NAMES = frozenset((u'K', u'L', u'D'))
+
+# And define those example tags
+WN_TAGS.define_from(
+    plwncu.iter_usage_tags(),
+    plwncu.DEFAULT_USAGE_TAG_SURROUND,
+)
+
+#: Data tuple returned from :func:`parse_comment_string`.
+CommentData = namedtuple(
+    'CommentData',
+    ('examples', 'examples_sources', 'definition', 'usage', 'links'),
+)
+
+
+def parse_comment_string(cmt_str):
+    """Parse a comment string and extract all data required by PLWN API packed
+    in a named tuple.
+
+    :param str cmt_str: String in PLWN comment format.
+
+    :returns: Extracted and ordered items needed by PLWN API.
+    :rtype: CommentData
+    """
+
+    try:
+        cmt = plwnc.Comment.parse(cmt_str, WN_TAGS)
+    except plwnce.PLWNCommentsException:
+        # For now just make an empty comment which will make all fields unset
+        cmt = plwnc.Comment(WN_TAGS)
+
+    # Get all examples
+    examples = []
+    examples_src = []
+
+    for tagname, tagcontents in cmt.items():
+        if tagname not in NON_EXAMPLE_TAG_NAMES:
+            examples.extend(tagcontents)
+            examples_src.extend(itt.repeat(tagname, len(tagcontents)))
+
+    return CommentData(
+        tuple(examples),
+        tuple(examples_src),
+        cmt.get_first(u'D'),
+        tuple(cmt[u'K']),
+        tuple(cmt[u'L']),
+    )
--- a/plwn/readers/nodes.py
+++ b/plwn/readers/nodes.py
+"""Those tuples are returned by readers and absorbed by storages."""
+
+from collections import namedtuple
+
+
+__all__ = 'SynsetNode', 'LexicalUnitNode'
+
+
+SynsetNode = namedtuple("SynsetNode", ["id", "definition", "related"])
+LexicalUnitNode = namedtuple(
+    "LexicalUnitNode",
+    ["id", "lemma", "pos", "variant", "synset", "unit_index", "definition",
+     "usage_notes", "external_links", "examples", "examples_sources",
+     "domain", "related", "verb_aspect", "emotion_markedness", "emotion_names",
+     "emotion_valuations", "emotion_example_1", "emotion_example_2"]
+)
--- a/plwn/readers/ubylmf.py
+++ b/plwn/readers/ubylmf.py
+# FIXME Some assert statements should be converted to regular raises (asserts
+# should not be used for anything other than checking for errors in the code
+# itself).
+from xml.etree import ElementTree
+import re
+import logging
+
+from .nodes import SynsetNode, LexicalUnitNode
+from .. import exceptions as exc
+from ..enums import PoS, Domain
+
+
+__all__ = 'ubylmf_reader',
+
+ENCODING = 'utf-8'
+
+_logger = logging.getLogger(__name__)
+
+
+def ubylmf_reader(ubylmf_file):
+    """Read PLwordnet iteratively, element by element.
+
+    :param ubylmf_file: the name of UMY-LMF file or an opened file itself.
+    :type ubylmf_file: str or file
+
+    :return: a generator over PLwordnet entities.
+    :rtype: generator
+    """
+    contex = ElementTree.iterparse(ubylmf_file)  # catch only end events
+    contex = iter(contex)
+    # Get root elem in order to clear it after reading each elem
+    try:
+        _, root = next(contex)
+    except StopIteration:
+        raise exc.ReaderException('The xml file is empty')
+    # Generate wordnet's elements
+    for _, elem in contex:
+        entities = []
+        # Parse entities
+        if elem.tag == "LexicalEntry":
+            for xml_sense in elem.findall("Sense"):
+                # Don't move it before if - we still want to generate tuples
+                # even if one sense is broken.
+                try:
+                    entities.append(_make_lexicalunit(elem, xml_sense))
+                except Exception:
+                    _logger.exception(
+                        '\n%s\nIN ELEMENT\n%s',
+                        ElementTree.tostring(xml_sense, ENCODING),
+                        ElementTree.tostring(elem, ENCODING)
+                    )
+        elif elem.tag == "Synset":
+            try:
+                entities.append(_make_synset(elem))
+            except Exception:
+                _logger.exception('\n%s', ElementTree.tostring(elem, ENCODING))
+        # Return entities
+        if entities:
+            root.clear()
+            for entity in entities:
+                yield entity
+
+
+def _make_lexicalunit(xml_lexicalentry, xml_sense):
+    """Return a lexical unit built from an xml element.
+
+    :param xml_lexicalentry: an xml element of LexicalUnit read from a file.
+    :type xml_lexicalentry: xml.etree.ElementTree.Element
+    :param xml_sense: <Sense> element that belongs to the LexicalUnit
+    :type xml_sense: xml.etree.ElementTree.Element
+
+    :return: a named tuple LexicalUnitNode
+    :rtype: LexicalUnitNode
+    """
+    # Get id, synset and variant
+    lu_id = _extract_id(xml_sense.get("id"))
+    lu_synset = _extract_id(xml_sense.get("synset"))
+    lu_variant = int(xml_sense.get("index"))
+    # Get lemma
+    xml_lemma = xml_lexicalentry.find("Lemma").find("FormRepresentation")
+    lu_lemma = xml_lemma.get("writtenForm")
+    assert lu_lemma, "Lemma is empty"
+    # Get PoS
+    lu_pos = xml_lexicalentry.get("partOfSpeech")
+    assert lu_pos, "PoS is empty"
+    # Get definition - can be empty! At most 2
+    lu_definition, lu_usage_notes, lu_external_links = \
+        _extract_definitions(xml_sense)
+    # Get usage examples
+    lu_examples = []
+    lu_examples_sources = []
+    for xe in xml_sense.findall("SenseExample"):
+        example = xe.find("TextRepresentation").get("writtenText").strip()
+        if example:
+            exm_src_match = re.search(r'\[##([-\w]+):?\]$', example, re.U)
+            if exm_src_match is not None:
+                lu_examples.append(example[:exm_src_match.start(0)])
+                lu_examples_sources.append(exm_src_match.group(1))
+            else:
+                _logger.warning("Malformed sense example: %s", example)
+    # Get semantic labels
+    lu_domain = _get_domain(xml_sense)
+    # Get related
+    lu_related = []
+    for xsr in xml_sense.findall("SenseRelation"):
+        try:
+            lu_related.append(
+                (xsr.get("relName"), _extract_id(xsr.get("target")))
+            )
+        except exc.MalformedIdentifierException:
+            _logger.exception(
+                '\n%s\nIN ELEMENT\n%s\nThis relation is skipped.',
+                ElementTree.tostring(xsr, ENCODING),
+                ElementTree.tostring(xml_sense, ENCODING)
+            )
+    # Get unit index
+    lu_unit_index = int(_extract_id(
+        xml_sense.find("MonolingualExternalRef").get("externalReference"))
+    )
+    return LexicalUnitNode(
+        id=lu_id,
+        lemma=lu_lemma,
+        pos=PoS(lu_pos),
+        synset=lu_synset,
+        variant=lu_variant,
+        unit_index=lu_unit_index,
+        definition=lu_definition,
+        usage_notes=tuple(lu_usage_notes),
+        external_links=tuple(lu_external_links),
+        examples=tuple(lu_examples),
+        examples_sources=tuple(lu_examples_sources),
+        # The domain label is in format <pos>.<lang>_<name>; the last one is
+        # the only one we care about.
+        domain=Domain[lu_domain.rsplit('_', 1)[-1]],
+        related=tuple(lu_related),
+        # The below properties are never stored in uby files (at present at
+        # least).
+        verb_aspect=None,
+        emotion_markedness=None,
+        emotion_names=(),
+        emotion_valuations=(),
+        emotion_example_1=None,
+        emotion_example_2=None,
+    )
+
+
+def _extract_definitions(xml_sense):
+    """Extract a definition, notes and links of a LU from <Definition> tags.
+
+    :param xml_sense: <Sense> element read from an xml file.
+    :type xml_sense: xml.etree.ElementTree.Element
+
+    :return: the definition, usage notes and external links of the LU.
+    :rtype: (str or unicode, tuple, tuple)
+
+    :raises AssertionError: if there is more than 2 <Definition> tags.
+    """
+    # Get definition - can be empty! At most 2
+    xml_definitions = xml_sense.findall("Definition")
+    lu_definition = ""
+    lu_usage_notes = []
+    lu_external_links = []
+    assert len(xml_definitions) <= 2, \
+        "Too many definitions ({:})".format(len(xml_definitions))
+    # There is at least one <Definition>
+    if xml_definitions:
+        children = list(xml_definitions[0])
+        # Check whether the first child is the real definition
+        if children[0].tag == "TextRepresentation":
+            lu_definition = children[0].get("writtenText")
+            # <Statement> - the rest of children
+            children = list(xml_definitions[1]) \
+                if len(xml_definitions) == 2 else []
+        # Get additional info
+        for child in children:
+            if child.get("statementType", "") == "usageNote":
+                lu_usage_notes.append(
+                    child.find("TextRepresentation").get("writtenText")
+                )
+            if child.get("statementType", "") == "externalReference":
+                lu_external_links.append(
+                    child.find("TextRepresentation").get("writtenText")
+                )
+    return lu_definition, lu_usage_notes, lu_external_links
+
+
+def _get_domain(xml_sense):
+    """Extract a domain of a LU from <SemanticLabel> tags.
+
+    :param xml_sense: <Sense> element read from an xml file.
+    :type xml_sense: xml.etree.ElementTree.Element
+
+    :return: the domain of the LU.
+    :rtype: str or unicode
+
+    :raises AssertionError: if there is more than 1 <SemanticLabel> tags,
+        no tag at all or its type is different from domain.
+    """
+    xml_semantic_labels = xml_sense.findall("SemanticLabel")
+    assert len(xml_semantic_labels) == 1, \
+        "{:} SemanticLabel found, should be 1".format(len(xml_semantic_labels))
+    assert xml_semantic_labels[0].get("type", "") == "domain", \
+        ("SemanticLabel has type {:} instead of domain"
+         "").format(xml_semantic_labels[0].get("type").encode(ENCODING))
+    return xml_semantic_labels[0].get("label")
+
+
+def _make_synset(xml_synset):
+    """Return a synset built from an xml element.
+
+    :param xml_synset: an xml element of Synset read from a file.
+    :type xml_synset: xml.etree.Element
+
+    :return: a named tuple SynsetNode
+    :rtype: SynsetNode
+    """
+    s_id = _extract_id(xml_synset.get("id"))
+    xml_def = xml_synset.find("Definition")
+    s_def = xml_def.find("TextRepresentation").get("writtenText") \
+        if xml_def is not None else ""
+    s_related = []
+    for xsr in xml_synset.findall("SynsetRelation"):
+        try:
+            s_related.append(
+                (xsr.get("relName"), _extract_id(xsr.get("target")))
+            )
+        except exc.MalformedIdentifierException:
+            _logger.exception(
+                '\n%s\nIN ELEMENT\n%s\nThis relation is skipped.',
+                ElementTree.tostring(xsr, ENCODING),
+                ElementTree.tostring(xml_synset, ENCODING)
+            )
+    return SynsetNode(
+        id=s_id,
+        definition=s_def,
+        related=tuple(s_related)
+    )
+
+
+def _extract_id(full_id):
+    """Extract only numerical identifier from the end of a full id.
+
+    :param full_id: a full identifier that has a prefix before the real id.
+    :type full_id: str|unicode
+
+    :return: a real, numerical id.
+    :rtype: int
+
+    :raises MalformedIdentifierException: if the original id doesn't end with
+        digits.
+    """
+    try:
+        return int(re.findall(r"\d+$", full_id)[0])
+    except IndexError:
+        raise exc.MalformedIdentifierException(full_id)
+
+
+_this_reader_ = ubylmf_reader
--- a/plwn/readers/wndb.py
+++ b/plwn/readers/wndb.py
+# coding: utf8
+from __future__ import absolute_import, division
+
+import collections as coll
+import contextlib as ctxl
+import logging
+
+import sqlalchemy as sa
+
+from .nodes import SynsetNode, LexicalUnitNode
+from .comments import parse_comment_string
+from ..enums import (
+    PoS,
+    VerbAspect,
+    EmotionMarkedness,
+    EmotionName,
+    EmotionValuation,
+    Domain,
+)
+from ..utils.sorting import text_key
+
+
+__all__ = 'wndb_reader',
+
+
+_log = logging.getLogger(__name__)
+
+_EmotionData = coll.namedtuple(
+    '_EmotionData',
+    ('mark', 'names', 'valuations', 'example_1', 'example_2'),
+)
+
+
+def wndb_reader(wordnet_db_url):
+    """Generate UBY-LMF format compatible records directly from plWordNet
+    database.
+
+    sqlalchemy is required for this method to work.
+
+    :param str wordnet_db_url: URL in sqlalchemy format, pointing to a
+    plWordNet database.
+
+    :return: a generator over PLwordnet entities.
+    :rtype: generator
+    """
+
+    db_eng = sa.create_engine(wordnet_db_url)
+    db_meta = sa.MetaData(db_eng)
+    visited_synsets = set()
+    nonexistent_synsets = set()
+
+    # Define required tables
+    dbt_synset = sa.Table(u'synset', db_meta, autoload=True)
+    dbt_synrel = sa.Table(u'synsetrelation', db_meta, autoload=True)
+    dbt_reltype = sa.Table(u'relationtype', db_meta, autoload=True)
+    dbt_lexunit = sa.Table(u'lexicalunit', db_meta, autoload=True)
+    dbt_lexrel = sa.Table(u'lexicalrelation', db_meta, autoload=True)
+    dbt_uns = sa.Table(u'unitandsynset', db_meta, autoload=True)
+    dbt_emo = sa.Table(u'emotion', db_meta, autoload=True)
+
+    q = sa.select((
+        dbt_lexunit.c.ID,
+        dbt_lexunit.c.lemma,
+        dbt_lexunit.c.pos,
+        dbt_lexunit.c.variant,
+        dbt_uns.c.SYN_ID,
+        dbt_uns.c.unitindex,
+        dbt_lexunit.c.domain,
+        dbt_lexunit.c.comment,
+        dbt_lexunit.c.verb_aspect,
+    )).select_from(
+        dbt_lexunit.join(
+            dbt_uns,
+            dbt_uns.c.LEX_ID == dbt_lexunit.c.ID,
+        )
+    ).where(dbt_lexunit.c.pos.between(1, 4))
+
+    with ctxl.closing(db_eng.execute(q)) as result:
+        for lexid, lemma, pos, variant, synid, uidx, domain, comment,\
+                verb_aspect in result:
+
+            if synid in nonexistent_synsets:
+                continue
+
+            # Select all relations children of the unit
+            q = sa.select(
+                (dbt_lexrel.c.CHILD_ID, dbt_reltype.c.name)
+            ).select_from(
+                dbt_lexrel.join(
+                    dbt_reltype,
+                    dbt_reltype.c.ID == dbt_lexrel.c.REL_ID,
+                )
+            ).where(dbt_lexrel.c.PARENT_ID == lexid)
+
+            with ctxl.closing(db_eng.execute(q)) as lex_rel_result:
+                # Ensure relations targets exist
+                lex_related = []
+                for lex_child_id, lex_rel_name in lex_rel_result:
+                    q = sa.select((
+                        sa.exists().select_from(
+                            # This join to ensure the unit belongs to
+                            # some synset.
+                            dbt_lexunit.join(
+                                dbt_uns,
+                                dbt_uns.c.LEX_ID == dbt_lexunit.c.ID,
+                            )
+                        ).where(sa.and_(
+                            dbt_lexunit.c.ID == lex_child_id,
+                            dbt_lexunit.c.pos.between(1, 4),
+                        )),
+                    ))
+
+                    if db_eng.execute(q).scalar():
+                        lex_related.append((lex_rel_name, lex_child_id))
+
+            # Now, select the unit's synset, but only once
+            if synid not in visited_synsets:
+                visited_synsets.add(synid)
+
+                q = sa.select(
+                    (dbt_synset.c.ID, dbt_synset.c.definition)
+                ).where(dbt_synset.c.ID == synid)
+
+                synrow = db_eng.execute(q).first()
+
+                if synrow is None:
+                    nonexistent_synsets.add(synid)
+                    continue
+
+                # Select all relation children of the synset
+                q = sa.select(
+                    (dbt_synrel.c.CHILD_ID, dbt_reltype.c.name)
+                ).select_from(
+                    dbt_synrel.join(
+                        dbt_reltype,
+                        dbt_reltype.c.ID == dbt_synrel.c.REL_ID,
+                    )
+                ).where(dbt_synrel.c.PARENT_ID == synid)
+
+                with ctxl.closing(db_eng.execute(q)) as syn_rel_result:
+                    syn_related = []
+                    for syn_child_id, syn_rel_name in syn_rel_result:
+                        # Ensure the child exists
+                        q = sa.select((
+                            sa.exists().select_from(
+                                dbt_synset.join(
+                                    dbt_uns,
+                                    dbt_uns.c.SYN_ID == dbt_synset.c.ID,
+                                ).join(
+                                    dbt_lexunit,
+                                    dbt_lexunit.c.ID == dbt_uns.c.LEX_ID,
+                                )
+                            ).where(sa.and_(
+                                dbt_synset.c.ID == syn_child_id,
+                                dbt_lexunit.c.pos.between(1, 4),
+                            )),
+                        ))
+
+                        if db_eng.execute(q).scalar():
+                            syn_related.append((syn_rel_name, syn_child_id))
+
+                yield SynsetNode(
+                    synid,
+                    synrow[1] if synrow[1] is not None else u'',
+                    tuple(syn_related),
+                )
+
+            # Try getting emotion annotations for the unit
+            emo_data = _extract_emotion_data(db_eng, dbt_emo, lexid)
+
+            # Now, parse the comment string to get some last pieces of data
+            cmt_data = parse_comment_string(comment
+                                            if comment is not None
+                                            else u'')
+
+            yield LexicalUnitNode(
+                id=lexid,
+                lemma=lemma,
+                pos=PoS.by_db_number(pos),
+                variant=variant,
+                synset=synid,
+                unit_index=uidx,
+                definition=cmt_data.definition,
+                usage_notes=cmt_data.usage,
+                external_links=cmt_data.links,
+                examples=cmt_data.examples,
+                examples_sources=cmt_data.examples_sources,
+                # XXX Since domains are defined as strings, the int is cast
+                # to unicode. It's possible, in the future to add a
+                # translation dict to textual representations.
+                domain=Domain.by_db_number(domain),
+                related=tuple(lex_related),
+                verb_aspect=VerbAspect.by_db_number(verb_aspect, True),
+                emotion_markedness=EmotionMarkedness.normalized(emo_data.mark)
+                if emo_data.mark is not None else None,
+                emotion_names=_make_enum_tuple(
+                    EmotionName,
+                    sorted(emo_data.names, key=text_key),
+                ),
+                emotion_valuations=_make_enum_tuple(
+                    EmotionValuation,
+                    sorted(emo_data.valuations, key=text_key),
+                ),
+                emotion_example_1=emo_data.example_1,
+                emotion_example_2=emo_data.example_2,
+            )
+
+
+def _extract_emotion_data(db_eng, db_t_emo, unit_id):
+    q_emo = sa.select((
+        db_t_emo.c.markedness,  # XXX Typo in schema
+        db_t_emo.c.emotions,
+        db_t_emo.c.valuations,
+        db_t_emo.c.example1,
+        db_t_emo.c.example2,
+        db_t_emo.c.unitStatus,
+    )).where(db_t_emo.c.lexicalunit_id == unit_id).order_by(
+        # "super_anotation" is a boolean 0 or 1, so descending sort will put
+        # the super annotation first.
+        db_t_emo.c.super_anotation.desc()  # XXX Typo in schema
+    )
+
+    mark = None
+    names = set()
+    valuations = set()
+    example_1 = None
+    example_2 = None
+
+    with ctxl.closing(db_eng.execute(q_emo)) as result:
+        for row in result:
+            if not row[db_t_emo.c.unitStatus]:
+                return _EmotionData(
+                    mark=None,
+                    names=(),
+                    valuations=(),
+                    example_1=None,
+                    example_2=None,
+                )
+
+            if mark is None:
+                mark = row[db_t_emo.c.markedness]
+            if example_1 is None:
+                example_1 = row[db_t_emo.c.example1]
+            if example_2 is None:
+                example_2 = row[db_t_emo.c.example2]
+
+            row_names = row[db_t_emo.c.emotions]
+            if row_names is not None:
+                names.update(
+                    word.strip()
+                    for word in row_names.split(u';')
+                )
+
+            row_valuations = row[db_t_emo.c.valuations]
+            if row_valuations is not None:
+                valuations.update(
+                    word.strip()
+                    for word in row_valuations.split(u';')
+                )
+
+    return _EmotionData(
+        mark=mark,
+        names=names,
+        valuations=valuations,
+        example_1=example_1,
+        example_2=example_2,
+    )
+
+
+def _make_enum_tuple(enumtype, source):
+    result = []
+
+    for item in source:
+        try:
+            val = enumtype(item)
+        except ValueError:
+            _log.warning('Omitting bad value %r of enum %r', item, enumtype)
+        else:
+            result.append(val)
+
+    return tuple(result)
+
+
+_this_reader_ = wndb_reader
--- a/plwn/readers/wnxml.py
+++ b/plwn/readers/wnxml.py
+# coding: utf8
+from __future__ import absolute_import, division
+
+
+from collections import defaultdict
+import itertools as itt
+import logging
+import xml.etree.ElementTree as et
+
+import six
+
+from .comments import parse_comment_string
+from .nodes import SynsetNode, LexicalUnitNode
+from ..enums import PoS, Domain
+
+
+__all__ = 'wnxml_reader',
+
+
+_log = logging.getLogger(__name__)
+
+_POSES = {
+    u'rzeczownik': PoS.n,
+    u'czasownik': PoS.v,
+    u'przymiotnik': PoS.adj,
+    u'przysłówek': PoS.adv,
+}
+
+
+# Since etree may return either unicode or byte strings, all strings returned
+# by its interfaces are wrapped with six.text_type
+
+
+def wnxml_reader(wnxml_file):
+    """Generate plWordNet records from the official XML file.
+
+    :param str wnxml_file: Path to the plWordNet XML file to read from.
+
+    :return: a generator over PLwordnet entities.
+    :rtype: generator
+    """
+
+    # The regrettably huge global storage for yielding
+    synsets = {}
+    lexunits = {}
+    synid_n_lexids = []
+    reltypes_syn = {}
+    reltypes_lex = {}
+    # These need defaults to add instances to parent syn / lex
+    synrels = defaultdict(list)
+    lexrels = defaultdict(list)
+
+    # Now, parse everything
+    for _, elem in et.iterparse(wnxml_file):
+        if elem.tag == u'lexical-unit':
+            _make_lexunit(elem, lexunits)
+        elif elem.tag == u'synset':
+            _make_synset(elem, synsets, synid_n_lexids)
+        elif elem.tag == u'relationtypes':
+            _make_reltype(elem, reltypes_syn, reltypes_lex)
+        elif elem.tag == u'synsetrelations':
+            _make_rel(elem, synrels)
+        elif elem.tag == u'lexicalrelations':
+            _make_rel(elem, lexrels)
+
+    # Finalize units to synsets mapping
+    _make_units2synsets(lexunits, synid_n_lexids)
+
+    # Now complete synsets and lexunits with relations and yield
+    for node in itt.chain(
+            _make_gen(synsets, synrels, reltypes_syn),
+            _filter_nosynset(_make_gen(lexunits, lexrels, reltypes_lex)),
+    ):
+        yield node
+
+
+_this_reader_ = wnxml_reader
+
+
+def _make_lexunit(lu_node, lu_dict):
+    # Only words will pl poses will be remembered
+    xmlpos = six.text_type(lu_node.get(u'pos'))
+
+    if xmlpos not in _POSES:
+        return
+
+    lu_id = int(lu_node.get(u'id'))
+    cmt_data = parse_comment_string(six.text_type(lu_node.get(u'desc')))
+    # Create a temporal object which will be filled later
+    lu_dict[lu_id] = LexicalUnitNode(
+        id=lu_id,
+        lemma=six.text_type(lu_node.get(u'name')),
+        pos=_POSES[xmlpos],
+        variant=int(lu_node.get(u'variant')),
+        synset=None,
+        unit_index=None,
+        definition=cmt_data.definition,
+        usage_notes=cmt_data.usage,
+        external_links=cmt_data.links,
+        examples=cmt_data.examples,
+        examples_sources=cmt_data.examples_sources,
+        domain=Domain[lu_node.get(u'domain')],
+        related=None,
+        # The below properties are not stored in wnxml (at least in present)
+        verb_aspect=None,
+        emotion_markedness=None,
+        emotion_names=(),
+        emotion_valuations=(),
+        emotion_example_1=None,
+        emotion_example_2=None,
+    )
+
+
+def _make_synset(syn_node, syn_dict, snu_list):
+    # Only take non-abstract synsets
+    if six.text_type(syn_node.get(u'abstract')) != u'false':
+        return
+
+    synid = int(syn_node.get(u'id'))
+    # Assign lexical units to synsets they belong to.
+    snu_list.append((synid, [int(uid_node.text)
+                             for uid_node in syn_node.iter(u'unit-id')]))
+    # As with lexunits, related field is not yet filled
+    syn_dict[synid] = SynsetNode(
+        synid,
+        six.text_type(syn_node.get(u'definition')),
+        None,
+    )
+
+
+def _make_units2synsets(lu_dict, snu_list):
+    for synid, lexids in snu_list:
+        for uidx, uid in enumerate(lexids):
+            try:
+                lu = lu_dict[uid]
+            except KeyError:
+                _log.warning(
+                    'Unit %d from synset %d does not exist',
+                    uid,
+                    synid,
+                )
+            else:
+                lu_dict[uid] = lu._replace(synset=synid, unit_index=uidx)
+
+
+# Relation types are spelled in descriptive names
+_RELTYPE_SYN = u'relacja pomiędzy synsetami'
+_RELTYPE_LEX = u'relacja leksykalna'
+
+
+def _make_reltype(reltype_node, synreltype_dict, lureltype_dict):
+    relid = int(reltype_node.get(u'id'))
+    typestr = reltype_node.get(u'type')
+
+    if typestr == _RELTYPE_SYN:
+        the_dict = synreltype_dict
+    elif typestr == _RELTYPE_LEX:
+        the_dict = lureltype_dict
+    else:
+        # There is one more relation type, synonymy, but it's artificial
+        return
+
+    # Remember the name so that will be inserted into the reltype storages
+    the_dict[relid] = six.text_type(reltype_node.get(u'name'))
+
+
+# Relations are put into dicts indexed by parent IDs, to be later put into
+# nodes. One function can handle both types.
+def _make_rel(node, reldict):
+    # Get reltype - drop if unknown
+    reldict[int(node.get(u'parent'))].append((
+        int(node.get(u'child')),
+        # Reltypes should be returned by names, not IDs
+        int(node.get(u'relation')),
+    ))
+
+
+# As with relation, yielding is general for syn / lexes.
+# Related IDs need to be added, and those not known purged.
+def _make_gen(node_dict, rels_dict, reltype_dict):
+    for node in six.itervalues(node_dict):
+        related = []
+        for child_id, rel_id in rels_dict.get(node.id, ()):
+            try:
+                relname = reltype_dict[rel_id]
+            except KeyError:
+                _log.warning(
+                    'Unknown relation %d (of %s), from %d to %d',
+                    rel_id,
+                    node.__class__.__name__,
+                    node.id,
+                    child_id,
+                )
+                continue
+
+            # Only remember from the related dict the items whose IDs are in
+            # the node dict.
+            if child_id in node_dict:
+                related.append((child_id, relname))
+                related.append((relname, child_id))
+        yield node._replace(related=related)
+
+
+# Addendum to _make_gen for lexical units to filter synsetless ones
+def _filter_nosynset(lu_node_gen):
+    for lu_node in lu_node_gen:
+        if lu_node.synset is None:
+            _log.warning('Unit %d belongs to no synset', lu_node.id)
+        else:
+            yield lu_node