diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000000000000000000000000000000000..fa46a48d7d3e6cd3ac080a146b9a0f160794c2c9 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,16 @@ +root = true + +[*] +indent_style = space +indent_size = 4 + +max_line_length = 79 + +end_of_line = lf +charset = utf-8 + +trim_trailing_whitespace = true +insert_final_newline = true + +[*.rst] +indent_size = 2 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a2ed4fe904f3d3e7bb840b9f01ad04b346263721 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +*.swp + +# Sometimes backup files are used +*.bak + +*.pyc +__pycache__/ +build/ +dist/ +*.egg-info + +*.mo diff --git a/COPYING b/COPYING new file mode 100644 index 0000000000000000000000000000000000000000..9cecc1d4669ee8af2ca727a5d8cde10cd8b2d7cc --- /dev/null +++ b/COPYING @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + {one line to give the program's name and a brief idea of what it does.} + Copyright (C) {year} {name of author} + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + {project} Copyright (C) {year} {fullname} + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +<http://www.gnu.org/licenses/>. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +<http://www.gnu.org/philosophy/why-not-lgpl.html>. diff --git a/COPYING.LESSER b/COPYING.LESSER new file mode 100644 index 0000000000000000000000000000000000000000..65c5ca88a67c30becee01c5a8816d964b03862f9 --- /dev/null +++ b/COPYING.LESSER @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/LICENSE-PWN.txt b/LICENSE-PWN.txt new file mode 100644 index 0000000000000000000000000000000000000000..ece943d72a38f425a71cd1e887a113625cab36bd --- /dev/null +++ b/LICENSE-PWN.txt @@ -0,0 +1,26 @@ +This software and database is being provided to you, the LICENSEE, +by Princeton University under the following license. By obtaining, using +and/or copying this software and database, you agree that you have read, +understood, and will comply with these terms and conditions.: + +Permission to use, copy, modify and distribute this software and database and +its documentation for any purpose and without fee or royalty is hereby granted, +provided that you agree to comply with the following copyright notice and +statements, including the disclaimer, and that the same appear on ALL copies +of the software, database and documentation, including modifications that you +make for internal use or for distribution. + +WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved. + +THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON UNIVERSITY MAKES +NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, +BUT NOT LIMITATION, PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES +OF MERCHANT- ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF +THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT INFRINGE ANY +THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. + +The name of Princeton University or Princeton may not be used in advertising or +publicity pertaining to distribution of the software and/or database. +Title to copyright in this software, database and any associated documentation +shall at all times remain with Princeton University and LICENSEE agrees +to preserve same. diff --git a/LICENSE-plWN.txt b/LICENSE-plWN.txt new file mode 100644 index 0000000000000000000000000000000000000000..d43c374a371b4a4048f3f87d9223fe5343c6c0c8 --- /dev/null +++ b/LICENSE-plWN.txt @@ -0,0 +1,29 @@ +This software and database are being provided to you, the LICENSEE, +by WrocÅ‚aw University of Technology under the following license. By obtaining, +using and/or copying this software and database, you agree that you have read, +understood, and will comply with these terms and conditions: + +Permission to use, copy, modify, distribute, and public expose this software +and database (including data getting) and its documentation for any purpose and +without fee or royalty is hereby granted, provided that you agree to comply +with the following copyright notice and statements, including the disclaimer, +and that the same appear on ALL copies of the software, database and +documentation, including modifications which you make for internal use or for +wider distribution. + +plWordNet 3.0 © 2016 by WrocÅ‚aw University of Technology. All rights reserved. + +THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND WROCÅAW UNIVERSITY OF +TECHNOLOGY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY +OF EXAMPLE, BUT NOT LIMITATION, WROCÅAW UNIVERSITY OF TECHNOLOGY MAKES NO +REPRESENTATIONS OR WARRANTIES OF MERCHANT-ABILITY OR MERCHANT-FITNESS FOR ANY +PARTICULAR PURPOSE; NOR DOES WROCÅAW UNIVERSITY OF TECHNOLOGY MAKE ANY +REPRESENTATIONS OR WARRANTIES THAT THE USE OF THE LICENSED SOFTWARE, DATABASE +OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, +TRADEMARKS OR OTHER RIGHTS. + +The name of WrocÅ‚aw University of Technology may not be used in advertising or +publicity pertaining to distribution of the software, the database or both. +Title to copyright in this software, database and any associated documentation +shall at all times remain with WrocÅ‚aw University of Technology, and LICENSEE +agrees to preserve this copyright. diff --git a/MANIFEST.in b/MANIFEST.in index f696a4d46332c4de5cab5879241d87a50e46a18c..4ca82463a88e84208d6e89571736f6aaea665c70 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,4 @@ -include README-pl-beta.txt +include COPYING +include COPYING.LESSER +include LICENSE-plWN.txt +include LICENSE-PWN.txt diff --git a/PKG-INFO b/PKG-INFO deleted file mode 100644 index 0598421f994dc94d19923aae90f5c83fc1bd390a..0000000000000000000000000000000000000000 --- a/PKG-INFO +++ /dev/null @@ -1,10 +0,0 @@ -Metadata-Version: 1.0 -Name: PLWN_API -Version: 0.9 -Summary: Python API to access plWordNet lexicon -Home-page: UNKNOWN -Author: MichaÅ‚ KaliÅ„ski -Author-email: michal.kalinski@pwr.edu.pl -License: UNKNOWN -Description: UNKNOWN -Platform: UNKNOWN diff --git a/PLWN_API.egg-info/PKG-INFO b/PLWN_API.egg-info/PKG-INFO deleted file mode 100644 index c0d5f643e3b319e5a9d96b0858b954b60441156f..0000000000000000000000000000000000000000 --- a/PLWN_API.egg-info/PKG-INFO +++ /dev/null @@ -1,10 +0,0 @@ -Metadata-Version: 1.0 -Name: PLWN-API -Version: 0.9 -Summary: Python API to access plWordNet lexicon -Home-page: UNKNOWN -Author: MichaÅ‚ KaliÅ„ski -Author-email: michal.kalinski@pwr.edu.pl -License: UNKNOWN -Description: UNKNOWN -Platform: UNKNOWN diff --git a/PLWN_API.egg-info/SOURCES.txt b/PLWN_API.egg-info/SOURCES.txt deleted file mode 100644 index 9d68e4cea354d10ac0f67a12afc6328ad8417a01..0000000000000000000000000000000000000000 --- a/PLWN_API.egg-info/SOURCES.txt +++ /dev/null @@ -1,28 +0,0 @@ -MANIFEST.in -README-pl-beta.txt -setup.py -PLWN_API.egg-info/PKG-INFO -PLWN_API.egg-info/SOURCES.txt -PLWN_API.egg-info/dependency_links.txt -PLWN_API.egg-info/requires.txt -PLWN_API.egg-info/top_level.txt -plwn/__init__.py -plwn/_loading.py -plwn/bases.py -plwn/enums.py -plwn/exceptions.py -plwn/relation_aliases.tsv -plwn/relresolver.py -plwn/readers/__init__.py -plwn/readers/comments.py -plwn/readers/nodes.py -plwn/readers/ubylmf.py -plwn/readers/wndb.py -plwn/readers/wnxml.py -plwn/storages/__init__.py -plwn/storages/objects.py -plwn/storages/sqlite.py -plwn/utils/__init__.py -plwn/utils/graphmlout.py -plwn/utils/sorting.py -plwn/utils/tupwrap.py \ No newline at end of file diff --git a/PLWN_API.egg-info/dependency_links.txt b/PLWN_API.egg-info/dependency_links.txt deleted file mode 100644 index 8b137891791fe96927ad78e64b0aad7bded08bdc..0000000000000000000000000000000000000000 --- a/PLWN_API.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/PLWN_API.egg-info/requires.txt b/PLWN_API.egg-info/requires.txt deleted file mode 100644 index 0cc144e55cb1e22cea3e852a4b1d07ca5988c7ea..0000000000000000000000000000000000000000 --- a/PLWN_API.egg-info/requires.txt +++ /dev/null @@ -1,2 +0,0 @@ -six>=1.10 -enum34>=1.1.2 diff --git a/PLWN_API.egg-info/top_level.txt b/PLWN_API.egg-info/top_level.txt deleted file mode 100644 index c72d30de0531ec401e7d72fd2f63a069d248db85..0000000000000000000000000000000000000000 --- a/PLWN_API.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -plwn diff --git a/README.md b/README.md index 298cac62387ee50058dc88af9ed5854ec5c440f3..fed3cba90cb0e90c278c8c1ab80d5a13e0387ea3 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,80 @@ -# PLWN_API +======== +PLWN API +======== +PLWN API is a library for accessing the plWordNet lexicon in a Python program. + + +Usage +===== + +Access is provided using a PLWordNet object, with data loaded from the database +dump. + + >>> import plwn + >>> wn = plwn.load_default() + +Using that object, it's possible to obtain synset and lexical unit data. + + >>> lex = wn.lexical_unit('pies', plwn.PoS.noun_pl, 2) + >>> print(lex) + pies.2(21:zw) + >>> print(lex.definition) + pies domowy - popularne zwierzÄ™ domowe, przyjaciel czÅ‚owieka. + + +Full documentation +================== + +For description of loading plWordNet data: + + $ pydoc plwn._loading + +For description of the PLWordNet class and others: + + $ pydoc plwn.bases + + +Creating API dumps from wordnet sql +=================================== + +Latest wordnet database dump can be obtained from +http://ws.clarin-pl.eu/public/wordnet-work.LATEST.sql.gz + +It can be loaded using shell command: + + $ mysql -e 'CREATE SCHEMA wordnet_new' # For maintaining multiple versions. + $ mysql -D wordnet_new < wordnet-work.LATEST.sql.gz + +It is then recommended to run `clean_wndb.sql` script to remove any mistakes +in an unlikely case that the dump contains some, such as invalid enum values +or invalid foreign keys. + + $ mysql -D wordnet_new < clean_wndb.sql + +Then, edit connection string in storage-dumps if necessary according to sqlalchemy format. +Default values are all set to "wordnet", in the example DATABASE will be "wordnet_new". + + mysql+mysqldb://wordnet:wordnet@localhost/wordnet_new?charset=utf8 + +After that, the database can be read and saved into the API format. Only works in Python 2! + + >>> import sys; print(sys.version) + 2.7.12 + >>> import plwn + >>> api = plwn.read("connection.txt", "database", "plwn-new.db", "sqlite3") + +To load this version at a later date, use `plwn.load(path)` instead of `plwn.load_default()` + + >>> api = plwn.load("storage-dumps/plwn-new.db") + + +Licenses +======== + +The python software is provided on terms of the LGPL 3.0 license (see COPYING +and COPYING.LESSER). + +Lexicon data is provided on terms of the WordNet license (see LICENSE-PWN.txt) +for the original Princeton WordNet synsets and relations, and the plWordNet +license (see LICENSE-plWN.txt) for other entities. diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..a5fcdfdb2b1b17bd1404765eef7dfe2a2c949b79 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,193 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +# SPHINXOPTS = -D language=pl +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = a4 +BUILDDIR = build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext + +help: + @echo "Please use \`make <target>' where <target> is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " applehelp to make an Apple Help Book" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " coverage to run coverage check of the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/PLWN_API.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PLWN_API.qhc" + +applehelp: + $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp + @echo + @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." + @echo "N.B. You won't be able to view it unless you put it in" \ + "~/Library/Documentation/Help or install it in your application" \ + "bundle." + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/PLWN_API" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/PLWN_API" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +coverage: + $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage + @echo "Testing of coverage in the sources finished, look at the " \ + "results in $(BUILDDIR)/coverage/python.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/doc/asciio/skip-artificial-docstring.asciio b/doc/asciio/skip-artificial-docstring.asciio new file mode 100644 index 0000000000000000000000000000000000000000..086e71c75abf3b23eb1d54398bf49e3661838416 Binary files /dev/null and b/doc/asciio/skip-artificial-docstring.asciio differ diff --git a/doc/source/conf.py b/doc/source/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..4e05b4dbf0737e3fd078f804c49171ae0f976be4 --- /dev/null +++ b/doc/source/conf.py @@ -0,0 +1,301 @@ +# -*- coding: utf-8 -*- +# +# PLWN_API documentation build configuration file, created by +# sphinx-quickstart on Fri Jun 2 14:19:03 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os +import shlex + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The encoding of source files. +source_encoding = 'utf-8' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'PLWN_API' +copyright = u'2017, MichaÅ‚ KaliÅ„ski' +author = u'MichaÅ‚ KaliÅ„ski' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.22' +# The full version, including alpha/beta/rc tags. +release = '0.22' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +# language = 'en' + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = [] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + +locale_dirs = ['locale'] + +nitpicky = True + + +# -- Autodoc -------------------------------------------------------------- + +autoclass_content = 'both' +autodoc_default_flags = ['members', 'show-inheritance'] + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' +#html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +#html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +#html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'PLWN_APIdoc' + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +'papersize': 'a4paper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', + +# Latex figure (float) alignment +#'figure_align': 'htbp', + +'extraclassoptions': ',openany,oneside', +'babel': r'\usepackage[polish]{babel}', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'PLWN_API.tex', u'PLWN\\_API Documentation', + u'MichaÅ‚ KaliÅ„ski', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + +latex_engine = 'lualatex' + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'plwn_api', u'PLWN_API Documentation', + [author], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'PLWN_API', u'PLWN_API Documentation', + author, 'PLWN_API', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False diff --git a/doc/source/enums.rst b/doc/source/enums.rst new file mode 100644 index 0000000000000000000000000000000000000000..7c9fc0cac762a0bbe8f0af2131c1e219d19e67c2 --- /dev/null +++ b/doc/source/enums.rst @@ -0,0 +1,6 @@ +Public enums +============ + +.. automodule:: plwn.enums + :undoc-members: + :member-order: groupwise diff --git a/doc/source/exceptions.rst b/doc/source/exceptions.rst new file mode 100644 index 0000000000000000000000000000000000000000..6964d0faade327940ee5edb6f452e35fed2d8537 --- /dev/null +++ b/doc/source/exceptions.rst @@ -0,0 +1,4 @@ +Public exceptions +================= + +.. automodule:: plwn.exceptions diff --git a/doc/source/index.rst b/doc/source/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..47ae9634d19b3b3885fbf176077420c0c2718001 --- /dev/null +++ b/doc/source/index.rst @@ -0,0 +1,22 @@ +.. PLWN_API documentation master file, created by + sphinx-quickstart on Fri Jun 2 14:19:03 2017. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to PLWN_API's documentation! +==================================== + +.. toctree:: + :maxdepth: 2 + + introduction + interface + exceptions + enums + + +Indices and tables +================== +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/doc/source/interface.rst b/doc/source/interface.rst new file mode 100644 index 0000000000000000000000000000000000000000..f83ae7aab1950836e628cb94e36f1704584cd961 --- /dev/null +++ b/doc/source/interface.rst @@ -0,0 +1,4 @@ +Public interface +================ + +.. automodule:: plwn.bases diff --git a/doc/source/introduction.rst b/doc/source/introduction.rst new file mode 100644 index 0000000000000000000000000000000000000000..03510e4e08b152a9f6844d7acceaeb0d2f806c56 --- /dev/null +++ b/doc/source/introduction.rst @@ -0,0 +1,258 @@ +Introduction +============ + +Loading +------- + +Access to plWordNet is provided via a single +:class:`~plwn.bases.PLWordNetBase` object, which requires a source from which +to load the lexicon data. In normal distributions, the storage file is bundled +with the python package, so the only thing required to get an instance is:: + + import plwn + wn = plwn.load_default() + + +Getting synsets and lexical units +--------------------------------- + +The basic building blocks of plWordNet are synsets and lexical units, +represented by :class:`~plwn.bases.SynsetBase` and +:class:`~plwn.bases.LexicalUnitBase` objects. Every single synset and lexical +unit can be identified either by an unique ID number, or by a combination of +three properties: lemma, :abbr:`pos (part of speech)` and variant. + +There are three primary methods on +:class:`~plwn.bases.PLWordNetBase` for each of these two types of entities that +allow selecting them from the lexicon: + +* Many entities by matching one or more of the three identifying properties: + + * :meth:`~plwn.bases.PLWordNetBase.synsets` + * :meth:`~plwn.bases.PLWordNetBase.lexical_units` + +* A single entity by matching all three identifying properties: + + * :meth:`~plwn.bases.PLWordNetBase.synset` + * :meth:`~plwn.bases.PLWordNetBase.lexical_unit` + +* A single entity by matching the unique numeric ID: + + * :meth:`~plwn.bases.PLWordNetBase.synset_by_id` + * :meth:`~plwn.bases.PLWordNetBase.lexical_unit_by_id` + + +Selecting by ID +^^^^^^^^^^^^^^^ + +Using the ``*_by_id`` methods is the fastest and most straightforward way of +getting :class:`~plwn.bases.SynsetBase` and +:class:`~plwn.bases.LexicalUnitBase` objects, provided that ID values of +synsets and / or units for the correct version of plWordNet have been obtained +from an outside source or by storing the ``id`` property:: + + >>> s = wn.synset_by_id(34160) + >>> print(s) + {pies.2(21:zw), pies_domowy.1(21:zw)} + >>> print(s.id) + 34160 + + +Selecting by all three identifying properties +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The "singular" methods require all three properties. Lemma is the basic form of +a word, variant is an ordinal number differentiating between different meanings +of the same word, and :abbr:`pos (part of speech)` is an enumerated value. + +There are eight :abbr:`pos (part of speech)` constants, four for Polish synsets +and units, and four for English. The enum class is provided as a member of the +base module of the library: + +* ``plwn.PoS.verb``, ``plwn.PoS.noun``, ``plwn.PoS.adv``, ``plwn.PoS.adj`` +* ``plwn.PoS.verb_en``, ``plwn.PoS.noun_en``, ``plwn.PoS.adv_en``, + ``plwn.PoS.adj_en`` + +There are few cases, where all three properties would be known, but not the ID. +Still, selecting like this is possible:: + + >>> lx = wn.lexical_unit(lemma='pies', pos=plwn.PoS.noun, variant=2) + >>> print(lx) + pies.2(21:zw) + >>> print(lx == wn.lexical_unit_by_id(lx.id)) + True + +It's not legal to omit one any of the three properties:: + + >>> lx = wn.lexical_unit(lemma='pies', pos=plwn.PoS.noun) + TypeError: lexical_unit() missing 1 required positional argument: 'variant' + +If there's no synset / unit that fits the query, an +:exc:`~plwn.exceptions.NotFound` subclass is raised:: + + >>> lx = wn.lexical_unit(lemma='pies', pos=plwn.PoS.noun, variant=99) + LexicalUnitNotFound: lemma='pies', pos=<PoS.noun: 'noun'>, variant=99 + + >>> lx = wn.synset(lemma='pies', pos=plwn.PoS.noun, variant=99) + SynsetNotFound: lemma='pies', pos=<PoS.noun: 'noun'>, variant=99 + + +Selecting by some of the identifying properties +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The "plural" methods always return an iterable of synsets / lexical units. +Unlike the "singular" methods, they allows omitting one or more of the +arguments, which could match more than one entity. + +It's safer to wrap the invocation in ``tuple`` constructor, since the interface +only guarantees that the return value is iterable. + +:: + + >>> lxs = tuple(wn.lexical_units(lemma='pies', pos=plwn.PoS.noun)) + >>> print(lxs) + (<LexicalUnit id=5563 lemma='pies' pos=<PoS.noun: 'noun'> variant=1>, + <LexicalUnit id=52245 lemma='pies' pos=<PoS.noun: 'noun'> variant=2>, + <LexicalUnit id=69245 lemma='pies' pos=<PoS.noun: 'noun'> variant=3>, + <LexicalUnit id=626100 lemma='pies' pos=<PoS.noun: 'noun'> variant=4>, + <LexicalUnit id=626108 lemma='pies' pos=<PoS.noun: 'noun'> variant=5>, + <LexicalUnit id=626115 lemma='pies' pos=<PoS.noun: 'noun'> variant=6>, + <LexicalUnit id=710443 lemma='pies' pos=<PoS.noun: 'noun'> variant=7>) + +It's also possible that a query matches zero entities. Unlike the "singular" +methods, this will not raise an exception, but simply return an empty iterable. + +:: + + >>> lxs = tuple(wn.lexical_units(lemma='pies', pos=plwn.PoS.verb)) + >>> print(lxs) + () + + +Synset and lexical unit properties +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Data associated with plWordNet synsets and lexical units is provided as public +properties of synset and lexical unit objects. There are described in +documentation of the respective classes: :class:`~plwn.bases.SynsetBase` and +:class:`~plwn.bases.LexicalUnitBase`. + + +Getting relations +----------------- + +The other elementary kind of entities in plWordNet, aside from synsets and +lexical units, are relations. + +Relation instances can connect two synsets or two lexical units. These +instances are selected using identifiers of their types. + +A detailed explanation on how relation types can be referred to is in +:class:`~plwn.bases.RelationInfoBase`; the short version is: + +* Full name, for example: ``hiperonimia`` for relations that have no parent + type; ``meronimia/część`` for relations that do. +* Short alias, for example: ``hiper``. +* Parent name, for example: ``meronimia``; this refers to all the children of + the relation. + +To see names and aliases for all relations, in alphabetical order, do +``sorted(wn.relations_info())``. + + +Related synset / units +^^^^^^^^^^^^^^^^^^^^^^ + +Having a :class:`~plwn.bases.SynsetBase` or a +:class:`~plwn.bases.LexicalUnitBase` objects, it's possible to select all +objects related to it using the ``related`` method, which accepts one of the +relation type identifiers described above. The ``relations`` property can be +used to check what relation types have outbound instances from the synset / +unit:: + + >>> lx = wn.lexical_unit_by_id(132) + >>> print(lx.relations) + [<RelationInfo name='antonimia komplementarna' parent='antonimia' + kind=<RelationKind.lexical: 'lexical'> aliases=('ant_kom',)>, + <RelationInfo name='derywacyjność' parent=None + kind=<RelationKind.lexical: 'lexical'> aliases=('der',)>] + >>> print(tuple(lx.related('der'))) + (<LexicalUnit id=133 lemma='apetyt' pos=<PoS.noun: 'noun'> variant=2>,) + +If a relation of the right kind (synset or lexical) is passed to the method, +but it has no instances for the particular entity, an empty iterable is +returned:: + + >>> print(tuple(lx.related('rol:ag'))) + () + +In contrast, if a relation is of the wrong kind or does not exist, this raises +an error:: + + >>> lx.related('hiper') + InvalidRelationTypeException: (<RelationKind.lexical: 'lexical'>, 'hiper') + +When passing a parent relation type to ``related``, distinction between actual, +children relation types is lost. A second method ``related_pairs`` can be used +to annotate related entities with the relation instance connecting to it:: + + >>> s = wn.synset_by_id(7337) + >>> print(tuple(s.related_pairs('meronimia'))) + ((<RelationInfo name='część' parent='meronimia' + kind=<RelationKind.synset: 'synset'> aliases=('mero:cz',)>, + <Synset id=22085 lemma='pociÄ…g drogowy' pos=<PoS.noun: 'noun'> + variant=1>),) + +Synset's :meth:`~plwn.bases.SynsetBase.related` and +:meth:`~plwn.bases.SynsetBase.related_pairs` also have an additional boolean +``skip_artificial`` argument. See the methods' documentation for more details; +the default value should be correct for most uses. + + +Relation edges +^^^^^^^^^^^^^^ + +Relation instances can also be selected using +:meth:`~plwn.bases.PLWordNetBase.synset_relation_edges` and +:meth:`~plwn.bases.PLWordNetBase.lexical_relation_edges` methods. Unlike the +``related`` methods, these two are not anchored to a starting point and select +all relation instances of given types in plWordNet; they return iterables of +:class:`~plwn.bases.RelationEdge` instances, each having a ``source``, +``relation`` and ``target`` properties. + +Without arguments, all synset or lexical relation instances are yielded. +Filtering can be done using an ``include`` or ``exclude`` argument. Both expect +the values to be sets of relation type identifiers (the same as those accepted +by the ``related`` methods). When ``include`` is not ``None``, only instances +of relations mentioned in the set are yielded. For example, to select all +hyponymy instances:: + + >>> sr = tuple(wn.synset_relation_edges(include=('hiper',))) + >>> print(sr) + (RelationEdge(source=<Synset id=10 lemma='samoistny' + pos=<PoS.adjective: 'adjective'> + variant=2 [+ 1 unit(s)]>, + relation=<RelationInfo name='hiperonimia' + parent=None kind=<RelationKind.synset: 'synset'> + aliases=('hiper',)>, + target=<Synset id=9139 lemma='bezwiedny' + pos=<PoS.adjective: 'adjective'> + variant=1 [+ 7 unit(s)]>), + RelationEdge(source=<Synset id=10 lemma='samoistny' + pos=<PoS.adjective: 'adjective'> + variant=2 [+ 1 unit(s)]>, + relation=<RelationInfo name='hiperonimia' + parent=None kind=<RelationKind.synset: 'synset'> + aliases=('hiper',)>, + target=<Synset id=104191 lemma='automatyczny' + pos=<PoS.adjective: 'adjective'> variant=4>), + ...) + +When ``exclude`` is not ``None``, instances of mentioned relation types are +removed from the iterable; either from the set of all relations or those in +``include``. + +Method :meth:`~plwn.bases.PLWordNetBase.synset_relation_edges` also takes a +boolean ``skip_artificial`` argument that's ``True`` as default. Like with +:meth:`~plwn.bases.SynsetBase.related`, see the method's documentation for +details. diff --git a/doc/source/locale/pl/LC_MESSAGES/enums.po b/doc/source/locale/pl/LC_MESSAGES/enums.po new file mode 100644 index 0000000000000000000000000000000000000000..df1dabff56da22789a363fec03a88515477a393d --- /dev/null +++ b/doc/source/locale/pl/LC_MESSAGES/enums.po @@ -0,0 +1,110 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2017, MichaÅ‚ KaliÅ„ski +# This file is distributed under the same license as the PLWN_API package. +# FIRST AUTHOR <EMAIL@ADDRESS>, 2017. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PLWN_API 0.21\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2017-08-18 14:42+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <LL@li.org>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.1.1\n" + +#: ../../source/enums.rst:2 +msgid "Public enums" +msgstr "Publiczne obiekty wyliczeniowe" + +#: plwn.enums:1 +msgid "All enumerated values used in plWordNet." +msgstr "Wszystkie wyliczeniowe wartoÅ›ci wykorzystywane w SÅ‚owosieci." + +#: plwn.enums.RelationKind:1 plwn.enums.PoS:1 plwn.enums.VerbAspect:1 +#: plwn.enums.EmotionMarkedness:1 plwn.enums.EmotionName:1 +#: plwn.enums.EmotionValuation:1 plwn.enums.Domain:1 +msgid "Bases: :class:`enum.Enum`" +msgstr "Klasy bazowe: :class:`enum.Enum`" + +#: plwn.enums.RelationKind:1 +msgid "Whether a relation connects synsets or lexical units." +msgstr "OkreÅ›la czy relacja łączy synsety czy jednostki leksykalne." + +#: plwn.enums.RelationKind.by_db_number:1 plwn.enums.PoS.by_db_number:1 +#: plwn.enums.VerbAspect.by_db_number:1 plwn.enums.Domain.by_db_number:1 +msgid "" +"Return the enum value associated with ``number`` value stored in the " +"plWordNet database." +msgstr "" +"Zwróć wartość wyliczeniowÄ… skojarzonÄ… z wartoÅ›ciÄ… numerycznÄ… ``number`` " +"przechowywanÄ… w bazie danych SÅ‚owosieci." + +#: plwn.enums.RelationKind.by_db_number:4 plwn.enums.PoS.by_db_number:4 +#: plwn.enums.VerbAspect.by_db_number:4 plwn.enums.Domain.by_db_number:4 +msgid "" +"Raises ``KeyError`` if ``number`` is not in the range valid for the " +"database field, unless ``optional`` is ``True``; then, ``None`` is " +"returned instead of an enum value." +msgstr "" +"Rzuca ``KeyError`` jeÅ›li ``number`` nie jest w zakresie wÅ‚aÅ›ciwym dla " +"danego pola bazy danych, chyba że ``optional`` ma prawdziwÄ… wartość; " +"wtedy zamiast wartoÅ›ci wyliczeniowej zwracane jest ``None``." + +#: plwn.enums.RelationKind.db_number:1 plwn.enums.PoS.db_number:1 +#: plwn.enums.VerbAspect.db_number:1 plwn.enums.Domain.db_number:1 +msgid "The number associated with the enum value in plWordNet database." +msgstr "" +"Wartość numeryczna skojarzone w bazie danych SÅ‚owosieci z wartoÅ›ciÄ… " +"wyliczeniowÄ…," + +#: plwn.enums.PoS:1 +msgid "Defines part of speech values used in plWordNet." +msgstr "Definiuje wartoÅ›ci odpowiadajÄ…ce częściom mowy w SÅ‚owosieci." + +#: plwn.enums.VerbAspect:1 +msgid "Defines verb aspect values used in plWordNet." +msgstr "Definiuje wartoÅ›ci odpowiadajÄ…ce aspektom czasowników w SÅ‚owosieci." + +#: plwn.enums.EmotionMarkedness:1 +msgid "Defines markedness of emotions associated with some lexical units." +msgstr "" +"Definiuje oznaczenia sentymentu zwiÄ…zanego z niektórymi jednostkami " +"leksykalnymi." + +#: plwn.enums.EmotionMarkedness.normalized:1 +msgid "" +"Return an instance of this enum corresponding to ``strvalue`` after " +"normalizing it with regards to whitespace." +msgstr "" +"Zwróć instancjÄ™ typu wyliczeniowego odpowiadajÄ…cÄ… wartoÅ›ci tekstowej, po " +"jej znormalizowaniu pod wzglÄ™dem spacji." + +#: plwn.enums.EmotionName:1 +msgid "Defines names of emotions that may be associated with lexical units." +msgstr "" +"Definiuje nazwy sentymentu, które mogÄ… być powiÄ…zane z jednostkami " +"leksykalnymi." + +#: plwn.enums.EmotionValuation:1 +msgid "Defines valuations of emotions that may be associated with lexical units." +msgstr "" +"Definiuje wartoÅ›ciowania sentymentu, które mogÄ… być powiÄ…zane " +"z jednostkami leksykalnymi." + +#: plwn.enums.Domain:1 +msgid "Defines domains of lexical units occurring in plWordNet." +msgstr "Definiuje domeny jednostek leksykalnych SÅ‚owosieci." + +#: plwn.enums.make_values_tuple:1 +msgid "" +"Auxiliary function that converts a sequence of enums to a tuple of enum " +"string values." +msgstr "" +"Pomocnicza funkcja konwertujÄ…ca sekwencjÄ™ obiektów wyliczeniowych do " +"krotki ich wartoÅ›ci tekstowych." + diff --git a/doc/source/locale/pl/LC_MESSAGES/exceptions.po b/doc/source/locale/pl/LC_MESSAGES/exceptions.po new file mode 100644 index 0000000000000000000000000000000000000000..0898b0b37251c21cf3c27439520c24c4214b6dbe --- /dev/null +++ b/doc/source/locale/pl/LC_MESSAGES/exceptions.po @@ -0,0 +1,106 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2017, MichaÅ‚ KaliÅ„ski +# This file is distributed under the same license as the PLWN_API package. +# FIRST AUTHOR <EMAIL@ADDRESS>, 2017. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PLWN_API 0.21\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2017-08-18 14:42+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <LL@li.org>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.1.1\n" + +#: ../../source/exceptions.rst:2 +msgid "Public exceptions" +msgstr "Publiczne wyjÄ…tki" + +#: plwn.exceptions:1 +msgid "Custom exceptions raised by PLWN API." +msgstr "WyjÄ…tki rzucane przez PLWN API." + +#: plwn.exceptions.PLWNAPIException:1 +msgid "Bases: :class:`Exception`" +msgstr "Klasy bazowe: :class:`Exception`" + +#: plwn.exceptions.PLWNAPIException:1 +msgid "Base for all exceptions in the module." +msgstr "Baza dla wszystkich wyjÄ…tków w tym module." + +#: plwn.exceptions.NotFound:1 plwn.exceptions.ReaderException:1 +#: plwn.exceptions.LoadException:1 +#: plwn.exceptions.InvalidRelationTypeException:1 +msgid "Bases: :class:`plwn.exceptions.PLWNAPIException`" +msgstr "Klasy bazowe: :class:`plwn.exceptions.PLWNAPIException`" + +#: plwn.exceptions.NotFound:1 +msgid "Base for exceptions raised when an entity is not found." +msgstr "Baza dla wyjÄ…tków rzucanych przy nie znalezieniu szukanego obiektu." + +#: plwn.exceptions.LexicalUnitNotFound:1 plwn.exceptions.SynsetNotFound:1 +msgid "Bases: :class:`plwn.exceptions.NotFound`" +msgstr "Klasy bazowe: :class:`plwn.exceptions.NotFound`" + +#: plwn.exceptions.LexicalUnitNotFound:1 +msgid "Raised when a lexical unit is not found during lookup." +msgstr "Rzucany kiedy nie znaleziono szukanej jednostki leksykalnej." + +#: plwn.exceptions.SynsetNotFound:1 +msgid "Raised when a synset is not found during lookup." +msgstr "Rzucany kiedy nie znaleziono szukanego synsetu." + +#: plwn.exceptions.ReaderException:1 +msgid "Raised when there's an error in the format expected by a reader." +msgstr "Rzucany kiedy wystÄ…pi błąd w czytaniu formatu wejÅ›ciowego SÅ‚owosieci." + +#: plwn.exceptions.MalformedIdentifierException:1 +msgid "Bases: :class:`plwn.exceptions.ReaderException`" +msgstr "Klasy bazowe: :class:`plwn.exceptions.ReaderException`" + +#: plwn.exceptions.MalformedIdentifierException:1 +msgid "Raised during UBY-LMF parsing, when a malformed identifier is encountered." +msgstr "" +"Rzucany jeÅ›li podczas wczytywania UBY-LMF napotkany jest identyfikator " +"o zÅ‚ym formacie." + +#: plwn.exceptions.LoadException:1 +msgid "Raised when a storage can't be loaded from file." +msgstr "Rzucany jeÅ›li wystÄ…pi błąd podczas wczytywania danych ze zrzutu." + +#: plwn.exceptions.DumpVersionException:1 +msgid "Bases: :class:`plwn.exceptions.LoadException`" +msgstr "Klasy bazowe: :class:`plwn.exceptions.LoadException`" + +#: plwn.exceptions.DumpVersionException:1 +msgid "" +"Raised when a dumped storage has wrong version (suggesting incompatible " +"format)." +msgstr "" +"Rzucany kiedy zrzut danych ma wersjÄ™ formatu niezgodnÄ… z tÄ… obsÅ‚ugiwanÄ… " +"przez aktualnÄ… bibliotekÄ™." + +#: plwn.exceptions.InvalidRelationTypeException:1 +msgid "" +"Raised when a relation identifier does not refer to any existing relation" +" (or the relation exists for the other relation kind)." +msgstr "" +"Rzucany kiedy identyfikator relacji nie odpowiada żadnej istniejÄ…cej " +"relacji (albo relacji innego rodzaju)." + +#: plwn.exceptions.AmbiguousRelationTypeException:1 +msgid "Bases: :class:`plwn.exceptions.InvalidRelationTypeException`" +msgstr "Klasy bazowe: :class:`plwn.exceptions.InvalidRelationTypeException`" + +#: plwn.exceptions.AmbiguousRelationTypeException:1 +msgid "" +"Raised when a relation type identifier could refer to more than one " +"relation, but only one is permitted in the context." +msgstr "" +"Rzucany kiedy identyfikator typu relacji może odnosić siÄ™ do wiÄ™cej niż " +"jednej relacji, ale w danym kontekÅ›cie dozwolona jest tylko pojedyncza." diff --git a/doc/source/locale/pl/LC_MESSAGES/index.po b/doc/source/locale/pl/LC_MESSAGES/index.po new file mode 100644 index 0000000000000000000000000000000000000000..36e20e21b7a6aa2e512650d78d7d7a160eca34c5 --- /dev/null +++ b/doc/source/locale/pl/LC_MESSAGES/index.po @@ -0,0 +1,38 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2017, MichaÅ‚ KaliÅ„ski +# This file is distributed under the same license as the PLWN_API package. +# FIRST AUTHOR <EMAIL@ADDRESS>, 2017. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PLWN_API 0.21\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2017-06-12 16:51+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <LL@li.org>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.1.1\n" + +#: ../../source/index.rst:7 +msgid "Welcome to PLWN_API's documentation!" +msgstr "Dokumentacja PLWN_API" + +#: ../../source/index.rst:19 +msgid "Indices and tables" +msgstr "Indeksy i tabele" + +#: ../../source/index.rst:20 +msgid ":ref:`genindex`" +msgstr "" + +#: ../../source/index.rst:21 +msgid ":ref:`modindex`" +msgstr "" + +#: ../../source/index.rst:22 +msgid ":ref:`search`" +msgstr "" diff --git a/doc/source/locale/pl/LC_MESSAGES/interface.po b/doc/source/locale/pl/LC_MESSAGES/interface.po new file mode 100644 index 0000000000000000000000000000000000000000..230c22bd6c007d17e1b890b2c03dd84ea003297c --- /dev/null +++ b/doc/source/locale/pl/LC_MESSAGES/interface.po @@ -0,0 +1,1265 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2017, MichaÅ‚ KaliÅ„ski +# This file is distributed under the same license as the PLWN_API package. +# FIRST AUTHOR <EMAIL@ADDRESS>, 2017. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PLWN_API 0.21\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2017-08-18 14:42+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <LL@li.org>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.1.1\n" + +#: ../../source/interface.rst:2 +msgid "Public interface" +msgstr "Publiczny interfejs" + +#: plwn.bases:1 +msgid "" +"Base, abstract classes for plWordNet objects, implementing common " +"functionality independent of structures holding the data itself." +msgstr "" +"Bazowe, abstrakcyjne klasy obiektów SÅ‚owosieci, implementujÄ…ce " +"funkcjonalność niezależnÄ… od struktur przechowujÄ…cych same dane." + +#: plwn.bases.PLWordNetBase:1 plwn.bases.SynsetBase:1 +#: plwn.bases.LexicalUnitBase:1 plwn.bases.RelationInfoBase:1 +msgid "Bases: :class:`object`" +msgstr "Klasy bazowe: :class:`object`" + +#: plwn.bases.PLWordNetBase:1 +msgid "The primary object providing data from plWordNet." +msgstr "Podstawowy obiekt udostÄ™pniajÄ…cy dane ze SÅ‚owosieci." + +#: plwn.bases.PLWordNetBase:3 +msgid "Allows retrieving synsets, lexical units, and other informative objects." +msgstr "" +"Pozwala na wydobywanie synsetów, jednostek leksykalnych, oraz innych " +"obiektów informacyjnych." + +#: plwn.bases.PLWordNetBase.close:1 +msgid "Perform cleanup operations after using the :class:`PLWordNetBase` object." +msgstr "Wykonaj operacje porzÄ…dkujÄ…ce po używaniu obiektu :class:`PLWordNetBase`." + +#: plwn.bases.PLWordNetBase.close:4 +msgid "" +"By default, this method does nothing and should be overridden by a " +"subclass if necessary. It should still always be called, since any " +":class:`PLWordNetBase` subclass may create any kind of temporary " +"resources." +msgstr "" +"DomyÅ›lnie, ta metoda nie robi nic i powinna być zaimplementowana przez " +"podklasÄ™, jeÅ›li jest to wymagane. Mimo tego powinna zawsze być " +"wywoÅ‚ywana, ponieważ każda podklasa :class:`PLWordNetBase` może utworzyć " +"dowolny rodzaj tymczasowych zasobów." + +#: plwn.bases.PLWordNetBase.close:9 +msgid "" +"After calling this method, this instance and any ones linked with it " +"(:class:`SynsetBase`, :class:`LexicalUnitBase`, etc.) may become invalid " +"and should not be used." +msgstr "" +"Po wywoÅ‚aniu tej metody, ta instancja oraz wszystkie powiÄ…zane z niÄ…. " +"(:class:`SynsetBase`, :class:`LexicalUnitBase`, itd.) mogÄ… przestać " +"funkcjonować i nie powinny być używane." + +#: plwn.bases.PLWordNetBase.from_dump:1 +msgid "Create new instance from a dump of cached internal representation." +msgstr "Stwórz nowÄ… instancjÄ™ ze zrzutu wewnÄ™trznej reprezentacji." + +#: plwn.bases.PLWordNetBase.from_dump:3 +msgid "" +"The dump file must have been created by :meth:`.from_reader` of the same " +":class:`PLWordNetBase` subclass and schema version." +msgstr "" +"Plik zrzutu musi być wczeÅ›niej utworzony przez :meth:`.from_reader` tej " +"samej podklasy :class:`PLWordNetBase` i o tej samej samej wersji " +"schematu." + +#: plwn.bases.PLWordNetBase.from_reader:1 +msgid "" +"Create new instance from a source reader, optionally saving it in an " +"internal representation format in another file." +msgstr "" +"Stwórz nowÄ… instancjÄ™ z czytnika formatu źródÅ‚owego, opcjonalnie " +"zapisujÄ…c zrzut wewnÄ™trznej reprezentacji w innym pliku." + +#: plwn.bases.PLWordNetBase.from_reader:4 +msgid "" +"``reader`` is any iterable that yields node instances: " +":class:`~plwn.readers.nodes.SynsetNode`, " +":class:`~plwn.readers.nodes.LexicalUnitNode` and " +":class:`~plwn.readers.nodes.RelationTypeNode`." +msgstr "" +"``reader`` jest dowolnÄ… sekwencjÄ…, która zawiera obiekty typów: " +":class:`~plwn.readers.nodes.SynsetNode`, " +":class:`~plwn.readers.nodes.LexicalUnitNode` " +"i :class:`~plwn.readers.nodes.RelationTypeNode`." + +#: plwn.bases.PLWordNetBase.from_reader:9 +msgid "" +"``dump_to`` is a path to a (non-existing) file where data form ``reader``" +" will be stored to be to be loaded later. If not passed, then the data " +"won't be cached in any file, requiring to be read again using " +":meth:`.from_reader`." +msgstr "" +"``dump_to`` jest Å›cieżkÄ… do (nieistniejÄ…cego) pliku, gdzie dane " +"z ``reader`` zostanÄ… zrzucone, umożliwiajÄ…c późniejsze zaÅ‚adowanie. JeÅ›li" +" argument nie jest podany, dane nie zostanÄ… nigdzie zrzucone, wymagajÄ…c " +"ponownego użycia :meth:`.from_reader`." + +#: plwn.bases.PLWordNetBase.lexical_relation_edges:1 +msgid "" +"Get an iterable of lexical unit relation instances from plWordNet, as " +"represented by :class:`RelationEdge`." +msgstr "" +"WydobÄ…dź sekwencjÄ™ instancji relacji leksykalnych ze SÅ‚owosieci, " +"reprezentowanych przez :class:`RelationEdge`." + +#: plwn.bases.PLWordNetBase.lexical_relation_edges:4 +msgid "" +"This method works like :meth:`.synset_relation_edges`, but for lexical " +"units and relation types. There is no ``skip_artificial``, since there " +"are no artificial lexical units." +msgstr "" +"Ta metoda dziaÅ‚a jak :meth:`.synset_relation_edges`, ale dla leksykalnych" +" jednostek i typów relacji. Nie ma argumentu ``skip_artificial``, " +"ponieważ nie istniejÄ… sztuczne jednostki leksykalne." + +#: plwn.bases.PLWordNetBase.lexical_unit:1 +msgid "" +"Like :meth:`.lexical_units` but either return a single lexical unit or " +"raise :exc:`~plwn.exceptions.LexicalUnitNotFound`." +msgstr "" +"Ta metoda dziaÅ‚a jak :meth:`.lexical_units`, ale zawsze albo zwraca " +"pojedynczÄ… jednostkÄ™ leksykalnÄ…, albo rzuć " +":exc:`~plwn.exceptions.LexicalUnitNotFound`" + +#: plwn.bases.PLWordNetBase.lexical_unit:4 +msgid "" +"All parameters are required, to ensure that the query could only match a " +"single lexical unit." +msgstr "" +"Wszystkie parametry sÄ… wymagane, żeby zapytanie na pewno mogÅ‚o pasować " +"tylko do pojedynczej jednostki leksykalnej." + +#: plwn.bases.PLWordNetBase.lexical_unit_by_id:1 +msgid "Select a lexical unit using its internal, numeric ID." +msgstr "Wybierz jednostkÄ™ leksykalnÄ… używajÄ…c jej wewnÄ™trznego, numerycznego ID." + +#: plwn.bases.PLWordNetBase.lexical_unit_by_id:3 +msgid "" +"If there is no lexical unit with the given ID, raise " +":exc:`~plwn.exceptions.LexicalUnitNotFound`." +msgstr "" +"JeÅ›li nie ma jednostki leksykalnej o takim ID, rzuć " +":exc:`~plwn.exceptions.LexicalUnitNotFound`." + +#: plwn.bases.PLWordNetBase.lexical_unit_by_id:6 +msgid "" +"This is the fastest method to get a particular :class:`LexicalUnitBase` " +"object." +msgstr "To najszybszy sposób na wydobycie danego obiektu :class:`LexicalUnitBase`." + +#: plwn.bases.PLWordNetBase.lexical_units:1 +msgid "Select lexical units from plWordNet based on combination of criteria." +msgstr "" +"WydobÄ…dź jednostki leksykalne ze SÅ‚owosieci na podstawie kombinacji " +"kryteriów." + +#: plwn.bases.PLWordNetBase.lexical_units:3 +msgid "" +"It's possible to specify the lemma, part of speech and variant of the " +"units this method should yield. If a parameter value is omitted, any " +"value matches. Conversely, a call of ``lexical_units()`` will return an " +"iterable of all lexical units in plWordNet. If no lexical unit matches " +"the query, returns an empty iterable." +msgstr "" +"Można podać lemat, część mowy albo wariant jednostek, które ta metoda " +"powinna zwrócić. JeÅ›li wartość któregoÅ› argumentu jest pominiÄ™ta, każda " +"wartość pasuje w to miejsce. W ten sposób, wywoÅ‚anie ``lexical_units()`` " +"zwróci wszystkie jednostki leksykalne ze SÅ‚owosieci. JeÅ›li żadna " +"jednostka leksykalna pasuje do zapytania, zwracana jest pusta sekwencja." + +#: plwn.bases.PLWordNetBase.lexical_units:9 +msgid "" +"The parameter ``lemma`` is an unicode string, ``variant`` is an integer, " +"and ``pos`` is an enumerated value of :class:`~plwn.enums.PoS`." +msgstr "" +"Parametr ``lemma`` jest unicode'owym stringiem, ``variant`` jest liczbÄ…, " +"a ``pos`` jest wartoÅ›ciÄ… wyliczeniowÄ… :class:`~plwn.enums.PoS`." + +#: plwn.bases.PLWordNetBase.relations_info:1 +msgid "" +"Get an iterable of :class:`RelationInfoBase` instances, matching the " +"query defined by parameters." +msgstr "" +"WydobÄ…dź sekwencjÄ™ instancji :class:`RelationInfoBase`, pasujÄ…cych do " +"zapytania definiowanego przez parametry." + +#: plwn.bases.PLWordNetBase.relations_info:4 +msgid "" +"``name`` is a string naming a relation (see :class:`RelationInfoBase`). " +"If it names a \"parent\", all its children are selected." +msgstr "" +"``name`` jest nazwÄ… relacji (zob. :class:`RelationInfoBase`). JeÅ›li typ " +"relacji posiada \"dzieci\", wszystkie sÄ… wybierane." + +#: plwn.bases.PLWordNetBase.relations_info:8 +msgid "``kind`` is an enumerated value of :class:`~plwn.enums.RelationKind`." +msgstr "``kind`` jest wartoÅ›ciÄ… wyliczeniowÄ… :class:`~plwn.enums.RelationKind`." + +#: plwn.bases.PLWordNetBase.relations_info:11 +msgid "" +"Any parameter that's not passed matches any relation type. As such, a " +"call of ``relations_info()`` will select all relation types in plWordNet." +msgstr "" +"JeÅ›li któryÅ› parametr nie zostaÅ‚ podany, pasuje do niego każdy typ " +"relacji. Dlatego, wywoÅ‚anie ``relations_info()`` wybiera wszystkie typy " +"relacji ze SÅ‚owosieci." + +#: plwn.bases.PLWordNetBase.synset:1 +msgid "" +"Like :meth:`.synsets`, but either return a single synset or raise " +":exc:`~plwn.exceptions.SynsetNotFound`." +msgstr "" +"Ta metoda dziaÅ‚a jak :meth:`.synsets`, ale albo zwraca pojedynczy synset," +" albo rzuca :exc:`~plwn.exceptions.SynsetNotFound`." + +#: plwn.bases.PLWordNetBase.synset:4 +msgid "" +"All parameters are required, to ensure that the query could only match a " +"single synset." +msgstr "" +"Wszystkie parametry sÄ… wymagane, żeby zapytanie na pewno mogÅ‚o pasować " +"tylko do pojedynczego synsetu." + +#: plwn.bases.PLWordNetBase.synset_by_id:1 +msgid "Select a synset using its internal, numeric ID." +msgstr "Wybierz synset używajÄ…c wewnÄ™trznego, numerycznego ID." + +#: plwn.bases.PLWordNetBase.synset_by_id:3 +msgid "" +"If there is no synset with the given ID, raise " +":exc:`~plwn.exceptions.SynsetNotFound`." +msgstr "" +"JeÅ›li nie ma synsetu o takim ID, rzuć " +":exc:`~plwn.exceptions.SynsetNotFound`." + +#: plwn.bases.PLWordNetBase.synset_by_id:6 +msgid "This is the fastest method to get a particular :class:`SynsetBase` object." +msgstr "To najszybsza metoda wydobycia danego obiektu :class:`SynsetBase`." + +#: plwn.bases.PLWordNetBase.synset_relation_edges:1 +msgid "" +"Get an iterable of synset relation instances from plWordNet, as " +"represented by :class:`RelationEdge`." +msgstr "" +"WydobÄ…dź sekwencjÄ™ instancji relacji synsetów ze SÅ‚owosieci, " +"reprezentowanych przez :class:`RelationEdge`." + +#: plwn.bases.PLWordNetBase.synset_relation_edges:4 +msgid "" +"``include`` and ``exclude`` are containers of relation type identifiers " +"(see :class:`RelationInfoBase`). If ``include`` is not ``None``, then " +"only instances of relations in it are included in the result. If " +"``exclude`` is not ``None``, then all relations in it are omitted from " +"the result. If both are ``None``, all relations are selected." +msgstr "" +"``include`` i ``exclude`` sÄ… kolekcjami identyfikatorów typów relacji " +"(zob. :class:`RelationInfoBase`). JeÅ›li ``include`` nie jest ``None``, " +"tylko typy relacji zawarte w tej kolekcji sÄ… brane pod uwagÄ™ przy " +"wybieraniu instancji. JeÅ›li ``exclude`` nie jest ``None`` instancje typów" +" zawartych w tej kolekcji sÄ… pomijane w wynikowej sekwencji. JeÅ›li oba " +"parametry sÄ… ``None``, wszystkie relacje sÄ… wybierane." + +#: plwn.bases.PLWordNetBase.synset_relation_edges:11 +msgid "" +"If ``skip_artificial`` is ``True`` (the default), then artificial synsets" +" (see :attr:`SynsetBase.is_artificial`) are \"skipped over\": new " +"relation edges are created to replace ones ending or staring in an " +"artificial synset, and connecting neighbouring synsets if they have " +"relations directed like this::" +msgstr "" +"JeÅ›li ``skip_artificial`` jest ``True`` (domyÅ›lnie), sztuczne synsety " +"(zob. :attr:`SynsetBase.is_artificial`) sÄ… \"przeskakiwane\": nowe " +"krawÄ™dzie relacji sÄ… tworzone by zastÄ…pić te koÅ„czÄ…ce albo zaczynajÄ…cy " +"siÄ™ sztucznym synsetem i połączyć sÄ…siadujÄ…ce ze sobÄ… synsety jeÅ›li majÄ… " +"relacje skierowane w taki sposób::" + +#: plwn.bases.PLWordNetBase.synset_relation_edges:42 +msgid "" +"``Syn C`` is dropped, since there's no instance of ``Rel 1`` directed " +"outwards from the skipped artificial ``Syn B``." +msgstr "" +"``Syn C`` jest porzucany, ponieważ nie ma instancji ``Rel `` skierowanej " +"od przeskoczonego sztucznego ``Syn B``." + +#: plwn.bases.PLWordNetBase.synsets:1 +msgid "Select synsets from plWordNet based on combination of criteria." +msgstr "Wybierz synsety ze SÅ‚owosieci w oparciu o kombinacjÄ™ kryteriów." + +#: plwn.bases.PLWordNetBase.synsets:3 +msgid "" +"This method works just like :meth:`.lexical_units`, but returns an " +"iterable of distinct synsets that own the lexical units selected by the " +"query." +msgstr "" +"Ta metoda dziaÅ‚a jak :meth:`.lexical_units`, ale zwraca sekwencjÄ™ " +"unikalnych synsetów zawierajÄ…cych jednostki wybrane przez zapytanie." + +#: plwn.bases.PLWordNetBase.to_graphml:1 +msgid "" +"Export plWordNet as graph in `GraphML " +"<http://graphml.graphdrawing.org/>`_ format." +msgstr "" +"Eksportuj SÅ‚owosieć jako graf w formacie `GraphML " +"<http://graphml.graphdrawing.org/>`_." + +#: plwn.bases.PLWordNetBase.to_graphml:4 +msgid "" +"Nodes of the graph are synsets and / or lexical units, and edges are " +"relation instances." +msgstr "" +"WÄ™zÅ‚ami grafu sÄ… synsety i / lub jednostki leksykalne, a krawÄ™dziami sÄ… " +"instancje relacji." + +#: plwn.bases.PLWordNetBase.to_graphml:7 +msgid "For nodes, their numeric plWordNet IDs are set as their XML element IDs." +msgstr "" +"Numeryczne ID synsetów i jednostek w SÅ‚owosieci sÄ… ustawiane jako ID " +"elementów XML." + +#: plwn.bases.PLWordNetBase.to_graphml:10 +msgid "" +"**NOTE:** Nodes that have no inbound or outbound edges are dropped from " +"the graph." +msgstr "" +"**UWAGA:** WÄ™zÅ‚y które nie majÄ… żadnych krawÄ™dzi wychodzÄ…cych z nich ani " +"prowadzÄ…cych do nich sÄ… usuwane z grafu." + +#: plwn.bases.PLWordNetBase.to_graphml:13 +msgid "" +"Nodes and edges have attributes, as GraphML defines them. For nodes, " +"attributes are public properties of :class:`SynsetBase` or " +":class:`LexicalUnitBase` (aside from ``relations``, which would be " +"useless in a graph, and ``id``, which becomes the XML ID of a node). " +"Edges have two attributes:" +msgstr "" +"WÄ™zÅ‚y i krawÄ™dzie posiadajÄ… atrybuty, w takim sensie w jakim definiuje je" +" GraphML. Atrybutami wÄ™złów sÄ… publiczne atrybuty klas " +":class:`SynsetBase` lub :class:`LexicalUnitBase` (poza ``relations``, " +"który byÅ‚by nieprzydatny) w strukturze grafu, oraz ``id`` który zamiast " +"atrybutem jest ID elementu XML wÄ™zÅ‚a). KrawÄ™dzie majÄ… dwa atrybuty:" + +#: plwn.bases.PLWordNetBase.to_graphml:19 +msgid "" +"**type**: Either ``relation``, for edges that represent plWordNet " +"relation instances, or ``unit_and_synset`` for edges between synset nodes" +" and nodes of lexical units that belong to the synset. The latter appear " +"only in *mixed* graph." +msgstr "" +"**type**: Albo wartość ``relation``, dla krawÄ™dzi, które reprezentujÄ… " +"instancje relacji SÅ‚owosieci, albo ``unit_and_synset`` dla krawÄ™dzi " +"pomiÄ™dzy wÄ™zÅ‚ami synsetów i należących do nich wÄ™złów jednostek " +"leksykalnych. Te drugie pojawiajÄ… siÄ™ jedynie w grafie typu *mixed*." + +#: plwn.bases.PLWordNetBase.to_graphml:23 +msgid "" +"**name**: If **type** is ``relation``, then this is the full name of the " +"relation (see :class:`RelationInfoBase`). If **type** is " +"``unit_and_synset``, it is one of constant values: ``has_unit`` if the " +"edge is directed from synset to unit, or ``in_synset``, for edges " +"directed from unit to synset." +msgstr "" +"**name**: JeÅ›li **type** to ``relation``, wtedy jest to peÅ‚na nazwa " +"relacji (zob. :class:`RelationInfoBase`). JeÅ›li **type** to " +"``unit_and_synset``, wtedy jest jednÄ… ze staÅ‚ych wartoÅ›ci: ``has_unit`` " +"jeÅ›li krawÄ™dź jest skierowana od synsetu do jednostki, albo " +"``in_synset``, jeÅ›li krawÄ™dź jest skierowana od jednostki do synsetu." + +#: plwn.bases.PLWordNetBase.to_graphml:29 +msgid "" +"``out_file`` is a writable file-like object to which the GraphML output " +"will be written." +msgstr "" +"``out_file`` to obiekt plikowy z możliwoÅ›ciÄ… zapisu, do którego zostanie " +"zrzucone wyjÅ›cie w formacie GraphML." + +#: plwn.bases.PLWordNetBase.to_graphml:32 +msgid "" +"``graph_type`` is one of three constant string values: ``synset``, " +"``lexical_unit`` or ``mixed``. Synset graph contains only synset nodes " +"and relations, lexical unit graph contains only lexical unit nodes and " +"relations, and mixed graph contains all of the former, as well as " +"additional edges that map lexical units to synsets they belong to." +msgstr "" +"``graph_type`` jest jednÄ… ze staÅ‚ych wartoÅ›ci: ``synset``, " +"``lexical_unit`` albo ``mixed``. Graf ``synset`` zawiera jedynie wÄ™zÅ‚y " +"synsetów i relacje miÄ™dzy synsetami, graf ``lexical_unit`` zawiera " +"jedynie wÄ™zÅ‚y i relacje jednostek leksykalnych, a ``mixed`` zawiera " +"wszystkie powyższe, oraz dodatkowe krawÄ™dzie łączÄ…ce synsety z należącymi" +" do niech jednostkami leksykalnymi." + +#: plwn.bases.PLWordNetBase.to_graphml:39 +msgid "" +"If ``include_attributes`` is ``True``, then all synset and / or lexical " +"unit attributes will be included. By default, attributes are not included" +" to shrink the written file. Note, that if any of " +"``(included/excluded)_(synset/lexical_unit)_attributes`` parameters is " +"passed, inclusion of attributes will be controlled by them and the value " +"of ``include_attributes`` is ignored." +msgstr "" +"JeÅ›li ``include_attributes`` ma prawdziwÄ… wartość, wtedy wszystkie " +"atrybuty synsetów i / lub jednostek leksykalnych bÄ™dÄ… włączone do grafu. " +"DomyÅ›lnie, żadne wartoÅ›ci atrybutów nie sÄ… przenoszone do pliku GraphML " +"by zmniejszyć jego rozmiar. Uwaga: jeÅ›li zostaÅ‚ podany jakikolwiek " +"z parametrów ``(included/excluded)_(synset/lexical_unit)_attributes``, " +"atrybuty w pliku wyjÅ›ciowym sÄ… kontrolowane przez te parametry, " +"a ``include_attributes`` bÄ™dzie zignorowany." + +#: plwn.bases.PLWordNetBase.to_graphml:46 +msgid "" +"If ``prefix_ids`` is ``True``, then ID of each node will be prefixed with" +" the type: ``synset-`` or ``lexical_unit-``. By default, it's not done, " +"unless ``graph_type`` is ``mixed``, in which case this parameter is " +"ignored and ID prefixes are enforced." +msgstr "" +"JeÅ›li ``prefix_ids`` ma prawdziwÄ… wartość, wtedy przed ID każdego wÄ™zÅ‚a " +"bÄ™dzie dopisany jego typ: ``synset-`` lub ``lexical_unit-``. DomyÅ›lnie " +"typ wÄ™zÅ‚a jest pomijany, chyba że ``graph_type`` to ``mixed``, wtedy ten " +"parametr jest ignorowany a typ wÄ™złów zawsze jest dopisywany." + +#: plwn.bases.PLWordNetBase.to_graphml:51 +msgid "" +"``included_synset_attributes`` and ``excluded_synset_attributes`` are " +"containers of synset attribute names, selecting the values which should " +"or should not be included with synset nodes." +msgstr "" +"``included_synset_attributes`` i ``excluded_synset_attributes`` sÄ… " +"zbiorami nazw atrybutów synsetów, wyznaczajÄ…cymi te które powinny bÄ…dź " +"nie powinn" + +#: plwn.bases.PLWordNetBase.to_graphml:55 +msgid "" +"``included_lexical_unit_attributes`` and " +"``excluded_lexical_unit_attributes`` are the same way as the above, but " +"for attributes of lexical units." +msgstr "" +"``included_lexical_unit_attributes`` " +"i ``excluded_lexical_unit_attributes`` dziaÅ‚ajÄ… w taki sam sposób, ale " +"dla atrybutów jednostek leksykalnych." + +#: plwn.bases.PLWordNetBase.to_graphml:59 +msgid "" +"``included_synset_relations`` and ``excluded_synset_relations`` are " +"containers of synset relation type identifiers (see " +":class:`RelationInfoBase`), selecting synset relation types whose " +"instances should or should not be included in the graph. By default, all " +"relation types are included." +msgstr "" +"``included_synset_relations`` i ``excluded_synset_relations`` sÄ… zbiorami" +" identyfikatorów typów relacji (zob. :class:`RelationInfoBase`), " +"wyznaczajÄ…c relacje synsetów których krawÄ™dzie powinny bÄ…dź nie powinny " +"znaleźć siÄ™ w grafie. DomyÅ›lnie graf zawiera krawÄ™dzie wszystkich " +"relacji." + +#: plwn.bases.PLWordNetBase.to_graphml:65 +msgid "" +"``included_lexical_unit_relations`` and " +"``excluded_lexical_unit_relations`` are the same was as the above, but " +"for lexical relation types." +msgstr "" +"``included_lexical_unit_relations`` i ``excluded_lexical_unit_relations``" +" dziaÅ‚ajÄ… w taki sam sposób, ale dla typów relacji leksykalnych." + +#: plwn.bases.PLWordNetBase.to_graphml:69 +msgid "" +"``included_synset_nodes`` and ``excluded_synset_nodes`` are containers " +"for IDs of synset that should or should not be included as nodes in the " +"graph. If a node is not included, all edges that start or end in it are " +"also excluded. By default, all non-artificial synsets are included." +msgstr "" +"``included_synset_nodes`` i ``excluded_synset_nodes`` sÄ… zbiorami ID " +"synsetów, które powinny bÄ…dź nie powinny znaleźć siÄ™ w grafie jako wÄ™zÅ‚y." +" JeÅ›li jakiÅ› wÄ™zeÅ‚ jest wyłączony z grafu, wszystkie krawÄ™dzie które " +"zaczynajÄ… siÄ™ bÄ…dź koÅ„czÄ… w nim sÄ… również pomijane." + +#: plwn.bases.PLWordNetBase.to_graphml:74 +msgid "" +"``included_lexical_unit_nodes`` and ``excluded_lexical_unit_nodes`` are " +"the same way as the above, but for lexical units." +msgstr "" +"``included_lexical_unit_nodes`` i ``excluded_lexical_unit_nodes`` " +"dziaÅ‚ajÄ… jak powyżej, ale dla wÄ™złów jednostek leksykalnych." + +#: plwn.bases.PLWordNetBase.to_graphml:77 +msgid "" +"If ``skip_artificial_synsets`` is ``True`` (the default), then artificial" +" synsets are excluded from the graph, and edges connecting to them are " +"reconnected to \"skip over\" them, as described for " +":meth:`.synset_relation_edges`." +msgstr "" +"JeÅ›li ``skip_artificial_synsets`` ma prawdziwÄ… wartość (domyÅ›lnie), " +"sztuczne synsety sÄ… wykluczane z grafu, a krawÄ™dzie sÄ… modyfikowane tak, " +"by \"przeskakiwać\" je tak jak opisano przy " +":meth:`.synset_relation_edges`." + +#: plwn.bases.PLWordNetBase.to_graphml:82 +msgid "" +"**Note:** while this method accepts all of the above parameters at all " +"times, parameters relating to synsets are ignored if ``graph_type`` is " +"``lexical_unit``, and parameters relating to lexical units are ignored if" +" ``graph_type`` is ``synset``." +msgstr "" +"**Uwaga:** mimo że ta metoda przyjmuje wszystkie powyższe parametry " +"naraz, te odnoszÄ…ce siÄ™ do synsetów sÄ… ignorowane jeÅ›li ``graph_type`` to" +" ``lexical_unit``, a parametry odnoszÄ…ce siÄ™ do jednostek leksykalnych sÄ…" +" ignorowane jeÅ›li ``graph_type`` to ``synset``." + +#: plwn.bases.SynsetBase:1 +msgid "Encapsulates data associated with a plWordNet synset." +msgstr "Przechowuje dane zwiÄ…zane z synsetem SÅ‚owosieci." + +#: plwn.bases.SynsetBase:3 +msgid "" +"Synset contains lexical units that have the same meaning (ie. synonyms). " +"Most of plWordNet relations are between meanings, hence the need to group" +" lexical units into synsets." +msgstr "" +"Synsety zawierajÄ… jednostki leksykalne o takim samym znaczeniu " +"(synonimy). WiÄ™kszość relacji w SÅ‚owosieci jest pomiÄ™dzy znaczeniami, " +"dlatego potrzebne jest grupowanie jednostek leksykalnych w synsety." + +#: plwn.bases.SynsetBase:7 +msgid "" +"For purposes of ordering, a :class:`SynsetBase` object is uniquely " +"identified by its \"head\": the first of the lexical units it contains." +msgstr "" +"W kwestii porzÄ…dkowania, obiekt :class:`SynsetBase` jest identyfikowany " +"przez swojÄ… \"gÅ‚owÄ™\": pierwszÄ… jednostkÄ™ leksykalnÄ…, którÄ… zawiera." + +#: plwn.bases.SynsetBase.definition:1 +msgid "Textual description of the synset's meaning." +msgstr "Tekstowy opis znaczenia synsetu." + +#: plwn.bases.SynsetBase.definition:3 plwn.bases.LexicalUnitBase.definition:3 +msgid "May be ``None``." +msgstr "Może być ``None``." + +#: plwn.bases.SynsetBase.definition:5 +msgid "" +"In plWordNet, most definitions are stored as " +":attr:`LexicalUnitBase.definition`. Synset definitions are present mostly" +" for English synsets." +msgstr "" +"W SÅ‚owosieci, wiÄ™kszość definicji jest trzymana jako " +":attr:`LexicalUnitBase.definition`. Definicje synsetów sÄ… obecne głównie " +"dla anglojÄ™zycznych synsetów." + +#: plwn.bases.SynsetBase.id:1 +msgid "" +"The internal, numeric identifier of the synset in plWordNet. It is unique" +" among all synsets." +msgstr "WewnÄ™trzny, unikalny, numeryczny identyfikator synsetu w SÅ‚owosieci." + +#: plwn.bases.SynsetBase.id:4 +msgid "" +"If this identifier is passed to :meth:`PLWordNetBase.synset_by_id`, it " +"would return this :class:`SynsetBase` object." +msgstr "" +"Przekazanie tego identyfikatora do :meth:`PLWordNetBase.synset_by_id` " +"zwróciÅ‚oby ten obiekt :class:`SynsetBase`." + +#: plwn.bases.SynsetBase.is_artificial:1 +msgid "Boolean value informing if the synset is an artificial one." +msgstr "Wartość boolowska informujÄ…ca czy synset jest sztuczny." + +#: plwn.bases.SynsetBase.is_artificial:3 +msgid "" +"Artificial synsets carrying no linguistic meaning, but introduced as a " +"method of grouping synsets within the structure of plWordNet." +msgstr "" +"Sztuczne synsety nie majÄ… lingwistycznego znaczenia, ale sÄ… sposobem na " +"grupowanie synsetów w strukturze SÅ‚owosieci." + +#: plwn.bases.SynsetBase.is_artificial:7 +msgid "For most uses, artificial synsets should be ignored." +msgstr "" +"W wiÄ™kszoÅ›ci przypadków użycia SÅ‚owosieci sztuczne synsety powinny być " +"ignorowane." + +#: plwn.bases.SynsetBase.lexical_units:1 +msgid "" +"Tuple of :class:`LexicalUnitBase` objects, representing lexical units " +"contained in the synset. Ordering of units within the tuple is arbitrary," +" but constant." +msgstr "" +"Krotka obiektów :class:`LexicalUnitBase`, reprezentujÄ…cych jednostki " +"leksykalne znajdujÄ…ce siÄ™ w synsecie." + +#: plwn.bases.SynsetBase.lexical_units:5 +msgid "" +"At least one lexical unit is always present in every synset, so " +"``lexical_units[0]`` is always valid and selects the synset's \"head\"." +msgstr "" +"Co najmniej jedna jednostka leksykalna jest zawarta w każdym synsecie, " +"wiÄ™c ``lexical_units[0]`` jest zawsze poprawnym wyrażeniem, zwracajÄ…cym " +"\"gÅ‚owÄ™\" synsetu." + +#: plwn.bases.SynsetBase.related:1 +msgid "" +"Get an iterable of :class:`SynsetBase` instances that are connected to " +"this synset by outbound edges of synset relation type identified by " +"``relation_id``." +msgstr "" +"WydobÄ…dź sekwencjÄ™ instancji :class:`SynsetBase` do których prowadzÄ… " +"krawÄ™dzie relacji typu ``relation_id`` wychodzÄ…ce z tego synsetu." + +#: plwn.bases.SynsetBase.related:5 +msgid "" +"``relation_id`` can be any synset relation type identifier (see " +":class:`RelationInfoBase`), a collection of relation types identifiers, " +"or ``None``, in which case synsets related to this one by any relation " +"are selected." +msgstr "" +"``relation_id`` może być identyfikatorem typu dowolnej relacji synsetowej " +"(zob. :class:`RelationInfoBase`), zbiorem identyfikatorów typów relacji " +"synsetowych, albo ``None``; w ostatnim przypadku wszystkie synsety bÄ™dÄ…ce " +"w jakiejkolwiek relacji z danym synsetem sÄ… zwracane." + +#: plwn.bases.SynsetBase.related:10 plwn.bases.LexicalUnitBase.related:10 +msgid "" +"Note, that distinction between any relations that fit the ``relation_id``" +" query is lost. Use :meth:`.related_pairs` if it's needed." +msgstr "" +"Rozróżnienie pomiÄ™dzy instancjami różnych relacji pasujÄ…cymi do zapytania " +"``relation_id`` jest tracone; należy użyć :meth:`.related_pairs` gdy jest " +"ono potrzebne." + +#: plwn.bases.SynsetBase.related:14 +msgid "" +"Raises :exc:`~plwn.exceptions.InvalidRelationTypeException` if (any of) " +"``relation_id`` does not refer to an existing synset relation type." +msgstr "" +"WyjÄ…tek :exc:`~plwn.exceptions.InvalidRelationTypeException` jest rzucany " +"gdy (którykolwiek z) ``relation_id`` nie jest identyfikatorem istniejÄ…cej " +"relacji synsetów." + +#: plwn.bases.SynsetBase.related:18 +msgid "" +"If ``skip_artificial`` is ``True`` (the default) artificial synsets " +"related to this one are \"skipped over\", as described for " +":meth:`PLWordNetBase.synset_relation_edges`." +msgstr "" +"JeÅ›li ``skip_artificial`` ma prawdziwÄ… wartość (domyÅ›lnie), sztuczne " +"synsety w relacji z tym synsetem sÄ… \"przeskakiwane\", zgodnie z opisem " +"dla :meth:`PLWordNetBase.synset_relation_edges`." + +#: plwn.bases.SynsetBase.related_pairs:1 +msgid "" +"Like :meth:`.related`, but return an iterable of pairs ``(<relation " +"info>, <relation target synset>)``." +msgstr "" +"DziaÅ‚a jak :meth:`.related`, ale zwraca sekwencjÄ™ par ``(<info " +"o relacji>, <synset koÅ„czÄ…cy relacjÄ™>)``." + +#: plwn.bases.SynsetBase.relations:1 +msgid "" +"Tuple of :class:`RelationInfoBase` instances, containing types of " +"distinct relations that have outbound edges from this synset." +msgstr "" +"Krotka instancji :class:`RelationInfoBase` zawierajÄ…ca typy relacji które" +" majÄ… krawÄ™dzie wychodzÄ…ce z tego synsetu." + +#: plwn.bases.SynsetBase.relations:4 plwn.bases.LexicalUnitBase.relations:4 +msgid "Relations are returned in an arbitrary order." +msgstr "Relacje sÄ… zwracane w dowolnej kolejnoÅ›ci." + +#: plwn.bases.SynsetBase.relations:6 plwn.bases.LexicalUnitBase.relations:6 +msgid "" +"The tuple is special: methods for checking membership accept all possible" +" representations of a relation type (see :meth:`RelationInfoBase.eqv`)." +msgstr "" +"Ta krotka jest specjalna: metody sprawdzajÄ…ce jej zawartość akceptujÄ… " +"wszystkie możliwe reprezentacje typu relacji (zob. " +":meth:`RelationInfoBase.eqv`)." + +#: plwn.bases.SynsetBase.short_str:1 +msgid "" +"Shorter version of synset's string form (``__str__``) that displays only " +"the first lexical unit." +msgstr "" +"Krótsza wersja tekstowej formy synsetu (``__str__``) która wyÅ›wietla " +"tylko pierwszÄ… jednostkÄ™ leksykalnÄ…." + +#: plwn.bases.SynsetBase.to_dict:1 +msgid "" +"Create a JSON-compatible dictionary with all public properties of the " +"synset." +msgstr "" +"Stwórz obiekt ``dict`` kompatybilny z formatem JSON, zawierajÄ…cy wartoÅ›ci" +" wszystkich publicznych atrybutów synsetu." + +#: plwn.bases.SynsetBase.to_dict:4 plwn.bases.LexicalUnitBase.to_dict:4 +msgid "" +"Enums are converted to their values and all collections are converted to " +"tuples." +msgstr "" +"WartoÅ›ci wyliczeniowe sÄ… konwertowane do swoich (tekstowych) wartoÅ›ci, " +"a wszystkie kolekcje sÄ… konwertowane do krotek." + +#: plwn.bases.SynsetBase.to_dict:7 +msgid "" +"Property :attr:`.relations` is omitted, as it would be redundant when all" +" related synsets can be enumerated when ``include_related`` is ``True``. " +"Some additional members are also present in the dictionary:" +msgstr "" +"Atrybut :attr:`.relations` jest pomijany, ponieważ byÅ‚by zbÄ™dny kiedy " +"wszystkie synsety bÄ™dÄ…ce w relacji mogÄ… być wypisane kiedy " +"``include_related`` ma prawdziwÄ… wartość. Kilka dodatkowych atrybutów " +"jest dodanych do sÅ‚ownika:" + +#: plwn.bases.SynsetBase.to_dict:11 +msgid "" +"``str``: The string representation of the synset (defined by ``__str__`` " +"override on :class:`SynsetBase`)." +msgstr "" +"``str``: Tekstowa reprezentacja synsetu (okreÅ›lana przez metodÄ™ " +"``__str__`` na :class:`SynsetBase`)." + +#: plwn.bases.SynsetBase.to_dict:13 +msgid "" +"``units``: Listing (as a tuple) of units belonging to the synset (in the " +"same ordering as :attr:`.lexical_units`), as pairs of ``(<unit id>, <unit" +" string form>)``." +msgstr "" +"``units``: Listowanie (jako krotka) jednostek należących do synsetu " +"(w takiej samej kolejnoÅ›ci jak :attr:`.lexical_units`), jako pary ``(<id " +"jednostki>, <forma tekstowa jednostki>)``." + +#: plwn.bases.SynsetBase.to_dict:17 +msgid "" +"If ``include_related`` is ``True`` (the default), the dictionary will " +"contain an additional ``related`` member, representing synsets related to" +" this one, in the following format::" +msgstr "" +"JeÅ›li ``include_related`` ma prawdziwÄ… wartość (domyÅ›lnie), mapowanie " +"bÄ™dzie zawieraÅ‚a dodatkowy atrybut ``related``, reprezentujÄ…cy synsety " +"bÄ™dÄ…ce w relacji z obecnym synsetem, w nastÄ™pujÄ…cym formacie::" + +#: plwn.bases.SynsetBase.to_dict:29 +msgid "" +"If ``include_units_data`` is ``True`` (the default), the ``units`` member" +" will contain results of invocation of :meth:`LexicalUnitBase.to_dict` " +"for the synset's units, instead of pairs described above. In this case, " +"the value of ``include_related`` parameter is passed on to " +":meth:`LexicalUnitBase.to_dict`." +msgstr "" +"JeÅ›li ``include_units_data`` ma prawdziwÄ… wartość (domyÅ›lnie), atrybut " +"``units`` bÄ™dzie zawieraÅ‚ wyniki wywoÅ‚ania " +":meth:`LexicalUnitBase.to_dict` dla jednostek synsetu, zamiast par " +"opisanych powyżej. W takim wypadku wartość ``include_related`` jest " +"przekazany do :meth:`LexicalUnitBase.to_dict`." + +#: plwn.bases.LexicalUnitBase:1 +msgid "Encapsulates data associated with a plWordNet lexical unit." +msgstr "Przechowuje dane zwiÄ…zane z jednostkÄ… leksykalnÄ… SÅ‚owosieci." + +#: plwn.bases.LexicalUnitBase:3 +msgid "" +"Lexical units represent terms in the language. Each lexical unit is " +"uniquely identified by its lemma (base written form), part of speech " +"(verb, noun, adjective or adverb) and variant (a number differentiating " +"between homonyms)." +msgstr "" +"Jednostki leksykalne reprezentujÄ… terminy jÄ™zyka. Każda jednostka " +"leksykalna jest identyfikowana przez swój lemat (bazowÄ… formÄ™ tekstowÄ…), " +"część mowy (czasownik, rzeczownik, przymiotnik lub przysłówek) i wariant " +"(numer rozróżniajÄ…cy homonimy)." + +#: plwn.bases.LexicalUnitBase.definition:1 +msgid "Textual description of the lexical unit's meaning." +msgstr "Tekstowy opis znaczenia jednostki leksykalnej." + +#: plwn.bases.LexicalUnitBase.domain:1 +msgid "" +"plWordNet domain the lexical unit belongs to; one of enumerated constants" +" of :class:`~plwn.enums.Domain`." +msgstr "" +"Domena SÅ‚owosieci do której jednostka leksykalna należy; jeden z obiektów" +" wyliczeniowych :class:`~plwn.enums.Domain`." + +#: plwn.bases.LexicalUnitBase.emotion_example:1 +msgid "An example of an emotionally charged sentence using the lexical unit." +msgstr "" +"PrzykÅ‚ad emocjonalnie nacechowanego zdania zawierajÄ…cego jednostkÄ™ " +"leksykalnÄ…." + +#: plwn.bases.LexicalUnitBase.emotion_example_secondary:1 +msgid "" +"This property is not ``None`` only if :attr:`.emotion_markedness` is " +":attr:`~plwn.enums.EmotionMarkedness.amb`. In such case, " +":attr:`.emotion_example` will be an example of a positively charged " +"sentence, and this one will be a negatively charged sentence." +msgstr "" +"Ten atrybut nie jest ``None`` tylko gdy :attr:`.emotion_markedness` ma " +"wartość :attr:`~plwn.enums.EmotionMarkedness.amb`. W tym wypadku, " +":attr:`.emotion_example` bÄ™dzie przykÅ‚adem pozytywnie nacechowanego " +"zdania, a ten atrybut bÄ™dzie przykÅ‚adem negatywnie nacechowanego zdania." + +#: plwn.bases.LexicalUnitBase.emotion_markedness:1 +msgid "" +"Markedness of emotions associated with the lexical unit. May be ``None`` " +"if the unit has no emotional markedness." +msgstr "" +"Wartość nacechowania emocjonalnego jednostki leksykalnej. Może być " +"``None`` jeÅ›li jednostka nie ma emocjonalnego nacechowania." + +#: plwn.bases.LexicalUnitBase.emotion_markedness:4 +msgid "" +"If this property is ``None`` then all other ``emotion_*`` properties will" +" be ``None`` or empty collections." +msgstr "" +"JeÅ›li ten atrybut ma wartość ``None``, wszystkie pozostaÅ‚e atrybuty " +"``emotion_*`` bÄ™dÄ… ``None`` albo pustymi kolekcjami." + +#: plwn.bases.LexicalUnitBase.emotion_names:1 +msgid "Tuple of names of emotions associated with this lexical unit." +msgstr "Krotka nazw emocji skojarzonych z tÄ… jednostkÄ… leksykalnÄ…." + +#: plwn.bases.LexicalUnitBase.emotion_valuations:1 +msgid "Tuple of valuations of emotions associated with this lexical unit." +msgstr "Krotka wartoÅ›ciowaÅ„ emocji skojarzonych z tÄ… jednostkÄ… leksykalnÄ…." + +#: plwn.bases.LexicalUnitBase.external_links:1 +msgid "URLs linking to web pages describing the meaning of the lexical unit." +msgstr "" +"OdnoÅ›niki URL do stron internetowych opisujÄ…cych znaczenie jednostki " +"leksykalnej." + +#: plwn.bases.LexicalUnitBase.external_links:3 +#: plwn.bases.LexicalUnitBase.usage_notes:7 +msgid "May be an empty collection." +msgstr "Może być pustÄ… kolekcjÄ…." + +#: plwn.bases.LexicalUnitBase.id:1 +msgid "" +"The internal, numeric identifier of the lexical units in plWordNet. It is" +" unique among all lexical units." +msgstr "" +"WewnÄ™trzny, unikalny, numeryczny identyfikator jednostki leksykalnej " +"w SÅ‚owosieci." + +#: plwn.bases.LexicalUnitBase.id:4 +msgid "" +"If this identifier is passed to :meth:`PLWordNetBase.lexical_unit_by_id`," +" it would return this :class:`LexicalUnitBase` object." +msgstr "" +"Przekazanie tego identyfikatora do " +":meth:`PLWordNetBase.lexical_unit_by_id` zwróciÅ‚oby ten obiekt " +":class:`LexicalUnitBase`." + +#: plwn.bases.LexicalUnitBase.lemma:1 +msgid "Lemma of the unit; its basic text form." +msgstr "Lemat jednostki; jej bazowa forma tekstowa." + +#: plwn.bases.LexicalUnitBase.pos:1 +msgid "" +"Part of speech of the unit; one of enumerated constants of " +":class:`~plwn.enums.PoS`." +msgstr "" +"Część mowy jednostki; jeden z obiektów wyliczeniowych " +":class:`~plwn.enums.PoS`." + +#: plwn.bases.LexicalUnitBase.related:1 +msgid "" +"Get an iterable of :class:`LexicalUnitBase` instances that are connected " +"to this lexical unit by outbound edges of lexical relation type " +"identified by ``relation_id``." +msgstr "" +"WydobÄ…dź sekwencjÄ™ instancji :class:`LexicalUnitBase` do których prowadzÄ…" +" krawÄ™dzie typu ``relation_id`` wychodzÄ…ce z tej jednostki leksykalnej." + +#: plwn.bases.LexicalUnitBase.related:5 +msgid "" +"``relation_id`` can be any lexical relation type identifier (see " +":class:`RelationInfoBase`), a collection of relation types identifiers, " +"or ``None``, in which case lexical units related to this one by any " +"relation are selected." +msgstr "" +"``relation_id`` może być identyfikatorem typu dowolnej relacji leksykalnej " +"(zob. :class:`RelationInfoBase`), zbiorem identyfikatorów typów relacji " +"leksykalnych, bÄ…dź ``None``; w ostatnim przypadku wszystkie jednostki " +"leksykalne bÄ™dÄ…ce w jakiejkolwiek relacji z danÄ… jednostkÄ… sÄ… zwracane." + +#: plwn.bases.LexicalUnitBase.related:14 +msgid "" +"Raises :exc:`~plwn.exceptions.InvalidRelationTypeException` if " +"``relation_id`` does not refer to an existing lexical relation type." +msgstr "" +"WyjÄ…tek :exc:`~plwn.exceptions.InvalidRelationTypeException` jeÅ›li " +"``relation_id`` nie jest identyfikatorem istniejÄ…cej relacji leksykalnej." + +#: plwn.bases.LexicalUnitBase.related_pairs:1 +msgid "" +"Like :meth:`.related`, but return an iterable of pairs ``(<relation " +"info>, <relation target unit>)``." +msgstr "" +"DziaÅ‚a jak :meth:`.related`, ale zwraca sekwencjÄ™ par ``(<relation info>," +" <relation target unit>)``." + +#: plwn.bases.LexicalUnitBase.relations:1 +msgid "" +"Tuple of :class:`RelationInfoBase` instances, containing types of " +"distinct relations that have outbound edges from this lexical unit." +msgstr "" +"Krotka instancji :class:`RelationInfoBase`, zawierajÄ…ca typy relacji " +"które majÄ… krawÄ™dzie wychodzÄ…ce z tej jednostki leksykalnej." + +#: plwn.bases.LexicalUnitBase.sense_examples:1 +msgid "Text fragments that show how the lexical unit is used in the language." +msgstr "" +"Fragmenty tekstu pokazujÄ…ce jak jednostka leksykalna jest używana " +"w jÄ™zyku." + +#: plwn.bases.LexicalUnitBase.sense_examples:4 +msgid "May be an empty tuple." +msgstr "Może być pustÄ… krotkÄ…." + +#: plwn.bases.LexicalUnitBase.sense_examples_sources:1 +msgid "" +"Symbolic representations of sources from which the sense examples were " +"taken." +msgstr "" +"Symboliczna reprezentacja źródeÅ‚, z których przykÅ‚ady użycia zostaÅ‚y " +"wziÄ™te." + +#: plwn.bases.LexicalUnitBase.sense_examples_sources:4 +msgid "The symbols are short strings, defined by plWordNet." +msgstr "Symbole sÄ… krótkimi stringami, zdefiniowanymi przez SÅ‚owosieć." + +#: plwn.bases.LexicalUnitBase.sense_examples_sources:6 +msgid "" +"This tuples has the same length as :attr:`.sense_examples`, and is " +"aligned by index (for example, the source of ``sense_examples[3]`` is at " +"``sense_examples_sources[3]``)." +msgstr "" +"Ta krotka ma takÄ… samÄ… dÅ‚ugość jak :attr:`.sense_examples` i jest " +"uporzÄ…dkowana tak, by źródÅ‚a odpowiadaÅ‚y przykÅ‚adom o tym samym " +"indeksie.(na przykÅ‚ad, źródÅ‚o ``sense_examples[3]`` jest pod " +"``sense_examples_sources[3]``)" + +#: plwn.bases.LexicalUnitBase.sense_examples_sources:10 +msgid "" +"To get pairs of examples with their sources, use ``zip(sense_examples, " +"sense_examples_sources)``" +msgstr "" +"Aby otrzymać pary przykÅ‚adów i ich źródeÅ‚, należy użyć " +"``zip(sense_examples, sense_examples_sources)``" + +#: plwn.bases.LexicalUnitBase.synset:1 +msgid "" +"An instance of :class:`SynsetBase` representing the synset this unit " +"belongs to." +msgstr "" +"Instancja :class:`SynsetBase` reprezentujÄ…ca synset, do którego ta " +"jednostka należy." + +#: plwn.bases.LexicalUnitBase.to_dict:1 +msgid "" +"Create a JSON-compatible dictionary with all the public properties of the" +" lexical unit." +msgstr "" +"Stwórz obiekt ``dict`` kompatybilny z formatem JSON, zawierajÄ…cy " +"wszystkie publiczne atrybuty jednostki leksykalnej." + +#: plwn.bases.LexicalUnitBase.to_dict:7 +msgid "" +"Property :attr:`.relations` is omitted, as it would be redundant when all" +" related lexical units can be enumerated when ``include_related`` is " +"``True``." +msgstr "" +"Atrybut :attr:`.relations` jest pomijany, ponieważ byÅ‚by zbÄ™dny kiedy " +"wszystkie jednostki bÄ™dÄ…ce w relacji mogÄ… być wypisane kiedy " +"``include_related`` ma prawdziwÄ… wartość." + +#: plwn.bases.LexicalUnitBase.to_dict:11 +msgid "" +"An additional ``str`` member is present in the dictionary; its value is " +"the string representation of the lexical unit." +msgstr "" +"Dodatkowy atrybut ``str`` zawiera tekstowÄ… reprezentacjÄ™ jednostki " +"leksykalnej." + +#: plwn.bases.LexicalUnitBase.to_dict:14 +msgid "" +"If ``include_related`` is ``True`` (the default), the dictionary will " +"contain an additional ``related`` member, representing lexical units " +"related to this one, in the following format::" +msgstr "" +"JeÅ›li ``include_related`` ma prawdziwÄ… wartość (domyÅ›lnie), mapowanie " +"bÄ™dzie zawieraÅ‚a dodatkowy atrybut ``related``, reprezentujÄ…cy jednostki " +"leksykalne bÄ™dÄ…ce w relacji z obecnym synsetem, w nastÄ™pujÄ…cym formacie::" + +#: plwn.bases.LexicalUnitBase.usage_notes:1 +msgid "" +"Symbols denoting certain properties of how the lexical unit is used in " +"the language." +msgstr "" +"Fragmenty tekstu pokazujÄ…ce jak jednostka leksykalna jest używana " +"w jÄ™zyku." + +#: plwn.bases.LexicalUnitBase.usage_notes:4 +msgid "" +"The symbols are short strings, defined by plWordNet. For example, " +"``daw.`` means that the word is considered dated." +msgstr "" +"Te symbole sÄ… krótkimi wartoÅ›ciami tekstowymi, zdefiniowanymi przez " +"SÅ‚owosieć. Na przykÅ‚ad, ``daw.`` oznacza że sÅ‚owo jest uznawane za dawne." + +#: plwn.bases.LexicalUnitBase.variant:1 +msgid "Ordinal number to differentiate between meanings of homonyms." +msgstr "Numer porzÄ…dkowy rozróżniajÄ…cy znaczenia homonimów" + +#: plwn.bases.LexicalUnitBase.variant:3 +msgid "Numbering starts at 1." +msgstr "Numerowanie zaczyna siÄ™ od 1." + +#: plwn.bases.LexicalUnitBase.verb_aspect:1 +msgid "" +"Aspect of a verb; of the enumerated values of " +":class:`~plwn.enums.VerbAspect`." +msgstr "" +"Aspekt czasownika; jedna z wartoÅ›ci wyliczeniowych " +"z :class:`~plwn.enums.VerbAspect`." + +#: plwn.bases.LexicalUnitBase.verb_aspect:4 +msgid "May be ``None`` if the unit is not a verb, or had no aspect assigned." +msgstr "" +"Może być ``None``, jeÅ›li jednostka nie jest czasownikiem, albo aspekt nie" +" zostaÅ‚ jej przypisany." + +#: plwn.bases.RelationInfoBase:1 +msgid "Encapsulates information associated with a relation type." +msgstr "Zawiera informacje zwiÄ…zane z typem relacji." + +#: plwn.bases.RelationInfoBase:3 +msgid "" +"The primary purpose of this class is to serve as a single object " +"consolidating all possible ways a relation type can be referred to." +msgstr "" +"Głównym rolÄ… instancji tej klasy jest sÅ‚użenie jako pojedynczy obiekt " +"centralizujÄ…cy wszystkie sposoby na jakie może być identyfikowany typ " +"relacji." + +#: plwn.bases.RelationInfoBase:6 +msgid "" +"In general, plWordNet uses *parent* and *child* relation names. Child " +"relations are those that have actual instances between synsets and " +"lexical units. Parent relations only exist to group child relations " +"together; child relation names need to be only unique within the group of" +" their parent relation, while parent relations must be globally unique." +msgstr "" +"Ogólnie, SÅ‚owosieć dzieli nazwy relacji na *rodziców* i *dzieci*. " +"Relacje-dzieci majÄ… wÅ‚aÅ›ciwe instancje w SÅ‚owosieci, pomiÄ™dzy synsetami " +"oraz jednostkami. Relacje-rodzice istniejÄ… jedynie jako grupowania " +"dzieci;Nazwy relacji-dzieci muszÄ… być unikalne jedynie w ramach grupy " +"wyznaczanej przez rodzica, a relacje-rodzice muszÄ… być unikalne " +"globalnie." + +#: plwn.bases.RelationInfoBase:12 +msgid "" +"For example, there are two relations named \"część\" (\"part\"); one " +"being a child of \"meronimia\" (\"meronymy\"), and another a child of " +"\"holonimia\" (\"holonymy\")." +msgstr "" +"Na przykÅ‚ad, istniejÄ… dwie relacje nazwane \"część\"; jedna jest " +"dzieckiem relacji \"meronimia\", a druga dzieckiem relacji \"holonimia\"." + +#: plwn.bases.RelationInfoBase:16 +msgid "" +"Some relation types have no parent; they behave like child relations, but" +" their names need to be unique on par with parent relations." +msgstr "" +"Niektóre relacje nie majÄ… rodzica; zachowujÄ… siÄ™ jak relacje-dzieci, " +"aleich nazwy muszÄ… być unikalne na takim samym poziomie jak relacje-" +"rodzice." + +#: plwn.bases.RelationInfoBase:19 +msgid "" +"plWordNet also stores shorter aliases for most of the relation types, for" +" example \"hipo\" for \"hiponimia\" (\"hyponymy\")." +msgstr "" +"SÅ‚owosieć również przechowuje krótsze aliasy dla wiÄ™kszoÅ›ci typów " +"relacji, na przykÅ‚ad \"hipo\" dla hiponimii." + +#: plwn.bases.RelationInfoBase:22 +msgid "" +"There are four ways to refer to relations wherever a relation identifier " +"is accepted (usually the argument is named ``relation_id``):" +msgstr "" +"IstniejÄ… cztery sposoby na identyfikacjÄ™ typu relacji, tam gdzie " +"przyjmowany jest identyfikator relacji (zazwyczaj argument nazywa siÄ™ " +"``relation_id``):" + +#: plwn.bases.RelationInfoBase:25 +msgid "" +"Full name, in format ``<parent name>/<child name>`` (or just ``<child " +"name>`` if the relation has no parent)." +msgstr "" +"PeÅ‚na nazwa, w formacie ``<parent name>/<child name>`` (albo tylko " +"``<child name>`` jeÅ›li relacja nie ma rodzica)." + +#: plwn.bases.RelationInfoBase:27 +msgid "" +"One of the shorter aliases mentioned above. This is checked before " +"attempting to resolve relation names. Aliases must be globally unique." +msgstr "" +"Jeden z krótszych aliasów, wspomnianych powyżej. Aliasy sÄ… sprawdzane " +"przed wÅ‚aÅ›ciwymi nazwami relacji; muszÄ… być globalnie unikalne." + +#: plwn.bases.RelationInfoBase:29 +msgid "" +"A parent name on its own. This resolves to all children of the parent " +"relation. Note, that it's not always valid to pass a name that resolves " +"to multiple relations; " +":exc:`~plwn.exceptions.AmbiguousRelationTypeException` is raised in such " +"cases." +msgstr "" +"Sama nazwa rodzica. Jest ona interpretowana w taki sposób, jakby " +"odpowiadaÅ‚a identyfikatorom wszystkich swoich dzieci naraz. Jednak nie " +"wszÄ™dzie dozwolone jest podanie wiÄ™cej niż jednej relacji. W takich " +"przypadkach jest rzucany " +":exc:`~plwn.exceptions.AmbiguousRelationTypeException`." + +#: plwn.bases.RelationInfoBase:34 +msgid "" +"Finally, a :class:`RelationInfoBase` instance may be used instead of a " +"string, standing for the child relation it represents." +msgstr "" +"W koÅ„cu, instancja :class:`RelationInfoBase` może być użyta zamiast " +"reprezentacji tekstowej, oznaczajÄ…c relacjÄ™-dziecko którÄ… reprezentuje." + +#: plwn.bases.RelationInfoBase:37 +msgid "" +"Note, that parent relations don't have corresponding " +":class:`RelationInfoBase` instance." +msgstr "" +"Relacje-rodzice nie majÄ… odpowiadajÄ…cych sobie instancji " +":class:`RelationInfoBase`." + +#: plwn.bases.RelationInfoBase.SEP:1 +msgid "" +"Character that separates parent from child name in full name " +"representation. It must not appear in any relation names or aliases." +msgstr "" +"Znak rozdzielajÄ…cy część rodzica od części dziecka w peÅ‚nej nazwie. Ten " +"znak nie może siÄ™ pojawić w żadnej nazwie relacji ani aliasie." + +#: plwn.bases.RelationInfoBase.aliases:1 +msgid "Tuple of all aliases the relation can be referred to by." +msgstr "Krotka wszystkich aliasów które odnoszÄ… siÄ™ do tej relacji." + +#: plwn.bases.RelationInfoBase.eqv:1 +msgid "" +"Check if ``other`` is an equivalent representation; either an equal " +":class:`RelationInfoBase` object or a relation identifier that refers to " +"this object." +msgstr "" +"Sprawdź, czy ``other`` jest ekwiwalentnÄ… reprezentacjÄ…; albo równym " +"obiektem :class:`RelationInfoBase`, albo identyfikatorem relacji który " +"reprezentuje ten obiekt." + +#: plwn.bases.RelationInfoBase.eqv:5 +msgid "" +"This is less strict than the equality operator, which only checks for " +"equal :class:`RelationInfoBase` instances." +msgstr "" +"To mniej Å›cisÅ‚a wersja operatora równoÅ›ci, który sprawdza jedynie równość" +" instancji :class:`RelationInfoBase`." + +#: plwn.bases.RelationInfoBase.format_name:1 +msgid "Format and return a full name out of parent and child name strings." +msgstr "Sformatuj i zwróć peÅ‚nÄ… nazwÄ™ na podstawie nazw rodzica i dziecka." + +#: plwn.bases.RelationInfoBase.format_name:3 +msgid "" +"``parent_name`` may be ``None``, which will just return ``child_name``, " +"as relations without parents are fully represented just by their name." +msgstr "" +"``parent_name`` może być ``None``, co po prostu zwróci ``child_name``, " +"jako że relacje bez rodzica sÄ… po prostu reprezentowane przez swojÄ… " +"nazwÄ™." + +#: plwn.bases.RelationInfoBase.kind:1 +msgid "" +"One of enumerated constants of :class:`~plwn.enums.RelationKind`; denotes" +" it's a synset or lexical relation." +msgstr "" +"Jedna z wyliczeniowych wartoÅ›ci z :class:`~plwn.enums.RelationKind`; " +"rozróżnia czy relacja jest miÄ™dzy synsetami, czy jednostkami " +"leksykalnymi." + +#: plwn.bases.RelationInfoBase.name:1 +msgid "String name of the relation." +msgstr "Tekstowa nazwa relacji." + +#: plwn.bases.RelationInfoBase.parent:1 +msgid "" +"String name of the parent relation to this one. May be ``None`` if the " +"relation has no parent." +msgstr "" +"Tekstowa nazwa relacji-rodzica tej relacji. Może być ``None`` jeÅ›li " +"relacja nie ma rodzica." + +#: plwn.bases.RelationInfoBase.split_name:1 +msgid "Split a full name into a ``(<parent name>, <child name>)`` pair." +msgstr "Rozdziel peÅ‚nÄ… nazwÄ™ na parÄ™ ``(<parent name>, <child name>)``." + +#: plwn.bases.RelationInfoBase.split_name:3 +msgid "" +"``parent_name`` may be ``None`` if :attr:`.SEP` doesn't appear in the " +"full name." +msgstr "" +"``parent_name`` może być ``None``, jeÅ›li :attr:`.SEP` nie wystÄ™puje " +"w peÅ‚nej nazwie." + +#: plwn.bases.RelationInfoBase.split_name:6 +msgid "" +"However, if :attr:`.SEP` appears more than once in ``full_name``, a " +"``ValueError`` will be raised." +msgstr "" +"Jednak jeÅ›li :attr:`.SEP` pojawia siÄ™ w ``full_name`` wiÄ™cej niż raz, " +"zostanie rzucony ``ValueError``." + +#: plwn.bases.RelationEdge:1 +msgid "Bases: :class:`tuple`" +msgstr "Klasy bazowe: :class:`tuple`" + +#: plwn.bases.RelationEdge:1 +msgid "" +"Tuple type representing a relation instance between two synsets or " +"lexical units." +msgstr "" +"Typ krotki reprezentujÄ…cej instancjÄ™ relacji pomiÄ™dzy dwoma synsetami " +"albo jednostkami leksykalnymi." + +#: plwn.bases.RelationEdge.relation:1 +msgid "Alias for field number 1" +msgstr "Alias pola numer 1" + +#: plwn.bases.RelationEdge.source:1 +msgid "Alias for field number 0" +msgstr "Alias pola numer 0" + +#: plwn.bases.RelationEdge.target:1 +msgid "Alias for field number 2" +msgstr "Alias pola numer 2" diff --git a/doc/source/locale/pl/LC_MESSAGES/introduction.po b/doc/source/locale/pl/LC_MESSAGES/introduction.po new file mode 100644 index 0000000000000000000000000000000000000000..ae52cf4a509c2524e515fde9f199bcbbcc77c43f --- /dev/null +++ b/doc/source/locale/pl/LC_MESSAGES/introduction.po @@ -0,0 +1,407 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2017, MichaÅ‚ KaliÅ„ski +# This file is distributed under the same license as the PLWN_API package. +# FIRST AUTHOR <EMAIL@ADDRESS>, 2017. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PLWN_API 0.21\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2017-06-10 15:46+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <LL@li.org>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.1.1\n" + +#: ../../source/introduction.rst:2 +msgid "Introduction" +msgstr "WstÄ™p" + +#: ../../source/introduction.rst:5 +msgid "Loading" +msgstr "Åadowanie" + +#: ../../source/introduction.rst:7 +msgid "" +"Access to plWordNet is provided via a single " +":class:`~plwn.bases.PLWordNetBase` object, which requires a source from " +"which to load the lexicon data. In normal distributions, the storage file" +" is bundled with the python package, so the only thing required to get an" +" instance is::" +msgstr "" +"Punktem dostÄ™pu do SÅ‚owosieci jest pojedynczy obiekt " +":class:`~plwn.bases.PLWordNetBase`, który wymaga źródÅ‚a z którego muszÄ… " +"zostać wczytane dane leksykonu. W normalnej dystrybucji, plik z danymi " +"jest zawarty w paczce pythona; jedyna rzecz potrzebna by skonstruować " +"instancjÄ™ to::" + +#: ../../source/introduction.rst:17 +msgid "Getting synsets and lexical units" +msgstr "Wydobycie synsetów i jednostek leksykalnych" + +#: ../../source/introduction.rst:19 +msgid "" +"The basic building blocks of plWordNet are synsets and lexical units, " +"represented by :class:`~plwn.bases.SynsetBase` and " +":class:`~plwn.bases.LexicalUnitBase` objects. Every single synset and " +"lexical unit can be identified either by an unique ID number, or by a " +"combination of three properties: lemma, :abbr:`pos (part of speech)` and " +"variant." +msgstr "" +"Podstawowymi elementami skÅ‚adowymi SÅ‚owosieci sÄ… synsety i jednostki " +"leksykalne, reprezentowane przez obiekty :class:`~plwn.bases.SynsetBase` " +"i :class:`~plwn.bases.LexicalUnitBase`. Każdy synset i jednostka mogÄ… być " +"zidentyfikowane na podstawie albo unikalnego numeru ID, albo kombinacji " +"trzech atrybutów: lematu, :abbr:`pos (part of speech - część mowy)` oraz " +"wariantu." + +#: ../../source/introduction.rst:25 +msgid "" +"There are three primary methods on :class:`~plwn.bases.PLWordNetBase` for" +" each of these two types of entities that allow selecting them from the " +"lexicon:" +msgstr "" +"SÄ… trzy podstawowe metody :class:`~plwn.bases.PLWordNetBase` dla każdego " +"z dwóch typów obiektów, które pozwalajÄ… wybierać je z leksykonu:" + +#: ../../source/introduction.rst:29 +msgid "Many entities by matching one or more of the three identifying properties:" +msgstr "Wiele obiektów na podstawie zero lub wiÄ™cej z trzech identyfikacyjnych " +"atrybutów:" + +#: ../../source/introduction.rst:31 +msgid ":meth:`~plwn.bases.PLWordNetBase.synsets`" +msgstr "" + +#: ../../source/introduction.rst:32 +msgid ":meth:`~plwn.bases.PLWordNetBase.lexical_units`" +msgstr "" + +#: ../../source/introduction.rst:34 +msgid "A single entity by matching all three identifying properties:" +msgstr "Pojedynczy obiekt na podstawie wszystkich trzech identyfikacyjnych " +"atrybutów:" + +#: ../../source/introduction.rst:36 +msgid ":meth:`~plwn.bases.PLWordNetBase.synset`" +msgstr "" + +#: ../../source/introduction.rst:37 +msgid ":meth:`~plwn.bases.PLWordNetBase.lexical_unit`" +msgstr "" + +#: ../../source/introduction.rst:39 +msgid "A single entity by matching the unique numeric ID:" +msgstr "Pojedynczy obiekt na podstawie unikalnego numerycznego ID:" + +#: ../../source/introduction.rst:41 +msgid ":meth:`~plwn.bases.PLWordNetBase.synset_by_id`" +msgstr "" + +#: ../../source/introduction.rst:42 +msgid ":meth:`~plwn.bases.PLWordNetBase.lexical_unit_by_id`" +msgstr "" + +#: ../../source/introduction.rst:46 +msgid "Selecting by ID" +msgstr "Wybieranie przez ID" + +#: ../../source/introduction.rst:48 +msgid "" +"Using the ``*_by_id`` methods is the fastest and most straightforward way" +" of getting :class:`~plwn.bases.SynsetBase` and " +":class:`~plwn.bases.LexicalUnitBase` objects, provided that ID values of " +"synsets and / or units for the correct version of plWordNet have been obtained" +" from an outside source or by storing the ``id`` property::" +msgstr "" +"Metody ``*_by_id`` to najszybszy i najprostszy sposób na otrzymanie obiektów " +":class:`~plwn.bases.SynsetBase` i :class:`~plwn.bases.LexicalUnitBase`, w " +"przypadku gdy wartoÅ›ci ID synsetów i / lub jednostek dla wÅ‚aÅ›ciwej wersji " +"SÅ‚owosieci sÄ… znane z zewnÄ™trznego źródÅ‚a, albo przez zapamiÄ™tanie atrybutu " +"``id``::" + +#: ../../source/introduction.rst:62 +msgid "Selecting by all three identifying properties" +msgstr "Wybieranie przez wszystkie trzy identyfikacyjne atrybuty." + +#: ../../source/introduction.rst:64 +msgid "" +"The \"singular\" methods require all three properties. Lemma is the basic" +" form of a word, variant is an ordinal number differentiating between " +"different meanings of the same word, and :abbr:`pos (part of speech)` is " +"an enumerated value." +msgstr "" +"Metody \"pojedyncze\" wymagajÄ… wartoÅ›ci wszystkich trzech atrybutów. Lemat " +"jest podstawowÄ… formÄ… sÅ‚owa, wariant jest porzÄ…dkowym numerem rozróżniajÄ…cym " +"miÄ™dzy różnymi znaczeniami tego samego sÅ‚owa, a :abbr:`pos (part of speech " +"- część mowy)` jest wartoÅ›ciÄ… wyliczeniowÄ…." + +#: ../../source/introduction.rst:68 +msgid "" +"There are eight :abbr:`pos (part of speech)` constants, four for Polish " +"synsets and units, and four for English. The enum class is provided as a " +"member of the base module of the library:" +msgstr "" +"Jest osiem staÅ‚ych wartoÅ›ci :abbr:`pos (part of speech - część mowy)`, " +"cztery dla jÄ™zyka polskiego i cztery dla angielskiego. Klasa wyliczeniowa " +"jest częściÄ… bazowego moduÅ‚u biblioteki:" + +#: ../../source/introduction.rst:72 +msgid "``plwn.PoS.verb``, ``plwn.PoS.noun``, ``plwn.PoS.adv``, ``plwn.PoS.adj``" +msgstr "" + +#: ../../source/introduction.rst:73 +msgid "" +"``plwn.PoS.verb_en``, ``plwn.PoS.noun_en``, ``plwn.PoS.adv_en``, " +"``plwn.PoS.adj_en``" +msgstr "" + +#: ../../source/introduction.rst:76 +msgid "" +"There are few cases, where all three properties would be known, but not " +"the ID. Still, selecting like this is possible::" +msgstr "Rzadko zdarza siÄ™, by znany byÅ‚y wszystkie trzy atrybuty, ale nie ID. " +"Wciąż, wybieranie w ten sposób jest możliwe::" + +#: ../../source/introduction.rst:85 +msgid "It's not legal to omit one any of the three properties::" +msgstr "Nie można pominąć żadnego z trzech atrybutów::" + +#: ../../source/introduction.rst:90 +msgid "" +"If there's no synset / unit that fits the query, an " +":exc:`~plwn.exceptions.NotFound` subclass is raised::" +msgstr "" +"JeÅ›li nie ma synsetu / jednostki, która pasowaÅ‚aby do zapytania, rzucany " +"jest :exc:`~plwn.exceptions.NotFound`::" + +#: ../../source/introduction.rst:101 +msgid "Selecting by some of the identifying properties" +msgstr "Wybieranie przez niektóre identyfikacyjne atrybuty" + +#: ../../source/introduction.rst:103 +msgid "" +"The \"plural\" methods always return an iterable of synsets / lexical " +"units. Unlike the \"singular\" methods, they allows omitting one or more " +"of the arguments, which could match more than one entity." +msgstr "" +"Metody \"mnogie\" Zawsze zwracajÄ… sekwencjÄ™ synsetów / jednostek " +"leksykalnych. Inaczej niż \"pojedyncze\" metody, pozwalajÄ… na pominiÄ™cie " +"jednego lub wiÄ™cej argumentów, przez co dopasowany może być wiÄ™cej niż " +"jeden obiekt." + +#: ../../source/introduction.rst:107 +msgid "" +"It's safer to wrap the invocation in ``tuple`` constructor, since the " +"interface only guarantees that the return value is iterable." +msgstr "" +"Bezpieczniej jest otoczyć wywoÅ‚anie konstruktorem ``tuple``, ponieważ " +"interfejs gwarantuje jedynie że zwracana wartość jest sekwencjÄ…." + +#: ../../source/introduction.rst:122 +msgid "" +"It's also possible that a query matches zero entities. Unlike the " +"\"singular\" methods, this will not raise an exception, but simply return" +" an empty iterable." +msgstr "" +"Jest także możliwe, że zapytanie zwróci zerowÄ… liczbÄ™ obiektów. Inaczej niż " +"przy \"pojedynczych\" metodach, wyjÄ…tek nie jest rzucany, ale zwracana jest " +"pusta sekwencja." + +#: ../../source/introduction.rst:133 +msgid "Synset and lexical unit properties" +msgstr "Atrybuty synsetów i jednostek leksykalnych" + +#: ../../source/introduction.rst:135 +msgid "" +"Data associated with plWordNet synsets and lexical units is provided as " +"public properties of synset and lexical unit objects. There are described" +" in documentation of the respective classes: " +":class:`~plwn.bases.SynsetBase` and :class:`~plwn.bases.LexicalUnitBase`." +msgstr "" +"Dane skojarzone z jednostkami i synsetami SÅ‚owosieci sÄ… udostÄ™pniane jako " +"publiczne atrybuty obiektów. SÄ… opisane w dokumentacji osobnych klas: " +":class:`~plwn.bases.SynsetBase` i :class:`~plwn.bases.LexicalUnitBase`." + +#: ../../source/introduction.rst:142 +msgid "Getting relations" +msgstr "Wydobycie relacji" + +#: ../../source/introduction.rst:144 +msgid "" +"The other elementary kind of entities in plWordNet, aside from synsets " +"and lexical units, are relations." +msgstr "" +"Kolejny podstawowy rodzaj obiektów SÅ‚owosieci, poza synsetami i jednostkami " +"leksykalnymi, to relacje." + +#: ../../source/introduction.rst:147 +msgid "" +"Relation instances can connect two synsets or two lexical units. These " +"instances are selected using identifiers of their types." +msgstr "" +"Instancje relacji mogÄ… łączyć dwa synsety, albo dwie jednostki leksykalne. " +"Te instancje sÄ… wybierane za pomocÄ… identyfikatorów ich typów." + +#: ../../source/introduction.rst:150 +msgid "" +"A detailed explanation on how relation types can be referred to is in " +":class:`~plwn.bases.RelationInfoBase`; the short version is:" +msgstr "" +"DokÅ‚adne wytÅ‚umaczenia jak można identyfikować typy relacji znajduje siÄ™ " +"w :class:`~plwn.bases.RelationInfoBase`; w skrócie:" + +#: ../../source/introduction.rst:153 +msgid "" +"Full name, for example: ``hiperonimia`` for relations that have no " +"parent type; ``meronimia/część`` for relations that do." +msgstr "" +"PeÅ‚na nazwa, na przykÅ‚ad: ``hiperonimia`` dla relacji które nie majÄ… " +"rodzica; ``meronimia/część`` dla relacji które majÄ…." + +#: ../../source/introduction.rst:155 +msgid "Short alias, for example: ``hiper``." +msgstr "Krótki alias, na przykÅ‚ad: ``hiper``." + +#: ../../source/introduction.rst:156 +msgid "" +"Parent name, for example: ``meronimia``; this refers to all the " +"children of the relation." +msgstr "" +"Nazwa rodzica, na przykÅ‚ad: ``meronimia``; odnosi siÄ™ ono do wszystkich " +"dzieci relacji." + +#: ../../source/introduction.rst:159 +msgid "" +"To see names and aliases for all relations, in alphabetical order, do " +"``sorted(wn.relations_info())``." +msgstr "" +"Aby zobaczyć wszystkie nazwy i aliasy dla wszystkich relacji, w kolejnoÅ›ci " +"alfabetycznej, można użyć ``sorted(wn.relations_info())``." + +#: ../../source/introduction.rst:164 +msgid "Related synset / units" +msgstr "Synsety / jednostki w relacji" + +#: ../../source/introduction.rst:166 +msgid "" +"Having a :class:`~plwn.bases.SynsetBase` or a " +":class:`~plwn.bases.LexicalUnitBase` objects, it's possible to select all" +" objects related to it using the ``related`` method, which accepts one of" +" the relation type identifiers described above. The ``relations`` " +"property can be used to check what relation types have outbound instances" +" from the synset / unit::" +msgstr "" +"MajÄ…c obiekt :class:`~plwn.bases.SynsetBase` albo " +":class:`~plwn.bases.LexicalUnitBase`, możliwe jest wydobycie wszystkich " +"obiektów bÄ™dÄ…cych w relacji z nim, używajÄ…c metody ``related``, która " +"akceptuje jeden z typów identyfikatorów relacji opisanych powyżej. " +"Atrybut ``relations`` może być użyty, by zobaczyć jakie typy relacji " +"majÄ… instancje wychodzÄ…ce z synsetu / jednostki::" + +#: ../../source/introduction.rst:182 +msgid "" +"If a relation of the right kind (synset or lexical) is passed to the " +"method, but it has no instances for the particular entity, an empty " +"iterable is returned::" +msgstr "" +"JeÅ›li relacja wÅ‚aÅ›ciwego (synsetowa albo leksykalna) jest przekazana do " +"tej metody, ale obiekt nie ma jej wychodzÄ…cych instancji, zwracana jest " +"pusta sekwencja::" + +#: ../../source/introduction.rst:189 +msgid "" +"In contrast, if a relation is of the wrong kind or does not exist, this " +"raises an error::" +msgstr "" +"Natomiast, jeÅ›li relacja jest zÅ‚ego rodzaju albo nie istnieje, rzucany jest " +"wyjÄ…tek::" + +#: ../../source/introduction.rst:195 +msgid "" +"When passing a parent relation type to ``related``, distinction between " +"actual, children relation types is lost. A second method " +"``related_pairs`` can be used to annotate related entities with the " +"relation instance connecting to it::" +msgstr "" +"Kiedy do ``related`` jest przekazany typ relacji-rodzica, rozróżnienie " +"pomiÄ™dzy typami relacji-dzieci jest tracone. Druga metoda ``related_pairs`` " +"może być użyta by adnotować obiekty bÄ™dÄ…ce w relacji instancjami relacji " +"które do nich prowadzÄ…::" + +#: ../../source/introduction.rst:206 +msgid "" +"Synset's :meth:`~plwn.bases.SynsetBase.related` and " +":meth:`~plwn.bases.SynsetBase.related_pairs` also have an additional " +"boolean ``skip_artificial`` argument. See the methods' documentation for " +"more details; the default value should be correct for most uses." +msgstr "" +"Metody synsetów :meth:`~plwn.bases.SynsetBase.related` oraz " +":meth:`~plwn.bases.SynsetBase.related_pairs` majÄ… dodatkowy argument " +"boolowski ``skip_artificial``. Zob. dokumentacjÄ™ metod dla dalszych " +"szczegółów; domyÅ›lna wartość powinna być poprawna dla zdecydowanej " +"wiÄ™kszoÅ›ci zastosowaÅ„." + +#: ../../source/introduction.rst:213 +msgid "Relation edges" +msgstr "KrawÄ™dzie relacji" + +#: ../../source/introduction.rst:215 +msgid "" +"Relation instances can also be selected using " +":meth:`~plwn.bases.PLWordNetBase.synset_relation_edges` and " +":meth:`~plwn.bases.PLWordNetBase.lexical_relation_edges` methods. Unlike " +"the ``related`` methods, these two are not anchored to a starting point " +"and select all relation instances of given types in plWordNet; they " +"return iterables of :class:`~plwn.bases.RelationEdge` instances, each " +"having a ``source``, ``relation`` and ``target`` properties." +msgstr "" +"Instancje relacje mogÄ… być również wybrane przy użyciu metod " +":meth:`~plwn.bases.PLWordNetBase.synset_relation_edges` i " +":meth:`~plwn.bases.PLWordNetBase.lexical_relation_edges`. W przeciwieÅ„stwie " +"do metod ``related``, te dwie nie sÄ… zakotwiczone w konkretnym punkcie " +"poczÄ…tkowym i wybierajÄ… wszystkie instancje relacji danych typów ze " +"SÅ‚owosieci; zwracajÄ… sekwencje instancji :class:`~plwn.bases.RelationEdge`, " +"z których każda ma atrybuty ``source``, ``relation``, ``target``." + +#: ../../source/introduction.rst:223 +msgid "" +"Without arguments, all synset or lexical relation instances are yielded. " +"Filtering can be done using an ``include`` or ``exclude`` argument. Both " +"expect the values to be sets of relation type identifiers (the same as " +"those accepted by the ``related`` methods). When ``include`` is not " +"``None``, only instances of relations mentioned in the set are yielded. " +"For example, to select all hyponymy instances::" +msgstr "" +"Bez argumentów, wszystkie instancje synsetowych albo leksykalnych relacji " +"sÄ… zwracane. Filtrowanie jest możliwe przez argument ``include`` bÄ…dź " +"``exclude``. Oba przyjmujÄ… wartoÅ›ci bÄ™dÄ…ce zbiorami identyfikatorów typów " +"relacji (takich samych jak te które przyjmujÄ… metody ``related``). JeÅ›li " +"``include`` nie jest ``None``, tylko instancje relacji zawartych w tym " +"zbiorze sÄ… zwracane::" + +#: ../../source/introduction.rst:251 +msgid "" +"When ``exclude`` is not ``None``, instances of mentioned relation types " +"are removed from the iterable; either from the set of all relations or " +"those in ``include``." +msgstr "" +"JeÅ›li ``exclude`` nie jest ``None``, instancje zawartych w nim typów relacji " +"sÄ… usuwane ze zwracanej sekwencji; albo ze zbioru wszystkich relacji, albo " +"tych zawartych w ``include``." + +#: ../../source/introduction.rst:255 +msgid "" +"Method :meth:`~plwn.bases.PLWordNetBase.synset_relation_edges` also takes" +" a boolean ``skip_artificial`` argument that's ``True`` as default. Like " +"with :meth:`~plwn.bases.SynsetBase.related`, see the method's " +"documentation for details." +msgstr "" +"Metoda :meth:`~plwn.bases.PLWordNetBase.synset_relation_edges` przyjmuje " +"również argument boolowski ``skip_artificial``, majÄ…cy domyÅ›lnie wartość " +"prawdziwÄ…. DziaÅ‚a jak :meth:`~plwn.bases.SynsetBase.related`, zob. " +"dokumentacjÄ™ tej metody dla szerszego opisu." diff --git a/plwn/.bases.py.swp b/plwn/.bases.py.swp deleted file mode 100644 index db9c729a546145f142a12fbd6693aaea5bc691d3..0000000000000000000000000000000000000000 Binary files a/plwn/.bases.py.swp and /dev/null differ diff --git a/plwn/__init__.py b/plwn/__init__.py index fefc9a10e254fa4ea7c73918d4cfc176650ada63..fe9b1f0b971492731c7c6ed5c26934eef70743eb 100644 --- a/plwn/__init__.py +++ b/plwn/__init__.py @@ -1,8 +1,27 @@ +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from ._loading import load_default from ._loading import read from ._loading import load from ._loading import show_source_formats from ._loading import show_storage_formats -from .enums import PoS +# Import the enums that are needed for selecting and filtering +from .enums import PoS, RelationKind # Setup logging for the package (not) import logging as _logging @@ -14,4 +33,6 @@ __all__ = [ "load", "show_storage_formats", "show_source_formats", + "load_default", + "RelationKind", ] diff --git a/plwn/_loading.py b/plwn/_loading.py index 6571f47fb35453487a441d8304b9d537d491c0ac..bdc51a88a6f5dea64d1fe5cb6f3b19cb35f79036 100644 --- a/plwn/_loading.py +++ b/plwn/_loading.py @@ -1,7 +1,24 @@ +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + """Defines user-facing functions. -That allow simple construction of :class:`PLWordnetBase` instances, -with selected storages and readers. +That allow simple construction of +:class:`PLWordnetBase` instances, with selected storages and readers. """ from __future__ import absolute_import, division, print_function @@ -13,8 +30,20 @@ import textwrap as tw import six +from . import exceptions as exc +try: + from .default import get_default_load_args +except ImportError: + get_default_load_args = None + -__all__ = 'read', 'load', 'show_source_formats', 'show_storage_formats' +__all__ = ( + 'read', + 'load', + 'load_default', + 'show_source_formats', + 'show_storage_formats', +) _Info = namedtuple('_Info', ('desc', 'modname')) @@ -36,16 +65,10 @@ _STORAGES = { 'PLWN database).', 'sqlite', ), - 'objects': _Info( - 'Stores data in plain python objects, dumping them in pickle format. ' - 'Quick to construct, but querying and memory efficiency is not ' - 'guaranteed.', - 'objects', - ), } # Defaults for this version -_READERS[None] = _READERS['xml'] +_READERS[None] = _READERS['database'] _STORAGES[None] = _STORAGES['sqlite3'] @@ -70,6 +93,7 @@ def read(source_file, Return the right :class:`PLWordnetBase` subclass instance for the selected parameters. + Where defaults are mentioned, those values may change with each minor version of PLWN API. If you depend on some particular format for a long running program, state it explicitly. @@ -117,6 +141,27 @@ def load(storage_file, storage_format=None): return stor_cls.from_dump(storage_file) +def load_default(): + """Load and return the default, bundled version of plWordNet data. + + This function will fail if the bundled version is not present in the + package; this may occur in some builds where specialized plWordNet versions + are needed and disk space is a concern. + + :rtype: PLWordnetBase + + :raises PLWNAPIException: If no default data is bundled. + """ + try: + args = get_default_load_args() + except TypeError: + raise exc.PLWNAPIException( + 'No default storage bundled with this PLWN API package', + ) + + return load(*args) + + def show_source_formats(): """Print names and short descriptions. diff --git a/plwn/bases.py b/plwn/bases.py index b104c5d77f15e691b97ccef6d622c777234792db..43f9712ef12cdbf7d08832f084686d047e078924 100644 --- a/plwn/bases.py +++ b/plwn/bases.py @@ -1,274 +1,260 @@ +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + """Base, abstract classes for plWordNet objects. -Implementing common functionality independent of structures holding -wordnet data. +Implementing common functionality independent of structures +holding the data itself. """ from __future__ import absolute_import, division -import abc -import collections as coll -import functools +from abc import ABCMeta, abstractmethod, abstractproperty +from collections import namedtuple import locale +import operator as op import six from .utils import graphmlout as go from .enums import make_values_tuple -from .relresolver import get_default_relation_resolver - -__all__ = 'SynsetBase', 'LexicalUnitBase', 'PLWordNetBase', 'RelationEdge' - -#: Named tuple type yielded by -#: :meth:`PLWordNetBase.synset_relation_edges` and -#: :meth:`PLWordNetBase.lexical_relation_edges`. -RelationEdge = coll.namedtuple( +__all__ = ( + 'PLWordNetBase', + 'SynsetBase', + 'LexicalUnitBase', + 'RelationInfoBase', 'RelationEdge', - ('source', 'relation', 'target'), ) +#: Tuple type representing a relation instance between two synsets or lexical +#: units. +RelationEdge = namedtuple('RelationEdge', ('source', 'relation', 'target')) + + +@six.add_metaclass(ABCMeta) class PLWordNetBase(object): - """The primary entry point for retrieving data from plWordNet. + """The primary object providing data from plWordNet. - Allows querying the plWordNet for synsets and lexical units. + Allows retrieving synsets, lexical units, and other informative objects. """ - __metaclass__ = abc.ABCMeta - _STORAGE_NAME = '?' @classmethod def from_reader(cls, reader, dump_to=None): - """Create a new instance from a source reader. - - Optionally saving it in an internal representation format - in another file. + """Create new instance from a source reader. - :param reader: Generator that yields :class:`SynsetNone` and - :class:`LexicalUnitNode` from a source representation. + Optionally saving it in an internal representation format in + another file. - :param str dump_to: Path to a file where the data read from the source - will be dumped in an internal representation. It will be possible - to later load it quicker by :meth:`.from_dump`. If ``None``, then - no cached file will be created. + ``reader`` is any iterable that yields node instances: + :class:`~plwn.readers.nodes.SynsetNode`, + :class:`~plwn.readers.nodes.LexicalUnitNode` and + :class:`~plwn.readers.nodes.RelationTypeNode`. - :returns: New instance of PLWN API entry point. - :rtype: PLWordNetBase + ``dump_to`` is a path to a (non-existing) file where data + form ``reader`` will be stored to be to be loaded later. + If not passed, then the data won't be cached in any file, requiring + to be read again using :meth:`.from_reader`. """ raise NotImplementedError() @classmethod def from_dump(cls, dump): - """Create a new instance from a dump of cached internal representation. + """Create new instance from a dump of cached internal representation. - The dump file must have been created by the same ``PLWordNetBase`` - subclass, and preferably by the same version of PLWN API (backwards - compatibility of dump formats is not guaranteed). - - :param str dump: Path to a file with cached internal representation. - - :returns: New instance of PLWN API entry point. - :rtype: PLWordNetBase + The dump file must have been created by :meth:`.from_reader` of the + same :class:`PLWordNetBase` subclass and schema version. """ return NotImplementedError() - def __init__(self): - """Initialize PLWordNetBase.""" - self._rel_resolver = get_default_relation_resolver() - - @abc.abstractmethod + @abstractmethod def synsets(self, lemma=None, pos=None, variant=None): - """Iterate over synsets form plWordNet. - - Filtered by lemma, part ofspeech and variant. - - If a parameter is omitted, then any value is accepted (so ``synsets()`` - iterates over all synsets). - - The lemma, pos and variant are properties of lexical units, this method - yields synsets that contain those lexical units. - - :param str lemma: Only synsets containing a lexical unit with this - lemma will be yielded. - :param pos: Only synsets containing a lexical unit with this part - of speech will be yielded. - :type pos: Union[PoS, str] - :param int variant: Only synsets containing a lexical unit with this - variant will be yielded. + """Select synsets from plWordNet based on combination of criteria. - :returns: Iterable of synsets fitting the parameters' criteria. - :rtype: Iterable[SynsetBase] - - :raises InvalidPoSException: If a query is made for a PoS that is not - one of the valid constants. + This method works just like :meth:`.lexical_units`, but returns an + iterable of distinct synsets that own the lexical units selected by + the query. """ pass - @abc.abstractmethod + @abstractmethod def synset(self, lemma, pos, variant): - """Get the synset. - - Containing the unit with the lemma, part of speech and variant. - - Unlike :meth:`.synsets`, all parameters of this method are mandatory. - It either returns a single synset, or raises and exception if no - such synset can be found. - - :param str lemma: The lemma of a lexical unit contained by the - requested synset. - :param pos: The part of speech of a lexical unit contained by the - requested synset. - :type pos: Union[PoS, str] - :param int variant: The variant of a lexical unit contained by the - requested synset. + """Like :meth:`.synsets`. - :returns: Synset satisfying the criteria specified by the parameters. - :rtype: SynsetBase + But either return a single synset or raise + :exc:`~plwn.exceptions.SynsetNotFound`. - :raises SynsetNotFound: If no synset with the given properties - could be found. - :raises InvalidPoSException: If a query is made for a PoS that is not - one of the valid constants. + All parameters are required, to ensure that the query could only match + a single synset. """ pass - @abc.abstractmethod + @abstractmethod def synset_by_id(self, id_): - """Get the synset, knowing its internal, numerical ID. + """Select a synset using its internal, numeric ID. - This method is not intended to be used by itself, but with tools which - identify PLWN synsets by their IDs. + If there is no synset with the given ID, raise + :exc:`~plwn.exceptions.SynsetNotFound`. - :param int id_: The internal plWordnet identifier of the synset. - - :returns: The synset having the ID. - :rtype: SynsetBase - - :raises InvalidSynsetIdentifierException: If there's no synset with - the ID in plWordnet. + This is the fastest method to get a particular :class:`SynsetBase` + object. """ pass - @abc.abstractmethod + @abstractmethod def lexical_units(self, lemma=None, pos=None, variant=None): - """Iterate over lexical units form plWordNet. - - Filtered by lemma, part of speech and variant. + """Select lexical units from plWordNet based on combination of criteria. - If a parameter is omitted, then any value is accepted (so - ``lexical_units()`` iterates over all units). + It's possible to specify the lemma, part of speech and variant of the + units this method should yield. If a parameter value is omitted, any + value matches. Conversely, a call of ``lexical_units()`` will return + an iterable of all lexical units in plWordNet. If no lexical unit + matches the query, returns an empty iterable. - :param str lemma: Only lexical units with this lemma will be yielded. - :param pos: Only lexical units with this part of speech will be - yielded. - :type pos: Union[PoS, str] - :param int variant: Only lexical units with this variant will be - yielded. - - :returns: Iterable of lexical units fitting the parameters' criteria. - :rtype: Iterable[LexicalUnitBase] - - :raises InvalidPoSException: If a query is made for a PoS that is not - one of the valid constants. + The parameter ``lemma`` is an unicode string, ``variant`` is an + integer, and ``pos`` is an enumerated value of + :class:`~plwn.enums.PoS`. """ pass - @abc.abstractmethod + @abstractmethod def lexical_unit(self, lemma, pos, variant): - """Get the lexical unit with the lemma, part of speech and variant. - - Unlike :meth:`.lexical_units`, all parameters of this method are - mandatory. It either returns a single unit, or raises and exception - if no such unit can be found. + """Like :meth:`.lexical_units`. - :param str lemma: The lemma of the requested lexical unit. - :param pos: The part of speech of the requested lexical unit. - :type pos: Union[PoS, str] - :param int variant: The variant of the requested lexical unit. + But either return a single lexical unit or + raise :exc:`~plwn.exceptions.LexicalUnitNotFound`. - :returns: Lexical unit satisfying the criteria specified by the - parameters. - :rtype: LexicalUnitBase - - :raises LexicalUnitNotFound: If no unit with the given properties - could be found. - :raises InvalidPoSException: If a query is made for a PoS that is not - one of the valid constants. + All parameters are required, to ensure that the query could only match + a single lexical unit. """ pass - @abc.abstractmethod + @abstractmethod def lexical_unit_by_id(self, id_): - """Get the lexical unit, knowing its internal, numerical ID. - - See :meth:`.synset_by_id` for remarks. - - :param int id_: The internal plWordnet identifier of the lexical unit. + """Select a lexical unit using its internal, numeric ID. - :returns: The lexical unit having the ID. - :rtype: LexicalUnitBase + If there is no lexical unit with the given ID, raise + :exc:`~plwn.exceptions.LexicalUnitNotFound`. - :raises InvalidLexicalUnitIdentifierException: If there's no lexical - unit with the ID in plWordnet. + This is the fastest method to get a particular :class:`LexicalUnitBase` + object. """ pass - @abc.abstractmethod - def synset_relation_edges(self, include=None, exclude=None): - """Iterate over all synset relation instances in plWordnet. - - Yielding them as tuples. - - Named tuples in format ``(source, relation, target)`` - (:data:`RelationEdge`) are yielded by this method. - - One of the intended uses of this method is to create a graph "live", by - feeding the results directly to a graph-building library. - - **Note:** if both ``include`` and ``exclude`` are passed, the result - will be a logical intersection. In both collections, invalid relation - names are silently ignored. + @abstractmethod + def synset_relation_edges(self, + include=None, + exclude=None, + skip_artificial=True): + """Get an iterable of synset relation instances from plWordNet. + + As represented by :class:`RelationEdge`. + + ``include`` and ``exclude`` are containers of relation type + identifiers (see :class:`RelationInfoBase`). If ``include`` is not + ``None``, then only instances of relations in it are included in the + result. If ``exclude`` is not ``None``, then all relations in it are + omitted from the result. If both are ``None``, all relations are + selected. + + If ``skip_artificial`` is ``True`` (the default), then artificial + synsets (see :attr:`SynsetBase.is_artificial`) are "skipped over": new + relation edges are created to replace ones ending or staring in an + artificial synset, and connecting neighbouring synsets if they have + relations directed like this:: + + .-------. Rel 1 + | Syn D |-----------------. + '-------' | + v + .--------------. + .-------. Rel 1 | Syn B | Rel 1 .-------. + | Syn A |-------->| [artificial] |-------->| Syn E | + '-------' '--------------' '-------' + ^ + | + .-------. Rel 2 | + | Syn C |-----------------' + '-------' + + + .-------. Rel 1 + | Syn D |-----------------. + '-------' v + .-------. + | Syn E | + '-------' + .-------. Rel 1 ^ + | Syn A |-----------------' + '-------' + + ``Syn C`` is dropped, since there's no instance of ``Rel 1`` directed + outwards from the skipped artificial ``Syn B``. + """ + pass - :param Iterable[str] include: Names of relations which should be - included in the output. Instances of all other relations will be - ignored. By default all relations are included. + @abstractmethod + def lexical_relation_edges(self, include=None, exclude=None): + """Get an iterable of lexical unit relation instances from plWordNet. - :param Iterable[str] exclude: Names of relations which should not be - included in the output. By default, no relations are excluded. + As represented by :class:`RelationEdge`. - :returns: Generator of tuples representing synset relation edges. - :rtype: Iterable[Tuple[SynsetBase,str,SynsetBase]] + This method works like :meth:`.synset_relation_edges`, but for lexical + units and relation types. There is no ``skip_artificial``, since there + are no artificial lexical units. """ pass - @abc.abstractmethod - def lexical_relation_edges(self, include=None, exclude=None): - """Iterate over all lexical relation instances in plWordnet. + @abstractmethod + def relations_info(self, name=None, kind=None): + """Get an iterable of :class:`RelationInfoBase` instances. + + Matching the query defined by parameters. - Yielding them as tuples. + ``name`` is a string naming a relation (see + :class:`RelationInfoBase`). If it names a "parent", all its children + are selected. - This method behaves very closely to :meth:`.synset_relation_edges`, but - for lexical relations. + ``kind`` is an enumerated value of + :class:`~plwn.enums.RelationKind`. - :rtype: Iterable[Tuple[LexicalUnitBase,str,LexicalUnitBase]] + Any parameter that's not passed matches any relation type. + As such, a call of ``relations_info()`` will select all relation types + in plWordNet. """ pass def close(self): - """Perform necessary cleanup operations, close this PLWordNet instance. + """Perform cleanup operations. - Often, temporary files are created when reading and parsing plWordNet, - and non-temporary files may be opened. Call this method to properly - close / remove those files. + After using the :class:`PLWordNetBase` object. - It's best to use :func:`contextlib.closing` to ensure that this method - gets eventually called. + By default, this method does nothing and should be overridden by a + subclass if necessary. It should still always be called, since any + :class:`PLWordNetBase` subclass may create any kind of temporary + resources. - It's legal to call this method several times. It's not legal to call - any other methods after :meth:`.close` has been called. + After calling this method, this instance and any ones linked with it + (:class:`SynsetBase`, :class:`LexicalUnitBase`, etc.) may become + invalid and should not be used. """ pass @@ -288,132 +274,94 @@ class PLWordNetBase(object): included_synset_nodes=None, excluded_synset_nodes=None, included_lexical_unit_nodes=None, - excluded_lexical_unit_nodes=None): - """Export the wordnet as graph. + excluded_lexical_unit_nodes=None, + skip_artificial_synsets=True): + """Export plWordNet as graph. In `GraphML <http://graphml.graphdrawing.org/>`_ format. - Normally, nodes of the graph are synsets, and edges are relations - between synsets. It's possible to make the graph made of lexical units - and relations, or both synsets and units. - - IDs of nodes are internal plWordNet IDs (the same as returned by ``id`` - property of synset / lexical_unit). They may be prefixed with - ``synset-`` or ``lexical_unit-`` depending on type of the node and - ``prefix_ids`` parameter value. - - Edges have no IDs. - - Nodes and edges can have certain attributes assigned to them in - GraphML. For edges, there are two attributes: - - * **type:** Either ``relation`` or ``unit_and_synset``, depending on - whether the edge represents a relation or a link between a synset and - a unit that belongs to it. The latter are only present in mixed graph - type. - * **name:** If **type** is ``relation``, then it's the name of the - relation. If **type** is ``unit_and_synset``, then it's either - ``has_unit``, for an edge directed from a synset node to a - lexical_unit node, or ``in_synset`` for an edge in the - opposite direction. - - Nodes only have attributes if ``include_attributes`` parameter is - ``True``. The attributes have names and values corresponding to - properties of :class:`SynsetBase` or :class:`LexicalUnitBase` objects. - Composite values (like tuples) are stored as JSON strings (since - GraphML only allows simple types for attributes). Attributes can be - excluded or included using the method's parameters. - - Possible names of synset attributes: - * definition - * relations - - Possible names of lexical unit attributes: - * lemma - * pos - * variant - * definition - * sense_examples - * sense_examples_sources - * external_links - * usage_notes - * domain - * relations - - **NOTE:** If both corresponding ``include_*`` and ``exclude_*`` - parameters are passed, an item will be included only if it appears in - the ``include_*`` set and does not appear in ``exclude_*`` set. - - :param Union[str,BytesIO] out_file: Stream or name of the file to which - the GraphML XML will be written. **NOTE:** Because of a peculiarity in - the XML module used (ElementTree), if a stream is passed here, it - should be opened in binary mode. - - :param str graph_type: Type of the graph. There are three possible - values: - * ``synset``: Nodes are synsets and edges are synset relations. - * ``lexical_unit``: Nodes are lexical units and edges are lexical unit - relations. - * ``mixed``: There are both synset and lexical unit nodes, - distinguished by prefixes in their IDs. Synsets are connected with - synset relations and lexical units are connected with lexical - relations. Synsets and units are connected with ``unit_and_synset`` - type of edge (see description above). - - :param bool include_attributes: If ``True``, then node attributes will - be included in the output XML file. Note, that if - ``included_*_attributes`` or ``excluded_*_attributes`` is passed, then - this parameter is ignored and the designated attributes are included. - - :param bool prefix_ids: If ``True``, then IDs of nodes will be prefixed - with ``synset-`` or ``lexical_unit-``. Note, that if ``graph_type`` is - ``mixed``, nodes are always prefixed and this parameter is ignored. - - :param FrozenSet[str] included_synset_attributes: Set of names of - synset attributes which should be included in GraphML nodes. All other - attributes are excluded. - - :param FrozenSet[str] excluded_synset_attributes: Set of names of - synset attributes which should not be included in GraphML nodes. All - other attributes are included. - - :param FrozenSet[str] included_lexical_unit_attributes: Like - ``included_synset_attributes``, but for lexical unit nodes. - - :param FrozenSet[str] excluded_lexical_unit_attributes: Like - ``excluded_synset_attributes``, but for lexical unit nodes. - - :param FrozenSet[str] included_synset_relations: Set of names of synset - relations which should be included as edges in the graph. All other - relation edges are excluded. - - :param FrozenSet[str] excluded_synset_relations: Set of names of synset - relations which should not be included as edges in the graph. All other - relation edges are included. - - :param FrozenSet[str] included_lexical_unit_relations: Like - ``included_synset_relations``, but for lexical unit relations. - - :param FrozenSet[str] excluded_lexical_unit_relations: Like - ``excluded_synset_relations``, but for lexical unit relations. - - :param FrozenSet[int] included_synset_nodes: Set of IDs of synsets that - should be included as nodes in the graph. All other synsets are - excluded. Any edge that has one of its endpoints not included will also - not be included. Also, if the graph type is mixed, lexical units - belonging to a synset which is not included will also be excluded. - - :param FrozenSet[int] excluded_synset_nodes: Set of IDs of synsets - which should not be included as nodes in the graph. All other synsets - are included. Also see remarks for ``included_synset_nodes``. - - :param FrozenSet[int] included_lexical_unit_nodes: Like - ``included_synset_nodes``, but for lexical units. - - :param FrozenSet[int] excluded_lexical_unit_nodes: Like - ``excluded_synset_nodes``, but for lexical units. - - :raises ValueError: If ``graph_type`` is not one of the allowed values. + Nodes of the graph are synsets and / or lexical units, and edges are + relation instances. + + For nodes, their numeric plWordNet IDs are set as their XML element + IDs. + + **NOTE:** Nodes that have no inbound or outbound edges are dropped from + the graph. + + Nodes and edges have attributes, as GraphML defines them. For nodes, + attributes are public properties of :class:`SynsetBase` or + :class:`LexicalUnitBase` (aside from ``relations``, which would be + useless in a graph, and ``id``, which becomes the XML ID of a node). + Edges have two attributes: + + * **type**: Either ``relation``, for edges that represent plWordNet + relation instances, or ``unit_and_synset`` for edges between synset + nodes and nodes of lexical units that belong to the synset. The + latter appear only in *mixed* graph. + * **name**: If **type** is ``relation``, then this is the full name + of the relation (see :class:`RelationInfoBase`). If **type** is + ``unit_and_synset``, it is one of constant values: ``has_unit`` if + the edge is directed from synset to unit, or ``in_synset``, for edges + directed from unit to synset. + + ``out_file`` is a writable file-like object to which the GraphML output + will be written. + + ``graph_type`` is one of three constant string values: ``synset``, + ``lexical_unit`` or ``mixed``. Synset graph contains only synset + nodes and relations, lexical unit graph contains only lexical unit + nodes and relations, and mixed graph contains all of the former, as + well as additional edges that map lexical units to synsets they belong + to. + + If ``include_attributes`` is ``True``, then all synset and / or lexical + unit attributes will be included. By default, attributes are not + included to shrink the written file. Note, that if any of + ``(included/excluded)_(synset/lexical_unit)_attributes`` parameters is + passed, inclusion of attributes will be controlled by them and the + value of ``include_attributes`` is ignored. + + If ``prefix_ids`` is ``True``, then ID of each node will be prefixed + with the type: ``synset-`` or ``lexical_unit-``. By default, it's + not done, unless ``graph_type`` is ``mixed``, in which case this + parameter is ignored and ID prefixes are enforced. + + ``included_synset_attributes`` and ``excluded_synset_attributes`` are + containers of synset attribute names, selecting the values which should + or should not be included with synset nodes. + + ``included_lexical_unit_attributes`` and + ``excluded_lexical_unit_attributes`` are the same way as the above, + but for attributes of lexical units. + + ``included_synset_relations`` and ``excluded_synset_relations`` are + containers of synset relation type identifiers (see + :class:`RelationInfoBase`), selecting synset relation types whose + instances should or should not be included in the graph. By default, + all relation types are included. + + ``included_lexical_unit_relations`` and + ``excluded_lexical_unit_relations`` are the same was as the above, but + for lexical relation types. + + ``included_synset_nodes`` and ``excluded_synset_nodes`` are containers + for IDs of synset that should or should not be included as nodes in the + graph. If a node is not included, all edges that start or end in it are + also excluded. By default, all non-artificial synsets are included. + + ``included_lexical_unit_nodes`` and ``excluded_lexical_unit_nodes`` are + the same way as the above, but for lexical units. + + If ``skip_artificial_synsets`` is ``True`` (the default), then + artificial synsets are excluded from the graph, and edges connecting to + them are reconnected to "skip over" them, as described for + :meth:`.synset_relation_edges`. + + **Note:** while this method accepts all of the above parameters at + all times, parameters relating to synsets are ignored if ``graph_type`` + is ``lexical_unit``, and parameters relating to lexical units are + ignored if ``graph_type`` is ``synset``. """ gwn = go.GraphMLWordNet() gb = go.GraphMLBuilder(self, gwn) @@ -428,6 +376,7 @@ class PLWordNetBase(object): excluded_nodes=excluded_synset_nodes, included_relations=included_synset_relations, excluded_relations=excluded_synset_relations, + skip_artificial_synsets=skip_artificial_synsets, ) elif graph_type == go.GRAPH_TYPE_UNIT: gb.lexical_unit_graph( @@ -459,9 +408,10 @@ class PLWordNetBase(object): excluded_synset_nodes=excluded_synset_nodes, included_lexical_unit_nodes=included_lexical_unit_nodes, excluded_lexical_unit_nodes=excluded_lexical_unit_nodes, + skip_artificial_synsets=skip_artificial_synsets, ) else: - raise ValueError('graph_type={!r}'.format(graph_type)) + raise ValueError('Invalid graph type: {!r}'.format(graph_type)) gwn.write(out_file) @@ -472,8 +422,8 @@ class PLWordNetBase(object): ) -@functools.total_ordering @six.python_2_unicode_compatible +@six.add_metaclass(ABCMeta) class SynsetBase(object): """Encapsulates data associated with a plWordNet synset. @@ -481,95 +431,166 @@ class SynsetBase(object): Most of plWordNet relations are between meanings, hence the need to group lexical units into synsets. - For purposes of ordering, a :class:`SynsetBase` is uniquely identified by - its head: the first of the lexical units it contains. + For purposes of ordering, a :class:`SynsetBase` object is uniquely + identified by its "head": the first of the lexical units it contains. """ - __metaclass__ = abc.ABCMeta - - @abc.abstractproperty + @abstractproperty def id(self): - """``int``. + """The internal, numeric identifier of the synset in plWordNet. - The internal identifier of the synset in plWordnet. It is unique among - all synsets. + It is unique among all synsets. + + If this identifier is passed to :meth:`PLWordNetBase.synset_by_id`, it + would return this :class:`SynsetBase` object. """ pass - @abc.abstractproperty + @abstractproperty def lexical_units(self): - """``Tuple[LexicalUnitBase]``. + """Tuple of :class:`LexicalUnitBase` objects. - Lexical units contained in the synsets. Ordering of units within the - tuple is arbitrary, but constant. The first unit is the synset's head, - used to represent it. + Representing lexical units contained in the synset. + Ordering of units within the tuple is arbitrary, but constant. - At least one lexical unit is always present in every synset. + At least one lexical unit is always present in every synset, so + ``lexical_units[0]`` is always valid and selects the synset's "head". """ pass - @abc.abstractproperty + @abstractproperty def definition(self): - """``str``. + """Textual description of the synset's meaning. + + May be ``None``. + + In plWordNet, most definitions are stored as + :attr:`LexicalUnitBase.definition`. Synset definitions are present + mostly for English synsets. + """ + pass - Textual description of the synset's meaning. + @abstractproperty + def is_artificial(self): + """Boolean value informing if the synset is an artificial one. - Will be an empty string if the definition is not present in plWordNet. + Artificial synsets carrying no linguistic + meaning, but introduced as a method of grouping synsets within the + structure of plWordNet. + + For most uses, artificial synsets should be ignored. """ pass - @abc.abstractproperty + @abstractproperty def relations(self): - """``Tuple[str]``. + """Tuple of :class:`RelationInfoBase` instances. + + Containing types of distinct relations that have outbound + edges from this synset. - Tuple of all outward relations that lead from this synset. + Relations are returned in an arbitrary order. + + The tuple is special: methods for checking membership accept all + possible representations of a relation type (see + :meth:`RelationInfoBase.eqv`). """ pass - @abc.abstractmethod - def related(self, relation_name): - """Iterate over synsets to whom this synset has a certain relation. + @abstractproperty + def is_polish(self): + """Check whether all units are Polish.""" + pass - :param str relation_name: The name of the relation to follow. + @abstractproperty + def is_english(self): + """Check whether all units are English.""" + pass - :returns: Iterable of related synsets. - :rtype: Iterable[SynsetBase] + @abstractproperty + def pos(self): + """Returns PoS of the synset units. - :raises InvalidRelationNameException: If ``relation_name`` is not a - valid name of a synset relation in plWordNet. + Raises :exc:`ValueError` if units have many different PoS. + """ + pass + + @abstractmethod + def related(self, relation_id=None, skip_artificial=True): + """Get an iterable of :class:`SynsetBase` instances. + + That are connected to this synset by outbound edges of + synset relation type identified by ``relation_id``. + + ``relation_id`` can be any synset relation type identifier (see + :class:`RelationInfoBase`), a collection of relation types identifiers, + or ``None``, in which case synsets related to this one by any relation + are selected. + + Note, that distinction between any relations that fit the + ``relation_id`` query is lost. Use :meth:`.related_pairs` if it's + needed. + + Raises :exc:`~plwn.exceptions.InvalidRelationTypeException` if + (any of) ``relation_id`` does not refer to an existing synset relation + type. + + If ``skip_artificial`` is ``True`` (the default) artificial synsets + related to this one are "skipped over", as described for + :meth:`PLWordNetBase.synset_relation_edges`. + """ + pass + + @abstractmethod + def related_pairs(self, relation_id=None, skip_artificial=True): + """Like :meth:`.related`. + + But return an iterable of pairs + ``(<relation info>, <relation target synset>)``. """ pass def to_dict(self, include_related=True, include_units_data=True): """Create a JSON-compatible dictionary. - With all the public properties of the synset. + With all public properties of the synset. Enums are converted to their values and all collections are converted to tuples. - :param bool include_related: If ``True``, the dictionary will contain a - "related" member, whose value is a dictionary in format:: + Property :attr:`.relations` is omitted, as it would be redundant when + all related synsets can be enumerated when ``include_related`` is + ``True``. Some additional members are also present in the dictionary: - { - "<synset relation name>": ( - (<relation target id>, <relation target string form>), - ... - ), - ... - } + * ``str``: The string representation of the synset (defined by + ``__str__`` override on :class:`SynsetBase`). + * ``units``: Listing (as a tuple) of units belonging to the synset (in + the same ordering as :attr:`.lexical_units`), as pairs of + ``(<unit id>, <unit string form>)``. + + If ``include_related`` is ``True`` (the default), the dictionary will + contain an additional ``related`` member, representing synsets related + to this one, in the following format:: - :param bool include_units_data: If ``True``, then the "units" member of - the dictionary will be a tuple of results of - :meth:`LexicalUnitBase.to_dict`. Otherwise, it will contain only - tuples of ``(<unit id>, <unit string form>)``. + { + <synset relation full name>: ( + (<relation target id>, <relation target string form>), + ... + ), + ... + } - :returns: Dictionary contain data of the synset. - :rtype: Mapping[str, Any] + If ``include_units_data`` is ``True`` (the default), the ``units`` + member will contain results of invocation of + :meth:`LexicalUnitBase.to_dict` for the synset's units, + instead of pairs described above. In this case, the value of + ``include_related`` parameter is passed on to + :meth:`LexicalUnitBase.to_dict`. """ syn_dict = { u'id': self.id, u'definition': self.definition, + u'is_artificial': self.is_artificial, u'units': tuple( (lu.to_dict(include_related) for lu in self.lexical_units) if include_units_data @@ -580,17 +601,20 @@ class SynsetBase(object): if include_related: syn_dict[u'related'] = { - relname: tuple( + six.text_type(rel): tuple( (target.id, target.short_str()) - for target in self.related(relname) + for target in self.related(rel) ) - for relname in self.relations + for rel in self.relations } return syn_dict def short_str(self): - """Shorter version of ``str`` cast that displays only the first unit.""" + """Shorter version of synset's string form (``__str__``). + + That displays only the first lexical unit. + """ sstr = [u'{', six.text_type(self.lexical_units[0])] if len(self.lexical_units) > 1: sstr.append( @@ -599,6 +623,11 @@ class SynsetBase(object): sstr.append(u'}') return ''.join(sstr) + def __inner_cmp(self, cmp_op, other): + if not isinstance(other, SynsetBase): + return NotImplemented + return cmp_op(self.lexical_units[0], self.lexical_units[0]) + def __repr__(self): head = self.lexical_units[0] rstr = '<Synset id={!r} lemma={!r} pos={!r} variant={!r}'.format( @@ -623,221 +652,261 @@ class SynsetBase(object): def __hash__(self): # Even if comparing is done by the synset's head, it's probably better # to hash by all lexical units, to boost the hash's uniqueness - return hash(self.lexical_units) + return hash((SynsetBase, self.lexical_units)) def __eq__(self, other): - if not isinstance(other, SynsetBase): - return NotImplemented - return self.lexical_units[0] == other.lexical_units[0] + return self.__inner_cmp(op.eq, other) def __ne__(self, other): - return not self == other + return self.__inner_cmp(op.ne, other) def __lt__(self, other): - if not isinstance(other, SynsetBase): - return NotImplemented - return self.lexical_units[0] < other.lexical_units[0] + return self.__inner_cmp(op.lt, other) + + def __le__(self, other): + return self.__inner_cmp(op.le, other) + + def __gt__(self, other): + return self.__inner_cmp(op.gt, other) + + def __ge__(self, other): + return self.__inner_cmp(op.ge, other) @six.python_2_unicode_compatible +@six.add_metaclass(ABCMeta) class LexicalUnitBase(object): """Encapsulates data associated with a plWordNet lexical unit. Lexical units represent terms in the language. Each lexical unit is uniquely identified by its lemma (base written form), part of speech - (verb, noun, adjective or adverb) and variant (a number: sometimes the same - form can have multiple meanings). + (verb, noun, adjective or adverb) and variant (a number differentiating + between homonyms). """ - __metaclass__ = abc.ABCMeta - - @abc.abstractproperty + @abstractproperty def id(self): - """``int``. + """The internal, numeric identifier of the lexical units in plWordNet. + + It is unique among all lexical units. - The internal identifier of the lexical unit in plWordnet. It is unique - among all units. + If this identifier is passed to + :meth:`PLWordNetBase.lexical_unit_by_id`, it would return this + :class:`LexicalUnitBase` object. """ pass - @abc.abstractproperty + @abstractproperty def lemma(self): - """``str``. - - Lemma of the unit, basic form of the word(s) the unit represents. - """ + """Lemma of the unit; its basic text form.""" pass - @abc.abstractproperty + @abstractproperty def pos(self): - """``PoS``. + """Part of speech of the unit. - Part of speech of the unit. This will be one of enumeration constants - from :class:`PoS`. To get the textual value, use ``pos.value``. + One of enumerated constants of :class:`~plwn.enums.PoS`. """ pass - @abc.abstractproperty + @abstractproperty def variant(self): - """``int``. + """Ordinal number to differentiate between meanings of homonyms. - If the same lemma has different meanings as the same part of speech, - this number will be used to tell them apart. The first meaning has the - number 1. + Numbering starts at 1. """ pass - @abc.abstractproperty + @abstractproperty def definition(self): - """``str``. + """Textual description of the lexical unit's meaning. - Textual description of the lexical unit's meaning. - - Will be an empty string if the definition is not present in plWordNet. + May be ``None``. """ pass - @abc.abstractproperty + @abstractproperty def sense_examples(self): - """``Tuple[str]``. + """Text fragments. - Fragments of text that show how the lexical unit is used in the - language. + That show how the lexical unit is used in the language. - May be an empty collection, if no examples are present. + May be an empty tuple. """ pass - @abc.abstractproperty + @abstractproperty def sense_examples_sources(self): - """``Tuple[str]``. + """Symbolic representations of sources. + + From which the sense examples were taken. - Symbolic representations of sources from which the sense examples were - taken. + The symbols are short strings, defined by plWordNet. - This tuples has the same length as ``sense_examples``, and is aligned - by index (for example, the source of ``sense_examples[3]`` is at - ``sense_examples_sources[3]``). + This tuples has the same length as :attr:`.sense_examples`, and is + aligned by index (for example, the source of ``sense_examples[3]`` is + at ``sense_examples_sources[3]``). - To get pairs of of examples with their sources, use + To get pairs of examples with their sources, use ``zip(sense_examples, sense_examples_sources)`` """ # TODO List of source symbols, link to? pass - @abc.abstractproperty + @abstractproperty def external_links(self): - """``Tuple[str]``. - - URLs to webpages describing the meaning of the lexical unit. + """URLs linking to web pages describing the meaning of the lexical unit. - May be an empty collection, if no examples are present. + May be an empty collection. """ pass - @abc.abstractproperty + @abstractproperty def usage_notes(self): - """``Tuple[str]``. + """Symbols. - Symbols denoting certain properties of how the lexical unit is used. + Denoting certain properties of how the lexical unit is used in + the language. - For example, "daw." means that the word is considered dated. + The symbols are short strings, defined by plWordNet. For example, + ``daw.`` means that the word is considered dated. May be an empty collection. """ pass - @abc.abstractproperty + @abstractproperty def domain(self): - """``Domain``. + """plWordNet domain the lexical unit belongs to. - Wordnet domain the lexical unit belongs to. + One of enumerated constants of :class:`~plwn.enums.Domain`. """ pass - @abc.abstractproperty + @abstractproperty def verb_aspect(self): - """``Optional[VerbAspect]``. + """Aspect of a verb. + + Of the enumerated values of :class:`~plwn.enums.VerbAspect`. - Aspect of a verb. This will be one of the constants from - :class:`VerbAspect`, or ``None``, if the lexical unit is not a verb. + May be ``None`` if the unit is not a verb, or had no aspect assigned. """ pass - @abc.abstractproperty - def emotion_markedness(self): - """``Optional[EmotionMarkedness]``. + @abstractproperty + def is_emotional(self): + """Boolean value informing if the lexical unit has emotional affinity. - Markedness of emotional connotations of the lexical unit. May be - ``None``, if the unit has no emotional markedness. + If it is ``True``, then the lexical unit describes a term that has an + emotional load, and ``emotion_*`` properties will have meaningful + values, describing the affinity. - If this property is ``None``, then all other ``emotion_*`` properties - will be ``None`` or empty. + If it is ``False``, then the unit is emotionally neutral. All + ``emotion_*`` properties will be ``None`` or empty collections. + + This property can also be ``None``, which means that the unit has not + (yet) been evaluated with regards to emotional affinity. All + ``emotion_*`` properties are the same as when it's ``False``. """ pass - @abc.abstractproperty - def emotion_names(self): - """``Tuple[str, ...]``. + @abstractproperty + def emotion_markedness(self): + """Markedness of emotions associated with the lexical unit. - Names of emotions associated with this lexical unit. + May be ``None`` if the unit has no emotional markedness. + + If this property is ``None`` then all other ``emotion_*`` properties + will be ``None`` or empty collections. """ pass - @abc.abstractproperty - def emotion_valuations(self): - """``Tuple[str, ...]``. + @abstractproperty + def emotion_names(self): + """Tuple of names of emotions associated with this lexical unit.""" + pass - Valuations of emotions associated with this lexical unit. - """ + @abstractproperty + def emotion_valuations(self): + """Tuple of valuations of emotions associated with this lexical unit.""" pass - @abc.abstractproperty + @abstractproperty def emotion_example(self): - """``Optional[str]``. - - An example of an emotionally loaded sentence using the lexical unit. - """ + """Example of an emotionally charged sentence using the lexical unit.""" pass - @abc.abstractproperty + @abstractproperty def emotion_example_secondary(self): """``Optional[str]``. - This property is not ``None`` only if ``emotion_markedness`` is - ``amb``. In such case, :attr:`.emotion_example` will be an - example of a positive sentence, and this one will be a negative - sentence. + This property is not ``None`` only if :attr:`.emotion_markedness` is + :attr:`~plwn.enums.EmotionMarkedness.amb`. In such case, + :attr:`.emotion_example` will be an example of a positively charged + sentence, and this one will be a negatively charged sentence. """ pass - @abc.abstractproperty + @abstractproperty def synset(self): - """``SynsetBase``. + """An instance of :class:`SynsetBase`. - The synset the unit belongs to. + Representing the synset this unit belongs to. """ pass - @abc.abstractmethod - def related(self, relation_name): - """Iterate over lexical units to whom this unit has a certain relation. + @abstractproperty + def relations(self): + """Tuple of :class:`RelationInfoBase` instances. - :param str relation_name: The name of the relation to follow. + Containing types of distinct relations that have + outbound edges from this lexical unit. - :returns: Iterable of related units. - :rtype: Iterable[LexicalUnitBase] + Relations are returned in an arbitrary order. - :raises InvalidRelationNameException: If ``relation_name`` is not a - valid name of a lexical relation in plWordNet. + The tuple is special: methods for checking membership accept all + possible representations of a relation type (see + :meth:`RelationInfoBase.eqv`). """ pass - @abc.abstractproperty - def relations(self): - """``Tuple[str]``. + @abstractproperty + def is_polish(self): + """Check whether unit is Polish by its PoS.""" + pass - Tuple of all outward relations that lead from this lexical unit. + @abstractproperty + def is_english(self): + """Check whether unit is English by its PoS.""" + pass + + @abstractmethod + def related(self, relation_id=None): + """Get an iterable of :class:`LexicalUnitBase` instances. + + That are connected to this lexical unit by outbound edges + of lexical relation type identified by ``relation_id``. + + ``relation_id`` can be any lexical relation type identifier (see + :class:`RelationInfoBase`), a collection of relation types identifiers, + or ``None``, in which case lexical units related to this one by any + relation are selected. + + Note, that distinction between any relations that fit the + ``relation_id`` query is lost. Use :meth:`.related_pairs` if it's + needed. + + Raises :exc:`~plwn.exceptions.InvalidRelationTypeException` if + ``relation_id`` does not refer to an existing lexical relation type. + """ + pass + + @abstractmethod + def related_pairs(self, relation_id): + """Like :meth:`.related`. + + But return an iterable of pairs + ``(<relation info>, <relation target unit>)``. """ pass @@ -849,19 +918,24 @@ class LexicalUnitBase(object): Enums are converted to their values and all collections are converted to tuples. - :param bool include_related: If ``True``, the dictionary will contain a - "related" member, whose value is a dictionary in format:: + Property :attr:`.relations` is omitted, as it would be redundant when + all related lexical units can be enumerated when ``include_related`` + is ``True``. - { - "<lexical relation name>": ( - (<relation target id>, <relation target string form>), - ... - ), - ... - } + An additional ``str`` member is present in the dictionary; its value is + the string representation of the lexical unit. - :returns: Dictionary contain data of the lexical unit. - :rtype: Mapping[str, Any] + If ``include_related`` is ``True`` (the default), the dictionary will + contain an additional ``related`` member, representing lexical units + related to this one, in the following format:: + + { + <lexical relation full name>: ( + (<relation target id>, <relation target string form>), + ... + ), + ... + } """ lu_dict = { u'id': self.id, @@ -875,6 +949,9 @@ class LexicalUnitBase(object): u'usage_notes': tuple(self.usage_notes), u'domain': self.domain.value, u'synset': self.synset.id, + u'verb_aspect': None + if self.verb_aspect is None + else self.verb_aspect.value, u'emotion_markedness': None if self.emotion_markedness is None else self.emotion_markedness.value, @@ -887,15 +964,41 @@ class LexicalUnitBase(object): if include_related: lu_dict[u'related'] = { - relname: tuple( + six.text_type(rel): tuple( (target.id, six.text_type(target)) - for target in self.related(relname) + for target in self.related(rel) ) - for relname in self.relations + for rel in self.relations } return lu_dict + def __lt_lempos(self, other): + # Common code for __lt__ and __le__ methods. + # Compares first two elements. + colled = locale.strcoll(self.lemma, other.lemma) + if colled < 0: + return True + if colled > 0: + return False + if self.pos is other.pos: + # Defer comparison + return None + return self.pos.value < other.pos.value + + def __inner_eq(self, other): + return (locale.strcoll(self.lemma, other.lemma) == 0 and + self.pos == other.pos and + self.variant == other.variant) + + def __inner_cmp(self, cmp_op, other): + if not isinstance(other, LexicalUnitBase): + return NotImplemented + cmp_val = self.__lt_lempos(other) + return (cmp_val + if cmp_val is not None + else cmp_op(self.variant, other.variant)) + def __repr__(self): return '<LexicalUnit id={!r} lemma={!r} pos={!r} variant={!r}>'.format( self.id, @@ -913,50 +1016,187 @@ class LexicalUnitBase(object): ) def __hash__(self): - return hash((self.lemma, self.pos, self.variant)) + return hash((LexicalUnitBase, self.lemma, self.pos, self.variant)) def __eq__(self, other): if not isinstance(other, LexicalUnitBase): return NotImplemented - - return (locale.strcoll(self.lemma, other.lemma) == 0 and - self.pos == other.pos and - self.variant == other.variant) + return self.__inner_eq(other) def __ne__(self, other): - return not self == other - - # Total ordering done by hand, to minimize strcoll calls + if not isinstance(other, LexicalUnitBase): + return NotImplemented + return not self.__inner_eq(other) def __lt__(self, other): - cmp_ = self.__lt_lempos(other) - return cmp_ if cmp_ is not None else self.variant < other.variant + return self.__inner_cmp(op.lt, other) def __le__(self, other): - cmp_ = self.__lt_lempos(other) - return cmp_ if cmp_ is not None else self.variant <= other.variant + return self.__inner_cmp(op.le, other) def __gt__(self, other): - return not self <= other + return self.__inner_cmp(op.gt, other) def __ge__(self, other): - return not self < other + return self.__inner_cmp(op.ge, other) - def __lt_lempos(self, other): - # Common code for __lt__ and __le__ methods. - # Compares first two elements. - if not isinstance(other, LexicalUnitBase): + +@six.python_2_unicode_compatible +@six.add_metaclass(ABCMeta) +class RelationInfoBase(object): + """Encapsulates information associated with a relation type. + + The primary purpose of this class is to serve as a single object + consolidating all possible ways a relation type can be referred to. + + In general, plWordNet uses *parent* and *child* relation names. Child + relations are those that have actual instances between synsets and lexical + units. Parent relations only exist to group child relations together; child + relation names need to be only unique within the group of their parent + relation, while parent relations must be globally unique. + + For example, there are two relations named "część" ("part"); one being a + child of "meronimia" ("meronymy"), and another a child of "holonimia" + ("holonymy"). + + Some relation types have no parent; they behave like child relations, but + their names need to be unique on par with parent relations. + + plWordNet also stores shorter aliases for most of the relation types, + for example "hipo" for "hiponimia" ("hyponymy"). + + There are four ways to refer to relations wherever a relation identifier + is accepted (usually the argument is named ``relation_id``): + + * Full name, in format ``<parent name>/<child name>`` (or just + ``<child name>`` if the relation has no parent). + * One of the shorter aliases mentioned above. This is checked before + attempting to resolve relation names. Aliases must be globally unique. + * A parent name on its own. This resolves to all children of the parent + relation. Note, that it's not always valid to pass a name that resolves + to multiple relations; + :exc:`~plwn.exceptions.AmbiguousRelationTypeException` is raised in such + cases. + * Finally, a :class:`RelationInfoBase` instance may be used instead of + a string, standing for the child relation it represents. + + Note, that parent relations don't have corresponding + :class:`RelationInfoBase` instance. + """ + + #: Character that separates parent from child name in full name + #: representation. It must not appear in any relation names or aliases. + SEP = u'/' + + @classmethod + def format_name(cls, parent_name, child_name): + """Format and return a full name out of parent and child name strings. + + ``parent_name`` may be ``None``, which will just return ``child_name``, + as relations without parents are fully represented just by their name. + """ + parform = u'' if parent_name is None else parent_name + cls.SEP + return parform + child_name + + @classmethod + def split_name(cls, full_name): + """Split a full name into a ``(<parent name>, <child name>)`` pair. + + ``parent_name`` may be ``None`` if :attr:`.SEP` doesn't appear in the + full name. + + However, if :attr:`.SEP` appears more than once in ``full_name``, a + ``ValueError`` will be raised. + """ + items = full_name.split(cls.SEP) + itlen = len(items) + + if itlen > 2: + raise ValueError(full_name) + + return (None, items[0]) if itlen < 2 else tuple(items) + + @abstractproperty + def kind(self): + """One of enumerated constants of :class:`~plwn.enums.RelationKind`. + + Denotes it's a synset or lexical relation. + """ + pass + + @abstractproperty + def parent(self): + """String name of the parent relation to this one. + + May be ``None`` if the relation has no parent. + """ + pass + + @abstractproperty + def name(self): + """String name of the relation.""" + + @abstractproperty + def aliases(self): + """Tuple of all aliases the relation can be referred to by.""" + pass + + def eqv(self, other): + """Check if ``other`` is an equivalent representation. + + Either an equal :class:`RelationInfoBase` object or + a relation identifier that refers to this object. + + This is less strict than the equality operator, which only checks for + equal :class:`RelationInfoBase` instances. + """ + sother = six.text_type(other) + return sother == six.text_type(self) or sother in self.aliases + + def __inner_eq(self, other): + return (self.parent == other.parent and + self.name == other.name) + + def __inner_cmp(self, cmp_op, other): + if not isinstance(other, RelationInfoBase): return NotImplemented + return cmp_op(six.text_type(self), six.text_type(other)) - colled = locale.strcoll(self.lemma, other.lemma) + def __repr__(self): + return ( + '<RelationInfo name={!r} parent={!r} kind={!r} aliases={!r}>' + .format( + self.name, + self.parent, + self.kind, + self.aliases, + ) + ) - if colled < 0: - return True - if colled > 0: - return False + def __str__(self): + return self.format_name(self.parent, self.name) - if self.pos is other.pos: - # Defer comparison - return None + def __hash__(self): + return hash((RelationInfoBase, self.parent, self.name)) - return self.pos.value < other.pos.value + def __eq__(self, other): + if not isinstance(other, RelationInfoBase): + return NotImplemented + return self.__inner_eq(other) + + def __ne__(self, other): + if not isinstance(other, RelationInfoBase): + return NotImplemented + return not self.__inner_eq(other) + + def __lt__(self, other): + return self.__inner_cmp(op.lt, other) + + def __le__(self, other): + return self.__inner_cmp(op.le, other) + + def __gt__(self, other): + return self.__inner_cmp(op.gt, other) + + def __ge__(self, other): + return self.__inner_cmp(op.ge, other) diff --git a/plwn/default/__init__.py b/plwn/default/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c2f5501f2bd5baadbf18775b81293a7d4ce3220c --- /dev/null +++ b/plwn/default/__init__.py @@ -0,0 +1,20 @@ +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from ._default import get_default_load_args + +__all__ = 'get_default_load_args', diff --git a/plwn/default/_default.py b/plwn/default/_default.py new file mode 100644 index 0000000000000000000000000000000000000000..6c1bdf52c110824948275b3003208e17c9e48b31 --- /dev/null +++ b/plwn/default/_default.py @@ -0,0 +1,46 @@ +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from __future__ import absolute_import, division + + +import pkg_resources as pkgr + + +__all__ = 'get_default_load_args', + + +_DEFAULT_LOC = 'plwn.default', 'plwn-3.0-v5.db' +_DEFAULT_FORMAT = 'sqlite3' + + +def get_default_load_args(): + """Get a tuple with arguments to :func:`~plwn._loading.load`. + + Required to load the default storage. + + This is a helper function meant to be internally used by + :func:`~plwn._loading.load_default`. + + **Advanced note:** This function refers to the bundled file using + ``pkg_resources.resource_filename()``. Normally, the ``plwn`` package will + not be installed as a zipfile, but if you override this, then if you use + this function you will have to call ``pkg_resources.cleanup_resources()`` + before the process exits. + """ + storage_filename = pkgr.resource_filename(*_DEFAULT_LOC) + return storage_filename, _DEFAULT_FORMAT diff --git a/plwn/enums.py b/plwn/enums.py index e14f67ac2a5d7083b744361cddef07152c1b7137..ebca1d5602448fe6dcbe42440d2288579a02e9f1 100644 --- a/plwn/enums.py +++ b/plwn/enums.py @@ -1,5 +1,21 @@ # coding: utf8 -"""Enumerated values used in plWordNet.""" + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +"""All enumerated values used in plWordNet.""" from __future__ import absolute_import, division @@ -11,6 +27,7 @@ import six __all__ = ( + 'RelationKind', 'PoS', 'VerbAspect', 'EmotionMarkedness', @@ -21,67 +38,139 @@ __all__ = ( ) -# Helper function for making dictionaries translating enum instances into -# numbers used to denote them in plWN database. -def _fill_numtrans(enumclass, num2enum, enum2num): - for num, enuminst in enumerate(enumclass, 1): - num2enum[num] = enuminst - enum2num[enuminst] = num +def _numtrans(n2e_dict, e2n_dict, start_value=1): + """Helper decorator for making dictionaries. + + Translating enum instances into numbers denoting them in plWN database. + """ + + def decorator(cls): + def by_db_number(number, optional=False): + """Return the enum value. + + Associated with ``number`` value stored in the plWordNet database. + + Raises ``KeyError`` if ``number`` is not in the range valid for + the database field, unless ``optional`` is ``True``; then, + ``None`` is returned instead of an enum value. + """ + try: + return n2e_dict[number] + except KeyError: + if optional: + return None + raise + + def db_number(self): + """Number associated with the enum value in plWordNet database.""" + return e2n_dict[self] + + cls.by_db_number = staticmethod(by_db_number) + cls.db_number = property(db_number) + + for num, enval in enumerate(cls, start_value): + n2e_dict[num] = enval + e2n_dict[enval] = num + return cls + return decorator -def _get_from_numtrans(numtrans, num, optional): - try: - return numtrans[num] - except KeyError: - if optional: - return None - raise + +_RK_NUM2ENUM = {} +_RK_ENUM2NUM = {} + + +@_numtrans(_RK_NUM2ENUM, _RK_ENUM2NUM, 0) +class RelationKind(Enum): + """Whether a relation connects synsets or lexical units.""" + + # Explicit ordering is needed only in python 2. + if six.PY2: + __order__ = 'lexical synset' + + lexical = u'lexical' + synset = u'synset' -# Explicit ordering is needed only in python 2. -_POS_ORDER = 'verb noun adverb adjective' _POS_NUM2ENUM = {} _POS_ENUM2NUM = {} +@_numtrans(_POS_NUM2ENUM, _POS_ENUM2NUM) class PoS(Enum): - """Defines **Part of Speech** values used by plWN.""" + """Defines part of speech values used in plWordNet.""" if six.PY2: - __order__ = _POS_ORDER + __order__ = 'verb noun adverb adjective ' \ + 'verb_en noun_en adverb_en adjective_en' verb = u'verb' noun = u'noun' adverb = u'adverb' adjective = u'adjective' + # English (PWN) PoSes + verb_en = u'verb_en' + noun_en = u'noun_en' + adverb_en = u'adverb_en' + adjective_en = u'adjective_en' + v = verb n = noun adv = adverb adj = adjective - @staticmethod - def by_db_number(number, optional=False): - return _get_from_numtrans(_POS_NUM2ENUM, number, optional) + v_en = verb_en + n_en = noun_en + adv_en = adverb_en + adj_en = adjective_en + + # Polish aliases for parity + verb_pl = verb + noun_pl = noun + adverb_pl = adverb + adjective_pl = adjective + + v_pl = verb + n_pl = noun + adv_pl = adverb + adj_pl = adjective @property - def db_number(self): - return _POS_ENUM2NUM[self] + def short_value(self): + """Returns shortened value. + With "adjective" shortened to "adj" and "adverb" to "adv" + for compatibility with other conventions. + """ + if not hasattr(self, "_short_value"): + value = self.value + value = value.replace("adjective", "adj") + value = value.replace("adverb", "adv") + self._short_value = value + return self._short_value -_fill_numtrans(PoS, _POS_NUM2ENUM, _POS_ENUM2NUM) + @property + def is_polish(self): + return not self.is_english + + @property + def is_english(self): + if not hasattr(self, "_is_english"): + self._is_english = self.value.endswith("_en") + return self._is_english -_VA_ORDER = 'perfective imperfective predicative two_aspect' _VA_NUM2ENUM = {} _VA_ENUM2NUM = {} +@_numtrans(_VA_NUM2ENUM, _VA_ENUM2NUM) class VerbAspect(Enum): - """Defines aspect values used by verbs in plWN.""" + """Defines verb aspect values used in plWordNet.""" if six.PY2: - __order__ = _VA_ORDER + __order__ = 'perfective imperfective predicative two_aspect' perfective = u'perf' imperfective = u'imperf' @@ -97,17 +186,6 @@ class VerbAspect(Enum): dk = perfective ndk = imperfective - @staticmethod - def by_db_number(number, optional=False): - return _get_from_numtrans(_VA_NUM2ENUM, number, optional) - - @property - def db_number(self): - return _VA_ENUM2NUM[self] - - -_fill_numtrans(VerbAspect, _VA_NUM2ENUM, _VA_ENUM2NUM) - class EmotionMarkedness(Enum): """Defines markedness of emotions associated with some lexical units.""" @@ -128,7 +206,8 @@ class EmotionMarkedness(Enum): def normalized(cls, strvalue): """Return an instance of this enum. - With string value normalized with regards to whitespace. + Corresponding to ``strvalue`` after normalizing it with regards + to whitespace. """ strvalue = strvalue.strip() @@ -147,7 +226,7 @@ class EmotionMarkedness(Enum): class EmotionName(Enum): - """Possible names of emotions associated with some lexical units.""" + """Defines names of emotions that may be associated with lexical units.""" joy = u'radość' trust = u'zaufanie' @@ -169,7 +248,10 @@ class EmotionName(Enum): class EmotionValuation(Enum): - """Possible valuations of emotions associated with some lexical units.""" + """Defines valuations of emotions. + + That may be associated with lexical units. + """ usefulness = u'użyteczność' good = u'dobro' @@ -198,19 +280,19 @@ class EmotionValuation(Enum): nieszczescie = unhappiness -_DOM_ORDER = 'bhp czy wytw cech czc umy por zdarz czuj jedz grp msc cel rz ' \ - 'os zj rsl pos prc il zw ksz st sbst czas zwz hig zmn cumy cpor wal ' \ - 'cjedz dtk cwytw cczuj ruch pst cpos sp cst pog jak rel odcz grad sys ' \ - 'adj adv cdystr caku cper cdel' _DOM_NUM2ENUM = {} _DOM_ENUM2NUM = {} +@_numtrans(_DOM_NUM2ENUM, _DOM_ENUM2NUM) class Domain(Enum): - """Wordnet domains of lexical units.""" + """Defines domains of lexical units occurring in plWordNet.""" if six.PY2: - __order__ = _DOM_ORDER + __order__ = 'bhp czy wytw cech czc umy por zdarz czuj jedz grp msc ' \ + 'cel rz os zj rsl pos prc il zw ksz st sbst czas zwz hig zmn ' \ + 'cumy cpor wal cjedz dtk cwytw cczuj ruch pst cpos sp cst pog ' \ + 'jak rel odcz grad sys adj adv cdystr caku cper cdel' bhp = u'najwyższe w hierarchii' czy = u'czynnoÅ›ci (nazwy)' @@ -273,21 +355,10 @@ class Domain(Enum): cper = u'czasowniki perduratywne' cdel = u'czasowniki delimitatywne' - @staticmethod - def by_db_number(number, optional=False): - return _get_from_numtrans(_DOM_NUM2ENUM, number, optional) - - @property - def db_number(self): - return _DOM_ENUM2NUM[self] - - -_fill_numtrans(Domain, _DOM_NUM2ENUM, _DOM_ENUM2NUM) - def make_values_tuple(enum_seq): """Auxiliary function. - That converts a sequence of enums to a tuple of enumvalues. + That converts a sequence of enums to a tuple of enum string values. """ return tuple(en.value for en in enum_seq) diff --git a/plwn/exceptions.py b/plwn/exceptions.py index ccb23844fee9154559f3e919ad2b5a81cab1f427..3c7fbef1a93fd6e56d659d6ee8e6e2efa9bd208d 100644 --- a/plwn/exceptions.py +++ b/plwn/exceptions.py @@ -1,3 +1,20 @@ +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + """Custom exceptions raised by PLWN API.""" from __future__ import absolute_import, division @@ -12,10 +29,8 @@ __all__ = ( 'MalformedIdentifierException', 'LoadException', 'DumpVersionException', - 'InvalidSynsetIdentifierException', - 'InvalidLexicalUnitIdentifierException', - 'InvalidRelationNameException', - 'InvalidPoSException', + 'InvalidRelationTypeException', + 'AmbiguousRelationTypeException', ) @@ -26,17 +41,9 @@ class PLWNAPIException(Exception): class NotFound(PLWNAPIException): - """Base for exceptions raised when an object is not found.""" - - def __init__(self, lemma, pos, variant, *args): - """Initialize NotFound.""" - super(NotFound, self).__init__(*args) + """Base for exceptions raised when an entity is not found.""" - self.args = ('lemma={!r} pos={!r} variant={!r}'.format( - lemma, - pos, - variant, - ),) + self.args + pass class LexicalUnitNotFound(NotFound): @@ -64,7 +71,6 @@ class MalformedIdentifierException(ReaderException): """ def __init__(self, id_): - """Initialize MalformedIdentifierException.""" super(MalformedIdentifierException, self).__init__( "Malformed identifier, expected digits at the end of the original" " id instead got {!r}" @@ -85,7 +91,6 @@ class DumpVersionException(LoadException): """ def __init__(self, version_is, version_required): - """Initialize DumpVersionException.""" super(DumpVersionException, self).__init__(version_is, version_required) self.version_is = version_is @@ -98,31 +103,20 @@ class DumpVersionException(LoadException): ) -class InvalidSynsetIdentifierException(PLWNAPIException): - """Raised when a query for a nonexistent synset ID is made.""" - - pass - - -class InvalidLexicalUnitIdentifierException(PLWNAPIException): - """Raised when a query for a nonexistent lexical unit ID is made.""" - - pass - - -class InvalidRelationNameException(PLWNAPIException): - """Raised when attempting to select synsets or units. +class InvalidRelationTypeException(PLWNAPIException): + """Raised when relation identifier does not refer to any existing relation. - Related by a relation that does not exist. + Or the relation exists for the other relation kind. """ pass -class InvalidPoSException(PLWNAPIException): - """Raised when a query for PoS is made. +class AmbiguousRelationTypeException(InvalidRelationTypeException): + """. - Which is not one of the valid constants. + Raised when a relation type identifier could refer to more than + one relation, but only one is permitted in the context. """ pass diff --git a/plwn/readers/comments.py b/plwn/readers/comments.py index 4a0f6efeb0c9702fb7af9e2742b95569fd06b0c0..473d37d61808fd34ecd3a033e92c726594042b17 100644 --- a/plwn/readers/comments.py +++ b/plwn/readers/comments.py @@ -1,3 +1,20 @@ +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + """Parsing strings in wordnet comment format. For readers that need to deal with them. @@ -21,6 +38,7 @@ __all__ = ( 'NON_EXAMPLE_TAG_NAMES', 'CommentData', 'parse_comment_string', + 'make_empty_comment_data', ) @@ -79,7 +97,22 @@ def parse_comment_string(cmt_str): return CommentData( tuple(examples), tuple(examples_src), - cmt.get_first(u'D'), + cmt.get_first(u'D', None), tuple(cmt[u'K']), tuple(cmt[u'L']), ) + + +def make_empty_comment_data(): + """Create an empty ``CommentData`` instance. + + For cases where there's no comment to parse but an instance + with null-data is needed. + """ + return CommentData( + examples=(), + examples_sources=(), + definition=None, + usage=(), + links=(), + ) diff --git a/plwn/readers/nodes.py b/plwn/readers/nodes.py index 31790ec46a572a5134aa715a67f9ab0ffb8ec96a..b1bf8d3cafafd96ae67ee3c0012489bd71d6fb7e 100644 --- a/plwn/readers/nodes.py +++ b/plwn/readers/nodes.py @@ -1,16 +1,133 @@ +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. """Those tuples are returned by readers and absorbed by storages.""" from collections import namedtuple -__all__ = 'SynsetNode', 'LexicalUnitNode' +__all__ = ( + 'SynsetNode', + 'LexicalUnitNode', + 'RelationTypeNode', + 'make_synset_node', + 'make_lexical_unit_node', + 'make_relation_type_node', +) -SynsetNode = namedtuple("SynsetNode", ["id", "definition", "related"]) +SynsetNode = namedtuple( + "SynsetNode", + ["id", "definition", "related", "is_artificial"], +) LexicalUnitNode = namedtuple( "LexicalUnitNode", ["id", "lemma", "pos", "variant", "synset", "unit_index", "definition", "usage_notes", "external_links", "examples", "examples_sources", - "domain", "related", "verb_aspect", "emotion_markedness", "emotion_names", - "emotion_valuations", "emotion_example_1", "emotion_example_2"] + "domain", "related", "verb_aspect", "is_emotional", "emotion_markedness", + "emotion_names", "emotion_valuations", "emotion_example_1", + "emotion_example_2"] ) +RelationTypeNode = namedtuple( + "RelationTypeNode", + ["kind", "name", "parent", "aliases"], +) + + +def make_synset_node(**props): + """Create a :class:`SynsetNode` instance. + + Inserting appropriate "empty" values into optional properties + where a value was not passed. + + Takes only keyword arguments, and passes them to :class:`SynsetNode` + constructor. + """ + syn = SynsetNode( + id=props.pop('id'), + definition=props.pop('definition', None), + related=props.pop('related', ()), + is_artificial=props.pop('is_artificial', False), + ) + + if props: + raise KeyError('Not known synset properties: ' + repr(tuple(props))) + + return syn + + +def make_lexical_unit_node(**props): + """Create a :class:`LexicalUnitNode` instance. + + Inserting appropriate "empty" values into optional properties + where a value was not passed. + + Takes only keyword arguments, and passes them to :class:`LexicalUnitNode` + constructor. + """ + lex = LexicalUnitNode( + id=props.pop('id'), + lemma=props.pop('lemma'), + pos=props.pop('pos'), + variant=props.pop('variant'), + synset=props.pop('synset'), + unit_index=props.pop('unit_index'), + definition=props.pop('definition', None), + usage_notes=props.pop('usage_notes', ()), + external_links=props.pop('external_links', ()), + examples=props.pop('examples', ()), + examples_sources=props.pop('examples_sources', ()), + domain=props.pop('domain'), + related=props.pop('related', ()), + verb_aspect=props.pop('verb_aspect', None), + is_emotional=props.pop('is_emotional', None), + emotion_markedness=props.pop('emotion_markedness', None), + emotion_names=props.pop('emotion_names', ()), + emotion_valuations=props.pop('emotion_valuations', ()), + emotion_example_1=props.pop('emotion_example_1', None), + emotion_example_2=props.pop('emotion_example_2', None), + ) + + if props: + raise KeyError( + 'Not known lexical unit properties: ' + repr(tuple(props)), + ) + + return lex + + +def make_relation_type_node(**props): + """Create a :class:`RelationNode` instance. + + Inserting appropriate "empty" values into optional properties + where a value was not passed. + + Takes only keyword arguments, and passes them to :class:`RelationNode` + constructor. + """ + rel = RelationTypeNode( + name=props.pop('name'), + kind=props.pop('kind'), + parent=props.pop('parent', None), + aliases=props.pop('aliases', frozenset()), + ) + + if props: + raise KeyError( + 'Not known relation properties: ' + repr(tuple(props)), + ) + + return rel diff --git a/plwn/readers/ubylmf.py b/plwn/readers/ubylmf.py index d28b361fa2ff46d34e3e1633c65040f931978677..a3859ef5bc91737206c05d4c442103d09ad10a00 100644 --- a/plwn/readers/ubylmf.py +++ b/plwn/readers/ubylmf.py @@ -1,13 +1,28 @@ +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + # FIXME Some assert statements should be converted to regular raises (asserts # should not be used for anything other than checking for errors in the code # itself). -"""Implementation of ubylmf reader.""" - from xml.etree import ElementTree import re import logging -from .nodes import SynsetNode, LexicalUnitNode +from .nodes import make_synset_node, make_lexical_unit_node from .. import exceptions as exc from ..enums import PoS, Domain @@ -119,7 +134,7 @@ def _make_lexicalunit(xml_lexicalentry, xml_sense): lu_unit_index = int(_extract_id( xml_sense.find("MonolingualExternalRef").get("externalReference")) ) - return LexicalUnitNode( + return make_lexical_unit_node( id=lu_id, lemma=lu_lemma, pos=PoS(lu_pos), @@ -135,14 +150,7 @@ def _make_lexicalunit(xml_lexicalentry, xml_sense): # the only one we care about. domain=Domain[lu_domain.rsplit('_', 1)[-1]], related=tuple(lu_related), - # The below properties are never stored in uby files (at present at - # least). - verb_aspect=None, - emotion_markedness=None, - emotion_names=(), - emotion_valuations=(), - emotion_example_1=None, - emotion_example_2=None, + # Other properties are not stored in UBY files. ) @@ -159,7 +167,7 @@ def _extract_definitions(xml_sense): """ # Get definition - can be empty! At most 2 xml_definitions = xml_sense.findall("Definition") - lu_definition = "" + lu_definition = None lu_usage_notes = [] lu_external_links = [] assert len(xml_definitions) <= 2, \ @@ -219,7 +227,7 @@ def _make_synset(xml_synset): s_id = _extract_id(xml_synset.get("id")) xml_def = xml_synset.find("Definition") s_def = xml_def.find("TextRepresentation").get("writtenText") \ - if xml_def is not None else "" + if xml_def is not None else None s_related = [] for xsr in xml_synset.findall("SynsetRelation"): try: @@ -232,20 +240,21 @@ def _make_synset(xml_synset): ElementTree.tostring(xsr, ENCODING), ElementTree.tostring(xml_synset, ENCODING) ) - return SynsetNode( + return make_synset_node( id=s_id, definition=s_def, - related=tuple(s_related) + related=tuple(s_related), + # There are no artificial synsets in UBY dumps ) def _extract_id(full_id): - """Extract only numerical identifier from the end of a full id. + """Extract only numeric identifier from the end of a full id. :param full_id: a full identifier that has a prefix before the real id. :type full_id: str|unicode - :return: a real, numerical id. + :return: a real, numeric id. :rtype: int :raises MalformedIdentifierException: if the original id doesn't end with diff --git a/plwn/readers/wndb.py b/plwn/readers/wndb.py index 47a1f547e86da7bae089d602b9e3769569a12a3b..e1b3093cd92d2d29810ff91be6f4048c1a2e4e91 100644 --- a/plwn/readers/wndb.py +++ b/plwn/readers/wndb.py @@ -1,286 +1,262 @@ # coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. """Implementation of wndb reader.""" from __future__ import absolute_import, division -import collections as coll -import contextlib as ctxl + +from contextlib import closing +import io import logging import sqlalchemy as sa -from .nodes import SynsetNode, LexicalUnitNode -from .comments import parse_comment_string -from ..enums import ( - PoS, - VerbAspect, - EmotionMarkedness, - EmotionName, - EmotionValuation, - Domain, -) -from ..utils.sorting import text_key +from .. import enums as en +from .wnschema import WNSchemaProcessor -__all__ = 'wndb_reader', +__all__ = 'WNDBReader', -_log = logging.getLogger(__name__) +_LOG = logging.getLogger(__name__) -_EmotionData = coll.namedtuple( - '_EmotionData', - ('mark', 'names', 'valuations', 'example_1', 'example_2'), -) +# I'm not sure what role the relationtype of type 2 (synonymy) fulfills, but it +# seems completely unused by the relation tables. As such, it will be easiest +# to just omit it. +_SYNO_REL_OBJTYPE = 2 -def wndb_reader(wordnet_db_url): - """Generate UBY-LMF format compatible records. +class WNDBReader(object): + """Generate UBY-LMF format. - Directly from plWordNet database. + Compatible records directly from plWordNet database. - sqlalchemy is required for this method to work. + SQLAlchemy is required for this method to work. + """ - :param str wordnet_db_url: URL in sqlalchemy format, pointing to a - plWordNet database. + def __init__(self, wordnet_db_url_file): + with io.open(wordnet_db_url_file) as db_url_f: + self._db_url = db_url_f.readline().strip() + + self._db_eng = sa.create_engine(self._db_url) + self._db_meta = sa.MetaData(self._db_eng) + # Define required tables + self._dbt_synset = self.__mktable(u'synset') + self._dbt_synrel = self.__mktable(u'synsetrelation') + self._dbt_reltype = self.__mktable(u'relationtype') + self._dbt_lexunit = self.__mktable(u'lexicalunit') + self._dbt_lexrel = self.__mktable(u'lexicalrelation') + self._dbt_uns = self.__mktable(u'unitandsynset') + self._dbt_emo = self.__mktable(u'emotion') + + self._schema = WNSchemaProcessor() + + def __iter__(self): + # First, get relation type data, since it depends on nothing. And the + # other two kinds of nodes will need the full relation names. + # Then, get lexical units, since they will show which synsets are + # needed. + # Finally, get the synsets. + self._extract_relation_types() + self._extract_emotions() + self._extract_units() + self._extract_unit_rels() + self._extract_uns() + self._extract_syns() + self._extract_syn_rels() + + for node in self._schema.finalize(): + yield node + + def _extract_relation_types(self): + reltype_q = sa.select(( + self._dbt_reltype.c.ID, + self._dbt_reltype.c.PARENT_ID, + self._dbt_reltype.c.objecttype, + self._dbt_reltype.c.name, + self._dbt_reltype.c.shortcut, + )) + with closing(self._db_eng.execute(reltype_q)) as result: + for row in result: + parent_id = row[self._dbt_reltype.c.PARENT_ID] + object_type = row[self._dbt_reltype.c.objecttype] + if object_type != _SYNO_REL_OBJTYPE: + self._schema.take_relation_type( + row[self._dbt_reltype.c.ID], + # Ignore the kind information of relation types that + # have parents. It will be inherited. + en.RelationKind.by_db_number(object_type) + if parent_id is None + else None, + row[self._dbt_reltype.c.name], + row[self._dbt_reltype.c.shortcut], + row[self._dbt_reltype.c.PARENT_ID], + ) - :return: a generator over PLwordnet entities. - :rtype: generator - """ - db_eng = sa.create_engine(wordnet_db_url) - db_meta = sa.MetaData(db_eng) - visited_synsets = set() - nonexistent_synsets = set() - - # Define required tables - dbt_synset = sa.Table(u'synset', db_meta, autoload=True) - dbt_synrel = sa.Table(u'synsetrelation', db_meta, autoload=True) - dbt_reltype = sa.Table(u'relationtype', db_meta, autoload=True) - dbt_lexunit = sa.Table(u'lexicalunit', db_meta, autoload=True) - dbt_lexrel = sa.Table(u'lexicalrelation', db_meta, autoload=True) - dbt_uns = sa.Table(u'unitandsynset', db_meta, autoload=True) - dbt_emo = sa.Table(u'emotion', db_meta, autoload=True) - - q = sa.select(( - dbt_lexunit.c.ID, - dbt_lexunit.c.lemma, - dbt_lexunit.c.pos, - dbt_lexunit.c.variant, - dbt_uns.c.SYN_ID, - dbt_uns.c.unitindex, - dbt_lexunit.c.domain, - dbt_lexunit.c.comment, - dbt_lexunit.c.verb_aspect, - )).select_from( - dbt_lexunit.join( - dbt_uns, - dbt_uns.c.LEX_ID == dbt_lexunit.c.ID, - ) - ).where(dbt_lexunit.c.pos.between(1, 4)) - - with ctxl.closing(db_eng.execute(q)) as result: - for lexid, lemma, pos, variant, synid, uidx, domain, comment,\ - verb_aspect in result: - - if synid in nonexistent_synsets: - continue - - # Select all relations children of the unit - q = sa.select( - (dbt_lexrel.c.CHILD_ID, dbt_reltype.c.name) - ).select_from( - dbt_lexrel.join( - dbt_reltype, - dbt_reltype.c.ID == dbt_lexrel.c.REL_ID, + def _extract_emotions(self): + emo_q = sa.select(( + self._dbt_emo.c.lexicalunit_id, + self._dbt_emo.c.emotions, + self._dbt_emo.c.valuations, + self._dbt_emo.c.markedness, + self._dbt_emo.c.unitStatus, + self._dbt_emo.c.example1, + self._dbt_emo.c.example2, + self._dbt_emo.c.super_anotation, + )) + with closing(self._db_eng.execute(emo_q)) as result: + for row in result: + ustatus = bool(row[self._dbt_emo.c.unitStatus]) + superann = bool(row[self._dbt_emo.c.super_anotation]) + markstr = row[self._dbt_emo.c.markedness] + + if markstr is not None: + try: + mark = en.EmotionMarkedness.normalized(markstr) + except (ValueError, TypeError): + _LOG.error( + 'Value %r is not valid as emotion markedness; ' + 'skipping record %r', + markstr, + row, + ) + continue + else: + mark = None + + self._schema.take_emotion( + row[self._dbt_emo.c.lexicalunit_id], + mark, + _make_emo_tuple( + en.EmotionName, + row[self._dbt_emo.c.emotions], + ), + _make_emo_tuple( + en.EmotionValuation, + row[self._dbt_emo.c.valuations], + ), + row[self._dbt_emo.c.example1], + row[self._dbt_emo.c.example2], + ustatus, + superann, ) - ).where(dbt_lexrel.c.PARENT_ID == lexid) - - with ctxl.closing(db_eng.execute(q)) as lex_rel_result: - # Ensure relations targets exist - lex_related = [] - for lex_child_id, lex_rel_name in lex_rel_result: - q = sa.select(( - sa.exists().select_from( - # This join to ensure the unit belongs to - # some synset. - dbt_lexunit.join( - dbt_uns, - dbt_uns.c.LEX_ID == dbt_lexunit.c.ID, - ) - ).where(sa.and_( - dbt_lexunit.c.ID == lex_child_id, - dbt_lexunit.c.pos.between(1, 4), - )), - )) - - if db_eng.execute(q).scalar(): - lex_related.append((lex_rel_name, lex_child_id)) - - # Now, select the unit's synset, but only once - if synid not in visited_synsets: - visited_synsets.add(synid) - - q = sa.select( - (dbt_synset.c.ID, dbt_synset.c.definition) - ).where(dbt_synset.c.ID == synid) - - synrow = db_eng.execute(q).first() - - if synrow is None: - nonexistent_synsets.add(synid) - continue - - # Select all relation children of the synset - q = sa.select( - (dbt_synrel.c.CHILD_ID, dbt_reltype.c.name) - ).select_from( - dbt_synrel.join( - dbt_reltype, - dbt_reltype.c.ID == dbt_synrel.c.REL_ID, - ) - ).where(dbt_synrel.c.PARENT_ID == synid) - - with ctxl.closing(db_eng.execute(q)) as syn_rel_result: - syn_related = [] - for syn_child_id, syn_rel_name in syn_rel_result: - # Ensure the child exists - q = sa.select(( - sa.exists().select_from( - dbt_synset.join( - dbt_uns, - dbt_uns.c.SYN_ID == dbt_synset.c.ID, - ).join( - dbt_lexunit, - dbt_lexunit.c.ID == dbt_uns.c.LEX_ID, - ) - ).where(sa.and_( - dbt_synset.c.ID == syn_child_id, - dbt_lexunit.c.pos.between(1, 4), - )), - )) - - if db_eng.execute(q).scalar(): - syn_related.append((syn_rel_name, syn_child_id)) - - yield SynsetNode( - synid, - synrow[1] if synrow[1] is not None else u'', - tuple(syn_related), + + def _extract_units(self): + lexunit_q = sa.select(( + self._dbt_lexunit.c.ID, + self._dbt_lexunit.c.lemma, + self._dbt_lexunit.c.pos, + self._dbt_lexunit.c.variant, + self._dbt_lexunit.c.domain, + self._dbt_lexunit.c.comment, + self._dbt_lexunit.c.verb_aspect, + )) + with closing(self._db_eng.execute(lexunit_q)) as result: + for row in result: + self._schema.take_lexical_unit( + row[self._dbt_lexunit.c.ID], + row[self._dbt_lexunit.c.lemma], + en.PoS.by_db_number(row[self._dbt_lexunit.c.pos]), + row[self._dbt_lexunit.c.variant], + en.Domain.by_db_number(row[self._dbt_lexunit.c.domain]), + row[self._dbt_lexunit.c.comment], + en.VerbAspect.by_db_number( + row[self._dbt_lexunit.c.verb_aspect], + True, + ), ) - # Try getting emotion annotations for the unit - emo_data = _extract_emotion_data(db_eng, dbt_emo, lexid) - - # Now, parse the comment string to get some last pieces of data - cmt_data = parse_comment_string(comment - if comment is not None - else u'') - - yield LexicalUnitNode( - id=lexid, - lemma=lemma, - pos=PoS.by_db_number(pos), - variant=variant, - synset=synid, - unit_index=uidx, - definition=cmt_data.definition, - usage_notes=cmt_data.usage, - external_links=cmt_data.links, - examples=cmt_data.examples, - examples_sources=cmt_data.examples_sources, - # XXX Since domains are defined as strings, the int is cast - # to unicode. It's possible, in the future to add a - # translation dict to textual representations. - domain=Domain.by_db_number(domain), - related=tuple(lex_related), - verb_aspect=VerbAspect.by_db_number(verb_aspect, True), - emotion_markedness=EmotionMarkedness.normalized(emo_data.mark) - if emo_data.mark is not None else None, - emotion_names=_make_enum_tuple( - EmotionName, - sorted(emo_data.names, key=text_key), - ), - emotion_valuations=_make_enum_tuple( - EmotionValuation, - sorted(emo_data.valuations, key=text_key), - ), - emotion_example_1=emo_data.example_1, - emotion_example_2=emo_data.example_2, - ) - - -def _extract_emotion_data(db_eng, db_t_emo, unit_id): - q_emo = sa.select(( - db_t_emo.c.markedness, # XXX Typo in schema - db_t_emo.c.emotions, - db_t_emo.c.valuations, - db_t_emo.c.example1, - db_t_emo.c.example2, - db_t_emo.c.unitStatus, - )).where(db_t_emo.c.lexicalunit_id == unit_id).order_by( - # "super_anotation" is a boolean 0 or 1, so descending sort will put - # the super annotation first. - db_t_emo.c.super_anotation.desc() # XXX Typo in schema - ) - - mark = None - names = set() - valuations = set() - example_1 = None - example_2 = None - - with ctxl.closing(db_eng.execute(q_emo)) as result: - for row in result: - if not row[db_t_emo.c.unitStatus]: - return _EmotionData( - mark=None, - names=(), - valuations=(), - example_1=None, - example_2=None, + def _extract_uns(self): + uns_q = sa.select(( + self._dbt_uns.c.SYN_ID, + self._dbt_uns.c.LEX_ID, + self._dbt_uns.c.unitindex, + )) + with closing(self._db_eng.execute(uns_q)) as result: + for row in result: + self._schema.take_unit_to_synset( + row[self._dbt_uns.c.LEX_ID], + row[self._dbt_uns.c.SYN_ID], + row[self._dbt_uns.c.unitindex], ) - if mark is None: - mark = row[db_t_emo.c.markedness] - if example_1 is None: - example_1 = row[db_t_emo.c.example1] - if example_2 is None: - example_2 = row[db_t_emo.c.example2] - - row_names = row[db_t_emo.c.emotions] - if row_names is not None: - names.update( - word.strip() - for word in row_names.split(u';') + def _extract_unit_rels(self): + lexrel_q = sa.select(( + self._dbt_lexrel.c.PARENT_ID, + self._dbt_lexrel.c.CHILD_ID, + self._dbt_lexrel.c.REL_ID, + )) + with closing(self._db_eng.execute(lexrel_q)) as result: + for row in result: + self._schema.take_lexical_relation( + row[self._dbt_lexrel.c.PARENT_ID], + row[self._dbt_lexrel.c.CHILD_ID], + row[self._dbt_lexrel.c.REL_ID], ) - row_valuations = row[db_t_emo.c.valuations] - if row_valuations is not None: - valuations.update( - word.strip() - for word in row_valuations.split(u';') + def _extract_syns(self): + synset_q = sa.select(( + self._dbt_synset.c.ID, + self._dbt_synset.c.isabstract, + self._dbt_synset.c.definition, + )) + with closing(self._db_eng.execute(synset_q)) as result: + for row in result: + self._schema.take_synset( + row[self._dbt_synset.c.ID], + row[self._dbt_synset.c.definition], + bool(row[self._dbt_synset.c.isabstract]), ) - return _EmotionData( - mark=mark, - names=names, - valuations=valuations, - example_1=example_1, - example_2=example_2, - ) + def _extract_syn_rels(self): + synrel_q = sa.select(( + self._dbt_synrel.c.PARENT_ID, + self._dbt_synrel.c.CHILD_ID, + self._dbt_synrel.c.REL_ID, + )) + with closing(self._db_eng.execute(synrel_q)) as result: + for row in result: + self._schema.take_synset_relation( + row[self._dbt_synrel.c.PARENT_ID], + row[self._dbt_synrel.c.CHILD_ID], + row[self._dbt_synrel.c.REL_ID], + ) + def __mktable(self, table_name): + return sa.Table(table_name, self._db_meta, autoload=True) -def _make_enum_tuple(enumtype, source): - result = [] - for item in source: +def _make_enums_from_values(enclass, valiter): + for val in valiter: try: - val = enumtype(item) + en = enclass(val) except ValueError: - _log.warning('Omitting bad value %r of enum %r', item, enumtype) + _LOG.error('Value %r is not valid for %r', val, enclass) else: - result.append(val) + yield en + - return tuple(result) +def _make_emo_tuple(enclass, emoval): + return () if emoval is None else tuple(frozenset(_make_enums_from_values( + enclass, + # Skip empty elements in the values sequence (some people just append a + # ";"). + (item for item in emoval.split(u';') if item), + ))) -_this_reader_ = wndb_reader +_this_reader_ = WNDBReader diff --git a/plwn/readers/wnschema.py b/plwn/readers/wnschema.py new file mode 100644 index 0000000000000000000000000000000000000000..cb93ae8ab6f822ab80642c3da649e527d3b4cc49 --- /dev/null +++ b/plwn/readers/wnschema.py @@ -0,0 +1,541 @@ +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from __future__ import absolute_import, division + + +import collections as coll +import itertools as itt +import logging +import operator as op + +import six +import plwn_comments as plwnc +import plwn_comments.exceptions as plwnce +import plwn_comments.utils.usage_tags as plwncu + +from ..bases import RelationInfoBase +from ..utils.sorting import text_key +from . import nodes as nd + + +__all__ = 'WNSchemaProcessor', + + +_LOG = logging.getLogger(__name__) + +_BASIC_RELINST_ERROR_TMPL = \ + 'Relation %s between units / synset %s -> %s dropped: ' + +_SynData = coll.namedtuple('_SynData', ('definition', 'isart')) +_LexData = coll.namedtuple( + '_LexData', + ('lemma', 'pos', 'variant', 'domain', 'comment', 'verb_aspect'), +) +_UnSData = coll.namedtuple('_UnSData', ('synset_id', 'unit_index')) +_RelInstData = coll.namedtuple('_RelInstData', ('child', 'relation')) +_RelTypeData = coll.namedtuple( + '_RelTypeData', + ('kind', 'name', 'short', 'parent'), +) +_EmoData = coll.namedtuple( + '_EmoData', + ('mark', 'names', 'valuations', 'example1', 'example2', 'status', 'super'), +) + +_CmtDataT = coll.namedtuple( + '_CmtData', + ('examples', 'examples_sources', 'definition', 'usage', 'links'), +) + + +class WNSchemaProcessor(object): + """Helper class. + + Externalizing some operations common to reading from any + source that follows the "standard" plWordNet schema. + + In practice, objects of this class are intended for composition, being fed + data from a schema-abiding source, perform some consistency cleanups, then + providing well-formatted nodes that can be passed to a storage. + + The checks performed by this processor are as such: + + * Synsets that don't have units. + * Units not assigned to a synset. + * Units assigned to not-existing synsets. + * Relations to or from non-existent units / synsets. + * Relation types that don't have instances or are parents. + * Relation instances that don't have types (illegal in the schema). + """ + + def __init__(self): + # These dicts should be indexed by IDs of the respective data records + self._syn_acc = {} + self._lex_acc = {} + self._lex_to_syn_acc = {} + self._reltype_acc = {} + # Relation instances are indexed like: + # parent id => list of _RelInstData + self._synrel_acc = coll.defaultdict(list) + self._lexrel_acc = coll.defaultdict(list) + # Emotion records are indexed like: lexical id => list of _EmoData + self._emo_acc = coll.defaultdict(list) + # This is aux sets for IDs that will be used for filtering + self._reltypes_being_parents = set() + self._relinstance_count = coll.Counter() + self._syn_to_units_check = coll.defaultdict(list) + + # The following are filled during finalization: + # Nodes need full relation names, this will provide translation from + # IDs. + self._relid2relname = None + # Some aliases may repeat in the plWN database, but it's not allowed + # here. + self._bad_rel_aliases = None + # All the units that were rejected for any reason - used by filtering + # relation. + self._bad_units = None + self._bad_synsets = None + + def take_relation_type(self, id_, kind, name, short_name, parent_id): + data = _RelTypeData(kind, name, short_name, parent_id) + if _insert_if_uniq(self._reltype_acc, id_, data): + if parent_id is not None: + self._reltypes_being_parents.add(parent_id) + + def take_synset(self, id_, definition, is_artificial): + _insert_if_uniq( + self._syn_acc, + id_, + _SynData(definition, is_artificial), + ) + + def take_lexical_unit(self, + id_, + lemma, + pos, + variant, + domain, + comment, + verb_aspect): + _insert_if_uniq( + self._lex_acc, + id_, + _LexData( + lemma, + pos, + variant, + domain, + comment, + verb_aspect, + ), + ) + + def take_unit_to_synset(self, unit_id, synset_id, unit_index): + data = _UnSData(synset_id, unit_index) + if _insert_if_uniq(self._lex_to_syn_acc, unit_id, data): + self._syn_to_units_check[synset_id].append(unit_id) + + def take_synset_relation(self, parent_id, child_id, relation_id): + self.__take_relation( + self._synrel_acc, + parent_id, + child_id, + relation_id, + ) + + def take_lexical_relation(self, parent_id, child_id, relation_id): + self.__take_relation( + self._lexrel_acc, + parent_id, + child_id, + relation_id, + ) + + def take_emotion(self, + lexical_id, + markedness, + names, + valuations, + example1, + example2, + unit_status, + super_annotation): + self._emo_acc[lexical_id].append(_EmoData( + markedness, + names, + valuations, + example1, + example2, + unit_status, + super_annotation, + )) + + def finalize(self): + """After putting in data using the ``take_*`` methods. + + Perform all checks and yield all created nodes. + """ + # Reset filtered sets, then fill them + self._bad_units = set() + self._filter_bad_units() + self._bad_synsets = set() + self._filter_bad_synsets() + self._bad_rel_aliases = set() + self._filter_bad_rel_aliases() + self._filter_bad_rel_instances() + + for node in itt.chain(self._fin_reltypes(), + self._fin_units(), + self._fin_syns()): + yield node + + def _fin_reltypes(self): + self._relid2relname = {} + + for rel_id, rel_data in six.iteritems(self._reltype_acc): + if rel_id in self._reltypes_being_parents: + continue + + if self._relinstance_count[rel_id] <= 0: + _LOG.warning( + 'Relation %s = %r omitted: no instances', + rel_id, + rel_data, + ) + continue + + # Inherit the kind data from the parent reltype, if the parent is + # not None. + if rel_data.parent is not None: + try: + par_data = self._reltype_acc[rel_data.parent] + except KeyError: + _LOG.error( + 'Relation %s has non-existent parent %s', + rel_id, + rel_data.parent, + ) + continue + rel_parname = par_data.name + rel_kind = par_data.kind + else: + rel_parname = None + rel_kind = rel_data.kind + + self._relid2relname[rel_id] = RelationInfoBase.format_name( + rel_parname, + rel_data.name, + ) + + yield nd.RelationTypeNode( + kind=rel_kind, + name=rel_data.name, + parent=rel_parname, + aliases=(rel_data.short,) + if rel_data.short is not None and + rel_data.short not in self._bad_rel_aliases + else (), + ) + + def _fin_units(self): + for lu_id, lu_data in six.iteritems(self._lex_acc): + if lu_id in self._bad_units: + continue + + final_emo = self._coalesce_emo(lu_id) + cmt_data = ( + _CmtData.make_empty() + if lu_data.comment is None + else _CmtData.extract_from_string(lu_data.comment) + ) + final_related = self._make_related_for_unit(lu_id) + try: + uns = self._lex_to_syn_acc[lu_id] + except KeyError: + # This shouldn't happen, but possibly can, so just skip the + # unit. + continue + + yield nd.LexicalUnitNode( + id=lu_id, + lemma=lu_data.lemma, + pos=lu_data.pos, + variant=lu_data.variant, + synset=uns.synset_id, + unit_index=uns.unit_index, + definition=cmt_data.definition, + usage_notes=cmt_data.usage, + external_links=cmt_data.links, + examples=cmt_data.examples, + examples_sources=cmt_data.examples_sources, + domain=lu_data.domain, + related=final_related, + verb_aspect=lu_data.verb_aspect, + is_emotional=final_emo.status, + emotion_markedness=final_emo.mark, + emotion_names=final_emo.names, + emotion_valuations=final_emo.valuations, + emotion_example_1=final_emo.example1, + emotion_example_2=final_emo.example2, + ) + + def _fin_syns(self): + for syn_id, syn_data in six.iteritems(self._syn_acc): + if syn_id in self._bad_synsets: + continue + + final_related = self._make_related_for_synset(syn_id) + + yield nd.SynsetNode( + id=syn_id, + definition=syn_data.definition, + related=final_related, + is_artificial=syn_data.isart, + ) + + def _filter_bad_units(self): + for lex_id in self._lex_acc: + if lex_id not in self._lex_to_syn_acc: + _LOG.error('Unit %s belongs to no synset', lex_id) + self._bad_units.add(lex_id) + continue + + syn_of_lex = self._lex_to_syn_acc[lex_id].synset_id + if syn_of_lex not in self._syn_acc: + _LOG.error( + 'Unit %s belongs to non-existent synset %s', + lex_id, + syn_of_lex, + ) + self._bad_units.add(lex_id) + + def _filter_bad_synsets(self): + for syn_id in self._syn_acc: + # Do those synsets have units and those units are real? + syn_units = self._syn_to_units_check.get(syn_id, ()) + any_unit_valid = False + + # This check doesn't necessarily remove the synset, but + # notification will be given. At least one valid unit for synset + # must remain. + for unit_id in syn_units: + if unit_id in self._lex_acc: + any_unit_valid = True + else: + _LOG.error( + 'Unit %s of synset %s is non-existent', + unit_id, + syn_id, + ) + + if not any_unit_valid: + _LOG.error('Synset %s has no (valid) units', syn_id) + self._bad_synsets.add(syn_id) + + def _filter_bad_rel_aliases(self): + # If an alias repeats multiple times, remember it to remove both + # instances later (so don't decide which is the "right" one). + all_aliases = set() + for rel_data in six.itervalues(self._reltype_acc): + alias = rel_data.short + if alias in all_aliases: + _LOG.error( + 'Relation shortcut %r is not unique; dropping both', + alias, + ) + self._bad_rel_aliases.add(alias) + else: + all_aliases.add(alias) + + def _filter_bad_rel_instances(self): + # Assuming that all bad synsets and units have been filtered, drop all + # instances of relations that refer to them. + # It removes instances in-place from related dicts, and decreases + # counts of instances for relation types. + self.__recount_rels(self._synrel_acc, self._syn_acc, self._bad_synsets) + self.__recount_rels(self._lexrel_acc, self._lex_acc, self._bad_units) + + def _make_related_for_unit(self, lex_id): + return self.__make_related(self._lexrel_acc, lex_id) + + def _make_related_for_synset(self, syn_id): + return self.__make_related(self._synrel_acc, syn_id) + + def _coalesce_emo(self, lex_id): + # The algorithm is like this: + # Start with super-annotation, iterate all annotations, fill what is + # possible. + # Do not overwrite status, markedness or examples, but sum names and + # values. If the super-annotation is marked as not-an-emotion, just + # return empty data. + # When returning the final emo value, don't remember its + # super annotation - it doesn't matter at this point; set to None. + # TODO Ensure that this algorithm makes sense, there seem to be more + # sensible ways of handling things. Move on for now. + final_status = None + final_mark = None + final_ex1 = None + final_ex2 = None + names_acc = [] + values_acc = [] + + for emo_data in sorted(self._emo_acc.get(lex_id, ()), + key=op.attrgetter('super'), + reverse=True): + if final_status is None: + final_status = emo_data.status + if final_mark is None: + final_mark = emo_data.mark + if final_ex1 is None: + final_ex1 = emo_data.example1 + if final_ex2 is None: + final_ex2 = emo_data.example2 + + names_acc.extend(emo_data.names) + values_acc.extend(emo_data.valuations) + + return _EmoData( + mark=final_mark, + names=_emo_uniq_sorted_tuple(names_acc), + valuations=_emo_uniq_sorted_tuple(values_acc), + example1=final_ex1, + example2=final_ex2, + status=final_status, + super=None, + ) + + def __take_relation(self, relinst_acc, parent_id, child_id, relation_id): + relinst_acc[parent_id].append(_RelInstData(child_id, relation_id)) + self._relinstance_count[relation_id] += 1 + + def __recount_rels(self, relinst_acc, item_acc, bad_acc): + for parent_id, children in six.iteritems(relinst_acc): + # Do not filter parents; this will be done at yielding, outside + fil_children = [] + for relinst in children: + if relinst.relation not in self._reltype_acc: + _LOG.error( + _BASIC_RELINST_ERROR_TMPL + + 'non-existent relation', + relinst.relation, + parent_id, + relinst.child, + ) + elif relinst.child not in item_acc or relinst.child in bad_acc: + _LOG.error( + _BASIC_RELINST_ERROR_TMPL + + 'the child is non-existent or invalid', + relinst.relation, + parent_id, + relinst.child, + ) + self._relinstance_count[relinst.relation] -= 1 + else: + fil_children.append(relinst) + + relinst_acc[parent_id] = fil_children + + def __make_related(self, relinst_acc, parent_id): + return tuple( + (self._relid2relname[relinst.relation], relinst.child) + for relinst in relinst_acc.get(parent_id, ()) + ) + + +class _CmtData(_CmtDataT): + + __slots__ = () + + # :class:`plwn_comments.TagBank` structure that defines all kinds of + # comment tags which are needed by PLWN API. + _WN_TAGS = plwnc.TagBank() + # Usage notes + _WN_TAGS.define(u'K') + # External links + _WN_TAGS.define(u'L', u'{') + # Definition + _WN_TAGS.define(u'D') + # The distinction for these tags is useful, since all examples go to one + # place. + _NON_EXAMPLE_TAG_NAMES = frozenset((u'K', u'L', u'D')) + # And define those example tags + _WN_TAGS.define_from( + plwncu.iter_usage_tags(), + plwncu.DEFAULT_USAGE_TAG_SURROUND, + ) + + @classmethod + def extract_from_string(cls, cmt_str): + try: + cmt = plwnc.Comment.parse(cmt_str, cls._WN_TAGS) + except plwnce.PLWNCommentsException: + # For now just make an empty comment which will make all fields + # unset. + cmt = plwnc.Comment(cls._WN_TAGS) + + # Get all examples + examples = [] + examples_src = [] + + for tagname, tagcontents in cmt.items(): + if tagname not in cls._NON_EXAMPLE_TAG_NAMES: + examples.extend(tagcontents) + examples_src.extend(itt.repeat(tagname, len(tagcontents))) + + return cls( + examples=tuple(examples), + examples_sources=tuple(examples_src), + definition=cmt.get_first(u'D', None), + usage=tuple(cmt[u'K']), + links=tuple(cmt[u'L']), + ) + + @classmethod + def make_empty(cls): + return cls( + examples=(), + examples_sources=(), + definition=None, + usage=(), + links=(), + ) + + +def _insert_if_uniq(data_acc, id_val, data_obj): + obj_in = data_acc.setdefault(id_val, data_obj) + + if obj_in is not data_obj: + _LOG.error( + 'Cannot add record %r with ID %s: already associated with ' + 'record %r', + data_obj, + id_val, + obj_in, + ) + return False + return True + + +def _emo_enums_sortkey(item): + return text_key(item.value) + + +def _emo_uniq_sorted_tuple(emo_acc): + # Sort the names and valuations for predictable behaviour + return tuple(sorted(frozenset(emo_acc), key=_emo_enums_sortkey)) diff --git a/plwn/readers/wnxml.py b/plwn/readers/wnxml.py index 18b40bf98b196e7061027cd149496eca86f3303f..51f637dcdbb4a8330dfee1060a6d15c1efead1d0 100644 --- a/plwn/readers/wnxml.py +++ b/plwn/readers/wnxml.py @@ -1,211 +1,163 @@ # coding: utf8 -"""Implementation of wnxml readwer.""" + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. from __future__ import absolute_import, division -from collections import defaultdict -import itertools as itt -import logging import xml.etree.ElementTree as et -import six - -from .comments import parse_comment_string -from .nodes import SynsetNode, LexicalUnitNode -from ..enums import PoS, Domain - +from .wnschema import WNSchemaProcessor +from .. import enums as en -__all__ = 'wnxml_reader', +__all__ = 'WNXMLReader', -_log = logging.getLogger(__name__) _POSES = { - u'rzeczownik': PoS.n, - u'czasownik': PoS.v, - u'przymiotnik': PoS.adj, - u'przysłówek': PoS.adv, + u'rzeczownik': en.PoS.n, + u'czasownik': en.PoS.v, + u'przymiotnik': en.PoS.adj, + u'przysłówek': en.PoS.adv, + u'rzeczownik pwn': en.PoS.n, + u'czasownik pwn': en.PoS.v, + u'przymiotnik pwn': en.PoS.adj, + u'przysłówek pwn': en.PoS.adv, } - - -# Since etree may return either unicode or byte strings, all strings returned -# by its interfaces are wrapped with six.text_type - - -def wnxml_reader(wnxml_file): - """Generate plWordNet records from the official XML file. - - :param str wnxml_file: Path to the plWordNet XML file to read from. - - :return: a generator over PLwordnet entities. - :rtype: generator - """ - # The regrettably huge global storage for yielding - synsets = {} - lexunits = {} - synid_n_lexids = [] - reltypes_syn = {} - reltypes_lex = {} - # These need defaults to add instances to parent syn / lex - synrels = defaultdict(list) - lexrels = defaultdict(list) - - # Now, parse everything - for _, elem in et.iterparse(wnxml_file): - if elem.tag == u'lexical-unit': - _make_lexunit(elem, lexunits) - elif elem.tag == u'synset': - _make_synset(elem, synsets, synid_n_lexids) - elif elem.tag == u'relationtypes': - _make_reltype(elem, reltypes_syn, reltypes_lex) - elif elem.tag == u'synsetrelations': - _make_rel(elem, synrels) - elif elem.tag == u'lexicalrelations': - _make_rel(elem, lexrels) - - # Finalize units to synsets mapping - _make_units2synsets(lexunits, synid_n_lexids) - - # Now complete synsets and lexunits with relations and yield - for node in itt.chain( - _make_gen(synsets, synrels, reltypes_syn), - _filter_nosynset(_make_gen(lexunits, lexrels, reltypes_lex)), - ): - yield node - - -_this_reader_ = wnxml_reader - - -def _make_lexunit(lu_node, lu_dict): - # Only words will pl poses will be remembered - xmlpos = six.text_type(lu_node.get(u'pos')) - - if xmlpos not in _POSES: - return - - lu_id = int(lu_node.get(u'id')) - cmt_data = parse_comment_string(six.text_type(lu_node.get(u'desc'))) - # Create a temporal object which will be filled later - lu_dict[lu_id] = LexicalUnitNode( - id=lu_id, - lemma=six.text_type(lu_node.get(u'name')), - pos=_POSES[xmlpos], - variant=int(lu_node.get(u'variant')), - synset=None, - unit_index=None, - definition=cmt_data.definition, - usage_notes=cmt_data.usage, - external_links=cmt_data.links, - examples=cmt_data.examples, - examples_sources=cmt_data.examples_sources, - domain=Domain[lu_node.get(u'domain')], - related=None, - # The below properties are not stored in wnxml (at least in present) - verb_aspect=None, - emotion_markedness=None, - emotion_names=(), - emotion_valuations=(), - emotion_example_1=None, - emotion_example_2=None, - ) - - -def _make_synset(syn_node, syn_dict, snu_list): - # Only take non-abstract synsets - if six.text_type(syn_node.get(u'abstract')) != u'false': - return - - synid = int(syn_node.get(u'id')) - # Assign lexical units to synsets they belong to. - snu_list.append((synid, [int(uid_node.text) - for uid_node in syn_node.iter(u'unit-id')])) - # As with lexunits, related field is not yet filled - syn_dict[synid] = SynsetNode( - synid, - six.text_type(syn_node.get(u'definition')), - None, - ) - - -def _make_units2synsets(lu_dict, snu_list): - for synid, lexids in snu_list: - for uidx, uid in enumerate(lexids): - try: - lu = lu_dict[uid] - except KeyError: - _log.warning( - 'Unit %d from synset %d does not exist', - uid, - synid, - ) - else: - lu_dict[uid] = lu._replace(synset=synid, unit_index=uidx) - - -# Relation types are spelled in descriptive names -_RELTYPE_SYN = u'relacja pomiÄ™dzy synsetami' -_RELTYPE_LEX = u'relacja leksykalna' - - -def _make_reltype(reltype_node, synreltype_dict, lureltype_dict): - relid = int(reltype_node.get(u'id')) - typestr = reltype_node.get(u'type') - - if typestr == _RELTYPE_SYN: - the_dict = synreltype_dict - elif typestr == _RELTYPE_LEX: - the_dict = lureltype_dict - else: - # There is one more relation type, synonymy, but it's artificial - return - - # Remember the name so that will be inserted into the reltype storages - the_dict[relid] = six.text_type(reltype_node.get(u'name')) - - -# Relations are put into dicts indexed by parent IDs, to be later put into -# nodes. One function can handle both types. -def _make_rel(node, reldict): - # Get reltype - drop if unknown - reldict[int(node.get(u'parent'))].append(( - int(node.get(u'child')), - # Reltypes should be returned by names, not IDs - int(node.get(u'relation')), - )) - - -# As with relation, yielding is general for syn / lexes. -# Related IDs need to be added, and those not known purged. -def _make_gen(node_dict, rels_dict, reltype_dict): - for node in six.itervalues(node_dict): - related = [] - for child_id, rel_id in rels_dict.get(node.id, ()): - try: - relname = reltype_dict[rel_id] - except KeyError: - _log.warning( - 'Unknown relation %d (of %s), from %d to %d', - rel_id, - node.__class__.__name__, - node.id, - child_id, - ) - continue - - # Only remember from the related dict the items whose IDs are in - # the node dict. - if child_id in node_dict: - related.append((child_id, relname)) - related.append((relname, child_id)) - yield node._replace(related=related) - - -# Addendum to _make_gen for lexical units to filter synsetless ones -def _filter_nosynset(lu_node_gen): - for lu_node in lu_node_gen: - if lu_node.synset is None: - _log.warning('Unit %d belongs to no synset', lu_node.id) - else: - yield lu_node +_RELKINDS = { + u'relacja pomiÄ™dzy synsetami': en.RelationKind.synset, + u'relacja leksykalna': en.RelationKind.lexical, +} +_BOOLVALUES = {u'true': True, u'false': False} + + +# Examples of nodes that this reader is supposed to parse: +# <lexical-unit id="478387" name=".22" pos="rzeczownik pwn" +# tagcount="0" domain="wytw" desc="" workstate="Nie przetworzone" +# source="użytkownika" variant="1"/> +# <lexicalrelations parent="107360" child="61999" relation="104" +# valid="true" owner=""/> +# <relationtypes id="242" type="relacja leksykalna" +# name="rola: materiaÅ‚" +# description="Relacja roli: materiaÅ‚u jest wyjÄ…tkowÄ… relacjÄ… roli, +# łączÄ…cÄ… przymiotniki materiaÅ‚owe z ich podstawami rzeczownikowymi nazwami +# substancji i materiałów." +# posstr="rzeczownik,przymiotnik" +# display="<x#> jest zrobione z <y#>" shortcut="mat" +# autoreverse="false" pwn=""> +# Child relation types have the additional "parent" attribute. +# <relationtypes id="35" type="relacja leksykalna" parent="32" +# name="pacjens|obiekt" description="(dziedziczone)" +# posstr="(dziedziczone)" +# display="<x#> jest pacjensem dla czynnoÅ›ci wyrażanej przez <y#>" +# shortcut="rol:pacj" autoreverse="false" pwn="p_rp"> +# <synset id="12" workstate="Nie przetworzone" split="1" owner="" +# definition="" desc="" abstract="false"> +# <unit-id>12</unit-id> +# <unit-id>10191</unit-id> +# </synset> +# <synsetrelations parent="1366" child="551" relation="10" +# valid="true" owner=""/> + +class WNXMLReader(object): + + def __init__(self, wordnet_xml_file): + self._wnxml_file = wordnet_xml_file + self._schema = WNSchemaProcessor() + + self._dispatch = { + u'lexical-unit': self._proc_lexunit, + u'synset': self._proc_synset, + u'relationtypes': self._proc_reltype, + u'synsetrelations': _make_proc_relinst( + self._schema.take_synset_relation, + ), + u'lexicalrelations': _make_proc_relinst( + self._schema.take_lexical_relation, + ), + } + + def __iter__(self): + for _, elem in et.iterparse(self._wnxml_file): + elem_proc = self._dispatch.get(elem.tag) + if elem_proc is not None: + elem_proc(elem) + + for node in self._schema.finalize(): + yield node + + def _proc_reltype(self, elem): + id_ = int(elem.get('id')) + kind = _RELKINDS[elem.get('type')] + parent = elem.get('parent') + if parent is not None: + parent = int(parent) + + self._schema.take_relation_type( + id_, + kind, + elem.get('name'), + elem.get('shortcut'), + parent, + ) + + def _proc_lexunit(self, elem): + id_ = int(elem.get('id')) + var = int(elem.get('variant')) + pos = _POSES[elem.get('pos')] + dom = en.Domain(elem.get('domain')) + + self._schema.take_lexical_unit( + id_, + elem.get('name'), + pos, + var, + dom, + elem.get('desc'), + None, # No verb aspect at present + ) + + def _proc_synset(self, elem): + id_ = int(elem.get('id')) + isart = _BOOLVALUES[elem.get('abstract')] + + self._proc_synset_units( + id_, + (uelem for uelem in elem if uelem.tag == u'unit-id'), + ) + self._schema.take_synset(id_, elem.get('definition'), isart) + + def _proc_synset_units(self, synid, unit_elems): + for uidx, uelem in enumerate(unit_elems, 1): + self._schema.take_unit_to_synset( + int(uelem.text), + synid, + uidx, + ) + + +def _make_proc_relinst(taker): + def elem_proc(elem): + parent = int(elem.get('parent')) + child = int(elem.get('child')) + relid = int(elem.get('relation')) + + taker(parent, child, relid) + + return elem_proc + + +_this_reader_ = WNXMLReader diff --git a/plwn/relation_aliases.tsv b/plwn/relation_aliases.tsv deleted file mode 100644 index b7f87a60bacacb416c9577923ca6bd8558ef3baa..0000000000000000000000000000000000000000 --- a/plwn/relation_aliases.tsv +++ /dev/null @@ -1,5 +0,0 @@ -hiperonimia hiper -hiponimia hipo -deminutywność dem -holonimia holo -meronimia mero diff --git a/plwn/relresolver.py b/plwn/relresolver.py deleted file mode 100644 index 940a529e6a3281acfba93a12cc4110edceb1f865..0000000000000000000000000000000000000000 --- a/plwn/relresolver.py +++ /dev/null @@ -1,122 +0,0 @@ -"""Implementation of Relation Resolver.""" -from __future__ import absolute_import, division - - -from contextlib import closing -import logging - -import pkg_resources as pkgr -import six - - -__all__ = 'RelationResolver', 'get_default_relation_resolver' - - -_DEFAULT_RESOLVER_LOC = 'plwn', 'relation_aliases.tsv' -_default_resolver_obj = None - -_log = logging.getLogger(__name__) - - -class RelationResolver(object): - """Stores dictionary of relation name aliases to full names.""" - - @classmethod - def from_tsv(cls, tsv_stream): - """Creates an instance from a TSV file. - - The first item of each line should be the full name, and every other - should be an alias (similar to ``from_reverse_dict``). - - :param tsv_stream: The stream from which TSV lines are read. - :type tsv_stream: TextIO - - :rtype: RelationResolver - """ - adict = {} - - for line in tsv_stream: - items = line.strip().split(u'\t') - fullname = items[0].strip() - for alias in items[1:]: - adict[alias.strip()] = fullname - - return cls(adict) - - @classmethod - def from_reverse_dict(cls, rdict): - """Creates an instance from a dictionary. - - Mapping full names to lists of aliases that should resolve to them. - - :type rdict: Mapping[str, List[str]] - - :rtype: RelationResolver - """ - adict = {} - - for full, aliases in six.iteritems(rdict): - for alias in aliases: - adict[alias] = full - - return cls(adict) - - def __init__(self, aliases): - """. - - :param aliases: Dictionary (or pairs sequence) mapping relation aliases - to full names. - :type aliases: Mapping[str, str] - """ - self._aliases = dict(aliases) - - def add_alias(self, alias, fullname): - """Add a new alias to the dictionary. - - :param str alias: The alias. - - :param str fullname: The name the alias will resolve to. - """ - self._aliases[alias] = fullname - - def resolve_name(self, relname): - """Resolve a possible alias to a full name. - - If ``relname`` is not a known alias, it's returned unchanged. - - :param str relname: The relation name that may be an alias that needs - to be resolved. - - :return: ``relname`` or, if it's an alias, the full name it resolves - to. - :rtype: str - """ - return self._aliases.get(relname, relname) - - -def get_default_relation_resolver(): - """Create an instance of ``RelationResolver``. - - That loads a file with all default relation name aliases. - - The default aliases TSV file is located in ``plwn`` package root, as - ``relation_aliases.tsv``. - - :return: The default ``RelationResolver`` instance, initialized once on the - first call. - :rtype: RelationResolver - """ - global _default_resolver_obj - - if _default_resolver_obj is None: - try: - with closing(pkgr.resource_stream(*_DEFAULT_RESOLVER_LOC)) \ - as tsv_in: - _default_resolver_obj = RelationResolver.from_tsv( - line.decode('utf8') for line in tsv_in - ) - except IOError: - _log.exception('Failed to load default aliases file') - _default_resolver_obj = RelationResolver({}) - - return _default_resolver_obj diff --git a/plwn/storages/objects.py b/plwn/storages/objects.py deleted file mode 100644 index 9618a75d9bbea17f3b450c9fa6b0f3f754bfe897..0000000000000000000000000000000000000000 --- a/plwn/storages/objects.py +++ /dev/null @@ -1,520 +0,0 @@ -"""Implementation which stores data in plain python objects. - -Should be fairly fast to construct, but querying and memory -efficiencies may not be too great. -""" - -from __future__ import absolute_import, absolute_import - - -import collections as coll -import logging -import operator as op - -import six -from six.moves import cPickle - -from ..readers import nodes as nd -from ..enums import PoS -from ..relresolver import get_default_relation_resolver -from ..utils.tupwrap import tup_wrapped, TupWrapper -from ..utils.sorting import text_key -from .. import bases, exceptions as exc - - -__all__ = 'PLWordNet', 'Synset', 'LexicalUnit' - - -_log = logging.getLogger(__name__) - - -class PLWordNet(bases.PLWordNetBase): - - _STORAGE_NAME = 'objects' - _SCHEMA_VERSION = 2 - - @classmethod - def from_reader(cls, reader, dump_to=None): - obj = cls() - obj.__read_data(reader) - - if dump_to is not None: - with open(dump_to, 'wb') as dump_ofs: - cPickle.dump(obj, dump_ofs, cPickle.HIGHEST_PROTOCOL) - - return obj - - @classmethod - def from_dump(cls, dump): - with open(dump, 'rb') as dump_ifs: - obj = cPickle.load(dump_ifs) - - if not isinstance(obj, cls): - raise exc.LoadException( - 'Unpickled object is not an instance of ' + repr(cls) - ) - - if not hasattr(obj, '_version') or obj._version != cls._SCHEMA_VERSION: - raise exc.DumpVersionException( - getattr(obj, '_version', None), - cls._SCHEMA_VERSION, - ) - - return obj - - @staticmethod - def __fill_id_reldict(src_node, id_rel_dict, id_set): - rels = coll.defaultdict(list) - for relname, reltarget in src_node.related: - if reltarget not in id_set: - _log.warning( - 'Target %d of relation %s from %d does not exist', - reltarget, - relname, - src_node.id, - ) - else: - rels[relname].append(reltarget) - - id_rel_dict[src_node.id] = coll.OrderedDict( - (relname, tuple(rels[relname])) - for relname in sorted(rels, key=text_key) - ) - - @staticmethod - def __gen_item_reldict(id_rel_dict, item_rel_dict, item_dict): - for src_id, rel_dict in six.iteritems(id_rel_dict): - irel_dict = coll.OrderedDict() - for relname, trg_ids in six.iteritems(rel_dict): - trg_items = [] - for trg_id in rel_dict[relname]: - try: - trg_item = item_dict[trg_id] - except KeyError: - _log.warning( - 'Target %d of relation %s from %d does not exist', - trg_id, - relname, - src_id, - ) - else: - trg_items.append(trg_item) - - if trg_items: - irel_dict[relname] = tuple(trg_items) - - if irel_dict: - item_rel_dict[src_id] = irel_dict - - def __init__(self): - """**NOTE:** This constructor should not be invoked directly. - - Use one of the standard methods: ``from_dump`` or ``from_reader``. - """ - super(PLWordNet, self).__init__() - - # Remember the version for unpickling check - self._version = self._SCHEMA_VERSION - - # Master indexes - self._synsets = coll.OrderedDict() - self._units = coll.OrderedDict() - - # Secondary indexes for lookup of units by lemma, pos and var - self._i_lem_pos_var = {} - self._i_lem_pos = coll.defaultdict(list) - self._i_lem_var = coll.defaultdict(list) - self._i_lem = coll.defaultdict(list) - self._i_pos = coll.defaultdict(list) - # No index for lookup by var! That's the slow way. - - # Relations: indexed by id and then relation names; the second one - # should be ordered. - self._synrels = {} - self._lexrels = {} - - def lexical_unit_by_id(self, id_): - try: - return self._units[id_] - except KeyError: - raise exc.InvalidLexicalUnitIdentifierException(id_) - - @tup_wrapped - def lexical_units(self, lemma=None, pos=None, variant=None): - if lemma is not None and pos is not None and variant is not None: - # Yield only one unit since it must be it if it exists - try: - yield self._i_lem_pos_var[lemma, PoS(pos), variant] - except KeyError: - pass - finally: - return - - if lemma is not None and pos is not None: - retlist = self._i_lem_pos.get((lemma, PoS(pos)), ()) - elif lemma is not None and variant is not None: - retlist = self._i_lem_var.get((lemma, variant), ()) - elif lemma is not None: - retlist = self._i_lem.get(lemma, ()) - elif pos is not None: - retlist = self._i_pos.get(PoS(pos), ()) - else: - # Hoo boy, it's bad - retlist = self._select_lexunits(lemma, PoS(pos), variant) - - for lu in retlist: - yield lu - - def lexical_unit(self, lemma, pos, variant): - try: - return self._i_lem_pos_var[lemma, PoS(pos), variant] - except KeyError: - raise exc.LexicalUnitNotFound(lemma, pos, variant) - - def synset_by_id(self, id_): - try: - return self._synsets[id_] - except KeyError: - raise exc.InvalidSynsetIdentifierException(id_) - - @tup_wrapped - def synsets(self, lemma=None, pos=None, variant=None): - for lu in self.lexical_units(lemma, pos, variant): - yield lu.synset - - def synset(self, lemma, pos, variant): - try: - return self._i_lem_pos_var[lemma, PoS(pos), variant].synset - except KeyError: - raise exc.SynsetNotFound(lemma, pos, variant) - - def synset_relation_edges(self, include=None, exclude=None): - return TupWrapper(self._iter_reledges(self._synrels, include, exclude)) - - def lexical_relation_edges(self, include=None, exclude=None): - return TupWrapper(self._iter_reledges(self._lexrels, include, exclude)) - - def _select_lexunits(self, lemma, pos, variant): - # The "slow way" (indexless) of selecting lexical units - for lu in six.itervalues(self._units): - if ((lemma is None or lemma == lu._lemma) and - (pos is None or pos is lu._pos) and - (variant is None or variant == lu._var)): - yield lu - - def _iter_reledges(self, reledges, include, exclude): - # Ensure those are sets - include = frozenset( - self._rel_resolver.resolve_name(rel) for rel in include - ) if include is not None else None - exclude = frozenset( - self._rel_resolver.resolve_name(rel) for rel in exclude - ) if exclude is not None else None - - for src, reldict in six.iteritems(reledges): - for relname, targets in six.iteritems(reldict): - if ((include is None or relname in include) and - (exclude is None or relname not in exclude)): - for trg in targets: - yield bases.RelationEdge( - source=src, - relation=relname, - target=trg, - ) - - def __read_data(self, reader): - # Nodes need to be separated and sorted before being pushed to indexes. - syn_nodes = {} - ordered_synids = [] - lex_nodes = {} - # Ordered AND filtered - ordered_lex_nodes = [] - # The association will remember unit indices - s2u = coll.defaultdict(list) - # Temporary id relation dicts - id_lex_rels = {} - id_syn_rels = {} - - for node in reader: - if isinstance(node, nd.SynsetNode): - syn_nodes[node.id] = node - else: - lex_nodes[node.id] = node - - # First iterate over lex nodes to establish the unit-synset - # relationships and sort out synsets and lexunits that don't exist. - for lex_node in six.itervalues(lex_nodes): - if lex_node.synset not in syn_nodes: - _log.warning( - 'Synset %d from unit %d does not exist', - lex_node.id, - lex_node.synset, - ) - else: - s2u[lex_node.synset].append((lex_node.unit_index, lex_node.id)) - ordered_synids.append(lex_node.synset) - ordered_lex_nodes.append(lex_node) - - # Sort by lemma! - ordered_lex_nodes.sort(key=lambda node: text_key(node.lemma)) - - # Insert lexical unit objects into ordered dict - for lex_node in ordered_lex_nodes: - self._units[lex_node.id] = LexicalUnit( - self, - lex_node.id, - lex_node.lemma, - lex_node.pos, - lex_node.variant, - lex_node.synset, - lex_node.definition, - tuple(lex_node.usage_notes), - tuple(lex_node.external_links), - tuple(lex_node.examples), - tuple(lex_node.examples_sources), - lex_node.domain, - lex_node.verb_aspect, - lex_node.emotion_markedness, - tuple(lex_node.emotion_names), - tuple(lex_node.emotion_valuations), - lex_node.emotion_example_1, - lex_node.emotion_example_2, - ) - - self.__fill_id_reldict(lex_node, id_lex_rels, lex_nodes) - - # Now, insert synsets in the right order - for synid in ordered_synids: - if synid in self._synsets: - continue - - syn_node = syn_nodes[synid] - # Sort units by index first - synunits = s2u[synid] - synunits.sort(key=op.itemgetter(0)) - - self._synsets[synid] = Synset( - self, - synid, - (it[1] for it in synunits), - syn_node.definition, - ) - - # Relations are done similarly to lex ones - self.__fill_id_reldict(syn_node, id_syn_rels, syn_nodes) - - # But what if there are synsets that have no units? - for synid in syn_nodes: - if synid not in self._synsets: - _log.warning('Synset %d has no units', synid) - - # We can convert id rel dicts now - self.__gen_item_reldict(id_lex_rels, self._lexrels, self._units) - self.__gen_item_reldict(id_syn_rels, self._synrels, self._synsets) - - # We can build indexes now - for lu in six.itervalues(self._units): - self._i_lem_pos_var[lu._lemma, lu._pos, lu._var] = lu - self._i_lem_pos[lu._lemma, lu._pos].append(lu) - self._i_lem_var[lu._lemma, lu._var].append(lu) - self._i_lem[lu._lemma].append(lu) - self._i_pos[lu._pos].append(lu) - - -class LexicalUnit(bases.LexicalUnitBase): - - __slots__ = ( - '_relr', - '_wn', - '_id', - '_lemma', - '_pos', - '_var', - '_synid', - '_def', - '_usn', - '_extl', - '_exms', - '_exms_srcs', - '_dom', - '_va', - '_emo_mark', - '_emo_names', - '_emo_valuations' - '_emo_ex1', - '_emo_ex2', - ) - - def __init__(self, - wn, - lexid, - lemma, - pos, - variant, - synid, - def_, - usn, - extl, - exms, - exms_srcs, - dom, - va, - emo_mark, - emo_names, - emo_valuations, - emo_ex1, - emo_ex2): - """**NOTE:** This constructor should not be called directly. - - Use :class:`PLWordNet` methods to obtain lexical units. - """ - self._relr = get_default_relation_resolver() - - self._wn = wn - self._id = lexid - self._lemma = lemma - self._pos = pos - self._var = variant - self._synid = synid - self._def = def_ - self._usn = usn - self._extl = extl - self._exms = exms - self._exms_srcs = exms_srcs - self._dom = dom - self._va = va - self._emo_mark = emo_mark - self._emo_names = emo_names - self._emo_valuations = emo_valuations - self._emo_ex1 = emo_ex1 - self._emo_ex2 = emo_ex2 - - @property - def id(self): - return self._id - - @property - def lemma(self): - return self._lemma - - @property - def pos(self): - return self._pos - - @property - def variant(self): - return self._var - - @property - def synset(self): - return self._wn._synsets[self._synid] - - @property - def definition(self): - return self._def - - @property - def sense_examples(self): - return self._exms - - @property - def sense_examples_sources(self): - return self._exms_srcs - - @property - def external_links(self): - return self._extl - - @property - def usage_notes(self): - return self._usn - - @property - def domain(self): - return self._dom - - @property - def verb_aspect(self): - return self._va - - @property - def emotion_markedness(self): - return self._emo_mark - - @property - def emotion_names(self): - return self._emo_names - - @property - def emotion_valuations(self): - return self._emo_valuations - - @property - def emotion_example(self): - return self._emo_ex1 - - @property - def emotion_example_secondary(self): - return self._emo_ex2 - - @property - def relations(self): - # Not caching, since this is an informational method that will probably - # not be called very often. - # The rel dicts should be an ordered dict with relation names as keys. - return tuple(self._wn._lexrels[self._id]) - - def related(self, relation_name): - relname = self._rel_resolver.resolve_name(relation_name) - reldict = self._wn._lexrels[self._id] - try: - return TupWrapper(iter(reldict[relname])) - except KeyError: - raise exc.InvalidRelationNameException(relation_name) - - -class Synset(bases.SynsetBase): - - __slots__ = '_relr', '_wn', '_id', '_units', '_def' - - def __init__(self, wn, synid, unit_ids, def_): - """**NOTE:** This constructor should not be called directly. - - Use :class:`PLWordNet` methods to obtain synsets. - """ - self._relr = get_default_relation_resolver() - - self._wn = wn - self._id = synid - self._units = tuple(wn._units[uid] for uid in unit_ids) - self._def = def_ - - @property - def id(self): - return self._id - - @property - def lexical_units(self): - return self._units - - @property - def definition(self): - return self._def - - @property - def relations(self): - # Not caching, since this is an informational method that will probably - # not be called very often. - # The rel dicts should be an ordered dict with relation names as keys. - return tuple(self._wn._synrels[self._id]) - - def related(self, relation_name): - relname = self._rel_resolver.resolve_name(relation_name) - reldict = self._wn._synrels[self._id] - try: - return TupWrapper(iter(reldict[relname])) - except KeyError: - raise exc.InvalidRelationNameException(relation_name) - - -_this_storage_ = PLWordNet diff --git a/plwn/storages/sqlite.py b/plwn/storages/sqlite.py index 4c01856e8dba214d5ead277419950d39aea361bd..49235d37569668d7e87db652bef383ada5943f73 100644 --- a/plwn/storages/sqlite.py +++ b/plwn/storages/sqlite.py @@ -1,3 +1,20 @@ +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + """Implementation that stores data from plWordNet in a sqlite databse file. With an impromptu schema. @@ -10,7 +27,7 @@ except ImportError: pass import sqlite3 -from collections import defaultdict +import collections as coll from contextlib import closing import errno import itertools as itt @@ -24,75 +41,74 @@ import weakref import six from ..readers import nodes as nd -from ..enums import ( - PoS, - VerbAspect, - EmotionMarkedness, - EmotionName, - EmotionValuation, - Domain, +from ..utils.artifilter import ( + filter_artificial_related_synsets, + filter_artificial_synset_edges, ) -from ..relresolver import get_default_relation_resolver -from ..utils.tupwrap import tup_wrapped, TupWrapper -from .. import bases, exceptions as exc +from ..utils.relinfotuple import RelationInfoTuple +from .. import bases as bs, exceptions as exc, enums as en + +__all__ = 'PLWordNet', 'Synset', 'LexicalUnit', 'RelationInfo' -__all__ = 'PLWordNet', 'Synset', 'LexicalUnit' +_LOG = logging.getLogger(__name__) -_log = logging.getLogger(__name__) +# Marker object for data that has not been fetched from the database +_UNFETCHED = object() -# SQL script used to initialize the database. +# SQL script used to initialize the database. {{{ # "locale" collation must be defined on the connection before this is executed. _DB_SCHEMA_SCRIPT = u""" PRAGMA foreign_keys = ON; --- Metadata table. Used for version number, currently. -CREATE TABLE IF NOT EXISTS plwn_meta ( +-- Metadata table. Used for version number, currently +CREATE TABLE plwn_meta ( name TEXT UNIQUE NOT NULL, value BLOB ); -- Tables for constant values -CREATE TABLE IF NOT EXISTS pos ( +CREATE TABLE pos ( id INTEGER PRIMARY KEY, value TEXT UNIQUE NOT NULL ); -CREATE TABLE IF NOT EXISTS verbaspect ( +CREATE TABLE verbaspect ( id INTEGER PRIMARY KEY, value TEXT UNIQUE NOT NULL ); -CREATE TABLE IF NOT EXISTS emotionmark ( +CREATE TABLE emotionmark ( id INTEGER PRIMARY KEY, value TEXT UNIQUE NOT NULL ); -CREATE TABLE IF NOT EXISTS emotionname ( +CREATE TABLE emotionname ( id INTEGER PRIMARY KEY, value TEXT UNIQUE NOT NULL COLLATE locale ); -CREATE TABLE IF NOT EXISTS emotionvaluation ( +CREATE TABLE emotionvaluation ( id INTEGER PRIMARY KEY, value TEXT UNIQUE NOT NULL COLLATE locale ); -CREATE TABLE IF NOT EXISTS domain ( +CREATE TABLE domain ( id INTEGER PRIMARY KEY, value TEXT UNIQUE NOT NULL COLLATE locale ); -- Synset only gets one simple table -CREATE TABLE IF NOT EXISTS synset ( +CREATE TABLE synset ( id INTEGER PRIMARY KEY, - definition TEXT NOT NULL COLLATE locale + definition TEXT COLLATE locale, + isartificial INTEGER NOT NULL DEFAULT 0 ); -- Lexical units have several tables, since they have several list-like -- properties. They also need indexes for lookup. -CREATE TABLE IF NOT EXISTS lexicalunit ( +CREATE TABLE lexicalunit ( id INTEGER PRIMARY KEY, lemma TEXT NOT NULL COLLATE locale, pos INTEGER NOT NULL @@ -101,11 +117,12 @@ CREATE TABLE IF NOT EXISTS lexicalunit ( synset INTEGER NOT NULL REFERENCES synset (id), unitindex INTEGER NOT NULL, - definition TEXT NOT NULL COLLATE locale, + definition TEXT COLLATE locale, domain INTEGER NOT NULL REFERENCES domain (id), verbaspect INTEGER REFERENCES verbaspect (id), + isemotional INTEGER, emotionmark INTEGER REFERENCES emotionmark (id), emotionexample1 TEXT COLLATE locale, @@ -119,34 +136,34 @@ CREATE TABLE IF NOT EXISTS lexicalunit ( -- lem-pos-var and synset-unitindex indexes (and partial ones) are -- automatically made because of UNIQUE constraint, but additional indexes -- need to be created. -CREATE INDEX IF NOT EXISTS lex_i_lem_var ON lexicalunit (lemma, variant); -CREATE INDEX IF NOT EXISTS lex_i_pos ON lexicalunit (pos); +CREATE INDEX lex_i_lem_var ON lexicalunit (lemma, variant); +CREATE INDEX lex_i_pos ON lexicalunit (pos); -- No index for variant itself - it's not an useful use case -- Tables dependant on lexicalunit -CREATE TABLE IF NOT EXISTS senseexample ( +CREATE TABLE senseexample ( unitid INTEGER NOT NULL REFERENCES lexicalunit (id), example TEXT NOT NULL COLLATE locale, source TEXT NOT NULL COLLATE locale ); -CREATE INDEX IF NOT EXISTS sen_i ON senseexample (unitid); +CREATE INDEX sen_i ON senseexample (unitid); -CREATE TABLE IF NOT EXISTS externallink ( +CREATE TABLE externallink ( unitid INTEGER NOT NULL REFERENCES lexicalunit (id), link TEXT NOT NULL COLLATE locale ); -CREATE INDEX IF NOT EXISTS link_i ON externallink (unitid); +CREATE INDEX link_i ON externallink (unitid); -CREATE TABLE IF NOT EXISTS usagenote ( +CREATE TABLE usagenote ( unitid INTEGER NOT NULL REFERENCES lexicalunit (id), note TEXT NOT NULL COLLATE locale ); -CREATE INDEX IF NOT EXISTS note_i ON usagenote (unitid); +CREATE INDEX note_i ON usagenote (unitid); -CREATE TABLE IF NOT EXISTS unitemotionname ( +CREATE TABLE unitemotionname ( unitid INTEGER NOT NULL REFERENCES lexicalunit (id), nameid INTEGER NOT NULL @@ -155,7 +172,7 @@ CREATE TABLE IF NOT EXISTS unitemotionname ( PRIMARY KEY (unitid, nameid) ); -CREATE TABLE IF NOT EXISTS unitemotionvaluation ( +CREATE TABLE unitemotionvaluation ( unitid INTEGER NOT NULL REFERENCES lexicalunit (id), valuationid INTEGER NOT NULL @@ -164,41 +181,114 @@ CREATE TABLE IF NOT EXISTS unitemotionvaluation ( PRIMARY KEY (unitid, valuationid) ); --- Relation tables -CREATE TABLE IF NOT EXISTS synsetrelationtype ( +-- Relation tables -- + +-- The for below are used to gather combinations of parent / child relation +-- names. +CREATE TABLE synsetrelationparentpart ( + id INTEGER PRIMARY KEY, + name TEXT UNIQUE NOT NULL COLLATE locale +); +CREATE TABLE synsetrelationchildpart ( id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL COLLATE locale ); -CREATE TABLE IF NOT EXISTS lexicalrelationtype ( +CREATE TABLE lexicalrelationparentpart ( id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL COLLATE locale ); +CREATE TABLE lexicalrelationchildpart ( + id INTEGER PRIMARY KEY, + name TEXT UNIQUE NOT NULL COLLATE locale +); + +-- Next, gather these parts into relation types themselves. +-- Parent can't be NULL - the no-parent case will be handled by a special empty +-- string parent. This is so that UNIQUE works correctly. +CREATE TABLE synsetrelationtype ( + id INTEGER PRIMARY KEY, + parentpart INTEGER NOT NULL + REFERENCES synsetrelationparentpart (id), + childpart INTEGER NOT NULL + REFERENCES synsetrelationchildpart (id), + + UNIQUE (parentpart, childpart) +); +CREATE TABLE lexicalrelationtype ( + id INTEGER PRIMARY KEY, + parentpart INTEGER NOT NULL + REFERENCES lexicalrelationparentpart (id), + childpart INTEGER NOT NULL + REFERENCES lexicalrelationchildpart (id), + + UNIQUE (parentpart, childpart) +); -CREATE TABLE IF NOT EXISTS synsetrelation ( - parentid INTEGER NOT NULL +-- The below tables are simply maps of relation aliases to their main IDs. +-- Reverse indexes are needed, too. +CREATE TABLE synsetrelationalias ( + name TEXT PRIMARY KEY NOT NULL COLLATE locale, + relationid INTEGER NOT NULL + REFERENCES synsetrelationtype (id) +); +CREATE INDEX synsetrelationalias_irev ON synsetrelationalias (relationid); +CREATE TABLE lexicalrelationalias ( + name TEXT PRIMARY KEY NOT NULL COLLATE locale, + relationid INTEGER NOT NULL + REFERENCES lexicalrelationtype (id) +); +CREATE INDEX lexicalrelationalias_irev ON lexicalrelationalias (relationid); + +-- Next are finally the relation instances +CREATE TABLE synsetrelation ( + source INTEGER NOT NULL REFERENCES synset (id), - relid INTEGER NOT NULL + relationtype INTEGER NOT NULL REFERENCES synsetrelationtype (id), - childid INTEGER NOT NULL + target INTEGER NOT NULL REFERENCES synset (id), - PRIMARY KEY (parentid, relid, childid) + PRIMARY KEY (source, relationtype, target) ); - -CREATE TABLE IF NOT EXISTS lexicalrelation ( - parentid INTEGER NOT NULL +CREATE TABLE lexicalrelation ( + source INTEGER NOT NULL REFERENCES lexicalunit (id), - relid INTEGER NOT NULL + relationtype INTEGER NOT NULL REFERENCES lexicalrelationtype (id), - childid INTEGER NOT NULL + target INTEGER NOT NULL REFERENCES lexicalunit (id), - PRIMARY KEY (parentid, relid, childid) + PRIMARY KEY (source, relationtype, target) ); -""" - -class PLWordNet(bases.PLWordNetBase): +-- Insert the special empty values for the parent part tables +INSERT INTO synsetrelationparentpart (name) VALUES (''); +INSERT INTO lexicalrelationparentpart (name) VALUES (''); +""" # }}} + +_RELTYPE_TABLES = { + en.RelationKind.synset: u'synsetrelationtype', + en.RelationKind.lexical: u'lexicalrelationtype', +} +_RELALIAS_TABLES = { + en.RelationKind.synset: u'synsetrelationalias', + en.RelationKind.lexical: u'lexicalrelationalias', +} +_RELPARENTPART_TABLES = { + en.RelationKind.synset: u'synsetrelationparentpart', + en.RelationKind.lexical: u'lexicalrelationparentpart', +} +_RELCHILDPART_TABLES = { + en.RelationKind.synset: u'synsetrelationchildpart', + en.RelationKind.lexical: u'lexicalrelationchildpart', +} +_RELINST_TABLES = { + en.RelationKind.synset: u'synsetrelation', + en.RelationKind.lexical: u'lexicalrelation', +} + + +class PLWordNet(bs.PLWordNetBase): _STORAGE_NAME = 'sqlite3' _SCHEMA_VERSION = 4 @@ -228,44 +318,11 @@ class PLWordNet(bases.PLWordNetBase): return plwn - @staticmethod - def _make_include_exclude(include, exclude): - """. - - Creates ``WHERE`` clause and the parameter tuple for simple ``IN`` - and ``NOT IN`` case. - """ - if include is not None: - whereclause = u"WHERE name IN ({})".format( - u','.join(itt.repeat(u'?', len(include))) - ) - includetuple = tuple(include) - else: - whereclause = u'' - includetuple = () - - if exclude is not None: - if not whereclause: - whereclause = u"WHERE name NOT IN ({})" - else: - whereclause += u" AND name NOT IN ({})" - - whereclause = whereclause.format( - u','.join(itt.repeat(u'?', len(exclude))) - ) - excludetuple = tuple(exclude) - else: - excludetuple = () - - return whereclause, includetuple + excludetuple - def __init__(self, db_file=None): """**NOTE:** This constructor should not be invoked directly. Use one of the standard methods: ``from_dump`` or ``from_reader``. """ - super(PLWordNet, self).__init__() - if db_file is None: self._tmp_dir = tempfile.mkdtemp(prefix='plwn_api-') # Close the file immediately, we just need the @@ -288,19 +345,22 @@ class PLWordNet(bases.PLWordNetBase): self.__drop_tmpdir() raise + self._relcache = _RelCache(self._db) + def close(self): self._db.close() self.__drop_tmpdir() def lexical_units(self, lemma=None, pos=None, variant=None): - return TupWrapper( - LexicalUnit(self._db, *row) + return tuple( + LexicalUnit(self._db, self._relcache, *row) for row in self._select_lexical_units(lemma, pos, variant, True) ) def lexical_unit(self, lemma, pos, variant): return LexicalUnit( self._db, + self._relcache, *self._get_one_lexical_unit( lemma, pos, @@ -321,32 +381,31 @@ class PLWordNet(bases.PLWordNetBase): ) row = cur.fetchone() if row is None: - raise exc.InvalidLexicalUnitIdentifierException(id_) - return LexicalUnit(self._db, id_, *row) + raise exc.LexicalUnitNotFound('id=' + repr(id_)) + return LexicalUnit(self._db, self._relcache, id_, *row) - @tup_wrapped def lexical_relation_edges(self, include=None, exclude=None): - parsed_include = frozenset( - self._rel_resolver.resolve_name(rel) for rel in include - ) if include is not None else None - - parsed_exclude = frozenset( - self._rel_resolver.resolve_name(rel) for rel in exclude - ) if exclude is not None else None - - whereclause, paramtuple = self._make_include_exclude( + parsed_include = frozenset(itt.chain.from_iterable( + self._relcache.get_ids(rel, en.RelationKind.lexical) + for rel in include + )) if include is not None else None + parsed_exclude = frozenset(itt.chain.from_iterable( + self._relcache.get_ids(rel, en.RelationKind.lexical) + for rel in exclude + )) if exclude is not None else None + where_clause, param_tuple = _make_include_exclude( parsed_include, parsed_exclude, + u'relationtype', ) with closing(self._db.cursor()) as cur: cur.execute( u""" - SELECT parentid, childid, name + SELECT source, target, relationtype FROM lexicalrelation - JOIN lexicalrelationtype ON relid = id - """ + whereclause, - paramtuple, + """ + where_clause, + param_tuple, ) lu_q = u""" @@ -356,32 +415,49 @@ class PLWordNet(bases.PLWordNetBase): WHERE lexicalunit.id = ? """ - for parent_id, child_id, rel_name in cur: + edges = [] + + for parent_id, child_id, rel_id in cur: with closing(self._db.cursor()) as cur2: cur2.execute(lu_q, (parent_id,)) par_lu = LexicalUnit( self._db, + self._relcache, parent_id, *cur2.fetchone() ) cur2.execute(lu_q, (child_id,)) chl_lu = LexicalUnit( self._db, + self._relcache, child_id, *cur2.fetchone() ) - yield bases.RelationEdge(par_lu, rel_name, chl_lu) + edges.append(bs.RelationEdge( + par_lu, + self._relcache.get_info_by_id( + rel_id, + en.RelationKind.lexical, + ), + chl_lu, + )) + + return tuple(edges) def synsets(self, lemma=None, pos=None, variant=None): synids = frozenset( row[-1] for row in self._select_lexical_units(lemma, pos, variant, True) ) - return TupWrapper(Synset(self._db, synid) for synid in synids) + return tuple( + Synset(self._db, self._relcache, synid) + for synid in synids + ) def synset(self, lemma, pos, variant): return Synset( self._db, + self._relcache, self._get_one_lexical_unit( lemma, pos, @@ -397,38 +473,80 @@ class PLWordNet(bases.PLWordNetBase): (id_,), ) if not cur.fetchone()[0]: - raise exc.InvalidSynsetIdentifierException(id_) - return Synset(self._db, id_) - - @tup_wrapped - def synset_relation_edges(self, include=None, exclude=None): - parsed_include = frozenset( - self._rel_resolver.resolve_name(rel) for rel in include - ) if include is not None else None - - parsed_exclude = frozenset( - self._rel_resolver.resolve_name(rel) for rel in exclude - ) if exclude is not None else None - - whereclause, paramtuple = self._make_include_exclude( + raise exc.SynsetNotFound('id=' + repr(id_)) + return Synset(self._db, self._relcache, id_) + + def synset_relation_edges(self, + include=None, + exclude=None, + skip_artificial=True): + parsed_include = frozenset(itt.chain.from_iterable( + self._relcache.get_ids(rel, en.RelationKind.synset) + for rel in include + )) if include is not None else None + parsed_exclude = frozenset(itt.chain.from_iterable( + self._relcache.get_ids(rel, en.RelationKind.synset) + for rel in exclude + )) if exclude is not None else None + where_clause, param_tuple = _make_include_exclude( parsed_include, parsed_exclude, + u'relationtype', ) + select_clause = u"SELECT source, target, relationtype" + from_clause = u"FROM synsetrelation" + + # Pre-fetch artificial status if skipping is necessary + if skip_artificial: + select_clause += u", parentsyn.isartificial, childsyn.isartificial" + from_clause += ( + u" JOIN synset AS parentsyn ON parentsyn.id = source" + u" JOIN synset AS childsyn ON childsyn.id = target" + ) + yield_edges = self.__syn_edges_withskip + else: + yield_edges = self.__syn_edges_noskip + with closing(self._db.cursor()) as cur: cur.execute( - u""" - SELECT parentid, childid, name - FROM synsetrelation JOIN synsetrelationtype ON relid = id - """ + whereclause, - paramtuple, + u'\n'.join((select_clause, from_clause, where_clause)), + param_tuple, + ) + return tuple(yield_edges(cur)) + + def relations_info(self, name=None, kind=None): + if name is None: + return ( + itt.chain( + self._relcache.get_all_of_kind(en.RelationKind.synset), + self._relcache.get_all_of_kind(en.RelationKind.lexical), + ) + if kind is None + else self._relcache.get_all_of_kind( + en.RelationKind(kind), + ) ) - for parent_id, child_id, rel_name in cur: - yield bases.RelationEdge( - Synset(self._db, parent_id), - rel_name, - Synset(self._db, child_id), + else: + return ( + itt.chain( + self._relcache.get_infos_by_name( + name, + en.RelationKind.synset, + allow_nonexistent=True, + ), + self._relcache.get_infos_by_name( + name, + en.RelationKind.lexical, + allow_nonexistent=True, + ), ) + if kind is None + else self._relcache.get_infos_by_name( + name, + en.RelationKind(kind), + ) + ) def _select_lexical_units(self, lemma, pos, variant, defval): with closing(self._db.cursor()) as cur: @@ -443,22 +561,27 @@ class PLWordNet(bases.PLWordNetBase): """, { u'lem': lemma, - u'pos': PoS(pos).value if pos else None, + u'pos': en.PoS(pos).value if pos else None, u'var': variant, u'defval': defval, }, ) - for row in cur: - yield row + return cur.fetchall() def _get_one_lexical_unit(self, lemma, pos, variant, exc_class): # False by default will force-return nothing if any is None - lu_rows = iter(self._select_lexical_units(lemma, pos, variant, False)) + lu_rows = self._select_lexical_units(lemma, pos, variant, False) try: - lu_row = next(lu_rows) - except StopIteration: - raise exc_class(lemma, pos, variant) - assert next(lu_rows, None) is None + lu_row = lu_rows[0] + except IndexError: + raise exc_class( + 'lemma={!r}, pos={!r}, variant={!r}'.format( + lemma, + pos, + variant, + ), + ) + assert len(lu_rows) == 1 return lu_row def __init_db(self): @@ -467,27 +590,27 @@ class PLWordNet(bases.PLWordNetBase): with self._db: self._db.executemany( u"INSERT OR IGNORE INTO pos (value) VALUES (?)", - ((p.value,) for p in PoS), + ((p.value,) for p in en.PoS), ).close() self._db.executemany( u"INSERT OR IGNORE INTO verbaspect (value) VALUES (?)", - ((va.value,) for va in VerbAspect), + ((va.value,) for va in en.VerbAspect), ).close() self._db.executemany( u"INSERT OR IGNORE INTO emotionmark (value) VALUES (?)", - ((em.value,) for em in EmotionMarkedness), + ((em.value,) for em in en.EmotionMarkedness), ).close() self._db.executemany( u"INSERT OR IGNORE INTO emotionname (value) VALUES (?)", - ((en.value,) for en in EmotionName), + ((en.value,) for en in en.EmotionName), ).close() self._db.executemany( u"INSERT OR IGNORE INTO emotionvaluation (value) VALUES (?)", - ((ev.value,) for ev in EmotionValuation), + ((ev.value,) for ev in en.EmotionValuation), ).close() self._db.executemany( u"INSERT OR IGNORE INTO domain (value) VALUES (?)", - ((dm.value,) for dm in Domain), + ((dm.value,) for dm in en.Domain), ).close() # Insert version if the database is new @@ -527,62 +650,54 @@ class PLWordNet(bases.PLWordNetBase): if e.errno != errno.ENOENT: raise + def __syn_edges_noskip(self, rowiter): + for parent_id, child_id, rel_id in rowiter: + yield bs.RelationEdge( + Synset(self._db, self._relcache, parent_id), + self._relcache.get_info_by_id(rel_id, en.RelationKind.synset), + Synset(self._db, self._relcache, child_id), + ) + + def __syn_edges_withskip(self, rowiter): + return filter_artificial_synset_edges( + bs.RelationEdge( + Synset(self._db, self._relcache, parent_id, bool(parent_art)), + self._relcache.get_info_by_id(rel_id, en.RelationKind.synset), + Synset(self._db, self._relcache, child_id, bool(child_art)), + ) + for parent_id, child_id, rel_id, parent_art, child_art in rowiter + ) -class LexicalUnit(bases.LexicalUnitBase): - - __slots__ = ( - '_relr', - '_db', - '_id', - '_lemma', - '_pos', - '_var', - '_synid', - '_syn', - '_def', - '_usn', - '_extl', - '_exms', - '_exms_srcs', - '_dom', - '_va', - '_emo_mark', - '_emo_names', - '_emo_valuations' - '_emo_ex1', - '_emo_ex2', - ) - # Since ``None`` is a valid value for verb_aspect, this is a sentinel value - _NO_VAL = object() +class LexicalUnit(bs.LexicalUnitBase): - def __init__(self, conn, id_, lemma, pos, variant, synid): + def __init__(self, conn, relcache, id_, lemma, pos, variant, synid): """**NOTE:** This constructor should not be called directly. Use :class:`PLWordNet` methods to obtain lexical units. """ - self._relr = get_default_relation_resolver() - self._db = conn + self._relcache = relcache self._id = id_ self._lemma = lemma - self._pos = PoS(pos) + self._pos = en.PoS(pos) self._var = variant self._synid = synid # Rest is unitialized - self._syn = self._NO_VAL - self._def = self._NO_VAL - self._usn = self._NO_VAL - self._extl = self._NO_VAL - self._exms = self._NO_VAL - self._exms_srcs = self._NO_VAL - self._dom = self._NO_VAL - self._va = self._NO_VAL - self._emo_mark = self._NO_VAL - self._emo_names = self._NO_VAL - self._emo_valuations = self._NO_VAL - self._emo_ex1 = self._NO_VAL - self._emo_ex2 = self._NO_VAL + self._syn = _UNFETCHED + self._def = _UNFETCHED + self._usn = _UNFETCHED + self._extl = _UNFETCHED + self._exms = _UNFETCHED + self._exms_srcs = _UNFETCHED + self._dom = _UNFETCHED + self._va = _UNFETCHED + self._is_emo = _UNFETCHED + self._emo_mark = _UNFETCHED + self._emo_names = _UNFETCHED + self._emo_valuations = _UNFETCHED + self._emo_ex1 = _UNFETCHED + self._emo_ex2 = _UNFETCHED @property def id(self): @@ -600,10 +715,18 @@ class LexicalUnit(bases.LexicalUnitBase): def variant(self): return self._var + @property + def is_polish(self): + return self._pos.is_polish + + @property + def is_english(self): + return self._pos.is_english + @property def synset(self): - if self._syn is self._NO_VAL or self._syn() is None: - syn = Synset(self._db, self._synid) + if self._syn is _UNFETCHED or self._syn() is None: + syn = Synset(self._db, self._relcache, self._synid) # Use weakref to avoid circular refrence to synset self._syn = weakref.ref(syn) return syn @@ -611,20 +734,18 @@ class LexicalUnit(bases.LexicalUnitBase): @property def definition(self): - if self._def is self._NO_VAL: + if self._def is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( u"SELECT definition FROM lexicalunit WHERE id = ?", (self._id,), ) - row = cur.fetchone() - assert row is not None - self._def = row[0] if row[0] is not None else '' + self._def = cur.fetchone()[0] return self._def @property def sense_examples(self): - if self._exms is self._NO_VAL: + if self._exms is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( u"SELECT example FROM senseexample WHERE unitid = ?", @@ -635,7 +756,7 @@ class LexicalUnit(bases.LexicalUnitBase): @property def sense_examples_sources(self): - if self._exms_srcs is self._NO_VAL: + if self._exms_srcs is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( u"SELECT source FROM senseexample WHERE unitid = ?", @@ -646,7 +767,7 @@ class LexicalUnit(bases.LexicalUnitBase): @property def external_links(self): - if self._extl is self._NO_VAL: + if self._extl is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( u"SELECT link FROM externallink WHERE unitid = ?", @@ -657,7 +778,7 @@ class LexicalUnit(bases.LexicalUnitBase): @property def usage_notes(self): - if self._usn is self._NO_VAL: + if self._usn is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( u"SELECT note FROM usagenote WHERE unitid = ?", @@ -668,7 +789,7 @@ class LexicalUnit(bases.LexicalUnitBase): @property def domain(self): - if self._dom is self._NO_VAL: + if self._dom is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( u""" @@ -679,14 +800,12 @@ class LexicalUnit(bases.LexicalUnitBase): """, (self._id,), ) - row = cur.fetchone() - assert row is not None - self._dom = Domain(row[0]) + self._dom = en.Domain(cur.fetchone()[0]) return self._dom @property def verb_aspect(self): - if self._va is self._NO_VAL: + if self._va is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( u""" @@ -698,12 +817,24 @@ class LexicalUnit(bases.LexicalUnitBase): (self._id,), ) row = cur.fetchone() - self._va = None if row is None else VerbAspect(row[0]) + self._va = None if row is None else en.VerbAspect(row[0]) return self._va + @property + def is_emotional(self): + if self._is_emo is _UNFETCHED: + with closing(self._db.cursor()) as cur: + cur.execute( + u"SELECT isemotional FROM lexicalunit WHERE id = ?", + (self._id,), + ) + rowval = cur.fetchone()[0] + self._is_emo = None if rowval is None else bool(rowval) + return self._is_emo + @property def emotion_markedness(self): - if self._emo_mark is self._NO_VAL: + if self._emo_mark is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( u""" @@ -715,12 +846,16 @@ class LexicalUnit(bases.LexicalUnitBase): (self._id,), ) row = cur.fetchone() - self._emo_mark = None if row is None else EmotionMarkedness(row[0]) + self._emo_mark = ( + None + if row is None + else en.EmotionMarkedness(row[0]) + ) return self._emo_mark @property def emotion_names(self): - if self._emo_names is self._NO_VAL: + if self._emo_names is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( u""" @@ -732,12 +867,12 @@ class LexicalUnit(bases.LexicalUnitBase): """, (self._id,), ) - self._emo_names = tuple(EmotionName(row[0]) for row in cur) + self._emo_names = tuple(en.EmotionName(row[0]) for row in cur) return self._emo_names @property def emotion_valuations(self): - if self._emo_valuations is self._NO_VAL: + if self._emo_valuations is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( u""" @@ -751,14 +886,14 @@ class LexicalUnit(bases.LexicalUnitBase): (self._id,), ) self._emo_valuations = tuple( - EmotionValuation(row[0]) + en.EmotionValuation(row[0]) for row in cur ) return self._emo_valuations @property def emotion_example(self): - if self._emo_ex1 is self._NO_VAL: + if self._emo_ex1 is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( u"SELECT emotionexample1 FROM lexicalunit WHERE id = ?", @@ -769,7 +904,7 @@ class LexicalUnit(bases.LexicalUnitBase): @property def emotion_example_secondary(self): - if self._emo_ex2 is self._NO_VAL: + if self._emo_ex2 is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( u"SELECT emotionexample2 FROM lexicalunit WHERE id = ?", @@ -780,72 +915,126 @@ class LexicalUnit(bases.LexicalUnitBase): @property def relations(self): - # Not caching, since this is an informational method that will probably - # not be called very often + # Not caching, since this is an informative method that will probably + # not be called very often. with closing(self._db.cursor()) as cur: cur.execute( u""" - SELECT DISTINCT name - FROM lexicalrelation JOIN lexicalrelationtype ON id = relid - WHERE parentid = ? - ORDER BY name + SELECT DISTINCT relationtype + FROM lexicalrelation + WHERE source = ? """, (self._id,), ) - return tuple(row[0] for row in cur) - - def related(self, relation_name): - relname = self._relr.resolve_name(relation_name) + return RelationInfoTuple(sorted( + self._relcache.get_info_by_id(row[0], en.RelationKind.lexical) + for row in cur + )) + + def related(self, relation_id=None): + relinfos = _parse_related_relid( + relation_id, + self._relcache, + en.RelationKind.lexical, + ) with closing(self._db.cursor()) as cur: cur.execute( - u"SELECT id FROM lexicalrelationtype WHERE name = ?", - (relname,), + u""" + SELECT lexicalunit.id, lemma, pos.value, variant, synset + FROM lexicalrelation + JOIN lexicalunit ON lexicalunit.id = target + JOIN pos ON lexicalunit.pos = pos.id + WHERE source = ? {} + """.format(_make_relationtype_where(relinfos)), + tuple(itt.chain( + (self._id,), + (ri._id for ri in (relinfos or ())), + )), + ) + return tuple( + LexicalUnit(self._db, self._relcache, *row) + for row in cur ) - row = cur.fetchone() - if row is None: - raise exc.InvalidRelationNameException(relation_name) - return TupWrapper(self.__related_gen(row[0])) - def __related_gen(self, relid): + def related_pairs(self, relation_id=None): + relinfos = _parse_related_relid( + relation_id, + self._relcache, + en.RelationKind.lexical, + ) with closing(self._db.cursor()) as cur: cur.execute( u""" - SELECT lexicalunit.id, lemma, pos.value, variant, synset + SELECT relationtype, + lexicalunit.id, lemma, pos.value, variant, synset FROM lexicalrelation - JOIN lexicalunit ON lexicalunit.id = childid + JOIN lexicalunit ON lexicalunit.id = target JOIN pos ON lexicalunit.pos = pos.id - WHERE parentid = ? AND relid = ? - """, - (self._id, relid), + WHERE source = ? {} + """.format(_make_relationtype_where(relinfos)), + tuple(itt.chain( + (self._id,), + (ri._id for ri in (relinfos or ())), + )), + ) + return tuple( + ( + self._relcache.get_info_by_id( + row[0], + en.RelationKind.lexical, + ), + LexicalUnit(self._db, self._relcache, *row[1:]), + ) + for row in cur ) - for row in cur: - yield LexicalUnit(self._db, *row) - -class Synset(bases.SynsetBase): - __slots__ = '_relr', '_db', '_id', '_units', '_def' +class Synset(bs.SynsetBase): - def __init__(self, conn, syn_id): + def __init__(self, conn, relcache, syn_id, syn_art=_UNFETCHED): """**NOTE:** This constructor should not be called directly. Use :class:`PLWordNet` methods to obtain synsets. """ - self._relr = get_default_relation_resolver() - self._db = conn + self._relcache = relcache self._id = syn_id + self._isart = syn_art - self._units = None - self._def = None + self._units = _UNFETCHED + self._def = _UNFETCHED + + self._pos = _UNFETCHED + self._is_polish = _UNFETCHED + self._is_english = _UNFETCHED @property def id(self): return self._id + @property + def pos(self): + if self._pos == _UNFETCHED: + (self._pos,) = {unit.pos for unit in self.lexical_units} + return self._pos + + @property + def is_polish(self): + if self._is_polish is _UNFETCHED: + self._is_polish = any(unit.is_polish + for unit in self.lexical_units) + return self._is_polish + + @property + def is_english(self): + if self._is_english is _UNFETCHED: + self._is_english = any(unit.is_english + for unit in self.lexical_units) + return self._is_english + @property def lexical_units(self): - if self._units is None: + if self._units is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( u""" @@ -859,6 +1048,7 @@ class Synset(bases.SynsetBase): self._units = tuple( LexicalUnit( self._db, + self._relcache, row[0], row[1], row[2], @@ -872,7 +1062,7 @@ class Synset(bases.SynsetBase): @property def definition(self): - if self._def is None: + if self._def is _UNFETCHED: with closing(self._db.cursor()) as cur: cur.execute( u"SELECT definition FROM synset WHERE id = ?", @@ -880,61 +1070,254 @@ class Synset(bases.SynsetBase): ) row = cur.fetchone() assert row is not None - self._def = row[0] if row[0] is not None else '' + self._def = row[0] if row[0] is not None else None return self._def + @property + def is_artificial(self): + if self._isart is _UNFETCHED: + with closing(self._db.cursor()) as cur: + cur.execute( + u"SELECT isartificial FROM synset WHERE id = ?", + (self._id,), + ) + row = cur.fetchone() + assert row is not None + self._isart = bool(row[0]) + return self._isart + @property def relations(self): # Not caching, since this is an informational method that will probably - # not be called very often + # not be called very often. with closing(self._db.cursor()) as cur: cur.execute( u""" - SELECT DISTINCT name - FROM synsetrelation JOIN synsetrelationtype ON id = relid - WHERE parentid = ? - ORDER BY name + SELECT DISTINCT relationtype + FROM synsetrelation + WHERE source = ? """, (self._id,), ) - return tuple(row[0] for row in cur) + return RelationInfoTuple(sorted( + self._relcache.get_info_by_id(row[0], en.RelationKind.synset) + for row in cur + )) + + def related(self, + relation_id=None, + skip_artificial=True, + _forbidden=None): + + _forbidden = _forbidden or set() + relinfos = _parse_related_relid( + relation_id, + self._relcache, + en.RelationKind.synset, + ) + select_clause = u"SELECT target" + from_clause = u"FROM synsetrelation" + + if skip_artificial: + select_clause += u", synset.isartificial, relationtype" + from_clause += u" JOIN synset ON target = synset.id" + yield_related = self.__related_withskip + else: + yield_related = self.__related_noskip - def related(self, relation_name): - relname = self._relr.resolve_name(relation_name) with closing(self._db.cursor()) as cur: cur.execute( - u"SELECT id FROM synsetrelationtype WHERE name = ?", - (relname,), + u'\n'.join(( + select_clause, + from_clause, + u"WHERE source = ? {}".format( + _make_relationtype_where(relinfos), + ), + )), + tuple(itt.chain( + (self._id,), + (ri._id for ri in (relinfos or ())), + )), ) - row = cur.fetchone() - if row is None: - raise exc.InvalidRelationNameException(relation_name) - return TupWrapper(self.__related_gen(row[0])) + return frozenset(yield_related(cur, _forbidden)) + + def related_pairs(self, + relation_id=None, + skip_artificial=True, + _forbidden=None): + + _forbidden = _forbidden or set() + relinfos = _parse_related_relid( + relation_id, + self._relcache, + en.RelationKind.synset, + ) + select_clause = u"SELECT relationtype, target" + from_clause = u"FROM synsetrelation" + + if skip_artificial: + select_clause += u", synset.isartificial" + from_clause += u" JOIN synset ON target = synset.id" + yield_related = self.__related_withskip_pairs + else: + yield_related = self.__related_noskip_pairs - def __related_gen(self, relid): with closing(self._db.cursor()) as cur: cur.execute( - u""" - SELECT childid - FROM synsetrelation - WHERE parentid = ? AND relid = ? - """, - (self._id, relid), + u'\n'.join(( + select_clause, + from_clause, + u"WHERE source = ? {}".format( + _make_relationtype_where(relinfos), + ), + )), + tuple(itt.chain( + (self._id,), + (ri._id for ri in (relinfos or ())), + )), + ) + return frozenset(yield_related(cur, _forbidden)) + + def __related_noskip(self, rowiter, forbidden): + return (Synset(self._db, self._relcache, synid) for synid, in rowiter) + + def __related_noskip_pairs(self, rowiter, forbidden): + return ( + ( + self._relcache.get_info_by_id(relid, en.RelationKind.synset), + Synset(self._db, self._relcache, synid), + ) + for relid, synid in rowiter + ) + + def __related_withskip(self, rowiter, forbidden): + return ( + fil_pair[0] + for fil_pair in self.__inner_related_withskip(rowiter, forbidden) + ) + + def __related_withskip_pairs(self, rowiter, forbidden): + re_rowiter = ( + (synid, isart, relid) + for relid, synid, isart in rowiter + ) + return ( + (relinfo, fil_syn) + for fil_syn, relinfo in self.__inner_related_withskip( + re_rowiter, + forbidden, ) - for row in cur: - yield Synset(self._db, row[0]) + ) + + def __inner_related_withskip(self, rowiter, forbidden): + return filter_artificial_related_synsets( + ( + ( + Synset(self._db, self._relcache, synid, isart), + self._relcache.get_info_by_id( + relid, + en.RelationKind.synset, + ), + ) + for synid, isart, relid in rowiter + ), + forbidden, + ) + + +class RelationInfo(bs.RelationInfoBase): + + def __init__(self, db, id_, kind): + """**NOTE:** This constructor should not be called directly. + + Use :class:`PLWordNet` methods to obtain relation info. + """ + self._db = db + # The ID is internal only, and can be used only with ``kind`` + self._id = id_ + self._kind = kind + + self._par = _UNFETCHED + self._name = _UNFETCHED + self._aliases = _UNFETCHED + + @property + def kind(self): + return self._kind + + @property + def parent(self): + if self._par is _UNFETCHED: + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT name + FROM {parpart} JOIN {reltype} ON {parpart}.id = parentpart + WHERE {reltype}.id = ? + """.format( + parpart=_RELPARENTPART_TABLES[self._kind], + reltype=_RELTYPE_TABLES[self._kind], + ), + (self._id,), + ) + row = cur.fetchone() + assert row is not None + # Convert the bogus '' value back to proper None + self._par = row[0] or None + return self._par + + @property + def name(self): + if self._name is _UNFETCHED: + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT name + FROM {chlpart} JOIN {reltype} ON {chlpart}.id = childpart + WHERE {reltype}.id = ? + """.format( + chlpart=_RELCHILDPART_TABLES[self._kind], + reltype=_RELTYPE_TABLES[self._kind], + ), + (self._id,), + ) + row = cur.fetchone() + assert row is not None + self._name = row[0] + return self._name + + @property + def aliases(self): + if self._aliases is _UNFETCHED: + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT name FROM {} + WHERE relationid = ? + ORDER BY name + """.format(_RELALIAS_TABLES[self._kind]), + (self._id,), + ) + self._aliases = tuple(row[0] for row in cur) + return self._aliases class _DBBuilder(object): def __init__(self, db): self._db = db - # Relations need to be added later to weed out nonexistent ones targets - # and avoid foreign key failures (which are a bit obtuse in sqlite3. - self._synrels = {} - self._lexrels = {} + self._node_handlers = { + nd.SynsetNode: self._insert_synset, + nd.LexicalUnitNode: self._insert_unit, + nd.RelationTypeNode: self._insert_relation_type, + } + # Ad-hoc relations (for cases where we don't have relation type nodes) + # need to be added later to weed out nonexistent ones targets and + # avoid foreign key failures (which are a bit obtuse in sqlite3). + self._adhoc_synrels = {} + self._adhoc_lexrels = {} # Synset to lexical units relations also need to be deferred. - self._synid2lexids = defaultdict(list) + self._synid2lexids = coll.defaultdict(list) # Cache IDs of constant values with closing(db.execute(u"SELECT value, id FROM pos")) as cur: self._posids = dict(cur) @@ -953,10 +1336,7 @@ class _DBBuilder(object): def __call__(self, reader): with self._db: for node in reader: - if isinstance(node, nd.SynsetNode): - self._insert_synset(node) - else: - self._insert_unit(node) + self._node_handlers[type(node)](node) with self._db: self._finalize_units() @@ -965,45 +1345,60 @@ class _DBBuilder(object): self._prune_empty_synsets() with self._db: - self._finalize_rels(u'synsetrelation', self._synrels) - self._finalize_rels(u'lexicalrelation', self._lexrels) + self._finalize_related( + self._adhoc_synrels, + en.RelationKind.synset, + ) + self._finalize_related( + self._adhoc_lexrels, + en.RelationKind.lexical, + ) def _insert_synset(self, syn_node): self._db.execute( - u"INSERT INTO synset (id, definition) VALUES (?, ?)", - (syn_node.id, syn_node.definition), + u""" + INSERT INTO synset (id, definition, isartificial) + VALUES (?, ?, ?) + """, + (syn_node.id, syn_node.definition, syn_node.is_artificial), ).close() # Related go into temp storage - self._synrels[syn_node.id] = [ - ( - self._ensure_enum_row_id( - u'synsetrelationtype', - u'id', - u'name', - relname, - ), - targetid, - ) - for relname, targetid in syn_node.related - ] + self._adhoc_synrels[syn_node.id] = syn_node.related def _insert_unit(self, lu_node): # Unfortunately, we can't insert into DB until we have all synsets. So # save nodes in temp dict. self._synid2lexids[lu_node.synset].append(lu_node) # But deal with relations - self._lexrels[lu_node.id] = [ - ( - self._ensure_enum_row_id( - u'lexicalrelationtype', - u'id', - u'name', - relname, - ), - targetid, + self._adhoc_lexrels[lu_node.id] = lu_node.related + + def _insert_relation_type(self, rel_node): + type_tbl = _RELTYPE_TABLES[rel_node.kind] + parent_tbl = _RELPARENTPART_TABLES[rel_node.kind] + child_tbl = _RELCHILDPART_TABLES[rel_node.kind] + + with closing(self._db.cursor()) as cur: + # Ensure the name is there + parname_id = self._ensure_rel_part_name( + parent_tbl, + rel_node.parent or u'', + ) + childname_id = self._ensure_rel_part_name(child_tbl, rel_node.name) + # And now the relation itself + cur.execute( + u"INSERT INTO {} (parentpart, childpart) VALUES (?, ?)" + .format(type_tbl), + (parname_id, childname_id), ) - for relname, targetid in lu_node.related - ] + # Do aliases if present + if rel_node.aliases: + rel_id = cur.lastrowid + alias_tbl = _RELALIAS_TABLES[rel_node.kind] + cur.executemany( + u"INSERT INTO {} (name, relationid) VALUES (?, ?)" + .format(alias_tbl), + ((nam, rel_id) for nam in rel_node.aliases), + ) def _finalize_units(self): # All synsets are in, can add units now. @@ -1017,13 +1412,15 @@ class _DBBuilder(object): id, lemma, pos, variant, synset, unitindex, definition, domain, verbaspect, - emotionmark, emotionexample1, emotionexample2 + isemotional, emotionmark, + emotionexample1, emotionexample2 ) VALUES ( :id, :lemma, :pos, :var, :syn, :uidx, :def, :dom, :va, - :emo_m, :emo_ex1, :emo_ex2 + :emo_is, :emo_m, + :emo_ex1, :emo_ex2 ) """, { @@ -1038,6 +1435,7 @@ class _DBBuilder(object): u'va': None if lu_node.verb_aspect is None else self._vaids[lu_node.verb_aspect.value], + u'emo_is': lu_node.is_emotional, u'emo_m': None if lu_node.emotion_markedness is None else self._emids[ @@ -1047,14 +1445,13 @@ class _DBBuilder(object): u'emo_ex2': lu_node.emotion_example_2, }, ) - except sqlite3.IntegrityError as e: - _log.warning( + except sqlite3.IntegrityError: + _LOG.exception( 'Pair (synset=%d, unitindex=%d) of unit %d ' - 'violates: %r', + 'causes integrity error', lu_node.synset, lu_node.unit_index, lu_node.id, - e.args, ) # Drop relations for this unit, if any self._lexrels.pop(lu_node.id, None) @@ -1071,7 +1468,6 @@ class _DBBuilder(object): lu_node.examples_sources) ), ) - cur.executemany( u""" INSERT INTO usagenote (unitid, note) @@ -1079,7 +1475,6 @@ class _DBBuilder(object): """, ((lu_node.id, note) for note in lu_node.usage_notes), ) - cur.executemany( u""" INSERT INTO externallink (unitid, link) @@ -1088,7 +1483,6 @@ class _DBBuilder(object): ((lu_node.id, link) for link in lu_node.external_links), ) - cur.executemany( u""" INSERT INTO unitemotionname (unitid, nameid) @@ -1099,7 +1493,6 @@ class _DBBuilder(object): for emo_name in lu_node.emotion_names ), ) - cur.executemany( u""" INSERT INTO unitemotionvaluation (unitid, valuationid) @@ -1111,46 +1504,129 @@ class _DBBuilder(object): ), ) - def _ensure_enum_row_id(self, table, id_field, value_field, value): - select_query = u"SELECT {id} FROM {table} WHERE {value} = ?".format( - id=id_field, - table=table, - value=value_field, - ) + def _finalize_related(self, related, kind): + # Insert all relation names from the related dict as global-level + # relations, if they have no SEP in them. If such relations are not + # defined, define them. If relation names do have SEP in them, don't + # try defining them, just assume the types are known and try getting ad + # their IDs. with closing(self._db.cursor()) as cur: - cur.execute(select_query, (value,)) - id_row = cur.fetchone() + for source_id, related_pairs in six.iteritems(related): + for relation_name, target_id in related_pairs: + relname_parent, relname_child = RelationInfo.split_name( + relation_name, + ) + try: + rel_id = ( + self._get_child_relation(relname_child, kind) + if relname_parent is None + else self._get_full_relation( + relname_parent, + relname_child, + kind, + ) + ) + except exc.InvalidRelationTypeException: + _LOG.exception( + 'Relation "%s" (between %d --> %d) unknown, ' + 'dropped', + relation_name, + source_id, + target_id, + ) + continue + + try: + cur.execute( + u""" + INSERT INTO {} (source, relationtype, target) + VALUES (?, ?, ?) + """.format(_RELINST_TABLES[kind]), + (source_id, rel_id, target_id), + ) + except sqlite3.IntegrityError: + _LOG.exception( + 'Relation "%s" between %d --> %d causes error, ' + 'dropped', + relation_name, + source_id, + target_id, + ) - if id_row is not None: - return id_row[0] + def _get_child_relation(self, relation_name, kind): + type_tbl = _RELTYPE_TABLES[kind] + parent_tbl = _RELPARENTPART_TABLES[kind] + child_tbl = _RELCHILDPART_TABLES[kind] - insert_query = u"INSERT INTO {table} ({value}) VALUES (?)".format( - table=table, - value=value_field, - ) with closing(self._db.cursor()) as cur: - cur.execute(insert_query, (value,)) + # Get the special empty string parent, since it will be used + # several times. + empty_parent_id = self._ensure_rel_part_name(parent_tbl, u'') + child_id = self._ensure_rel_part_name(child_tbl, relation_name) + # Now, try selecting the relation with empty parent. Otherwise, + # just add it. + cur.execute( + u"SELECT id FROM {} WHERE parentpart = ? AND childpart = ?" + .format(type_tbl), + (empty_parent_id, child_id), + ) + row = cur.fetchone() + + if row is not None: + return row[0] + + cur.execute( + u"INSERT INTO {} (parentpart, childpart) VALUES (?, ?)" + .format(type_tbl), + (empty_parent_id, child_id), + ) return cur.lastrowid - def _finalize_rels(self, tablename, rels_dict): - ins_query = ( - u"INSERT INTO {} (parentid, relid, childid) VALUES (?, ?, ?)" - .format(tablename) - ) + def _get_full_relation(self, parent_name, child_name, kind): + # For full relation names, only try selecting them, not adding the + # types, to reduce complexity. + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT {reltype}.id + FROM {reltype} + JOIN {parpart} ON parentpart = {parpart}.id + JOIN {chlpart} ON childpart = {chlpart}.id + WHERE {parpart}.name = ? AND {chlpart}.name = ? + """.format( + reltype=_RELTYPE_TABLES[kind], + parpart=_RELPARENTPART_TABLES[kind], + chlpart=_RELCHILDPART_TABLES[kind], + ), + (parent_name, child_name), + ) + row = cur.fetchone() + if row is None: + raise exc.InvalidRelationTypeException( + kind, + (parent_name, child_name), + ) + return row[0] + def _ensure_rel_part_name(self, tbl_name, rel_name): with closing(self._db.cursor()) as cur: - for par_id, chls in six.iteritems(rels_dict): - for rel_id, chl_id in chls: - try: - cur.execute(ins_query, (par_id, rel_id, chl_id)) - except sqlite3.IntegrityError: - _log.warning( - 'Relation typed %s between %d --> %d causes ' - 'IntegrityError, dropped', - tablename, - par_id, - chl_id, - ) + # Is the name in already? + cur.execute( + u"SELECT id FROM {} WHERE name = ?".format(tbl_name), + (rel_name,), + ) + row = cur.fetchone() + + if row is not None: + return row[0] + + # Insert it then + cur.execute( + u"INSERT INTO {} (name) VALUES (?)".format(tbl_name), + (rel_name,), + ) + + return cur.lastrowid def _prune_empty_synsets(self): with closing(self._db.cursor()) as cur: @@ -1169,7 +1645,7 @@ class _DBBuilder(object): return for synid in empties: - _log.warning('Synset %d is empty', synid) + _LOG.warning('Synset %d is empty', synid) self._db.execute( u"DELETE FROM synset WHERE id IN ({})".format( @@ -1179,4 +1655,226 @@ class _DBBuilder(object): ).close() +class _RelCache(object): + + def __init__(self, db): + self._db = db + self._ids = { + en.RelationKind.synset: {}, + en.RelationKind.lexical: {}, + } + self._infos = { + en.RelationKind.synset: _RelCacheInfoDict( + db, + en.RelationKind.synset, + ), + en.RelationKind.lexical: _RelCacheInfoDict( + db, + en.RelationKind.lexical, + ), + } + + def get_ids(self, relname, kind, allow_nonexistent=False): + idcache = self._ids[kind] + + try: + found = idcache[relname] + except KeyError: + found = None + else: + return found + + # If this is a full name (with parent and child), get that. + # Otherwise, check alias, childname and parentname - in that order. + # For bare parentname, return not one ID, but a set of all children + # IDs. + # Finally, if that fails, just raise an exception. + + try: + parent, name = RelationInfo.split_name(relname) + except ValueError: + raise exc.InvalidRelationTypeException(kind, relname) + + if parent is not None: + found = self._find_by_fullname(parent, name, kind) + else: + found = self._find_by_alias(name, kind) + if found is None: + found = self._find_by_childname(name, kind) + if found is None: + found = self._find_by_parentname(name, kind) + + if found is None: + if allow_nonexistent: + return () + else: + raise exc.InvalidRelationTypeException(kind, relname) + + idcache[relname] = found + return found + + def get_infos_by_name(self, relname, kind, allow_nonexistent=False): + infocache = self._infos[kind] + ids = self.get_ids(relname, kind, allow_nonexistent) + return tuple(infocache[id_] for id_ in ids) + + def get_info_by_id(self, id_, kind): + return self._infos[kind][id_] + + def get_all_of_kind(self, kind): + with closing(self._db.cursor()) as cur: + cur.execute(u"SELECT id FROM {}".format(_RELTYPE_TABLES[kind])) + return tuple(self._infos[kind][row[0]] for row in cur) + + def ensure_infos(self, item, kind): + if isinstance(item, RelationInfo): + return item, + if isinstance(item, six.integer_types): + return self.get_info_by_id(item, kind), + if isinstance(item, six.string_types): + return self.get_infos_by_name(item, kind) + raise TypeError( + repr(item) + ' is not an integer, string or RelationInfo', + ) + + def _find_by_fullname(self, parent, child, kind): + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT {reltype}.id + FROM {reltype} + JOIN {parpart} ON parentpart = {parpart}.id + JOIN {chlpart} ON childpart = {chlpart}.id + WHERE {parpart}.name = ? AND {chlpart}.name = ? + """.format( + reltype=_RELTYPE_TABLES[kind], + parpart=_RELPARENTPART_TABLES[kind], + chlpart=_RELCHILDPART_TABLES[kind], + ), + (parent or u'', child), + ) + row = cur.fetchone() + return None if row is None else tuple(row) + + def _find_by_alias(self, name, kind): + with closing(self._db.cursor()) as cur: + cur.execute( + u"SELECT relationid FROM {} WHERE name = ?".format( + _RELALIAS_TABLES[kind], + ), + (name,), + ) + row = cur.fetchone() + return None if row is None else tuple(row) + + def _find_by_childname(self, name, kind): + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT {reltype}.id + FROM {reltype} JOIN {chlpart} ON childpart = {chlpart}.id + WHERE name = ? + """.format( + reltype=_RELTYPE_TABLES[kind], + chlpart=_RELCHILDPART_TABLES[kind], + ), + (name,), + ) + rows = cur.fetchall() + if len(rows) > 1: + raise exc.AmbiguousRelationTypeException(name) + return None if not rows else tuple(rows[0]) + + def _find_by_parentname(self, name, kind): + # This one can by design return a set of values: all children of a + # relation. + with closing(self._db.cursor()) as cur: + cur.execute( + u""" + SELECT {reltype}.id + FROM {reltype} JOIN {parpart} ON parentpart = {parpart}.id + WHERE name = ? + """.format( + reltype=_RELTYPE_TABLES[kind], + parpart=_RELPARENTPART_TABLES[kind], + ), + (name,), + ) + return tuple(row[0] for row in cur) or None + + +class _RelCacheInfoDict(dict): + + def __init__(self, db, kind): + super(_RelCacheInfoDict, self).__init__() + self.__db = db + self.__kind = kind + + def __missing__(self, id_): + ri = RelationInfo(self.__db, id_, self.__kind) + self[id_] = ri + return ri + + +def _make_include_exclude(include, exclude, fieldname): + """Creates ``WHERE`` clause and the parameter tuple. + + For simple ``IN`` and ``NOT IN`` case. + """ + if include is not None: + whereclause = u"WHERE {} IN ({})".format( + fieldname, + _qmarks(len(include)), + ) + includetuple = tuple(include) + else: + whereclause = u'' + includetuple = () + + if exclude is not None: + if not whereclause: + whereclause = u"WHERE {} NOT IN ({})" + else: + whereclause += u" AND {} NOT IN ({})" + + whereclause = whereclause.format( + fieldname, + _qmarks(len(exclude)), + ) + excludetuple = tuple(exclude) + else: + excludetuple = () + + return whereclause, includetuple + excludetuple + + +def _parse_related_relid(relid, relcache, relkind): + if relid is None: + return None + + if (isinstance(relid, coll.Iterable) and + not isinstance(relid, six.string_types)): + return frozenset(itt.chain.from_iterable( + relcache.ensure_infos(r, relkind) + for r in relid + )) + + return relcache.ensure_infos(relid, relkind) + + +def _make_relationtype_where(relinfos): + """Create a ``WHERE`` clause appendix. + + For limiting ``related`` queries to sets of relations + """ + return u'' if not relinfos else ( + u'AND relationtype IN ({})'.format(_qmarks(len(relinfos))) + ) + + +def _qmarks(length): + """Create a sequence of question marks for prepared sqlite query.""" + return u','.join(itt.repeat(u'?', length)) + + _this_storage_ = PLWordNet diff --git a/plwn/utils/artifilter.py b/plwn/utils/artifilter.py new file mode 100644 index 0000000000000000000000000000000000000000..47d9afdc50bce9bfcf346889526ee68f4acf1256 --- /dev/null +++ b/plwn/utils/artifilter.py @@ -0,0 +1,112 @@ +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +"""Wrappers around synset-yielding generators. + +That handle exclusion of artificial synsets. + +The algorithm used here is pretty simple. When a relation edge reaches an +artificial synset, a matching relation is searched in edges originating from +the artificial synset. If found, the node it leads to is treated as target of +the relation from the source node. + +If there is no matching relation, the edge to the artificial synset is treated +as nonexistent. +""" + +# XXX Both functions maintain a set of items that they have already been +# yielded, to ensure that the wrappers will not yield the same item more than +# once. +# This makes them slower and uses up more memory, but in practice plWordNet +# structure is very unreliable with regards to not having loops and multiple +# paths. +# Look into removing these set objects only if speed / memory somehow becomes +# a concern. +# XXX At the same time, an assumption about plWordNet structure is made: that +# no lexical unit belonging to an artificial synset is connected to any other +# lexical unit by lexical relations. Surely, that should be easy to maintain? + +from __future__ import absolute_import, division + + +__all__ = ( + 'filter_artificial_related_synsets', + 'filter_artificial_synset_edges', +) + + +def filter_artificial_related_synsets(syn_and_relation_iter, forbidden=None): + """Filter a related synsets iterable. + + Skipping over artificial synsets using ``relation_name``. + + :param syn_and_relation_iter: Iterable of pairs of + ``(target_synset, relation)``. The relation is needed to generate + edges skipping over the artificial synset. + :type syn_and_relation_iter: Iterable[Tuple[SynsetBase, RelationInfoBase]] + + :return: The related synsets iterable with artificial synsets dealt with + according to the algorithm. + :rtype: Generator[SynsetBase] + """ + forbidden = forbidden or set() + for target_syn, relation in syn_and_relation_iter: + for filtered_syn in _inner_filter(target_syn, relation, forbidden): + yield filtered_syn, relation + + +def filter_artificial_synset_edges(syn_rel_edges_iter): + """Filter an iterable of synset relation edges. + + Replacing edges to artificial synsets with edges to "next" nodes, + using the relation of the edge. + + Edges ending in artificial nodes are simply discarded. + + :param edges_iter: Iterable of relationship edges between synsets. + :type edges_iter: Iterable[RelationEdge] + + :return: The synset edges iterable with artificial synsets dealt with + according to the algorithm. + :rtype: Generator[RelationEdge] + """ + for edge in syn_rel_edges_iter: + # Drop all edges starting in artificial synsets + if edge.source.is_artificial: + continue + + forbidden = set() + + for filtered_syn in _inner_filter(edge.target, + edge.relation, + forbidden): + yield edge._replace(target=filtered_syn) + + +def _inner_filter(target_syn, relation, forbidden): + if target_syn.id in forbidden: + return + forbidden.add(target_syn.id) + + if target_syn.is_artificial: + # Apply the filter recursively for any artificial synset found in the + # target set. This should not cause recursion more than two-three + # levels deep. + for rec_target_syn in target_syn.related(relation, True, forbidden): + yield rec_target_syn + else: + yield target_syn diff --git a/plwn/utils/graphmlout.py b/plwn/utils/graphmlout.py index 910b545ddf85ac99b6a6e9af23f2f0eb292bf546..fecf2ff92edd8e0a7f93b4e2632f3c735f61b737 100644 --- a/plwn/utils/graphmlout.py +++ b/plwn/utils/graphmlout.py @@ -1,19 +1,32 @@ -"""Implementation that stores data from plWordNet as a GraphML tree.""" +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. from __future__ import absolute_import, division -try: - str = unicode -except NameError: - pass + import collections as coll import functools as funct import json import xml.etree.cElementTree as et -from six import iteritems +import six + +from .. import enums as en -from ..enums import make_values_tuple __all__ = ( 'GraphMLWordNet', @@ -25,6 +38,7 @@ __all__ = ( 'UNS_IN_SYN', ) + # Constants for graphml exporting (library user should just use the string # values). # They also double as prefixes for IDs. @@ -48,26 +62,32 @@ class GraphMLWordNet(object): #: to string which will be the content of a ``data`` tag. _DataType = coll.namedtuple('_DataType', ('typename', 'convert')) - DATA_TYPE_INT = _DataType(u'long', lambda val: str(int(val))) - DATA_TYPE_STR = _DataType(u'string', str) + DATA_TYPE_INT = _DataType(u'long', lambda val: six.text_type(int(val))) + DATA_TYPE_STR = _DataType(u'string', six.text_type) + DATA_TYPE_OPTSTR = _DataType( + u'string', + lambda val: u'' if val is None else six.text_type(val), + ) DATA_TYPE_BOOL = _DataType( u'boolean', - (lambda val: u'true' if val else u'false'), + lambda val: u'true' if val else u'false', ) DATA_TYPE_JSON = _DataType(u'string', json.dumps) - DATA_TYPE_ENUMVAL = _DataType(u'string', lambda val: str(val.value)) + DATA_TYPE_ENUMVAL = _DataType( + u'string', + lambda val: six.text_type(val.value), + ) # Data type for enum that can also be None. DATA_TYPE_OPTENUMVAL = _DataType( u'string', - lambda val: '' if val is None else str(val.value), + lambda val: u'' if val is None else six.text_type(val.value), ) DATA_TYPE_ENUMSEQ = _DataType( u'string', - lambda val: json.dumps(make_values_tuple(val)), + lambda val: json.dumps(en.make_values_tuple(val)), ) def __init__(self): - """Initialize GraphMLWordNet.""" self._root = et.Element( u'graphml', # The commented out xmlns declaration is correct, but inserting @@ -85,9 +105,7 @@ class GraphMLWordNet(object): self._attr_types = {} def add_attribute_type(self, id_, name, type_, for_=u'node'): - """Adds an attribute. - - Which can be then assigned to node or edge instances. + """Adds attribute which can be then assigned to node or edge instances. :param str id_: Unique (in the whole XML) identifier of the attribute type. @@ -175,7 +193,7 @@ class GraphMLWordNet(object): self._tree.write(file_, 'utf-8') def _add_attributes_to(self, element, attributes): - for attr_id, attr_val in iteritems(attributes): + for attr_id, attr_val in six.iteritems(attributes): attr_type = self._attr_types[attr_id] attr = et.SubElement( element, @@ -189,8 +207,8 @@ class GraphMLBuilder(object): """Class that bridges. :class:`plwn.bases.PLWordNetBase` and :class:`GraphMLWordNet`, - extracting data from the former and putting it into the latter - in the appropriate format. + extracting data from the former and putting it + into the latter in the appropriate format. This is an auxiliary class which usually shouldn't be constructed directly. Use an appropriate method from :class:`plwn.bases.PLWordNet`. @@ -236,7 +254,8 @@ class GraphMLBuilder(object): included_nodes, excluded_nodes, included_relations, - excluded_relations): + excluded_relations, + skip_artificial_synsets=True): """See :meth:`plwn.bases.PLWordNetBase.to_graphml` for description.""" added_attributes = ( self._add_synset_attrs(included_attributes, excluded_attributes) @@ -247,10 +266,9 @@ class GraphMLBuilder(object): ) visited_nodes = set() - for edge in self._plwn.synset_relation_edges( - included_relations, - excluded_relations, - ): + for edge in self._plwn.synset_relation_edges(included_relations, + excluded_relations, + skip_artificial_synsets): prefixed_source = self._prefix_synset_id( edge.source.id, prefix_ids, @@ -264,12 +282,10 @@ class GraphMLBuilder(object): # added along edges, but it's not a problem if a valid node is not # included, because it will eventually be included by another edge, # if it's not completely secluded (and if it is, we don't want it). - if self._check_include_exclude_2( - edge.source.id, - edge.target.id, - included_nodes, - excluded_nodes, - ): + if self._check_include_exclude_2(edge.source.id, + edge.target.id, + included_nodes, + excluded_nodes): if edge.source.id not in visited_nodes: visited_nodes.add(edge.source.id) self._graphout.add_node( @@ -310,7 +326,6 @@ class GraphMLBuilder(object): excluded_nodes, included_relations, excluded_relations): - added_attributes = ( self._add_lexunit_attrs(included_attributes, excluded_attributes) if (include_attributes or @@ -331,12 +346,10 @@ class GraphMLBuilder(object): prefix_ids, ) - if self._check_include_exclude_2( - edge.source.id, - edge.target.id, - included_nodes, - excluded_nodes, - ): + if self._check_include_exclude_2(edge.source.id, + edge.target.id, + included_nodes, + excluded_nodes): if edge.source.id not in visited_nodes: visited_nodes.add(edge.source.id) self._graphout.add_node( @@ -380,7 +393,8 @@ class GraphMLBuilder(object): included_synset_nodes, excluded_synset_nodes, included_lexical_unit_nodes, - excluded_lexical_unit_nodes): + excluded_lexical_unit_nodes, + skip_artificial_synsets=True): synset_attributes = ( self._add_synset_attrs( @@ -414,14 +428,13 @@ class GraphMLBuilder(object): for syn_edge in self._plwn.synset_relation_edges( included_synset_relations, excluded_synset_relations, + skip_artificial_synsets, ): - if self._check_include_exclude_2( - syn_edge.source.id, - syn_edge.target.id, - included_synset_nodes, - excluded_synset_nodes, - ): + if self._check_include_exclude_2(syn_edge.source.id, + syn_edge.target.id, + included_synset_nodes, + excluded_synset_nodes): self._add_mixed_synset_edge( syn_edge, synset_attributes, @@ -437,12 +450,10 @@ class GraphMLBuilder(object): excluded_lexical_unit_relations, ): - if self._check_include_exclude_2( - lex_edge.source.id, - lex_edge.target.id, - included_lexical_unit_nodes, - excluded_lexical_unit_nodes, - ): + if self._check_include_exclude_2(lex_edge.source.id, + lex_edge.target.id, + included_lexical_unit_nodes, + excluded_lexical_unit_nodes): self._add_mixed_lexunit_edge( lex_edge, synset_attributes, @@ -684,8 +695,8 @@ class GraphMLBuilder(object): ), ) - includer(u'relations', GraphMLWordNet.DATA_TYPE_JSON) - includer(u'definition', GraphMLWordNet.DATA_TYPE_STR) + includer(u'definition', GraphMLWordNet.DATA_TYPE_OPTSTR) + includer(u'is_artificial', GraphMLWordNet.DATA_TYPE_BOOL) return includer.included_attrs @@ -703,14 +714,14 @@ class GraphMLBuilder(object): includer(u'lemma', GraphMLWordNet.DATA_TYPE_STR) includer(u'pos', GraphMLWordNet.DATA_TYPE_ENUMVAL) includer(u'variant', GraphMLWordNet.DATA_TYPE_INT) - includer(u'definition', GraphMLWordNet.DATA_TYPE_STR) + includer(u'definition', GraphMLWordNet.DATA_TYPE_OPTSTR) includer(u'sense_examples', GraphMLWordNet.DATA_TYPE_JSON) includer(u'sense_examples_sources', GraphMLWordNet.DATA_TYPE_JSON) includer(u'external_links', GraphMLWordNet.DATA_TYPE_JSON) includer(u'usage_notes', GraphMLWordNet.DATA_TYPE_JSON) includer(u'domain', GraphMLWordNet.DATA_TYPE_ENUMVAL) - includer(u'relations', GraphMLWordNet.DATA_TYPE_JSON) includer(u'verb_aspect', GraphMLWordNet.DATA_TYPE_OPTENUMVAL) + includer(u'is_emotional', GraphMLWordNet.DATA_TYPE_BOOL) includer(u'emotion_markedness', GraphMLWordNet.DATA_TYPE_OPTENUMVAL) includer(u'emotion_names', GraphMLWordNet.DATA_TYPE_ENUMSEQ) includer(u'emotion_valuations', GraphMLWordNet.DATA_TYPE_ENUMSEQ) @@ -731,13 +742,13 @@ class GraphMLBuilder(object): def _prefix_synset_id(cls, id_, do_prefix): return (u'{}-{}'.format(GRAPH_TYPE_SYNSET, id_) if do_prefix - else str(id_)) + else six.text_type(id_)) @classmethod def _prefix_lexunit_id(cls, id_, do_prefix): return (u'{}-{}'.format(GRAPH_TYPE_UNIT, id_) if do_prefix - else str(id_)) + else six.text_type(id_)) @staticmethod def _check_include_exclude(item, include_set, exclude_set): @@ -775,9 +786,7 @@ class _AttrIncluder(object): """ def __init__(self, graphout, type_prefix, checkfunc): - """. - - :param GraphMLWordNet graphout: The output graph instance. + """:param GraphMLWordNet graphout: The output graph instance. :param str type_prefix: Unique names of attributes will be prefixed with this. diff --git a/plwn/utils/relinfotuple.py b/plwn/utils/relinfotuple.py new file mode 100644 index 0000000000000000000000000000000000000000..f1290cc485945d9a27814650c74a2746730d4012 --- /dev/null +++ b/plwn/utils/relinfotuple.py @@ -0,0 +1,64 @@ +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from __future__ import absolute_import, division + + +from six.moves import range + + +__all__ = 'RelationInfoTuple', + + +class RelationInfoTuple(tuple): + """Tuple subclass for :class:`~plwn.bases.RelationInfoBase` instances. + + Meant as return value for ``relations`` properties. + + Overrides search methods to use :meth:`~plwn.bases.RelationInfoBase.eqv` + for membership testing, to make checking if a synset / unit has some + relation easier. + """ + + __slots__ = () + + def __repr__(self): + return ( + self.__class__.__name__ + + super(RelationInfoTuple, self).__repr__() + ) + + def __contains__(self, item): + return any(rel.eqv(item) for rel in self) + + def index(self, x, i=None, j=None): + rend = min(j, len(self)) if j is not None else len(self) + + for ind in range(i or 0, rend): + if self[ind].eqv(x): + return ind + + raise ValueError(repr(x) + ' not in tuple') + + def count(self, x): + cnt = 0 + + for rel in self: + if rel.eqv(x): + cnt += 1 + + return cnt diff --git a/plwn/utils/sorting.py b/plwn/utils/sorting.py index bd37a82ec17a9f4fd5c995774399fc22daa70dbb..69cbc58fbac9f933b02cfc1d933ee5dc2a0829df 100644 --- a/plwn/utils/sorting.py +++ b/plwn/utils/sorting.py @@ -1,3 +1,20 @@ +# coding: utf8 + +# Copyright (C) 2017 MichaÅ‚ KaliÅ„ski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + """Sorting keys that provide locale-dependant alphabetical sorting.""" from __future__ import absolute_import, division diff --git a/plwn/utils/tupwrap.py b/plwn/utils/tupwrap.py deleted file mode 100644 index 7e94abc3e65c9dc5cb1205e70fcbe81fd904e4db..0000000000000000000000000000000000000000 --- a/plwn/utils/tupwrap.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Wrapper for all functions that return generators. - -Calling the wrapped generator will wrap the contents in a tuple -(as a faster, chaining way or ``tuple(generator)``). -""" - -from __future__ import absolute_import, unicode_literals, division - - -from functools import wraps - - -__all__ = 'TupWrapper', 'tup_wrapped' - - -class TupWrapper(object): - """Wrapper class for generator objects. - - Adds a ``__call__`` method which will convert the wrapped generator to - a tuple. - """ - - __slots__ = '_gen', - - def __init__(self, generator): - """Initialize TupWrapper.""" - self._gen = generator - - def __iter__(self): - return self._gen - - def __call__(self): - return tuple(self._gen) - - def __repr__(self): - return '{}({!r})'.format(self.__class__.__name__, self._gen) - - -def tup_wrapped(fn): - """Decorator for functions that return generators. - - The return value of the wrapped function will be wrapped by - :class:`TupWrapper`. - - This decorator is the only way to wrap around the output of generator - functions. - """ - - @wraps(fn) - def decorated(*args, **kwargs): - return TupWrapper(fn(*args, **kwargs)) - - return decorated diff --git a/scripts/clean_wndb.sql b/scripts/clean_wndb.sql new file mode 100644 index 0000000000000000000000000000000000000000..631b05186e1c8361a8e588571a9fb1383e336ee1 --- /dev/null +++ b/scripts/clean_wndb.sql @@ -0,0 +1,32 @@ +-- Clean a plWN database of things that cause the API reading to break. +-- This is ad hoc, for cases when we need one-off dumps for internal usage. +-- Let's not involve SWORD, please. + +-- Remove relations with types that don't exist or one of the endpoints that +-- don't exist. +DELETE synsetrelation +FROM synsetrelation + LEFT JOIN relationtype ON REL_ID = relationtype.ID + LEFT JOIN synset AS par_syn ON synsetrelation.PARENT_ID = par_syn.ID + LEFT JOIN synset AS chl_syn ON CHILD_ID = chl_syn.ID +WHERE relationtype.ID IS NULL OR par_syn.ID IS NULL OR chl_syn.ID IS NULL; + +DELETE lexicalrelation +FROM lexicalrelation + LEFT JOIN relationtype ON REL_ID = relationtype.ID + LEFT JOIN lexicalunit AS par_lu ON lexicalrelation.PARENT_ID = par_lu.ID + LEFT JOIN lexicalunit AS chl_lu ON CHILD_ID = chl_lu.ID +WHERE relationtype.ID IS NULL OR par_lu.ID IS NULL OR chl_lu.ID IS NULL; + +-- Remove relations that are instances of parent relations (if a relation +-- has children, only they are legal values.) +DELETE FROM synsetrelation WHERE REL_ID IN ( + SELECT rel_outer.ID + FROM relationtype AS rel_outer + WHERE rel_outer.PARENT_ID IS NULL + AND EXISTS ( + SELECT 1 + FROM relationtype AS rel_inner + WHERE rel_inner.PARENT_ID = rel_outer.ID + ) +); diff --git a/scripts/patch_old_wndb.sql b/scripts/patch_old_wndb.sql new file mode 100644 index 0000000000000000000000000000000000000000..ea32233a0ee5be050c1432a483c3b5dfef5542b2 --- /dev/null +++ b/scripts/patch_old_wndb.sql @@ -0,0 +1,21 @@ +-- Used to add the emotion table required for schema 2, though it will remain +-- empty. +CREATE TABLE IF NOT EXISTS `emotion` ( + `id` bigint(20) NOT NULL AUTO_INCREMENT, + `lexicalunit_id` bigint(20) NOT NULL, + `emotions` varchar(255) COLLATE utf8_polish_ci DEFAULT NULL, + `valuations` varchar(255) COLLATE utf8_polish_ci DEFAULT NULL, + `markedness` varchar(5) CHARACTER SET utf8 DEFAULT NULL, + `unitStatus` int(1) DEFAULT '0', + `example1` varchar(255) COLLATE utf8_polish_ci DEFAULT NULL, + `example2` varchar(255) COLLATE utf8_polish_ci DEFAULT NULL, + `owner` varchar(255) COLLATE utf8_polish_ci NOT NULL, + `creation_date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP + ON UPDATE CURRENT_TIMESTAMP, + `super_anotation` int(1) DEFAULT '0', + + PRIMARY KEY (`id`), + KEY `idx` (`lexicalunit_id`) +) ENGINE=MyISAM AUTO_INCREMENT=4300 CHARSET=utf8 COLLATE=utf8_polish_ci; + +ALTER TABLE lexicalunit ADD verb_aspect int(11) DEFAULT '0'; diff --git a/scripts/verify_uby_lmf_file.py b/scripts/verify_uby_lmf_file.py new file mode 100755 index 0000000000000000000000000000000000000000..9026e1a0b6acb9b970a914a2c6b780cd7236d502 --- /dev/null +++ b/scripts/verify_uby_lmf_file.py @@ -0,0 +1,87 @@ +#!/usr/bin/python +"""Script that checks an UBY-LMF file containing plWordNet data for consistency. + +Specifically, it checks for the following errors: + * Empty synsets. + * Lexical units belonging to nonexistent synsets. + * Synsets and lexical relations to nonexistent synsets / units + +UBY-LMF module also prints out encountered errors, so be sure to capture the +standard output. +""" + +import argparse as argp +import collections as coll +# import itertools as itt +import logging as log +import sys + +from six import iteritems + +import plwn.ubylmf_reader as ubyr + + +def main(): + ap = argp.ArgumentParser(description=__doc__) + ap.add_argument('uby_lmf_file', help='The file to check.') + + av = ap.parse_args() + # Log every error and warning from the plwn module along with this script's + # output + plwn_log = log.getLogger('plwn') + plwn_log_h = log.StreamHandler(sys.stdout) + plwn_log_h.setFormatter(log.Formatter('!! Log from %(name)s: %(message)s')) + plwn_log.addHandler(plwn_log_h) + plwn_log.setLevel(log.WARNING) + + _verify(ubyr.iterread(av.uby_lmf_file)) + + +def _verify(uby_lmf_reader): + synset_defs = set() + unit_defs = set() + synsets2units = coll.defaultdict(list) + synrels = {} + lexrels = {} + + for record in uby_lmf_reader: + if isinstance(record, ubyr.SynsetNode): + synset_defs.add(record.id) + synrels[record.id] = frozenset(item[0] for item in record.related) + else: + unit_defs.add(record.id) + lexrels[record.id] = frozenset(item[0] for item in record.related) + synsets2units[record.synset].append(record.id) + + # Empty synsets + print('!! Empty synsets:') + empties = synset_defs.difference(synsets2units) + if empties: + for synid in empties: + print('S' + str(synid)) + + # Lexical units belonging to nonexistent synsets + print('!! Units belonging to nonexistent synsets:') + syn_nonexistent = unit_defs.difference(synsets2units) + if syn_nonexistent: + for synid in syn_nonexistent: + for lexid in synsets2units[synid]: + print('L' + str(lexid)) + + # Synrelations to nonexistent targers + print('!! Synset relations to nonexistent targets:') + for parent, targets in iteritems(synrels): + bad_targets = targets - synset_defs + for bad in bad_targets: + print('S{} -> S{}'.format(parent, bad)) + + # Lexrelations to nonexistent synsets + print('!! Lexical relations to nonexistent targets:') + for parent, targets in iteritems(lexrels): + bad_targets = targets - unit_defs + for bad in bad_targets: + print('L{} -> L{}'.format(parent, bad)) + + +if __name__ == '__main__': + main() diff --git a/setup.py b/setup.py index 8bd5719020890f2ce2a65c6b017eae15904eaa92..2277dc8fcca7ed664ade8713a67af130ba30a710 100644 --- a/setup.py +++ b/setup.py @@ -1,28 +1,33 @@ # coding: utf8 -from setuptools import setup, find_packages -import sys +from setuptools import setup +import os +# Name of the enviromental variable that excludes the default storage file from +# the distribution (to save space). +# The *.egg-info directory must not exists for this hack to work. +ENVNAME_DIST_NODEFAULT = 'PLWN_API_DIST_NO_DEFAULT_STORAGE' -def install_requires(): - req = ['six>=1.10'] - # Only require enum backport in python2 (python3 has better stdlib) - if sys.version_info.major < 3: - req.append('enum34>=1.1.2') - return req +setup_args = dict( + name='PLWN_API', + version='0.23', + license='LGPL-3.0+', + description='Python API to access plWordNet lexicon', + author='MichaÅ‚ KaliÅ„ski', + author_email='michal.kalinski@pwr.edu.pl', -if __name__ == '__main__': - setup( - name='plwn_api', - version='0.9', - description='Python API to access plWordNet lexicon', + packages=['plwn', 'plwn.readers', 'plwn.storages', 'plwn.utils'], + package_data={'plwn.default': ['*.db']}, + + test_suite='tests.setuptools_loader.setuptools_load_tests', + install_requires=['six>=1.10', 'enum34>=1.1.2;python_version<"3.4"'], + zip_safe=False, +) - author='MichaÅ‚ KaliÅ„ski', - author_email='michal.kalinski@pwr.edu.pl', - packages=find_packages(exclude=['tests', 'tests.*']), - package_data={'plwn': ['relation_aliases.tsv']}, - test_suite='tests.setuptools_loader.setuptools_load_tests', +if __name__ == '__main__': + # Include the "default storage" subpackage by default + if not int(os.environ.get(ENVNAME_DIST_NODEFAULT, 0)): + setup_args['packages'].append('plwn.default') - install_requires=install_requires(), - ) + setup(**setup_args) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/abstract_cases/__init__.py b/tests/abstract_cases/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..88772d58a91535c5e6b2bf2f8cfc7964b13a8b3c --- /dev/null +++ b/tests/abstract_cases/__init__.py @@ -0,0 +1,3 @@ +from ._make_abstract import load_tests_from_abstract + +__all__ = 'load_tests_from_abstract', diff --git a/tests/abstract_cases/_make_abstract.py b/tests/abstract_cases/_make_abstract.py new file mode 100644 index 0000000000000000000000000000000000000000..4a6f9df4595e0c9df72a1ba7a0c3fd4dc994e5dc --- /dev/null +++ b/tests/abstract_cases/_make_abstract.py @@ -0,0 +1,29 @@ +from __future__ import absolute_import, division + + +from . import test_graphml, test_plwordnet, test_unit_and_synset + +import unittest as ut + + +__all__ = 'load_tests_from_abstract', + +_ALL_MODS = test_graphml, test_plwordnet, test_unit_and_synset + + +def load_tests_from_abstract(loader, name_suffix, plwn_class): + retsuite = ut.TestSuite() + for mod in _ALL_MODS: + for tcase in _iter_mod_cases(mod, name_suffix, plwn_class): + retsuite.addTests(loader.loadTestsFromTestCase(tcase)) + return retsuite + + +def _iter_mod_cases(mod, name_suffix, plwn_class): + moddict = vars(mod) + for name in mod.__all__: + obj = moddict[name] + if issubclass(obj, ut.TestCase): + # Yield a subclass with suffixed name nad the _PLWNClass variable + # set to a real value. + yield type(name + name_suffix, (obj,), {'_PLWNClass': plwn_class}) diff --git a/tests/abstract_cases/asciio/graphml-edges.asciio b/tests/abstract_cases/asciio/graphml-edges.asciio new file mode 100644 index 0000000000000000000000000000000000000000..012d9493a0eaf3e8f7a3ec8142817d84c65bc617 Binary files /dev/null and b/tests/abstract_cases/asciio/graphml-edges.asciio differ diff --git a/tests/abstract_cases/asciio/graphml-mixed.asciio b/tests/abstract_cases/asciio/graphml-mixed.asciio new file mode 100644 index 0000000000000000000000000000000000000000..7d39908abc799dabab6345211fa2f13518dcdbbf Binary files /dev/null and b/tests/abstract_cases/asciio/graphml-mixed.asciio differ diff --git a/tests/abstract_cases/asciio/relation-edges-with-artificial-loop.asciio b/tests/abstract_cases/asciio/relation-edges-with-artificial-loop.asciio new file mode 100644 index 0000000000000000000000000000000000000000..bac81e9063d157181070286503f61836bb4f291a Binary files /dev/null and b/tests/abstract_cases/asciio/relation-edges-with-artificial-loop.asciio differ diff --git a/tests/abstract_cases/asciio/relation-edges-with-artificial.asciio b/tests/abstract_cases/asciio/relation-edges-with-artificial.asciio new file mode 100644 index 0000000000000000000000000000000000000000..ef961540eb391f191c5852675002cedb1559b5ba Binary files /dev/null and b/tests/abstract_cases/asciio/relation-edges-with-artificial.asciio differ diff --git a/tests/abstract_cases/asciio/relation-edges.asciio b/tests/abstract_cases/asciio/relation-edges.asciio new file mode 100644 index 0000000000000000000000000000000000000000..d147506010ecee274b3cd0b543f24755e4190d35 Binary files /dev/null and b/tests/abstract_cases/asciio/relation-edges.asciio differ diff --git a/tests/abstract_cases/asciio/synset-related-with-artificial-loop.asciio b/tests/abstract_cases/asciio/synset-related-with-artificial-loop.asciio new file mode 100644 index 0000000000000000000000000000000000000000..d7b9da85dce34d60749913554959e5b7b34b797f Binary files /dev/null and b/tests/abstract_cases/asciio/synset-related-with-artificial-loop.asciio differ diff --git a/tests/abstract_cases/asciio/synset-related-with-artificial.asciio b/tests/abstract_cases/asciio/synset-related-with-artificial.asciio new file mode 100644 index 0000000000000000000000000000000000000000..415c27ae2afd3ef67577aff69184b01f36c169e5 Binary files /dev/null and b/tests/abstract_cases/asciio/synset-related-with-artificial.asciio differ diff --git a/tests/abstract_cases/test_graphml.py b/tests/abstract_cases/test_graphml.py new file mode 100644 index 0000000000000000000000000000000000000000..260d487a53b3e4fb990c9759065fdc0c481e9314 --- /dev/null +++ b/tests/abstract_cases/test_graphml.py @@ -0,0 +1,1712 @@ +# coding: utf8 +from __future__ import absolute_import, division + + +import unittest as ut +import io +import json + +from plwn.readers import nodes as nd +from plwn.bases import RelationInfoBase +from plwn import enums as en + +try: + import xml.etree.cElementTree as et +except ImportError: + import xml.etree.ElementTree as et + + +__all__ = ( + 'AttributesTest', + 'SynsetGraphTest', + 'LexicalGraphTest', + 'MixedGraphTest', +) + + +class AttributesTest(ut.TestCase): + """Test storing attributes in graphml output.""" + + _PLWNClass = None # Override in subclass + + def __assert_attr_syn_definition(self, attrkeys): + self.assertEqual( + attrkeys[u'syn_data-definition'], + { + u'for': u'node', + u'attr.name': u'definition', + u'attr.type': u'string', + }, + ) + + def __assert_attr_syn_is_artificial(self, attrkeys): + self.assertEqual( + attrkeys[u'syn_data-is_artificial'], + { + u'for': u'node', + u'attr.name': u'is_artificial', + u'attr.type': u'boolean', + }, + ) + + def __assert_attr_lex_lemma(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-lemma'], + { + u'for': u'node', + u'attr.name': u'lemma', + u'attr.type': u'string', + }, + ) + + def __assert_attr_lex_pos(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-pos'], + { + u'for': u'node', + u'attr.name': u'pos', + u'attr.type': u'string', + }, + ) + + def __assert_attr_lex_variant(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-variant'], + { + u'for': u'node', + u'attr.name': u'variant', + u'attr.type': u'long', + }, + ) + + def __assert_attr_lex_definition(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-definition'], + { + u'for': u'node', + u'attr.name': u'definition', + u'attr.type': u'string', + }, + ) + + def __assert_attr_lex_sense_examples(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-sense_examples'], + { + u'for': u'node', + u'attr.name': u'sense_examples', + u'attr.type': u'string', + }, + ) + + def __assert_attr_lex_sense_examples_sources(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-sense_examples_sources'], + { + u'for': u'node', + u'attr.name': u'sense_examples_sources', + u'attr.type': u'string', + }, + ) + + def __assert_attr_lex_external_links(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-external_links'], + { + u'for': u'node', + u'attr.name': u'external_links', + u'attr.type': u'string', + }, + ) + + def __assert_attr_lex_usage_notes(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-usage_notes'], + { + u'for': u'node', + u'attr.name': u'usage_notes', + u'attr.type': u'string', + }, + ) + + def __assert_attr_lex_domain(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-domain'], + { + u'for': u'node', + u'attr.name': u'domain', + u'attr.type': u'string', + }, + ) + + def __assert_attr_lex_verb_aspect(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-verb_aspect'], + { + u'for': u'node', + u'attr.name': u'verb_aspect', + u'attr.type': u'string', + }, + ) + + def __assert_attr_lex_is_emotional(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-is_emotional'], + { + u'for': u'node', + u'attr.name': u'is_emotional', + u'attr.type': u'boolean', + }, + ) + + def __assert_attr_lex_emotion_markedness(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-emotion_markedness'], + { + u'for': u'node', + u'attr.name': u'emotion_markedness', + u'attr.type': u'string', + }, + ) + + def __assert_attr_lex_emotion_names(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-emotion_names'], + { + u'for': u'node', + u'attr.name': u'emotion_names', + u'attr.type': u'string', + }, + ) + + def __assert_attr_lex_emotion_valuations(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-emotion_valuations'], + { + u'for': u'node', + u'attr.name': u'emotion_valuations', + u'attr.type': u'string', + }, + ) + + def __assert_attr_lex_emotion_example(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-emotion_example'], + { + u'for': u'node', + u'attr.name': u'emotion_example', + u'attr.type': u'string', + }, + ) + + def __assert_attr_lex_emotion_example_secondary(self, attrkeys): + self.assertEqual( + attrkeys[u'lu_data-emotion_example_secondary'], + { + u'for': u'node', + u'attr.name': u'emotion_example_secondary', + u'attr.type': u'string', + }, + ) + + def __assert_attr_edge_type(self, attrkeys): + self.assertEqual( + attrkeys[u'edge-type'], + { + u'for': u'edge', + u'attr.name': u'type', + u'attr.type': u'string', + }, + ) + + def __assert_attr_edge_name(self, attrkeys): + self.assertEqual( + attrkeys[u'edge-name'], + { + u'for': u'edge', + u'attr.name': u'name', + u'attr.type': u'string', + }, + ) + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'hiperonimia', + ), + nd.make_relation_type_node( + kind=en.RelationKind.lexical, + name=u'deminutywność', + ), + nd.make_synset_node( + id=1, + definition=u'best friend', + related=((u'hiperonimia', 2),), + ), + nd.make_synset_node( + id=2, + definition=u'melk', + is_artificial=True, + ), + nd.make_lexical_unit_node( + id=11, + lemma=u'pies', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + definition=u'Barks.', + usage_notes=(u'P', u'S'), + external_links=(u'http://dogs.com',), + examples=(u'Ala ma psa',), + examples_sources=(u'Lies!',), + domain=en.Domain.zw, + related=((u'deminutywność', 21),), + is_emotional=True, + emotion_markedness=en.EmotionMarkedness.ambiguous, + # Names and valuations need to be tuples, so their ordering is + # predictable. + emotion_names=(en.EmotionName.strach, en.EmotionName.zlosc), + emotion_valuations=( + en.EmotionValuation.piekno, + en.EmotionValuation.krzywda, + ), + emotion_example_1=u'bim', + emotion_example_2=u'bom', + ), + nd.make_lexical_unit_node( + id=21, + lemma=u'ssak', + pos=en.PoS.n, + variant=1, + synset=2, + unit_index=1, + domain=en.Domain.bhp, + verb_aspect=en.VerbAspect.two, + ), + )) + + def tearDown(self): + self.__plwn.close() + + def test_synset_attrs_all(self): + """Include all synset attributes.""" + tree = _write_and_read( + self.__plwn, + include_attributes=True, + skip_artificial_synsets=False, + ) + + # Check data keys + keys = _make_attr_key_dicts(tree) + self.assertEqual(len(keys), 4) + self.__assert_attr_syn_definition(keys) + self.__assert_attr_syn_is_artificial(keys) + # The edges are always included + self.__assert_attr_edge_type(keys) + self.__assert_attr_edge_name(keys) + + # Check data for the synset + data = _get_data_values(tree.find(u"./graph/node[@id='1']")) + self.assertEqual(len(data), 2) + self.assertEqual(data[u'syn_data-definition'], u'best friend') + self.assertEqual(data[u'syn_data-is_artificial'], u'false') + + def test_synset_attrs_include(self): + """Include a select attribute of a synset.""" + tree = _write_and_read( + self.__plwn, + included_synset_attributes=(u'definition',), + skip_artificial_synsets=False, + ) + + keys = _make_attr_key_dicts(tree) + self.assertEqual(len(keys), 3) + self.__assert_attr_syn_definition(keys) + self.__assert_attr_edge_type(keys) + self.__assert_attr_edge_name(keys) + + data = _get_data_values(tree.find(u"./graph/node[@id='1']")) + self.assertEqual(len(data), 1) + self.assertEqual(data[u'syn_data-definition'], u'best friend') + + def test_synset_attrs_exclude(self): + """Include a select attribute of a synset by excluding other ones.""" + tree = _write_and_read( + self.__plwn, + excluded_synset_attributes=(u'definition',), + skip_artificial_synsets=False, + ) + + keys = _make_attr_key_dicts(tree) + self.assertEqual(len(keys), 3) + self.__assert_attr_syn_is_artificial(keys) + self.__assert_attr_edge_type(keys) + self.__assert_attr_edge_name(keys) + + data = _get_data_values(tree.find(u"./graph/node[@id='2']")) + self.assertEqual(len(data), 1) + self.assertEqual(data[u'syn_data-is_artificial'], u'true') + + def test_lexunit_attrs_all(self): + """Include all lexical unit attributes.""" + tree = _write_and_read( + self.__plwn, + graph_type='lexical_unit', + include_attributes=True, + skip_artificial_synsets=False, + ) + + keys = _make_attr_key_dicts(tree) + self.assertEqual(len(keys), 18) + self.__assert_attr_lex_lemma(keys) + self.__assert_attr_lex_pos(keys) + self.__assert_attr_lex_variant(keys) + self.__assert_attr_lex_definition(keys) + self.__assert_attr_lex_sense_examples(keys) + self.__assert_attr_lex_sense_examples_sources(keys) + self.__assert_attr_lex_external_links(keys) + self.__assert_attr_lex_usage_notes(keys) + self.__assert_attr_lex_domain(keys) + self.__assert_attr_lex_verb_aspect(keys) + self.__assert_attr_lex_is_emotional(keys) + self.__assert_attr_lex_emotion_markedness(keys) + self.__assert_attr_lex_emotion_names(keys) + self.__assert_attr_lex_emotion_valuations(keys) + self.__assert_attr_lex_emotion_example(keys) + self.__assert_attr_lex_emotion_example_secondary(keys) + self.__assert_attr_edge_type(keys) + self.__assert_attr_edge_name(keys) + + data = _get_data_values(tree.find(u"./graph/node[@id='11']")) + self.assertEqual(len(data), 16) + self.assertEqual(data[u'lu_data-lemma'], u'pies') + self.assertEqual(data[u'lu_data-pos'], en.PoS.n.value) + self.assertEqual(data[u'lu_data-variant'], u'1') + self.assertEqual(data[u'lu_data-definition'], u'Barks.') + self.assertEqual( + json.loads(data[u'lu_data-usage_notes']), + [u'P', u'S'], + ) + self.assertEqual( + json.loads(data[u'lu_data-external_links']), + [u'http://dogs.com'], + ) + self.assertEqual( + json.loads(data[u'lu_data-sense_examples']), + [u'Ala ma psa'], + ) + self.assertEqual( + json.loads(data[u'lu_data-sense_examples_sources']), + [u'Lies!'], + ) + self.assertEqual(data[u'lu_data-domain'], en.Domain.zw.value) + self.assertIsNone(data[u'lu_data-verb_aspect']) + self.assertEqual(data[u'lu_data-is_emotional'], u'true') + self.assertEqual( + data[u'lu_data-emotion_markedness'], + en.EmotionMarkedness.ambiguous.value, + ) + self.assertEqual( + json.loads(data[u'lu_data-emotion_names']), + [en.EmotionName.strach.value, en.EmotionName.zlosc.value], + ) + self.assertEqual( + json.loads(data[u'lu_data-emotion_valuations']), + [ + en.EmotionValuation.krzywda.value, + en.EmotionValuation.piekno.value, + ], + ) + self.assertEqual(data[u'lu_data-emotion_example'], u'bim') + self.assertEqual(data[u'lu_data-emotion_example_secondary'], u'bom') + + def test_lexunit_attrs_include(self): + """Include select attributes of a lexical unit.""" + tree = _write_and_read( + self.__plwn, + graph_type='lexical_unit', + included_lexical_unit_attributes=(u'lemma', u'variant'), + skip_artificial_synsets=False, + ) + + keys = _make_attr_key_dicts(tree) + self.assertEqual(len(keys), 4) + self.__assert_attr_lex_lemma(keys) + self.__assert_attr_lex_variant(keys) + self.__assert_attr_edge_type(keys) + self.__assert_attr_edge_name(keys) + + data = _get_data_values(tree.find(u"./graph/node[@id='11']")) + self.assertEqual(len(data), 2) + self.assertEqual(data[u'lu_data-lemma'], u'pies') + self.assertEqual(data[u'lu_data-variant'], u'1') + + def test_lexunit_attrs_exclude(self): + """Include select attributes of a lexical unit by excluding others.""" + tree = _write_and_read( + self.__plwn, + graph_type='lexical_unit', + excluded_lexical_unit_attributes=(u'lemma', u'variant'), + skip_artificial_synsets=False, + ) + + keys = _make_attr_key_dicts(tree) + self.assertEqual(len(keys), 16) + self.__assert_attr_lex_pos(keys) + self.__assert_attr_lex_definition(keys) + self.__assert_attr_lex_sense_examples(keys) + self.__assert_attr_lex_sense_examples_sources(keys) + self.__assert_attr_lex_external_links(keys) + self.__assert_attr_lex_usage_notes(keys) + self.__assert_attr_lex_domain(keys) + self.__assert_attr_lex_verb_aspect(keys) + self.__assert_attr_lex_is_emotional(keys) + self.__assert_attr_lex_emotion_markedness(keys) + self.__assert_attr_lex_emotion_names(keys) + self.__assert_attr_lex_emotion_valuations(keys) + self.__assert_attr_lex_emotion_example(keys) + self.__assert_attr_lex_emotion_example_secondary(keys) + self.__assert_attr_edge_type(keys) + self.__assert_attr_edge_name(keys) + + data = _get_data_values(tree.find(u"./graph/node[@id='11']")) + self.assertEqual(len(data), 14) + self.assertEqual(data[u'lu_data-pos'], en.PoS.n.value) + self.assertEqual(data[u'lu_data-definition'], u'Barks.') + self.assertEqual( + json.loads(data[u'lu_data-usage_notes']), + [u'P', u'S'], + ) + self.assertEqual( + json.loads(data[u'lu_data-external_links']), + [u'http://dogs.com'], + ) + self.assertEqual( + json.loads(data[u'lu_data-sense_examples']), + [u'Ala ma psa'], + ) + self.assertEqual( + json.loads(data[u'lu_data-sense_examples_sources']), + [u'Lies!'], + ) + self.assertEqual(data[u'lu_data-is_emotional'], u'true') + self.assertEqual(data[u'lu_data-domain'], en.Domain.zw.value) + # ElementTree reads empty-value nodes as None + self.assertIsNone(data[u'lu_data-verb_aspect']) + self.assertEqual( + data[u'lu_data-emotion_markedness'], + en.EmotionMarkedness.ambiguous.value, + ) + self.assertEqual( + json.loads(data[u'lu_data-emotion_names']), + [en.EmotionName.strach.value, en.EmotionName.zlosc.value], + ) + self.assertEqual( + json.loads(data[u'lu_data-emotion_valuations']), + [ + en.EmotionValuation.krzywda.value, + en.EmotionValuation.piekno.value, + ], + ) + self.assertEqual(data[u'lu_data-emotion_example'], u'bim') + self.assertEqual(data[u'lu_data-emotion_example_secondary'], u'bom') + + +class SynsetGraphTest(ut.TestCase): + """Create a graph of synsets and test its edges. + + The last element is artificial so it can be cut off with a parameter. + + :: + .---. a .---. B/b .----. + | 1 |----->| 2 |------>| 3a | + '---' '---' '----' + """ + + _PLWNClass = None # Override in subclass + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_relation_type_node(kind=en.RelationKind.synset, name=u'a'), + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'b', + parent=u'B', + ), + nd.make_synset_node(id=1, related=((u'a', 2),)), + nd.make_synset_node(id=2, related=((u'B/b', 3),)), + nd.make_synset_node(id=3, is_artificial=True), + nd.make_lexical_unit_node( + id=11, + lemma=u'A', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=21, + lemma=u'B', + pos=en.PoS.n, + variant=1, + synset=2, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=31, + lemma=u'C', + pos=en.PoS.n, + variant=1, + synset=3, + unit_index=1, + domain=en.Domain.bhp, + verb_aspect=en.VerbAspect.dk, + ), + )) + + def tearDown(self): + self.__plwn.close() + + def test_full_graph(self): + """Include all nodes and edges in the graph.""" + tree = _write_and_read(self.__plwn, skip_artificial_synsets=False) + + # Check nodes + self.assertEqual( + _get_node_id_set(tree), + frozenset((u'1', u'2', u'3')), + ) + + # Check edges + edges = _make_edge_dict(tree) + + self.assertEqual(len(edges), 2) + self.assertEqual( + edges[u'syn--1--2--a'], + { + u'source': u'1', + u'target': u'2', + u'type': u'relation', + u'name': u'a', + }, + ) + self.assertEqual( + edges[u'syn--2--3--B/b'], + { + u'source': u'2', + u'target': u'3', + u'type': u'relation', + u'name': u'B/b', + }, + ) + + def test_include_relations(self): + """Include only some of relation edges in the graph.""" + tree = _write_and_read( + self.__plwn, + included_synset_relations=(u'a',), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset((u'1', u'2')), + ) + + edges = _make_edge_dict(tree) + + self.assertEqual(len(edges), 1) + self.assertEqual( + edges[u'syn--1--2--a'], + { + u'source': u'1', + u'target': u'2', + u'type': u'relation', + u'name': u'a', + }, + ) + + def test_exclude_relations(self): + """Exclude only some of relation edges in the graph.""" + tree = _write_and_read( + self.__plwn, + excluded_synset_relations=(u'a',), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset((u'2', u'3')), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 1) + self.assertEqual( + edges[u'syn--2--3--' + RelationInfoBase.format_name(u'B', u'b')], + { + u'source': u'2', + u'target': u'3', + u'type': u'relation', + u'name': RelationInfoBase.format_name(u'B', u'b'), + }, + ) + + def test_include_nodes(self): + """Include only some nodes in the graph.""" + tree = _write_and_read( + self.__plwn, + included_synset_nodes=(1, 2), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset((u'1', u'2')), + ) + + edges = _make_edge_dict(tree) + + self.assertEqual(len(edges), 1) + self.assertEqual( + edges[u'syn--1--2--a'], + { + u'source': u'1', + u'target': u'2', + u'type': u'relation', + u'name': u'a', + }, + ) + + def test_exclude_nodes(self): + """Exclude some nodes from the graph.""" + tree = _write_and_read( + self.__plwn, + excluded_synset_nodes=(3,), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset((u'1', u'2')), + ) + + edges = _make_edge_dict(tree) + + self.assertEqual(len(edges), 1) + self.assertEqual( + edges[u'syn--1--2--a'], + { + u'source': u'1', + u'target': u'2', + u'type': u'relation', + u'name': u'a', + }, + ) + + def test_break_node_chain(self): + """Exclude one node in the middle which should make the graph empty.""" + tree = _write_and_read( + self.__plwn, + excluded_synset_nodes=(2,), + skip_artificial_synsets=False, + ) + + self.assertFalse(_get_node_id_set(tree)) + self.assertFalse(_make_edge_dict(tree)) + + def test_prefix_nodes(self): + """Check if nodes are correctly prefixed when told to.""" + tree = _write_and_read( + self.__plwn, + prefix_ids=True, + skip_artificial_synsets=False, + ) + self.assertEqual( + _get_node_id_set(tree), + frozenset((u'synset-1', u'synset-2', u'synset-3')), + ) + self.assertEqual( + frozenset(_make_edge_dict(tree)), + frozenset(( + u'syn--synset-1--synset-2--a', + u'syn--synset-2--synset-3--' + + RelationInfoBase.format_name(u'B', u'b'), + )), + ) + + def test_no_artificial(self): + """Cut off the artificial synset from the rest of the synset graph.""" + tree = _write_and_read(self.__plwn, skip_artificial_synsets=True) + + self.assertEqual( + _get_node_id_set(tree), + frozenset((u'1', u'2')), + ) + + edges = _make_edge_dict(tree) + + self.assertEqual(len(edges), 1) + self.assertEqual( + edges[u'syn--1--2--a'], + { + u'source': u'1', + u'target': u'2', + u'type': u'relation', + u'name': u'a', + }, + ) + + +class LexicalGraphTest(ut.TestCase): + """Create a graph of lexical units and test its edges. + + The graph is the same as in synset graph test, but with lexical units. + """ + + _PLWNClass = None # Override in subclass + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_relation_type_node( + kind=en.RelationKind.lexical, + name=u'a', + ), + nd.make_relation_type_node( + kind=en.RelationKind.lexical, + name=u'b', + parent=u'B', + ), + nd.make_synset_node(id=1), + nd.make_synset_node(id=2), + # On lexical unit level artificial synsets have no meaning + nd.make_synset_node(id=3, is_artificial=True), + nd.make_lexical_unit_node( + id=11, + lemma=u'A', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + domain=en.Domain.bhp, + related=((u'a', 21),), + ), + nd.make_lexical_unit_node( + id=21, + lemma=u'B', + pos=en.PoS.n, + variant=1, + synset=2, + unit_index=1, + domain=en.Domain.bhp, + related=((RelationInfoBase.format_name(u'B', u'b'), 31),), + ), + nd.make_lexical_unit_node( + id=31, + lemma=u'C', + pos=en.PoS.n, + variant=1, + synset=3, + unit_index=1, + domain=en.Domain.bhp, + verb_aspect=en.VerbAspect.dk, + ), + )) + + def tearDown(self): + self.__plwn.close() + + def test_full_graph(self): + """Include all lexical units and relations in the graph.""" + tree = _write_and_read( + self.__plwn, + graph_type='lexical_unit', + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset((u'11', u'21', u'31')), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 2) + self.assertEqual( + edges[u'lu--11--21--a'], + { + u'source': u'11', + u'target': u'21', + u'type': u'relation', + u'name': u'a', + }, + ) + self.assertEqual( + edges[u'lu--21--31--' + RelationInfoBase.format_name(u'B', u'b')], + { + u'source': u'21', + u'target': u'31', + u'type': u'relation', + u'name': RelationInfoBase.format_name(u'B', u'b'), + }, + ) + + def test_include_relations(self): + """Include only some of the lexical unit relations in the graph.""" + tree = _write_and_read( + self.__plwn, + graph_type=u'lexical_unit', + included_lexical_unit_relations=(u'a',), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset((u'11', u'21')), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 1) + self.assertEqual( + edges[u'lu--11--21--a'], + { + u'source': u'11', + u'target': u'21', + u'type': u'relation', + u'name': u'a', + }, + ) + + def test_exclude_relations(self): + """Exclude some of the relations from the graph.""" + tree = _write_and_read( + self.__plwn, + graph_type=u'lexical_unit', + excluded_lexical_unit_relations=(u'a',), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset((u'21', u'31')), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 1) + self.assertEqual( + edges[u'lu--21--31--' + RelationInfoBase.format_name(u'B', u'b')], + { + u'source': u'21', + u'target': u'31', + u'type': u'relation', + u'name': RelationInfoBase.format_name(u'B', u'b'), + }, + ) + + def test_include_nodes(self): + """Include only some of the lexical unit nodes in the graph.""" + tree = _write_and_read( + self.__plwn, + graph_type=u'lexical_unit', + included_lexical_unit_nodes=(11, 21,), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset((u'11', u'21')), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 1) + self.assertEqual( + edges[u'lu--11--21--a'], + { + u'source': u'11', + u'target': u'21', + u'type': u'relation', + u'name': u'a', + }, + ) + + def test_exclude_nodes(self): + """Exclude some of the lexical unit nodes in the graph.""" + tree = _write_and_read( + self.__plwn, + graph_type=u'lexical_unit', + excluded_lexical_unit_nodes=(31,), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset((u'11', u'21')), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 1) + self.assertEqual( + edges[u'lu--11--21--a'], + { + u'source': u'11', + u'target': u'21', + u'type': u'relation', + u'name': u'a', + }, + ) + + def test_break_node_chain(self): + """Exclude one node in the middle which should make the graph empty.""" + tree = _write_and_read( + self.__plwn, + excluded_synset_nodes=(2,), + skip_artificial_synsets=False, + ) + + self.assertFalse(_get_node_id_set(tree)) + self.assertFalse(_make_edge_dict(tree)) + + def test_prefix_nodes(self): + """Make sure node ids are prefixed when told to.""" + tree = _write_and_read( + self.__plwn, + graph_type=u'lexical_unit', + prefix_ids=True, + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset(( + u'lexical_unit-11', + u'lexical_unit-21', + u'lexical_unit-31', + )) + ) + self.assertEqual( + frozenset(_make_edge_dict(tree)), + frozenset(( + u'lu--lexical_unit-11--lexical_unit-21--a', + u'lu--lexical_unit-21--lexical_unit-31--' + + RelationInfoBase.format_name(u'B', u'b'), + )) + ) + + +class MixedGraphTest(ut.TestCase): + """Test how a mixed synset-unit graph behaves. + + :: + .--------.-->.-------. + | syn-4a | | lu-41 | + '--------'<--'-------' + ^ | + C/u| |C/d + | v + .-------. A .-------. B .-------. + | syn-1 |--->| syn-2 |--->| syn-3 | + '-------' '-------' '-------' + ^ | ^ | ^ | + | | | | | | + | v | v | v + .-------. a .-------. b .-------. + | lu-11 |--->| lu-21 |--->| lu-31 | + '-------' '-------' '-------' + """ + + _PLWNClass = None # Override in subclass + + def __assert_edge_syn_1_2(self, edges): + self.assertEqual( + edges[u'syn--synset-1--synset-2--A'], + { + u'source': u'synset-1', + u'target': u'synset-2', + u'type': u'relation', + u'name': u'A', + }, + ) + + def __assert_edge_syn_2_3(self, edges): + self.assertEqual( + edges[u'syn--synset-2--synset-3--B'], + { + u'source': u'synset-2', + u'target': u'synset-3', + u'type': u'relation', + u'name': u'B', + }, + ) + + def __assert_edge_syn_2_4(self, edges): + self.assertEqual( + edges[u'syn--synset-2--synset-4--C/u'], + { + u'source': u'synset-2', + u'target': u'synset-4', + u'type': u'relation', + u'name': u'C/u', + }, + ) + + def __assert_edge_syn_4_2(self, edges): + self.assertEqual( + edges[u'syn--synset-4--synset-2--C/d'], + { + u'source': u'synset-4', + u'target': u'synset-2', + u'type': u'relation', + u'name': u'C/d', + }, + ) + + def __assert_edge_lex_11_21(self, edges): + self.assertEqual( + edges[u'lu--lexical_unit-11--lexical_unit-21--a'], + { + u'source': u'lexical_unit-11', + u'target': u'lexical_unit-21', + u'type': u'relation', + u'name': u'a', + }, + ) + + def __assert_edge_lex_21_31(self, edges): + self.assertEqual( + edges[u'lu--lexical_unit-21--lexical_unit-31--b'], + { + u'source': u'lexical_unit-21', + u'target': u'lexical_unit-31', + u'type': u'relation', + u'name': u'b', + }, + ) + + def __assert_edge_s2l_1_11(self, edges): + self.assertEqual( + edges[u'uns--synset-1--lexical_unit-11--has_unit'], + { + u'source': u'synset-1', + u'target': u'lexical_unit-11', + u'type': u'unit_and_synset', + u'name': u'has_unit', + }, + ) + + def __assert_edge_s2l_2_21(self, edges): + self.assertEqual( + edges[u'uns--synset-2--lexical_unit-21--has_unit'], + { + u'source': u'synset-2', + u'target': u'lexical_unit-21', + u'type': u'unit_and_synset', + u'name': u'has_unit', + }, + ) + + def __assert_edge_s2l_3_31(self, edges): + self.assertEqual( + edges[u'uns--synset-3--lexical_unit-31--has_unit'], + { + u'source': u'synset-3', + u'target': u'lexical_unit-31', + u'type': u'unit_and_synset', + u'name': u'has_unit', + }, + ) + + def __assert_edge_s2l_4_41(self, edges): + self.assertEqual( + edges[u'uns--synset-4--lexical_unit-41--has_unit'], + { + u'source': u'synset-4', + u'target': u'lexical_unit-41', + u'type': u'unit_and_synset', + u'name': u'has_unit', + }, + ) + + def __assert_edge_l2s_11_1(self, edges): + self.assertEqual( + edges[u'uns--lexical_unit-11--synset-1--in_synset'], + { + u'source': u'lexical_unit-11', + u'target': u'synset-1', + u'type': u'unit_and_synset', + u'name': u'in_synset', + }, + ) + + def __assert_edge_l2s_21_2(self, edges): + self.assertEqual( + edges[u'uns--lexical_unit-21--synset-2--in_synset'], + { + u'source': u'lexical_unit-21', + u'target': u'synset-2', + u'type': u'unit_and_synset', + u'name': u'in_synset', + }, + ) + + def __assert_edge_l2s_31_3(self, edges): + self.assertEqual( + edges[u'uns--lexical_unit-31--synset-3--in_synset'], + { + u'source': u'lexical_unit-31', + u'target': u'synset-3', + u'type': u'unit_and_synset', + u'name': u'in_synset', + }, + ) + + def __assert_edge_l2s_41_4(self, edges): + self.assertEqual( + edges[u'uns--lexical_unit-41--synset-4--in_synset'], + { + u'source': u'lexical_unit-41', + u'target': u'synset-4', + u'type': u'unit_and_synset', + u'name': u'in_synset', + }, + ) + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_relation_type_node(kind=en.RelationKind.synset, name=u'A'), + nd.make_relation_type_node(kind=en.RelationKind.synset, name=u'B'), + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'u', + parent=u'C', + ), + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'd', + parent=u'C', + ), + nd.make_relation_type_node( + kind=en.RelationKind.lexical, + name=u'a', + ), + nd.make_relation_type_node( + kind=en.RelationKind.lexical, + name=u'b', + ), + nd.make_synset_node(id=1, related=((u'A', 2),)), + nd.make_synset_node(id=2, related=((u'B', 3), (u'C/u', 4))), + nd.make_synset_node(id=3), + nd.make_synset_node( + id=4, + related=((u'C/d', 2),), + is_artificial=True, + ), + nd.make_lexical_unit_node( + id=11, + lemma=u'first', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + domain=en.Domain.bhp, + related=((u'a', 21),), + ), + nd.make_lexical_unit_node( + id=21, + lemma=u'second', + pos=en.PoS.n, + variant=1, + synset=2, + unit_index=1, + domain=en.Domain.bhp, + related=((u'b', 31),), + ), + nd.make_lexical_unit_node( + id=31, + lemma=u'third', + pos=en.PoS.n, + variant=1, + synset=3, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=41, + lemma=u'unreal', + pos=en.PoS.n, + variant=1, + synset=4, + unit_index=1, + domain=en.Domain.bhp, + ), + )) + + def tearDown(self): + self.__plwn.close() + + def test_full_graph(self): + tree = _write_and_read( + self.__plwn, + graph_type='mixed', + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset(( + u'synset-1', + u'synset-2', + u'synset-3', + u'synset-4', + u'lexical_unit-11', + u'lexical_unit-21', + u'lexical_unit-31', + u'lexical_unit-41', + )), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 14) + self.__assert_edge_syn_1_2(edges) + self.__assert_edge_syn_2_3(edges) + self.__assert_edge_syn_2_4(edges) + self.__assert_edge_syn_4_2(edges) + self.__assert_edge_lex_11_21(edges) + self.__assert_edge_lex_21_31(edges) + self.__assert_edge_s2l_1_11(edges) + self.__assert_edge_s2l_2_21(edges) + self.__assert_edge_s2l_3_31(edges) + self.__assert_edge_s2l_4_41(edges) + self.__assert_edge_l2s_11_1(edges) + self.__assert_edge_l2s_21_2(edges) + self.__assert_edge_l2s_31_3(edges) + self.__assert_edge_l2s_41_4(edges) + + def test_include_synset_edges(self): + """Include only certain synset edges in the graph. + + All nodes should remain enact, except for 4 connected by excluded syn + relations. + """ + tree = _write_and_read( + self.__plwn, + graph_type='mixed', + included_synset_relations=(u'B',), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset(( + u'synset-1', + u'synset-2', + u'synset-3', + u'lexical_unit-11', + u'lexical_unit-21', + u'lexical_unit-31', + )), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 9) + self.__assert_edge_syn_2_3(edges) + self.__assert_edge_lex_11_21(edges) + self.__assert_edge_lex_21_31(edges) + self.__assert_edge_s2l_1_11(edges) + self.__assert_edge_s2l_2_21(edges) + self.__assert_edge_s2l_3_31(edges) + self.__assert_edge_l2s_11_1(edges) + self.__assert_edge_l2s_21_2(edges) + self.__assert_edge_l2s_31_3(edges) + + def test_exclude_synset_edges(self): + """Exclude some synset edges in the graph. + + All nodes should remain enact. + """ + tree = _write_and_read( + self.__plwn, + graph_type='mixed', + excluded_synset_relations=(u'B',), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset(( + u'synset-1', + u'synset-2', + u'synset-3', + u'synset-4', + u'lexical_unit-11', + u'lexical_unit-21', + u'lexical_unit-31', + u'lexical_unit-41', + )), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 13) + self.__assert_edge_syn_1_2(edges) + self.__assert_edge_syn_2_4(edges) + self.__assert_edge_syn_4_2(edges) + self.__assert_edge_lex_11_21(edges) + self.__assert_edge_lex_21_31(edges) + self.__assert_edge_s2l_1_11(edges) + self.__assert_edge_s2l_2_21(edges) + self.__assert_edge_s2l_3_31(edges) + self.__assert_edge_s2l_4_41(edges) + self.__assert_edge_l2s_11_1(edges) + self.__assert_edge_l2s_21_2(edges) + self.__assert_edge_l2s_31_3(edges) + self.__assert_edge_l2s_41_4(edges) + + def test_include_lexical_relations(self): + """Include only some of the lexical relation edges in the graph. + + All nodes should remain. + """ + tree = _write_and_read( + self.__plwn, + graph_type='mixed', + included_lexical_unit_relations=(u'b',), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset(( + u'synset-1', + u'synset-2', + u'synset-3', + u'synset-4', + u'lexical_unit-11', + u'lexical_unit-21', + u'lexical_unit-31', + u'lexical_unit-41', + )), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 13) + self.__assert_edge_syn_1_2(edges) + self.__assert_edge_syn_2_3(edges) + self.__assert_edge_syn_2_4(edges) + self.__assert_edge_syn_4_2(edges) + self.__assert_edge_lex_21_31(edges) + self.__assert_edge_s2l_1_11(edges) + self.__assert_edge_s2l_2_21(edges) + self.__assert_edge_s2l_3_31(edges) + self.__assert_edge_s2l_4_41(edges) + self.__assert_edge_l2s_11_1(edges) + self.__assert_edge_l2s_21_2(edges) + self.__assert_edge_l2s_31_3(edges) + self.__assert_edge_l2s_41_4(edges) + + def test_exclude_lexical_relations(self): + """Exclude some of the lexical relation edges from the graph. + + All nodes should remain. + """ + tree = _write_and_read( + self.__plwn, + graph_type='mixed', + excluded_lexical_unit_relations=(u'b',), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset(( + u'synset-1', + u'synset-2', + u'synset-3', + u'synset-4', + u'lexical_unit-11', + u'lexical_unit-21', + u'lexical_unit-31', + u'lexical_unit-41', + )), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 13) + self.__assert_edge_syn_1_2(edges) + self.__assert_edge_syn_2_3(edges) + self.__assert_edge_syn_2_4(edges) + self.__assert_edge_syn_4_2(edges) + self.__assert_edge_lex_11_21(edges) + self.__assert_edge_s2l_1_11(edges) + self.__assert_edge_s2l_2_21(edges) + self.__assert_edge_s2l_3_31(edges) + self.__assert_edge_s2l_4_41(edges) + self.__assert_edge_l2s_11_1(edges) + self.__assert_edge_l2s_21_2(edges) + self.__assert_edge_l2s_31_3(edges) + self.__assert_edge_l2s_41_4(edges) + + def test_cut_off_nodes(self): + """Remove both corresponding lexical unit and synset edges. + + This should make a synset and a unit disappear from the graph. They are + not connected to any other synset or unit, and only have their + has_unit/in_synset edges. + """ + tree = _write_and_read( + self.__plwn, + graph_type='mixed', + excluded_synset_relations=(u'B',), + excluded_lexical_unit_relations=(u'b',), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset(( + u'synset-1', + u'synset-2', + u'synset-4', + u'lexical_unit-11', + u'lexical_unit-21', + u'lexical_unit-41', + )), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 10) + self.__assert_edge_syn_1_2(edges) + self.__assert_edge_syn_2_4(edges) + self.__assert_edge_syn_4_2(edges) + self.__assert_edge_lex_11_21(edges) + self.__assert_edge_s2l_1_11(edges) + self.__assert_edge_s2l_2_21(edges) + self.__assert_edge_s2l_4_41(edges) + self.__assert_edge_l2s_11_1(edges) + self.__assert_edge_l2s_21_2(edges) + self.__assert_edge_l2s_41_4(edges) + + def test_include_synset_nodes(self): + """Include only some of the synsets in the graph. + + This should cause corresponding lexical units to be dropped from the + graph, as well as relation edges. + """ + tree = _write_and_read( + self.__plwn, + graph_type='mixed', + included_synset_nodes=(1, 2), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset(( + u'synset-1', + u'synset-2', + u'lexical_unit-11', + u'lexical_unit-21', + )), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 6) + self.__assert_edge_syn_1_2(edges) + self.__assert_edge_lex_11_21(edges) + self.__assert_edge_s2l_1_11(edges) + self.__assert_edge_s2l_2_21(edges) + self.__assert_edge_l2s_11_1(edges) + self.__assert_edge_l2s_21_2(edges) + + def test_exclude_synset_nodes(self): + """Exclude some synsets from the graph. + + Some nodes should drop, like in the include test. + """ + tree = _write_and_read( + self.__plwn, + graph_type='mixed', + excluded_synset_nodes=(1,), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset(( + u'synset-2', + u'synset-3', + u'synset-4', + u'lexical_unit-21', + u'lexical_unit-31', + u'lexical_unit-41', + )), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 10) + self.__assert_edge_syn_2_3(edges) + self.__assert_edge_syn_2_4(edges) + self.__assert_edge_syn_4_2(edges) + self.__assert_edge_lex_21_31(edges) + self.__assert_edge_s2l_2_21(edges) + self.__assert_edge_s2l_3_31(edges) + self.__assert_edge_s2l_4_41(edges) + self.__assert_edge_l2s_21_2(edges) + self.__assert_edge_l2s_31_3(edges) + self.__assert_edge_l2s_41_4(edges) + + def test_include_lexical_unit_nodes(self): + """Include only some of lexical unit nodes in the graph. + + Same thing like with including synsets. + """ + tree = _write_and_read( + self.__plwn, + graph_type='mixed', + included_lexical_unit_nodes=(11, 21), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset(( + u'synset-1', + u'synset-2', + u'lexical_unit-11', + u'lexical_unit-21', + )), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 6) + self.__assert_edge_syn_1_2(edges) + self.__assert_edge_lex_11_21(edges) + self.__assert_edge_s2l_1_11(edges) + self.__assert_edge_s2l_2_21(edges) + self.__assert_edge_l2s_11_1(edges) + self.__assert_edge_l2s_21_2(edges) + + def test_exclude_lexical_unit_nodes(self): + """Exclude some of lexical unit nodes in the graph. + + Same thing like with excluding synsets. + """ + tree = _write_and_read( + self.__plwn, + graph_type='mixed', + excluded_lexical_unit_nodes=(11,), + skip_artificial_synsets=False, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset(( + u'synset-2', + u'synset-3', + u'synset-4', + u'lexical_unit-21', + u'lexical_unit-31', + u'lexical_unit-41', + )), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 10) + self.__assert_edge_syn_2_3(edges) + self.__assert_edge_syn_2_4(edges) + self.__assert_edge_syn_4_2(edges) + self.__assert_edge_lex_21_31(edges) + self.__assert_edge_s2l_2_21(edges) + self.__assert_edge_s2l_3_31(edges) + self.__assert_edge_s2l_4_41(edges) + self.__assert_edge_l2s_21_2(edges) + self.__assert_edge_l2s_31_3(edges) + self.__assert_edge_l2s_41_4(edges) + + def test_break_graph(self): + """Remove one of the middle nodes, to empty the whole graph.""" + tree = _write_and_read( + self.__plwn, + graph_type='mixed', + excluded_synset_nodes=(2,), + skip_artificial_synsets=False, + ) + + self.assertFalse(_get_node_id_set(tree)) + self.assertFalse(_make_edge_dict(tree)) + + def test_no_artificial(self): + """Omit the artificial synset from the graph.""" + tree = _write_and_read( + self.__plwn, + graph_type='mixed', + skip_artificial_synsets=True, + ) + + self.assertEqual( + _get_node_id_set(tree), + frozenset(( + u'synset-1', + u'synset-2', + u'synset-3', + u'lexical_unit-11', + u'lexical_unit-21', + u'lexical_unit-31', + )), + ) + + edges = _make_edge_dict(tree) + self.assertEqual(len(edges), 10) + self.__assert_edge_l2s_11_1(edges) + self.__assert_edge_l2s_21_2(edges) + self.__assert_edge_l2s_31_3(edges) + self.__assert_edge_s2l_1_11(edges) + self.__assert_edge_s2l_2_21(edges) + self.__assert_edge_s2l_3_31(edges) + self.__assert_edge_syn_1_2(edges) + self.__assert_edge_syn_2_3(edges) + self.__assert_edge_lex_11_21(edges) + self.__assert_edge_lex_21_31(edges) + + +def _write_and_read(plwn, **kwargs): + outfile = io.BytesIO() + plwn.to_graphml(outfile, **kwargs) + return et.XML(outfile.getvalue()) + + +def _get_data_values(synset_elem): + return { + data_elem.attrib[u'key']: data_elem.text + for data_elem in synset_elem.findall(u'./data') + } + + +def _make_attr_key_dicts(xml_root): + dicts = {} + + for key_elem in xml_root.findall(u'./key'): + attrdict = key_elem.attrib.copy() + attrid = attrdict.pop(u'id') + dicts[attrid] = attrdict + + return dicts + + +def _get_node_id_set(xml_root): + return frozenset( + elem.attrib[u'id'] + for elem in xml_root.findall(u'./graph/node') + ) + + +def _make_edge_dict(xml_root): + return { + edge_elem.attrib[u'id']: { + u'source': edge_elem.attrib[u'source'], + u'target': edge_elem.attrib[u'target'], + u'type': edge_elem.find(u"./data[@key='edge-type']").text, + u'name': edge_elem.find(u"./data[@key='edge-name']").text, + } + for edge_elem in xml_root.findall(u'./graph/edge') + } diff --git a/tests/abstract_cases/test_plwordnet.py b/tests/abstract_cases/test_plwordnet.py new file mode 100644 index 0000000000000000000000000000000000000000..748cfa4bcfe883ab4753d03b8bdaa2cbbc34e04b --- /dev/null +++ b/tests/abstract_cases/test_plwordnet.py @@ -0,0 +1,1163 @@ +# coding: utf8 +from __future__ import absolute_import, division + + +import unittest as ut + +from plwn.readers import nodes as nd +from plwn.bases import RelationEdge, RelationInfoBase +from plwn import exceptions as exc, enums as en + + +__all__ = ( + 'ItemSelectingTest', + 'SynsetRelationEdgesTest', + 'SynsetRelationEdgesWithArtificialTest', + 'SynsetRelationEdgesWithArtificialLoopTest', + 'LexicalUnitRelationEdgesTest', +) + + +class ItemSelectingTest(ut.TestCase): + + _PLWNClass = None # Override in subclass + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_synset_node(id=1), + nd.make_synset_node(id=2, definition=u'tok'), + nd.make_synset_node(id=3), + nd.make_synset_node(id=4), + nd.make_lexical_unit_node( + id=11, + lemma=u'pies', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=21, + lemma=u'kot', + pos=en.PoS.n, + variant=1, + synset=2, + unit_index=1, + domain=en.Domain.bhp, + definition=u'kici', + ), + nd.make_lexical_unit_node( + id=22, + lemma=u'kot', + pos=en.PoS.n, + variant=2, + synset=2, + unit_index=2, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=31, + lemma=u'pies', + pos=en.PoS.n, + variant=2, + synset=3, + unit_index=1, + domain=en.Domain.bhp, + emotion_markedness=en.EmotionMarkedness.strong_negative, + emotion_names=(en.EmotionName.fear, en.EmotionName.disgust), + emotion_example_1=u'bim', + ), + # "Pies" is not a verb, but still, for testing... + nd.make_lexical_unit_node( + id=41, + lemma=u'pies', + pos=en.PoS.v, + variant=1, + synset=4, + unit_index=1, + domain=en.Domain.bhp, + verb_aspect=en.VerbAspect.ndk, + ), + )) + + def tearDown(self): + self.__plwn.close() + + def test_synsets_nonexistent(self): + """Select a synset that doesn't exists (in "synsets" method).""" + self.assertFalse(tuple(self.__plwn.synsets(u'not'))) + + def test_synsets_by_lemma(self): + """Select several synsets with the same lemmas.""" + self.assertEqual( + frozenset(self.__plwn.synsets(u'pies')), + frozenset(( + self.__plwn.synset_by_id(1), + self.__plwn.synset_by_id(3), + self.__plwn.synset_by_id(4), + )), + ) + + def test_synsets_by_pos(self): + """Select several synsets with the same pos.""" + self.assertEqual( + frozenset(self.__plwn.synsets(pos=en.PoS.n)), + frozenset(( + self.__plwn.synset_by_id(1), + self.__plwn.synset_by_id(2), + self.__plwn.synset_by_id(3), + )), + ) + + def test_synsets_by_combination(self): + """Select several synsets by lemma and pos .""" + self.assertEqual( + frozenset(self.__plwn.synsets(lemma=u'pies', pos=en.PoS.n)), + frozenset(( + self.__plwn.synset_by_id(1), + self.__plwn.synset_by_id(3), + )), + ) + + def test_synsets_one(self): + """Select one synset by "synsets" method.""" + self.assertEqual( + frozenset( + self.__plwn.synsets(lemma=u'pies', pos=en.PoS.n, variant=1), + ), + frozenset((self.__plwn.synset_by_id(1),)), + ) + + def test_synset_multi_lexunits(self): + """Select lemma / pos combination. + + That points to multiple lexical units within the same synset. + """ + self.assertEqual( + frozenset(self.__plwn.synsets(lemma=u'kot', pos=en.PoS.n)), + frozenset((self.__plwn.synset_by_id(2),)), + ) + + def test_synsets_all(self): + """Select all synsets.""" + self.assertEqual( + frozenset(self.__plwn.synsets()), + frozenset(( + self.__plwn.synset_by_id(1), + self.__plwn.synset_by_id(2), + self.__plwn.synset_by_id(3), + self.__plwn.synset_by_id(4), + )), + ) + + def test_synset_any_none(self): + """Make sure the method doesn't allow ``None`` as arguments.""" + self.assertRaises( + exc.SynsetNotFound, + self.__plwn.synset, + None, + en.PoS.n, + 1, + ) + self.assertRaises( + exc.SynsetNotFound, + self.__plwn.synset, + u'kot', + None, + 1, + ) + self.assertRaises( + exc.SynsetNotFound, + self.__plwn.synset, + u'kot', + en.PoS.n, + None, + ) + + def test_synset_one(self): + """Select a single synset.""" + sel_one = self.__plwn.synset(lemma=u'kot', pos=en.PoS.n, variant=1) + sel_id = self.__plwn.synset_by_id(2) + + self.assertEqual(sel_one, sel_id) + # As this tests both "synset" and "synset_by_id", ensure all properties + # are correct; those that were set, anyway. + self.assertEqual(sel_one.id, 2) + self.assertEqual(sel_one.id, sel_id.id) + self.assertEqual(sel_one.definition, u'tok') + self.assertEqual(sel_one.definition, sel_id.definition) + self.assertFalse(sel_one.is_artificial) + self.assertEqual(sel_one.is_artificial, sel_id.is_artificial) + self.assertEqual( + sel_one.lexical_units, + ( + self.__plwn.lexical_unit_by_id(21), + self.__plwn.lexical_unit_by_id(22), + ), + ) + self.assertEqual(sel_one.lexical_units, sel_id.lexical_units) + + def test_synset_nonexistent(self): + """Select a nonexistent synset.""" + self.assertRaises( + exc.SynsetNotFound, + self.__plwn.synset, + u'not', + en.PoS.n, + 1, + ) + + def test_synset_by_id_nonexistent(self): + """Select a nonexistent id.""" + self.assertRaises( + exc.SynsetNotFound, + self.__plwn.synset_by_id, + 100, + ) + + def test_lexical_units_nonexistent(self): + """Select a nonexistent combination of properties.""" + self.assertFalse(tuple(self.__plwn.lexical_units(u'not'))) + + def test_lexical_units_by_lemma(self): + """Select several lexical units sharing a lemma.""" + self.assertEqual( + frozenset(self.__plwn.lexical_units(u'pies')), + frozenset(( + self.__plwn.lexical_unit_by_id(11), + self.__plwn.lexical_unit_by_id(31), + self.__plwn.lexical_unit_by_id(41), + )), + ) + + def test_lexical_units_by_pos(self): + """Select a lexical unit by pos.""" + self.assertEqual( + frozenset(self.__plwn.lexical_units(pos=en.PoS.v)), + frozenset((self.__plwn.lexical_unit_by_id(41),)), + ) + + def test_lexical_units_by_combination(self): + """Select a lexical unit by combination of lemma and pos.""" + self.assertEqual( + frozenset( + self.__plwn.lexical_units(lemma=u'pies', pos=en.PoS.n), + ), + frozenset(( + self.__plwn.lexical_unit_by_id(11), + self.__plwn.lexical_unit_by_id(31), + )), + ) + + def test_lexical_units_one(self): + """Select one lexical unit using a full combination.""" + self.assertEqual( + frozenset( + self.__plwn.lexical_units( + lemma=u'kot', + pos=en.PoS.n, + variant=2, + ) + ), + frozenset((self.__plwn.lexical_unit_by_id(22),)), + ) + + def test_lexical_units_all(self): + """Select all lexical units.""" + self.assertEqual( + frozenset(self.__plwn.lexical_units()), + frozenset(( + self.__plwn.lexical_unit_by_id(11), + self.__plwn.lexical_unit_by_id(21), + self.__plwn.lexical_unit_by_id(22), + self.__plwn.lexical_unit_by_id(31), + self.__plwn.lexical_unit_by_id(41), + )), + ) + + def test_lexical_unit_any_none(self): + """Make sure the method does not accept ``None`` as arguments.""" + self.assertRaises( + exc.LexicalUnitNotFound, + self.__plwn.lexical_unit, + None, + en.PoS.n, + 1, + ) + self.assertRaises( + exc.LexicalUnitNotFound, + self.__plwn.lexical_unit, + u'kot', + None, + 1, + ) + self.assertRaises( + exc.LexicalUnitNotFound, + self.__plwn.lexical_unit, + u'kot', + en.PoS.n, + None, + ) + + def test_lexical_unit_one(self): + """Select a single lexical unit.""" + sel_one = self.__plwn.lexical_unit( + lemma=u'kot', + pos=en.PoS.n, + variant=1, + ) + sel_id = self.__plwn.lexical_unit_by_id(21) + + self.assertEqual(sel_one, sel_id) + # As this tests both "lexical_unit" and "lexical_unit_by_id", compare + # if some of the properties are correct. Don't check all to not + # duplicate tests; XXX but maybe move all tests to here and remove some + # external test cases. + self.assertEqual(sel_one.id, 21) + self.assertEqual(sel_one.id, sel_id.id) + self.assertEqual(sel_one.lemma, u'kot') + self.assertEqual(sel_one.lemma, sel_id.lemma) + self.assertIs(sel_one.pos, en.PoS.n) + self.assertEqual(sel_one.pos, sel_id.pos) + self.assertEqual(sel_one.variant, 1) + self.assertEqual(sel_one.variant, sel_id.variant) + self.assertEqual(sel_one.definition, u'kici') + self.assertEqual(sel_one.definition, sel_id.definition) + + def test_lexical_unit_nonexistent(self): + """Select a nonexistent lexical unit.""" + self.assertRaises( + exc.LexicalUnitNotFound, + self.__plwn.lexical_unit, + lemma=u'not', + pos=en.PoS.n, + variant=1, + ) + + def test_lexical_unit_by_id_nonexistent(self): + """Select a nonexistent id.""" + self.assertRaises( + exc.LexicalUnitNotFound, + self.__plwn.lexical_unit_by_id, + 100, + ) + + +class SynsetRelationEdgesTest(ut.TestCase): + """Inspect synset relations in a simple graph. + + :: + .----. mero/cz .----. + | 4 |<------------| 3 | + | |------------>| | + '----' holo/cz '----' + ^ | ^ | + | |hipo | |hipo + hiper| | hiper| | + | v | v + .----. mero/cz .----. + | 1 |<------------| 2 | + | |------------>| | + '----' holo/cz '----' + """ + + _PLWNClass = None # Override in subclass + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'hiperonimia', + ), + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'hiponimia', + aliases=('hipo',), + ), + # nd.make_relation_type_node( + # kind=en.RelationKind.synset, + # name=u'meronimia', + # ), + # nd.make_relation_type_node( + # kind=en.RelationKind.synset, + # name=u'holonimia', + # ), + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'część', + parent=u'meronimia', + ), + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'część', + parent=u'holonimia', + ), + nd.make_synset_node( + id=1, + related=( + (u'hiperonimia', 4), + ( + RelationInfoBase.format_name(u'holonimia', u'część'), + 2, + ), + ), + ), + nd.make_synset_node( + id=2, + related=( + ( + RelationInfoBase.format_name(u'meronimia', u'część'), + 1, + ), + (u'hiperonimia', 3), + ), + ), + nd.make_synset_node( + id=3, + related=( + (u'hiponimia', 2), + ( + RelationInfoBase.format_name(u'meronimia', u'część'), + 4, + ), + ), + ), + nd.make_synset_node( + id=4, + related=( + (u'hiponimia', 1), + ( + RelationInfoBase.format_name(u'holonimia', u'część'), + 3, + ), + ), + ), + nd.make_lexical_unit_node( + id=11, + lemma=u'aaa', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=21, + lemma=u'aaa', + pos=en.PoS.n, + variant=2, + synset=2, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=31, + lemma=u'aaa', + pos=en.PoS.n, + variant=3, + synset=3, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=41, + lemma=u'aaa', + pos=en.PoS.n, + variant=4, + synset=4, + unit_index=1, + domain=en.Domain.bhp, + verb_aspect=en.VerbAspect.perf, + ), + )) + + try: + self.__rel_hiper = tuple(self.__plwn.relations_info( + u'hiperonimia', + en.RelationKind.synset, + ))[0] + self.__rel_hipo = tuple(self.__plwn.relations_info( + u'hiponimia', + en.RelationKind.synset, + ))[0] + self.__rel_mero = tuple(self.__plwn.relations_info( + u'meronimia/część', + en.RelationKind.synset, + ))[0] + self.__rel_holo = tuple(self.__plwn.relations_info( + u'holonimia/część', + en.RelationKind.synset, + ))[0] + except BaseException: + self.__plwn.close() + raise + + def tearDown(self): + self.__plwn.close() + + def test_include(self): + self.assertEqual( + frozenset(self.__plwn.synset_relation_edges( + include=(u'hiperonimia', u'meronimia'), + )), + frozenset(( + RelationEdge( + source=self.__plwn.synset_by_id(1), + target=self.__plwn.synset_by_id(4), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.synset_by_id(2), + target=self.__plwn.synset_by_id(3), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.synset_by_id(3), + target=self.__plwn.synset_by_id(4), + relation=self.__rel_mero, + ), + RelationEdge( + source=self.__plwn.synset_by_id(2), + target=self.__plwn.synset_by_id(1), + relation=self.__rel_mero, + ), + )), + ) + + def test_exclude(self): + self.assertEqual( + frozenset(self.__plwn.synset_relation_edges( + exclude=(u'holonimia/część', u'hipo'), + )), + frozenset(( + RelationEdge( + source=self.__plwn.synset_by_id(1), + target=self.__plwn.synset_by_id(4), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.synset_by_id(2), + target=self.__plwn.synset_by_id(3), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.synset_by_id(3), + target=self.__plwn.synset_by_id(4), + relation=self.__rel_mero, + ), + RelationEdge( + source=self.__plwn.synset_by_id(2), + target=self.__plwn.synset_by_id(1), + relation=self.__rel_mero, + ), + )), + ) + + def test_all(self): + self.assertEqual( + frozenset(self.__plwn.synset_relation_edges()), + frozenset(( + RelationEdge( + source=self.__plwn.synset_by_id(1), + target=self.__plwn.synset_by_id(4), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.synset_by_id(2), + target=self.__plwn.synset_by_id(3), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.synset_by_id(3), + target=self.__plwn.synset_by_id(4), + relation=self.__rel_mero, + ), + RelationEdge( + source=self.__plwn.synset_by_id(2), + target=self.__plwn.synset_by_id(1), + relation=self.__rel_mero, + ), + RelationEdge( + source=self.__plwn.synset_by_id(4), + target=self.__plwn.synset_by_id(1), + relation=self.__rel_hipo, + ), + RelationEdge( + source=self.__plwn.synset_by_id(3), + target=self.__plwn.synset_by_id(2), + relation=self.__rel_hipo, + ), + RelationEdge( + source=self.__plwn.synset_by_id(4), + target=self.__plwn.synset_by_id(3), + relation=self.__rel_holo, + ), + RelationEdge( + source=self.__plwn.synset_by_id(1), + target=self.__plwn.synset_by_id(2), + relation=self.__rel_holo, + ), + )), + ) + + def test_combined(self): + self.assertEqual( + frozenset(self.__plwn.synset_relation_edges( + include=(u'meronimia/część', u'hiperonimia'), + exclude=(u'meronimia', u'holonimia'), + )), + frozenset(( + RelationEdge( + source=self.__plwn.synset_by_id(1), + target=self.__plwn.synset_by_id(4), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.synset_by_id(2), + target=self.__plwn.synset_by_id(3), + relation=self.__rel_hiper, + ), + )), + ) + + +class SynsetRelationEdgesWithArtificialTest(ut.TestCase): + """Get relation edges from a graph. + + Where skipping artificial synsets will produce interesting results. + + :: + .---. .---. + | 3 | | 4 | + '---' '---' + | ^ ^ | + hiper | | hipo hipo | | hiper + | | | | + | '---.----.----' | + | | 2a | | + '---->'----'<-----' + | ^ + hiper | | hipo + v | + .---. + | 1 | + '---' + """ + + _PLWNClass = None # Override in subclass + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'hiponimia', + ), + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'hiperonimia', + ), + nd.make_synset_node( + id=1, + related=((u'hiponimia', 2),), + ), + nd.make_synset_node( + id=2, + related=( + (u'hiponimia', 3), + (u'hiponimia', 4), + (u'hiperonimia', 1), + ), + is_artificial=True, + ), + nd.make_synset_node( + id=3, + related=((u'hiperonimia', 2),), + ), + nd.make_synset_node( + id=4, + related=((u'hiperonimia', 2),), + ), + nd.make_lexical_unit_node( + id=11, + lemma=u'aaa', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=21, + lemma=u'aaa', + pos=en.PoS.n, + variant=2, + synset=2, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=31, + lemma=u'aaa', + pos=en.PoS.n, + variant=3, + synset=3, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=41, + lemma=u'aaa', + pos=en.PoS.n, + variant=4, + synset=4, + unit_index=1, + domain=en.Domain.bhp, + ), + )) + + try: + self.__rel_hiper = tuple(self.__plwn.relations_info( + u'hiperonimia', + en.RelationKind.synset, + ))[0] + self.__rel_hipo = tuple(self.__plwn.relations_info( + u'hiponimia', + en.RelationKind.synset, + ))[0] + except BaseException: + self.__plwn.close() + raise + + def tearDown(self): + self.__plwn.close() + + def test_all(self): + self.assertEqual( + frozenset( + self.__plwn.synset_relation_edges(skip_artificial=False) + ), + frozenset(( + RelationEdge( + source=self.__plwn.synset_by_id(1), + target=self.__plwn.synset_by_id(2), + relation=self.__rel_hipo, + ), + RelationEdge( + source=self.__plwn.synset_by_id(2), + target=self.__plwn.synset_by_id(1), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.synset_by_id(2), + target=self.__plwn.synset_by_id(3), + relation=self.__rel_hipo, + ), + RelationEdge( + source=self.__plwn.synset_by_id(2), + target=self.__plwn.synset_by_id(4), + relation=self.__rel_hipo, + ), + RelationEdge( + source=self.__plwn.synset_by_id(3), + target=self.__plwn.synset_by_id(2), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.synset_by_id(4), + target=self.__plwn.synset_by_id(2), + relation=self.__rel_hiper, + ), + )), + ) + + def test_skipping(self): + self.assertEqual( + frozenset( + self.__plwn.synset_relation_edges(skip_artificial=True) + ), + frozenset(( + RelationEdge( + source=self.__plwn.synset_by_id(1), + target=self.__plwn.synset_by_id(3), + relation=self.__rel_hipo, + ), + RelationEdge( + source=self.__plwn.synset_by_id(1), + target=self.__plwn.synset_by_id(4), + relation=self.__rel_hipo, + ), + RelationEdge( + source=self.__plwn.synset_by_id(3), + target=self.__plwn.synset_by_id(1), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.synset_by_id(4), + target=self.__plwn.synset_by_id(1), + relation=self.__rel_hiper, + ), + )), + ) + + +class SynsetRelationEdgesWithArtificialLoopTest(ut.TestCase): + """Checking for inifinite loop while skipping edges in a looping graph. + + :: + .---. foo .----. + | 1 |---->| 2a |<--. + '---' '----' | + ^ | | + |foo |foo |foo + | v | + .---. foo .----. | + | 4 |<----| 3a |---' + '---' '----' + """ + + _PLWNClass = None # Override in subclass + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'foo', + ), + nd.make_synset_node(id=1, related=((u'foo', 2),)), + nd.make_synset_node( + id=2, + related=((u'foo', 3),), + is_artificial=True, + ), + nd.make_synset_node( + id=3, + related=((u'foo', 2), (u'foo', 4)), + is_artificial=True, + ), + nd.make_synset_node(id=4, related=((u'foo', 1),)), + nd.make_lexical_unit_node( + id=11, + lemma=u'aaa', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=21, + lemma=u'aaa', + pos=en.PoS.n, + variant=2, + synset=2, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=31, + lemma=u'aaa', + pos=en.PoS.n, + variant=3, + synset=3, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=41, + lemma=u'aaa', + pos=en.PoS.n, + variant=4, + synset=4, + unit_index=1, + domain=en.Domain.bhp, + ), + )) + + def tearDown(self): + self.__plwn.close() + + def test_all(self): + syn1 = self.__plwn.synset_by_id(1) + syn2 = self.__plwn.synset_by_id(2) + syn3 = self.__plwn.synset_by_id(3) + syn4 = self.__plwn.synset_by_id(4) + rel = next(iter(self.__plwn.relations_info(u'foo')), None) + self.assertEqual( + frozenset( + self.__plwn.synset_relation_edges(skip_artificial=False) + ), + frozenset(( + RelationEdge(source=syn1, target=syn2, relation=rel), + RelationEdge(source=syn2, target=syn3, relation=rel), + RelationEdge(source=syn3, target=syn2, relation=rel), + RelationEdge(source=syn3, target=syn4, relation=rel), + RelationEdge(source=syn4, target=syn1, relation=rel), + )), + ) + + def test_skipping(self): + syn1 = self.__plwn.synset_by_id(1) + syn4 = self.__plwn.synset_by_id(4) + rel = next(iter(self.__plwn.relations_info(u'foo')), None) + self.assertEqual( + frozenset( + self.__plwn.synset_relation_edges(skip_artificial=True) + ), + frozenset(( + RelationEdge(source=syn1, target=syn4, relation=rel), + RelationEdge(source=syn4, target=syn1, relation=rel), + )), + ) + + +class LexicalUnitRelationEdgesTest(ut.TestCase): + """Inspect lexical relations in a simple graph. + + :: + .----. mero/cz .----. + | 41 |<------------| 31 | + | |------------>| | + '----' holo/cz '----' + ^ | ^ | + | |hipo | |hipo + hiper| | hiper| | + | v | v + .----. mero/cz .----. + | 11 |<------------| 21 | + | |------------>| | + '----' holo/cz '----' + + Relation names are like for synsets, because they don't really matter. + """ + + _PLWNClass = None # Override in subclass + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_relation_type_node( + kind=en.RelationKind.lexical, + name=u'hiperonimia', + ), + nd.make_relation_type_node( + kind=en.RelationKind.lexical, + name=u'hiponimia', + ), + nd.make_relation_type_node( + kind=en.RelationKind.lexical, + name=u'część', + parent=u'holonimia', + aliases=(u'holo',), + ), + nd.make_relation_type_node( + kind=en.RelationKind.lexical, + name=u'część', + parent=u'meronimia', + ), + nd.make_synset_node(id=1), + nd.make_synset_node(id=2), + nd.make_synset_node(id=3), + nd.make_synset_node(id=4), + nd.make_lexical_unit_node( + id=11, + lemma=u'aaa', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + domain=en.Domain.bhp, + related=((u'holonimia/część', 21), (u'hiperonimia', 41)), + ), + nd.make_lexical_unit_node( + id=21, + lemma=u'bbb', + pos=en.PoS.n, + variant=1, + synset=2, + unit_index=1, + domain=en.Domain.bhp, + related=((u'meronimia/część', 11), (u'hiperonimia', 31)), + ), + nd.make_lexical_unit_node( + id=31, + lemma=u'ccc', + pos=en.PoS.n, + variant=1, + synset=3, + unit_index=1, + domain=en.Domain.bhp, + related=((u'meronimia/część', 41), (u'hiponimia', 21)), + ), + nd.make_lexical_unit_node( + id=41, + lemma=u'ddd', + pos=en.PoS.n, + variant=1, + synset=4, + unit_index=1, + domain=en.Domain.bhp, + related=((u'holonimia/część', 31), (u'hiponimia', 11)), + verb_aspect=en.VerbAspect.perf, + ), + )) + + try: + self.__rel_hiper = tuple(self.__plwn.relations_info( + u'hiperonimia', + en.RelationKind.lexical, + ))[0] + self.__rel_hipo = tuple(self.__plwn.relations_info( + u'hiponimia', + en.RelationKind.lexical, + ))[0] + self.__rel_holo = tuple(self.__plwn.relations_info( + u'holonimia/część', + en.RelationKind.lexical, + ))[0] + self.__rel_mero = tuple(self.__plwn.relations_info( + u'meronimia/część', + en.RelationKind.lexical, + ))[0] + except BaseException: + self.__plwn.close() + raise + + def tearDown(self): + self.__plwn.close() + + def test_include(self): + self.assertEqual( + frozenset(self.__plwn.lexical_relation_edges(include=( + u'hiperonimia', + u'meronimia/część', + ))), + frozenset(( + RelationEdge( + source=self.__plwn.lexical_unit_by_id(11), + target=self.__plwn.lexical_unit_by_id(41), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(21), + target=self.__plwn.lexical_unit_by_id(31), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(31), + target=self.__plwn.lexical_unit_by_id(41), + relation=self.__rel_mero, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(21), + target=self.__plwn.lexical_unit_by_id(11), + relation=self.__rel_mero, + ), + )), + ) + + def test_exclude(self): + self.assertEqual( + frozenset( + # Also, check aliases + self.__plwn.lexical_relation_edges(exclude=(u'holo',)) + ), + frozenset(( + RelationEdge( + source=self.__plwn.lexical_unit_by_id(11), + target=self.__plwn.lexical_unit_by_id(41), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(21), + target=self.__plwn.lexical_unit_by_id(31), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(41), + target=self.__plwn.lexical_unit_by_id(11), + relation=self.__rel_hipo, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(31), + target=self.__plwn.lexical_unit_by_id(21), + relation=self.__rel_hipo, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(31), + target=self.__plwn.lexical_unit_by_id(41), + relation=self.__rel_mero, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(21), + target=self.__plwn.lexical_unit_by_id(11), + relation=self.__rel_mero, + ), + )), + ) + + def test_all(self): + self.assertEqual( + frozenset(self.__plwn.lexical_relation_edges()), + frozenset(( + RelationEdge( + source=self.__plwn.lexical_unit_by_id(11), + target=self.__plwn.lexical_unit_by_id(41), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(21), + target=self.__plwn.lexical_unit_by_id(31), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(31), + target=self.__plwn.lexical_unit_by_id(41), + relation=self.__rel_mero, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(21), + target=self.__plwn.lexical_unit_by_id(11), + relation=self.__rel_mero, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(41), + target=self.__plwn.lexical_unit_by_id(11), + relation=self.__rel_hipo, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(31), + target=self.__plwn.lexical_unit_by_id(21), + relation=self.__rel_hipo, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(41), + target=self.__plwn.lexical_unit_by_id(31), + relation=self.__rel_holo, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(11), + target=self.__plwn.lexical_unit_by_id(21), + relation=self.__rel_holo, + ), + )), + ) + + def test_combined(self): + self.assertEqual( + frozenset(self.__plwn.lexical_relation_edges( + include=(u'meronimia', u'hiperonimia'), + exclude=(u'meronimia', u'holonimia'), + )), + frozenset(( + RelationEdge( + source=self.__plwn.lexical_unit_by_id(11), + target=self.__plwn.lexical_unit_by_id(41), + relation=self.__rel_hiper, + ), + RelationEdge( + source=self.__plwn.lexical_unit_by_id(21), + target=self.__plwn.lexical_unit_by_id(31), + relation=self.__rel_hiper, + ), + )), + ) diff --git a/tests/abstract_cases/test_unit_and_synset.py b/tests/abstract_cases/test_unit_and_synset.py new file mode 100644 index 0000000000000000000000000000000000000000..626e50f2161d79f5026a2152a3becc2392617fd9 --- /dev/null +++ b/tests/abstract_cases/test_unit_and_synset.py @@ -0,0 +1,981 @@ +# coding: utf8 +# XXX Relation operators for synsets and lexical units are not explicitly +# tested (only ordering), since they're not very important. +from __future__ import absolute_import, division + +import unittest as ut + +import six +from plwn.bases import RelationInfoBase +from plwn.readers import nodes as nd +from plwn import exceptions as exc, enums as en + + +__all__ = ( + 'SynsetPropertiesTest', + 'SynsetRelationsTest', + 'SynsetRelationsWithArtificialTest', + 'SynsetRelationsWithArtificialLoopTest', + 'LexicalUnitPropertiesTest', + 'LexicalUnitRelationsTest', + 'ItemOrderingTest', + 'ToDictTest', +) + + +class SynsetPropertiesTest(ut.TestCase): + + _PLWNClass = None # Override in subclass + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_synset_node(id=1, definition=u'foobar'), + nd.make_lexical_unit_node( + id=11, + lemma=u'aaa', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + domain=en.Domain.bhp, + ), + )) + try: + self.__syn = self.__plwn.synset_by_id(1) + except BaseException: + self.__plwn.close() + raise + + def tearDown(self): + self.__plwn.close() + + def test_id(self): + self.assertEqual(self.__syn.id, 1) + + def test_definition(self): + self.assertEqual(self.__syn.definition, u'foobar') + + def test_is_artificial(self): + self.assertIs(self.__syn.is_artificial, False) + + +class SynsetRelationsTest(ut.TestCase): + + _PLWNClass = None # Override in subclass + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'foo', + ), + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'hiperonimia', + parent=u'foo', + ), + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'hiponimia', + aliases=(u'hipo',), + ), + nd.make_synset_node( + id=1, + related=( + ( + RelationInfoBase.format_name(u'foo', u'hiperonimia'), + 2, + ), + (u'hiponimia', 3), + ), + ), + nd.make_synset_node(id=2, related=((u'hiponimia', 3),)), + nd.make_synset_node(id=3), + nd.make_lexical_unit_node( + id=11, + lemma=u'aaa', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=21, + lemma=u'aaa', + pos=en.PoS.n, + variant=2, + synset=2, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=31, + lemma=u'aaa', + pos=en.PoS.n, + variant=3, + synset=3, + unit_index=1, + domain=en.Domain.bhp, + ), + )) + try: + self.__syn1 = self.__plwn.synset_by_id(1) + self.__syn2 = self.__plwn.synset_by_id(2) + self.__syn3 = self.__plwn.synset_by_id(3) + self.__rel_hiper = next(iter(self.__plwn.relations_info( + RelationInfoBase.format_name(u'foo', u'hiperonimia'), + en.RelationKind.synset, + ))) + self.__rel_hipo = next(iter(self.__plwn.relations_info( + u'hiponimia', + en.RelationKind.synset, + ))) + except BaseException: + self.__plwn.close() + raise + + def tearDown(self): + self.__plwn.close() + + def test_related(self): + related = tuple(self.__syn1.related(RelationInfoBase.format_name( + u'foo', + u'hiperonimia', + ))) + self.assertEqual(len(related), 1) + self.assertEqual(related[0], self.__syn2) + + def test_related_alias(self): + # Try getting relations by alias + related = tuple(self.__syn1.related(u'hipo')) + self.assertEqual(len(related), 1) + self.assertEqual(related[0], self.__syn3) + + def test_empty_related(self): + self.assertFalse(tuple(self.__syn2.related(u'hiperonimia'))) + + def test_nonexistent_relation(self): + self.assertRaises( + exc.InvalidRelationTypeException, + self.__syn1.related, + u'not', + ) + + def test_all_related(self): + # Without parameter select all related + related = frozenset(self.__syn1.related()) + self.assertEqual(len(related), 2) + self.assertIn(self.__syn2, related) + self.assertIn(self.__syn3, related) + + def test_tuple_related(self): + related = frozenset(self.__syn1.related(( + RelationInfoBase.format_name(u'foo', u'hiperonimia'), + u'hipo', + ))) + self.assertEqual(len(related), 2) + self.assertIn(self.__syn2, related) + self.assertIn(self.__syn3, related) + + def test_related_pairs(self): + related = frozenset(self.__syn1.related_pairs(( + RelationInfoBase.format_name(u'foo', u'hiperonimia'), + u'hipo', + ))) + self.assertEqual(len(related), 2) + self.assertIn( + (self.__rel_hiper, self.__syn2), + related, + ) + self.assertIn( + (self.__rel_hipo, self.__syn3), + related, + ) + + def test_relations(self): + self.assertEqual( + frozenset(self.__syn1.relations), + frozenset((self.__rel_hiper, self.__rel_hipo)), + ) + + +class SynsetRelationsWithArtificialTest(ut.TestCase): + """Proper testing of artificial synset skipping requires more complex setup. + + Artificial skipping may skip several nodes in a row. + + :: + hipo .---. + .------->| 3 | + | '---' + | + | hipo .---. + | .---------->| 4 | + | | '---' + | | + .---. hipo .----. mero .---. .---. + | 1 |------>| 2a |-------->| 5 | | 7 | + '---' '----' '---' '---' + | ^ + | hipo .----. hipo | + '---------->| 6a |--------. + '----' | + v + .---. + | 8 | + '---' + """ + + _PLWNClass = None # Override in subclass + + def setUp(self): + # Don't use relation type nodes, as a test for that case. + self.__plwn = self._PLWNClass.from_reader(( + nd.make_synset_node( + id=1, + related=((u'hiponimia', 3), (u'hiponimia', 2)), + ), + nd.make_synset_node( + id=2, + related=( + (u'hiponimia', 4), + (u'hiponimia', 6), + (u'meronimia', 5), + ), + is_artificial=True, + ), + nd.make_synset_node(id=3), + nd.make_synset_node(id=4), + nd.make_synset_node(id=5), + nd.make_synset_node( + id=6, + related=((u'hiponimia', 7), (u'hiponimia', 8)), + is_artificial=True, + ), + nd.make_synset_node(id=7), + nd.make_synset_node(id=8), + nd.make_lexical_unit_node( + id=11, + lemma=u'aaa', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=21, + lemma=u'aaa', + pos=en.PoS.n, + variant=2, + synset=2, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=31, + lemma=u'aaa', + pos=en.PoS.n, + variant=3, + synset=3, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=41, + lemma=u'aaa', + pos=en.PoS.n, + variant=4, + synset=4, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=51, + lemma=u'aaa', + pos=en.PoS.n, + variant=5, + synset=5, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=61, + lemma=u'aaa', + pos=en.PoS.n, + variant=6, + synset=6, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=71, + lemma=u'aaa', + pos=en.PoS.n, + variant=7, + synset=7, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=81, + lemma=u'aaa', + pos=en.PoS.n, + variant=8, + synset=8, + unit_index=1, + domain=en.Domain.bhp, + ), + )) + + try: + self.__syn1 = self.__plwn.synset_by_id(1) + self.__rel_hipo = next(iter(self.__plwn.relations_info( + u'hiponimia', + en.RelationKind.synset, + ))) + except BaseException: + self.__plwn.close() + raise + + def tearDown(self): + self.__plwn.close() + + def test_related_noskip(self): + self.assertEqual( + frozenset( + self.__syn1.related(u'hiponimia', skip_artificial=False) + ), + frozenset(( + self.__plwn.synset_by_id(2), + self.__plwn.synset_by_id(3), + )), + ) + + def test_related_skip(self): + self.assertEqual( + frozenset(self.__syn1.related(u'hiponimia', skip_artificial=True)), + frozenset(( + self.__plwn.synset_by_id(3), + self.__plwn.synset_by_id(4), + self.__plwn.synset_by_id(7), + self.__plwn.synset_by_id(8), + )), + ) + + def test_related_pairs_noskip(self): + self.assertEqual( + frozenset( + self.__syn1.related_pairs(u'hiponimia', skip_artificial=False) + ), + frozenset(( + ( + self.__rel_hipo, + self.__plwn.synset_by_id(2), + ), + ( + self.__rel_hipo, + self.__plwn.synset_by_id(3), + ), + )), + ) + + def test_related_pairs_skip(self): + self.assertEqual( + frozenset(self.__syn1.related_pairs( + u'hiponimia', + skip_artificial=True, + )), + frozenset(( + ( + self.__rel_hipo, + self.__plwn.synset_by_id(3), + ), + ( + self.__rel_hipo, + self.__plwn.synset_by_id(4), + ), + ( + self.__rel_hipo, + self.__plwn.synset_by_id(7), + ), + ( + self.__rel_hipo, + self.__plwn.synset_by_id(8), + ), + )), + ) + + +class SynsetRelationsWithArtificialLoopTest(ut.TestCase): + """Test for an infinite loop. + + Where skipping artificial synsets would result in an infinite loop. + + :: + foo + .----------. + | | + v | + .---. foo .----. foo .----. foo .---. + | 1 |---->| 2a |---->| 3a |---->| 4 | + '---' '----' '----' '---' + """ + + _PLWNClass = None # Override in subclass + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'foo', + ), + nd.make_synset_node(id=1, related=((u'foo', 2),)), + nd.make_synset_node( + id=2, + related=((u'foo', 3),), + is_artificial=True, + ), + nd.make_synset_node( + id=3, + related=((u'foo', 4), (u'foo', 2)), + is_artificial=True, + ), + nd.make_synset_node(id=4), + nd.make_lexical_unit_node( + id=11, + lemma=u'aaa', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=21, + lemma=u'aaa', + pos=en.PoS.n, + variant=2, + synset=2, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=31, + lemma=u'aaa', + pos=en.PoS.n, + variant=3, + synset=3, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=41, + lemma=u'aaa', + pos=en.PoS.n, + variant=4, + synset=4, + unit_index=1, + domain=en.Domain.bhp, + ), + )) + + def tearDown(self): + self.__plwn.close() + + def test_related_noskip(self): + syn1 = self.__plwn.synset_by_id(1) + rel1 = frozenset(syn1.related(u'foo', skip_artificial=False)) + self.assertEqual( + rel1, + frozenset((self.__plwn.synset_by_id(2),)), + ) + + def test_related_skip(self): + syn1 = self.__plwn.synset_by_id(1) + rel1 = frozenset(syn1.related(u'foo', skip_artificial=True)) + self.assertEqual( + rel1, + frozenset((self.__plwn.synset_by_id(4),)), + ) + + def test_related_pairs_noskip(self): + syn1 = self.__plwn.synset_by_id(1) + rel1 = frozenset(syn1.related_pairs(u'foo', skip_artificial=False)) + foo = next(iter(self.__plwn.relations_info(u'foo')), None) + self.assertEqual( + rel1, + frozenset(((foo, self.__plwn.synset_by_id(2)),)), + ) + + def test_related_pairs_skip(self): + syn1 = self.__plwn.synset_by_id(1) + rel1 = frozenset(syn1.related_pairs(u'foo', skip_artificial=True)) + foo = next(iter(self.__plwn.relations_info(u'foo')), None) + self.assertEqual( + rel1, + frozenset(((foo, self.__plwn.synset_by_id(4)),)), + ) + + +class LexicalUnitPropertiesTest(ut.TestCase): + + _PLWNClass = None # Override in subclass + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_synset_node(id=1), + nd.make_lexical_unit_node( + id=11, + lemma=u'aaa', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + definition=u'scary noise', + usage_notes=(u'K', u'B'), + external_links=(u'http://foo.bar',), + examples=(u'aaaand',), + examples_sources=(u'me',), + domain=en.Domain.cdel, + verb_aspect=en.VerbAspect.two_aspect, + is_emotional=True, + emotion_markedness=en.EmotionMarkedness.ambiguous, + emotion_names=(en.EmotionName.joy, en.EmotionName.trust), + emotion_valuations=( + en.EmotionValuation.error, + en.EmotionValuation.good, + ), + emotion_example_1=u'bim', + emotion_example_2=u'bam', + ), + )) + try: + self.__syn = self.__plwn.synset_by_id(1) + self.__lex = self.__plwn.lexical_unit_by_id(11) + except BaseException: + self.__plwn.close() + raise + + def tearDown(self): + self.__plwn.close() + + def test_id(self): + self.assertEqual(self.__lex.id, 11) + + def test_lemma(self): + self.assertEqual(self.__lex.lemma, u'aaa') + + def test_pos(self): + self.assertEqual(self.__lex.pos, en.PoS.n) + + def test_variant(self): + self.assertEqual(self.__lex.variant, 1) + + def test_synset(self): + self.assertEqual(self.__lex.synset, self.__syn) + + def test_definition(self): + self.assertEqual(self.__lex.definition, u'scary noise') + + def test_usage_notes(self): + self.assertEqual(self.__lex.usage_notes, (u'K', u'B')) + + def test_external_links(self): + self.assertEqual(self.__lex.external_links, (u'http://foo.bar',)) + + def test_examples(self): + self.assertEqual(self.__lex.sense_examples, (u'aaaand',)) + + def test_examples_sources(self): + self.assertEqual(self.__lex.sense_examples_sources, (u'me',)) + + def test_domain(self): + self.assertEqual(self.__lex.domain, en.Domain.cdel) + + def test_verb_aspect(self): + self.assertEqual(self.__lex.verb_aspect, en.VerbAspect.two) + + def test_is_emotional(self): + self.assertTrue(self.__lex.is_emotional) + + def test_emotion_markedness(self): + self.assertIs( + self.__lex.emotion_markedness, + en.EmotionMarkedness.ambiguous, + ) + + def test_emotion_names(self): + self.assertEqual( + self.__lex.emotion_names, + (en.EmotionName.joy, en.EmotionName.trust), + ) + + def test_emotion_valuations(self): + self.assertEqual( + self.__lex.emotion_valuations, + (en.EmotionValuation.error, en.EmotionValuation.good), + ) + + def test_emotion_example(self): + self.assertEqual(self.__lex.emotion_example, u'bim') + + def test_emotion_example_secondary(self): + self.assertEqual(self.__lex.emotion_example_secondary, u'bam') + + +class LexicalUnitRelationsTest(ut.TestCase): + + _PLWNClass = None # Override in subclass + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_relation_type_node( + kind=en.RelationKind.lexical, + name=u'deminutywność', + aliases=(u'demi',), + ), + nd.make_relation_type_node( + kind=en.RelationKind.lexical, + name=u'mpar', + parent=u'syn', + ), + nd.make_synset_node(id=1), + nd.make_lexical_unit_node( + id=11, + lemma=u'aaa', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + domain=en.Domain.bhp, + related=( + (u'deminutywność', 12), + (RelationInfoBase.format_name(u'syn', u'mpar'), 13), + ), + ), + nd.make_lexical_unit_node( + id=12, + lemma=u'bbb', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=2, + domain=en.Domain.bhp, + related=((u'deminutywność', 13),), + ), + nd.make_lexical_unit_node( + id=13, + lemma=u'ccc', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=3, + domain=en.Domain.bhp, + verb_aspect=en.VerbAspect.two, + ), + )) + try: + self.__lex11 = self.__plwn.lexical_unit_by_id(11) + self.__lex12 = self.__plwn.lexical_unit_by_id(12) + self.__lex13 = self.__plwn.lexical_unit_by_id(13) + self.__rel_mpar = next(iter(self.__plwn.relations_info( + RelationInfoBase.format_name(u'syn', u'mpar'), + en.RelationKind.lexical, + ))) + self.__rel_demi = next(iter(self.__plwn.relations_info( + u'deminutywność', + en.RelationKind.lexical, + ))) + except BaseException: + self.__plwn.close() + raise + + def tearDown(self): + self.__plwn.close() + + def test_related(self): + related = tuple(self.__lex11.related(RelationInfoBase.format_name( + u'syn', + u'mpar', + ))) + self.assertEqual(len(related), 1) + self.assertEqual(related[0], self.__lex13) + + def test_related_alias(self): + related = tuple(self.__lex12.related(u'demi')) + self.assertEqual(len(related), 1) + self.assertEqual(related[0], self.__lex13) + + def test_empty_related(self): + self.assertFalse(tuple(self.__lex12.related( + RelationInfoBase.format_name(u'syn', u'mpar'), + ))) + + def test_nonexistent_relation(self): + self.assertRaises( + exc.InvalidRelationTypeException, + self.__lex11.related, + u'not', + ) + + def test_all_related(self): + # Without parameter select all related + related = frozenset(self.__lex11.related()) + self.assertEqual(len(related), 2) + self.assertIn(self.__lex12, related) + self.assertIn(self.__lex13, related) + + def test_tuple_related(self): + related = frozenset(self.__lex11.related(( + self.__rel_demi, + self.__rel_mpar, + ))) + self.assertEqual(len(related), 2) + self.assertIn(self.__lex12, related) + self.assertIn(self.__lex13, related) + + def test_related_pairs(self): + related = frozenset(self.__lex11.related_pairs()) + self.assertEqual(len(related), 2) + self.assertIn( + (self.__rel_demi, self.__lex12), + related, + ) + self.assertIn( + (self.__rel_mpar, self.__lex13), + related, + ) + + def test_relations(self): + self.assertEqual( + frozenset(self.__lex11.relations), + frozenset((self.__rel_demi, self.__rel_mpar)), + ) + + +class ItemOrderingTest(ut.TestCase): + + _PLWNClass = None # Override in subclass + + def setUp(self): + self.__plwn = self._PLWNClass.from_reader(( + nd.make_synset_node(id=1), + nd.make_synset_node(id=2), + nd.make_synset_node(id=3), + nd.make_lexical_unit_node( + id=11, + lemma=u'aaa', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=21, + lemma=u'bbb', + pos=en.PoS.n, + variant=1, + synset=2, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=22, + lemma=u'Ä…Ä…Ä…', + pos=en.PoS.n, + variant=2, + synset=2, + unit_index=2, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=31, + lemma=u'Ä…Ä…Ä…', + pos=en.PoS.n, + variant=1, + synset=3, + unit_index=1, + domain=en.Domain.bhp, + verb_aspect=en.VerbAspect.two, + ), + )) + + def tearDown(self): + self.__plwn.close() + + def test_synsets_order(self): + syn1 = self.__plwn.synset_by_id(1) + syn2 = self.__plwn.synset_by_id(2) + syn3 = self.__plwn.synset_by_id(3) + + list_ = [syn3, syn2, syn1] + list_.sort() + self.assertEqual(list_, [syn1, syn3, syn2]) + + def test_lexical_units_order(self): + lex11 = self.__plwn.lexical_unit_by_id(11) + lex21 = self.__plwn.lexical_unit_by_id(21) + lex22 = self.__plwn.lexical_unit_by_id(22) + lex31 = self.__plwn.lexical_unit_by_id(31) + + list_ = [lex31, lex22, lex21, lex11] + list_.sort() + self.assertEqual(list_, [lex11, lex31, lex22, lex21]) + + +class ToDictTest(ut.TestCase): + + _PLWNClass = None # Override in subclass + + def setUp(self): + self.maxDiff = None + # Try not using the relation type nodes here + self.__plwn = self._PLWNClass.from_reader(( + nd.make_synset_node( + id=1, + definition=u'foo', + related=((u'hiperonimia', 2), (u'hiponimia', 3)), + ), + nd.make_synset_node(id=2), + nd.make_synset_node(id=3), + nd.make_lexical_unit_node( + id=11, + lemma=u'aaa', + pos=en.PoS.v, + variant=1, + synset=1, + unit_index=0, + definition=u'bar', + usage_notes=(u'baz', u'ban'), + external_links=(u'http://a.com',), + examples=(u'nothing',), + examples_sources=(u'none',), + domain=en.Domain.rz, + related=((u'derywacyjność', 21),), + verb_aspect=en.VerbAspect.pred, + emotion_markedness=en.EmotionMarkedness.strong_negative, + emotion_names=(en.EmotionName.surprise,), + emotion_valuations=( + en.EmotionValuation.ugliness, + en.EmotionValuation.error, + ), + emotion_example_1=u'Bad thing.', + ), + nd.make_lexical_unit_node( + id=21, + lemma=u'bbb', + pos=en.PoS.n, + variant=1, + synset=2, + unit_index=1, + domain=en.Domain.bhp, + ), + nd.make_lexical_unit_node( + id=31, + lemma=u'ccc', + pos=en.PoS.n, + variant=1, + synset=3, + unit_index=0, + domain=en.Domain.bhp, + ), + )) + self.__lex11_dict = { + u'id': 11, + u'lemma': u'aaa', + u'pos': u'verb', + u'variant': 1, + u'synset': 1, + u'definition': u'bar', + u'usage_notes': (u'baz', u'ban'), + u'external_links': (u'http://a.com',), + u'sense_examples': (u'nothing',), + u'sense_examples_sources': (u'none',), + u'domain': en.Domain.rz.value, + u'verb_aspect': en.VerbAspect.predicative.value, + u'emotion_markedness': en.EmotionMarkedness.minus_m.value, + u'emotion_names': (en.EmotionName.surprise.value,), + u'emotion_valuations': ( + en.EmotionValuation.error.value, + en.EmotionValuation.ugliness.value, + ), + u'emotion_example': u'Bad thing.', + u'emotion_example_secondary': None, + u'str': six.text_type(self.__plwn.lexical_unit_by_id(11)), + } + self.__lex11_dict_with_rel = { + u'related': { + u'derywacyjność': ( + ( + 21, + six.text_type( + self.__plwn.lexical_unit_by_id(21) + ), + ), + ), + }, + } + self.__lex11_dict_with_rel.update(self.__lex11_dict) + + def tearDown(self): + self.__plwn.close() + + def test_lexunit_dict(self): + lex11 = self.__plwn.lexical_unit_by_id(11) + self.assertEqual( + lex11.to_dict(include_related=False), + self.__lex11_dict, + ) + self.assertEqual(lex11.to_dict(), self.__lex11_dict_with_rel) + + def test_synset_dict(self): + syn1 = self.__plwn.synset_by_id(1) + + self.assertEqual( + syn1.to_dict(include_related=False, include_units_data=False), + { + u'id': 1, + u'str': six.text_type(syn1), + u'definition': u'foo', + u'units': ( + ( + 11, + six.text_type( + self.__plwn.lexical_unit_by_id(11) + ), + ), + ), + u'is_artificial': False, + }, + ) + self.assertEqual( + syn1.to_dict(include_related=False), + { + u'id': 1, + u'str': six.text_type(syn1), + u'definition': u'foo', + u'units': (self.__lex11_dict,), + u'is_artificial': False, + }, + ) + self.assertEqual( + syn1.to_dict(), + { + u'id': 1, + u'str': six.text_type(syn1), + u'definition': u'foo', + u'units': (self.__lex11_dict_with_rel,), + u'related': { + u'hiperonimia': ( + (2, six.text_type(self.__plwn.synset_by_id(2))), + ), + u'hiponimia': ( + (3, six.text_type(self.__plwn.synset_by_id(3))), + ), + }, + u'is_artificial': False, + }, + ) diff --git a/tests/cases/__init__.py b/tests/cases/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/cases/test_graphmlout.py b/tests/cases/test_graphmlout.py new file mode 100644 index 0000000000000000000000000000000000000000..b021983945db873502fb8ab0cb40d825cd5eeb1d --- /dev/null +++ b/tests/cases/test_graphmlout.py @@ -0,0 +1,202 @@ +# coding: utf8 +from __future__ import absolute_import, division + + +import unittest as ut +import io + +try: + import xml.etree.cElementTree as et +except ImportError: + import xml.etree.ElementTree as et + +from plwn.utils.graphmlout import GraphMLWordNet + + +__all__ = 'GraphMLTest', + + +class GraphMLTest(ut.TestCase): + + def setUp(self): + self.__gmo = GraphMLWordNet() + + def ensure_directed(self): + """Graph type should always be directed.""" + graph = self.__write_and_read() + self.assertEqual( + graph.find(u'./graph').attrib[u'edgedefault'], + u'directed', + ) + + def test_node(self): + """Test adding a node.""" + self.__gmo.add_node(u'foo') + result = self.__write_and_read() + nodes = result.findall(u'./graph/node') + + self.assertEqual(len(nodes), 1) + self.assertEqual(nodes[0].attrib[u'id'], u'foo') + + def test_edge(self): + """Test adding an edge.""" + self.__gmo.add_edge(u'a-b-c', u'a', u'b') + result = self.__write_and_read() + edges = result.findall(u'./graph/edge') + + self.assertEqual(len(edges), 1) + self.assertEqual(edges[0].attrib[u'id'], u'a-b-c') + self.assertEqual(edges[0].attrib[u'source'], u'a') + self.assertEqual(edges[0].attrib[u'target'], u'b') + + def test_node_badattr(self): + """Add an attribute, which was not defined, to a node.""" + self.assertRaises( + KeyError, + self.__gmo.add_node, + u'foo', + {u'bar': u'baz'}, + ) + + def test_edge_badattr(self): + """Add an attribute, which was not defined, to an edge.""" + self.assertRaises( + KeyError, + self.__gmo.add_edge, + u'foo-bar', + u'foo', + u'bar', + {u'bar': u'baz'}, + ) + + def test_str_attribute(self): + """Add a string attribute and make sure the key is added.""" + self.__gmo.add_attribute_type( + u'foo', + u'foo_param', + GraphMLWordNet.DATA_TYPE_STR, + ) + # Add the node to an attribute + self.__gmo.add_node(u'oof', {u'foo': u'ala'}) + + result = self.__write_and_read() + keys = result.findall(u'./key') + + self.assertEqual(len(keys), 1) + self.assertEqual(keys[0].attrib[u'id'], u'foo') + self.assertEqual(keys[0].attrib[u'attr.name'], u'foo_param') + self.assertEqual(keys[0].attrib[u'attr.type'], u'string') + self.assertEqual(keys[0].attrib[u'for'], u'node') + + nodedata = result.findall(u"./graph/node[@id='oof']/data") + + self.assertEqual(len(nodedata), 1) + self.assertEqual(nodedata[0].attrib[u'key'], u'foo') + self.assertEqual(nodedata[0].text, u'ala') + + def test_int_attribute(self): + """Add an int attribute and make sure the key is added.""" + self.__gmo.add_attribute_type( + u'foo', + u'foo_param', + GraphMLWordNet.DATA_TYPE_INT, + ) + # Add the node to an attribute + self.__gmo.add_node(u'oof', {u'foo': 1}) + + result = self.__write_and_read() + keys = result.findall(u'./key') + + self.assertEqual(len(keys), 1) + self.assertEqual(keys[0].attrib[u'id'], u'foo') + self.assertEqual(keys[0].attrib[u'attr.name'], u'foo_param') + self.assertEqual(keys[0].attrib[u'attr.type'], u'long') + self.assertEqual(keys[0].attrib[u'for'], u'node') + + nodedata = result.findall(u"./graph/node[@id='oof']/data") + + self.assertEqual(len(nodedata), 1) + self.assertEqual(nodedata[0].attrib[u'key'], u'foo') + self.assertEqual(nodedata[0].text, u'1') + + def test_bool_attribute(self): + """Add a boolean attribute and make sure the key is added.""" + self.__gmo.add_attribute_type( + u'foo', + u'foo_param', + GraphMLWordNet.DATA_TYPE_BOOL, + ) + # Add the node to an attribute + self.__gmo.add_node(u'oof', {u'foo': True}) + self.__gmo.add_node(u'boof', {u'foo': False}) + + result = self.__write_and_read() + keys = result.findall(u'./key') + + self.assertEqual(len(keys), 1) + self.assertEqual(keys[0].attrib[u'id'], u'foo') + self.assertEqual(keys[0].attrib[u'attr.name'], u'foo_param') + self.assertEqual(keys[0].attrib[u'attr.type'], u'boolean') + self.assertEqual(keys[0].attrib[u'for'], u'node') + + nodedata = result.findall(u"./graph/node[@id='oof']/data") + + self.assertEqual(len(nodedata), 1) + self.assertEqual(nodedata[0].attrib[u'key'], u'foo') + self.assertEqual(nodedata[0].text, u'true') + + nodedata2 = result.findall(u"./graph/node[@id='boof']/data") + + self.assertEqual(len(nodedata2), 1) + self.assertEqual(nodedata2[0].attrib[u'key'], u'foo') + self.assertEqual(nodedata2[0].text, u'false') + + def test_json_attribute(self): + """Add a JSON attribute and make sure the key is added.""" + self.__gmo.add_attribute_type( + u'foo', + u'foo_param', + GraphMLWordNet.DATA_TYPE_JSON, + ) + # Add the node to an attribute + self.__gmo.add_node(u'oof', {u'foo': (1, 2, u'3')}) + + result = self.__write_and_read() + keys = result.findall(u'./key') + + self.assertEqual(len(keys), 1) + self.assertEqual(keys[0].attrib[u'id'], u'foo') + self.assertEqual(keys[0].attrib[u'attr.name'], u'foo_param') + self.assertEqual(keys[0].attrib[u'attr.type'], u'string') + self.assertEqual(keys[0].attrib[u'for'], u'node') + + nodedata = result.findall(u"./graph/node[@id='oof']/data") + + self.assertEqual(len(nodedata), 1) + self.assertEqual(nodedata[0].attrib[u'key'], u'foo') + self.assertEqual(nodedata[0].text, u'[1, 2, "3"]') + + # TODO: No enum tests + + def test_edge_attribute(self): + """Add an ordinary string attribute, but to an edge.""" + self.__gmo.add_attribute_type( + u'foo', + u'foo_param', + GraphMLWordNet.DATA_TYPE_STR, + u'edge', + ) + # Add an edge for this attribute + self.__gmo.add_edge(u'a-b', u'a', u'b', {u'foo': u'bar'}) + + result = self.__write_and_read() + edgedata = result.findall(u"./graph/edge[@id='a-b']/data") + + self.assertEqual(len(edgedata), 1) + self.assertEqual(edgedata[0].attrib[u'key'], u'foo') + self.assertEqual(edgedata[0].text, u'bar') + + def __write_and_read(self): + with io.BytesIO() as ss: + self.__gmo.write(ss) + return et.fromstring(ss.getvalue()) diff --git a/tests/cases/test_sqlite_storage.py b/tests/cases/test_sqlite_storage.py new file mode 100644 index 0000000000000000000000000000000000000000..d93121bffd99a9bdbf0a411d0be44c8b82ce6a21 --- /dev/null +++ b/tests/cases/test_sqlite_storage.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import, division + + +from plwn.storages import sqlite as sq +from tests.abstract_cases import load_tests_from_abstract + + +def load_tests(loader, tests, pattern): + return load_tests_from_abstract(loader, 'SQLite', sq.PLWordNet) diff --git a/tests/cases/test_ubylmf_reader.py b/tests/cases/test_ubylmf_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..a8d7aba9944d7b84dab9e87c67291e7d6211ca1b --- /dev/null +++ b/tests/cases/test_ubylmf_reader.py @@ -0,0 +1,563 @@ +# -*- coding: utf-8 -*- + +# FIXME Comments down below suggest that there are compatibility issues with +# some tests. Fix and uncomment them. + +from __future__ import absolute_import, division + + +import unittest +from plwn.readers import ubylmf as ur +from plwn.readers.nodes import make_synset_node, make_lexical_unit_node +from plwn.enums import PoS, Domain +from plwn import exceptions as exc +from io import BytesIO +from xml.etree import ElementTree as et +# Python2 +try: + from itertools import izip +# Python3 +except ImportError: + izip = zip + +__all__ = 'UBYLMFReaderTest', + +ENCODING = 'UTF-8' + +test_xml = u"""<?xml version="1.0" encoding="UTF-8" ?> +<LexicalResource dtdVersion="ubyDTD_1_0.dtd" name="plWordnet"> +<Lexicon languageIdentifier="pl" id="1" name="SÅ‚owosieć 2.2"> + +<LexicalEntry id="15" partOfSpeech="noun"> + <Lemma> + <FormRepresentation writtenForm="'patafizyka"/> + </Lemma> + <Sense id="plWN_Sense_628506" index="1" synset="plWN_Synset_396603"> + <Definition> + <Statement statementType="usageNote"> + <TextRepresentation writtenText="specj."/> + </Statement> + <Statement statementType="externalReference"> + <TextRepresentation + writtenText="http://pl.wikipedia.org/wiki/Patafizyka"/> + </Statement> + </Definition> + <MonolingualExternalRef + externalSystem="WordnetLoom PartOfSpeech And SenseIndex" + externalReference="POS[plWN rzeczownik] 0"/> + <SemanticLabel label="noun.plWN_umy" type="domain"/> + </Sense> + <Sense id="plWN_Sense_54584" index="2" synset="plWN_Synset_36078"> + <SenseRelation target="plWN_Sense_17308" relName="deminutywność"/> + <MonolingualExternalRef + externalSystem="WordnetLoom PartOfSpeech And SenseIndex" + externalReference="POS[plWN rzeczownik] 0"/> + <SemanticLabel label="noun.plWN_wytw" type="domain"/> + </Sense> + <Sense id="plWN_Sense_54584" index="3" synset="plWN_Synset_!#!"> + <SenseRelation target="plWN_Sense_17308" relName="deminutywność"/> + <MonolingualExternalRef + externalSystem="WordnetLoom PartOfSpeech And SenseIndex" + externalReference="POS[plWN rzeczownik] 0"/> + <SemanticLabel label="noun.plWN_wytw" type="domain"/> + </Sense> +</LexicalEntry> +<Synset id="plWN_Synset_10"> + <SynsetRelation target="plWN_Synset_9139" relName="hiperonimia"/> + <SynsetRelation target="plWN_Synset_19032" relName="wartość_cechy"/> + <SynsetRelation target="plWN_Synset_104177" relName="hiperonimia"/> + <SynsetRelation target="plWN_Synset_105404" relName="hiperonimia"/> + <SynsetRelation target="plWN_Synset_228433" relName="hiperonimia"/> +</Synset> +<Synset id="plWN_Synset_246792"> + <SynsetRelation target="plWN_Synset_245829" relName="hiponimia"/> +</Synset> +</Lexicon> +<SenseAxis id="433581" synsetOne="plWN_Synset_246792" + synsetTwo="WordNet 3.1 synset offset: 4730898"> + <SenseAxisRelation target="433581" relName="Hipo_plWN-PWN"/> +</SenseAxis> +</LexicalResource> +""" + + +class UBYLMFReaderTest(unittest.TestCase): + def test_ubylmf_reader(self): + filelike = BytesIO(b"") + self.assertRaises( + et.ParseError, + next, + ur.ubylmf_reader(filelike), + ) + + lu1 = make_lexical_unit_node( + id=628506, + lemma=u"'patafizyka", + pos=PoS.n, + synset=396603, + unit_index=0, + usage_notes=(u"specj.",), + external_links=(u"http://pl.wikipedia.org/wiki/Patafizyka",), + domain=Domain.umy, + variant=1, + ) + lu2 = make_lexical_unit_node( + id=54584, + lemma=u"'patafizyka", + pos=PoS.n, + synset=36078, + unit_index=0, + domain=Domain.wytw, + related=((u"deminutywność", 17308), ), + variant=2, + ) + s1 = make_synset_node( + id=10, + related=( + (u"hiperonimia", 9139), + (u"wartość_cechy", 19032), + (u"hiperonimia", 104177), + (u"hiperonimia", 105404), + (u"hiperonimia", 228433) + ), + ) + s2 = make_synset_node( + id=246792, + related=((u"hiponimia", 245829),), + ) + iter_corr = iter((lu1, lu2, s1, s2)) + filelike = BytesIO(test_xml.encode('UTF-8')) + for (read, correct) in izip(ur.ubylmf_reader(filelike), iter_corr): + self.assertEqual(read, correct) + + def test_make_lexicalunit(self): + # Wrong sense's ID + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584?" index="3" synset="plWN_Synset_!#!"> + </Sense> + """.encode(ENCODING) + ) + self.assertRaises( + exc.MalformedIdentifierException, + ur._make_lexicalunit, + et.Element(u"LexicalEntry"), + xml_sense + ) + # Incorrect synset's ID + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584" index="3" synset="plWN_Synset_!#!"> + </Sense> + """.encode(ENCODING) + ) + self.assertRaises( + exc.MalformedIdentifierException, + ur._make_lexicalunit, + et.Element(u"LexicalEntry"), + xml_sense + ) + # Incorrect variant (index) + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584" index="A" synset="plWN_Synset_33"> + </Sense> + """.encode(ENCODING) + ) + self.assertRaises( + ValueError, + ur._make_lexicalunit, + et.Element(u"LexicalEntry"), + xml_sense + ) + # Missing <Lemma> + xml_lu = et.fromstring( + u""" + <LexicalEntry id="15" partOfSpeech="noun"> + </LexicalEntry> + """.encode(ENCODING) + ) + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584" index="0" synset="plWN_Synset_33"> + </Sense> + """.encode(ENCODING) + ) + self.assertRaises( + AttributeError, + ur._make_lexicalunit, + xml_lu, + xml_sense + ) + # Empty <Lemma> + xml_lu = et.fromstring( + u""" + <LexicalEntry id="15" partOfSpeech="noun"> + <Lemma> + <FormRepresentation writtenForm=""/> + </Lemma> + </LexicalEntry> + """.encode(ENCODING) + ) + self.assertRaisesRegexp( + AssertionError, + "Lemma is empty", + ur._make_lexicalunit, + xml_lu, + xml_sense + ) + # Empty PoS + xml_lu = et.fromstring( + u""" + <LexicalEntry id="15" partOfSpeech=""> + <Lemma> + <FormRepresentation writtenForm="'patafizyka"/> + </Lemma> + </LexicalEntry> + """.encode(ENCODING) + ) + self.assertRaisesRegexp( + AssertionError, + "PoS is empty", + ur._make_lexicalunit, + xml_lu, + xml_sense + ) + # Don't check definitions and domain - they are in separate tests + # Incorrect unit index + xml_lu = et.fromstring( + u""" + <LexicalEntry id="15" partOfSpeech="noun"> + <Lemma> + <FormRepresentation writtenForm="'patafizyka"/> + </Lemma> + </LexicalEntry> + """.encode(ENCODING) + ) + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584" index="0" synset="plWN_Synset_33"> + <SemanticLabel label="noun.plWN_wytw" type="domain"/> + <MonolingualExternalRef + externalSystem="WordnetLoom PartOfSpeech And SenseIndex" + externalReference="POS[plWN rzeczownik] A"/> + </Sense> + """.encode(ENCODING) + ) + self.assertRaises( + exc.MalformedIdentifierException, + ur._make_lexicalunit, + xml_lu, + xml_sense + ) + # Check LU + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_628506" index="1" + synset="plWN_Synset_396603"> + <Definition> + <TextRepresentation writtenText="ExampleDefinition"/> + </Definition> + <Definition> + <Statement statementType="usageNote"> + <TextRepresentation writtenText="specj."/> + </Statement> + <Statement statementType="usageNote"> + <TextRepresentation writtenText="specj1"/> + </Statement> + <Statement statementType="externalReference"> + <TextRepresentation writtenText="http://"/> + </Statement> + </Definition> + <MonolingualExternalRef + externalSystem="WordnetLoom PartOfSpeech And SenseIndex" + externalReference="POS[plWN rzeczownik] 0"/> + <SemanticLabel label="noun.plWN_umy" type="domain"/> + <SenseRelation target="plWN_Sense_17308" + relName="deminutywność"/> + <SenseRelation target="plWN_Sense_17309" + relName="deminutywność"/> + <SenseExample id="0"> + <TextRepresentation + writtenText="1° = (Ï€/180) rad = 60′ = 3600″[##W:]"/> + </SenseExample> + <SenseExample id="1"> + <TextRepresentation writtenText="1[##P:]"/> + </SenseExample> + </Sense> + """.encode(ENCODING) + ) + lu = make_lexical_unit_node( + id=628506, + lemma=u"'patafizyka", + pos=PoS.n, + synset=396603, + unit_index=0, + definition=u"ExampleDefinition", + usage_notes=(u"specj.", u"specj1",), + external_links=("http://",), + examples=(u"1° = (Ï€/180) rad = 60′ = 3600″", u"1"), + examples_sources=(u'W', u'P'), + domain=Domain.umy, + related=((u"deminutywność", 17308), (u"deminutywność", 17309)), + variant=1, + ) + self.assertEqual(lu, ur._make_lexicalunit(xml_lu, xml_sense)) + + def test_make_synset(self): + xml_synset = et.fromstring( + u""" + <Synset id="plWN_Synset_246792"> + </Synset> + """.encode(ENCODING) + ) + sn = make_synset_node(id=246792) + self.assertEqual(sn, ur._make_synset(xml_synset)) + + xml_synset = et.fromstring( + u""" + <Synset id="plWN_Synset_246792"> + <Definition> + <TextRepresentation writtenText="ExampleDefinition"/> + </Definition> + </Synset> + """.encode(ENCODING) + ) + sn = make_synset_node(id=246792, definition=u"ExampleDefinition") + self.assertEqual(sn, ur._make_synset(xml_synset)) + + xml_synset = et.fromstring( + u""" + <Synset id="plWN_Synset_246792"> + <SynsetRelation target="plWN_Synset_245829" + relName="hiponimia"/> + </Synset> + """.encode(ENCODING) + ) + sn = make_synset_node(id=246792, related=((u"hiponimia", 245829),)) + self.assertEqual(sn, ur._make_synset(xml_synset)) + + xml_synset = et.fromstring( + u""" + <Synset id="plWN_Synset_246792"> + <Definition> + <TextRepresentation writtenText="ExampleDefinition"/> + </Definition> + <SynsetRelation target="plWN_Synset_245829" relName="hip"/> + <SynsetRelation target="plWN_Synset_245828" relName="ó"/> + </Synset> + """.encode(ENCODING) + ) + sn = make_synset_node( + id=246792, + definition=u"ExampleDefinition", + related=((u"hip", 245829), (u"ó", 245828)), + ) + self.assertEqual(sn, ur._make_synset(xml_synset)) + + xml_synset = et.fromstring( + u""" + <Synset id="plWN_Synset_??"> + </Synset> + """.encode(ENCODING) + ) + self.assertRaises( + exc.MalformedIdentifierException, + ur._make_synset, + xml_synset + ) + + xml_synset = et.fromstring( + u""" + <Synset id="plWN_Synset_246792"> + <SynsetRelation target="plWN_Synset_245829" relName="hip"/> + <SynsetRelation target="plWN_Synset_?" relName="hip"/> + </Synset> + """.encode(ENCODING) + ) + sn = make_synset_node(id=246792, related=((u"hip", 245829),)) + self.assertEqual(sn, ur._make_synset(xml_synset)) + + def test_extract_definitions(self): + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584" index="3" synset="plWN_Synset_!#!"> + </Sense> + """.encode(ENCODING) + ) + self.assertTupleEqual( + (None, [], []), + ur._extract_definitions(xml_sense) + ) + + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584" index="3" synset="plWN_Synset_!#!"> + <Definition> + <TextRepresentation writtenText="ExampleDefinition"/> + </Definition> + </Sense> + """.encode(ENCODING) + ) + self.assertTupleEqual( + (u"ExampleDefinition", [], []), + ur._extract_definitions(xml_sense) + ) + + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584" index="3" synset="plWN_Synset_!#!"> + <Definition> + <Statement statementType="?"> + <TextRepresentation writtenText="specj."/> + </Statement> + <Statement statementType="externalReference"> + <TextRepresentation writtenText="http://"/> + </Statement> + <Statement statementType="externalReference"> + <TextRepresentation writtenText="http://1"/> + </Statement> + </Definition> + </Sense> + """.encode(ENCODING) + ) + self.assertTupleEqual( + (None, [], [u"http://", u"http://1"]), + ur._extract_definitions(xml_sense) + ) + + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584" index="3" synset="plWN_Synset_!#!"> + <Definition> + <Statement statementType="usageNote"> + <TextRepresentation writtenText="n1"/> + </Statement> + <Statement statementType="usageNote"> + <TextRepresentation writtenText="łóż2"/> + </Statement> + <Statement statementType="?"> + <TextRepresentation writtenText="specj."/> + </Statement> + <Statement statementType="externalReference"> + <TextRepresentation writtenText="http://"/> + </Statement> + <Statement statementType="externalReference"> + <TextRepresentation writtenText="http://1"/> + </Statement> + </Definition> + </Sense> + """.encode(ENCODING) + ) + self.assertTupleEqual( + (None, [u"n1", u"łóż2"], [u"http://", u"http://1"]), + ur._extract_definitions(xml_sense) + ) + + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584" index="3" synset="plWN_Synset_!#!"> + <Definition> + </Definition> + <Definition> + </Definition> + <Definition> + </Definition> + </Sense> + """.encode(ENCODING) + ) + self.assertRaisesRegexp( + AssertionError, + r"Too many definitions \(3\)", + ur._extract_definitions, + xml_sense + ) + + def test_get_domain(self): + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584" index="3" synset="plWN_Synset_!#!"> + <SemanticLabel label="noun.plWN_wytw" type="domain"/> + </Sense> + """.encode(ENCODING) + ) + self.assertEqual(u"noun.plWN_wytw", ur._get_domain(xml_sense)) + + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584" index="3" synset="plWN_Synset_!#!"> + <SemanticLabel label="ółźć" type="domain"/> + </Sense> + """.encode(ENCODING) + ) + self.assertEqual(u"ółźć", ur._get_domain(xml_sense)) + + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584" index="3" synset="plWN_Synset_!#!"> + <SemanticLabel label="1" type="domain"/> + <SemanticLabel label="2" type="domain"/> + </Sense> + """.encode(ENCODING) + ) + self.assertRaises(AssertionError, ur._get_domain, xml_sense) + self.assertRaisesRegexp( + AssertionError, + "2 SemanticLabel found, should be 1", + ur._get_domain, + xml_sense + ) + + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584" index="3" synset="plWN_Synset_!#!"> + <SemanticLabel label="1" type=""/> + </Sense> + """.encode(ENCODING) + ) + self.assertRaises(AssertionError, ur._get_domain, xml_sense) + # Python3 fails here, so leave it... Unless you have an idea how to + # make it in a cleaver way. + # self.assertRaisesRegexp( + # AssertionError, + # "SemanticLabel has type instead of domain", + # ur._get_domain, + # xml_sense + # ) + + xml_sense = et.fromstring( + u""" + <Sense id="plWN_Sense_54584" index="3" synset="plWN_Synset_!#!"> + <SemanticLabel label="1" type="ĄĘÓÅÅ»"/> + </Sense> + """.encode(ENCODING) + ) + self.assertRaises(AssertionError, ur._get_domain, xml_sense) + # assertRaisesRegexp tries to convert unicode to str implicitly + # without checking explicitly which version of Python is in use + # this won't work... + # self.assertRaisesRegexp( + # AssertionError, + # "SemanticLabel has type ĄĘÓÅÅ» instead of domain", + # ur._get_domain, + # xml_sense + # ) + + def test_extract_id(self): + self.assertEqual(ur._extract_id(u"id_1_and_id_2_234562"), 234562) + self.assertEqual(ur._extract_id(u"234562"), 234562) + self.assertEqual(ur._extract_id(u"łóżźć_234562"), 234562) + self.assertRaises( + exc.MalformedIdentifierException, + ur._extract_id, + u"id_" + ) + self.assertRaises( + exc.MalformedIdentifierException, + ur._extract_id, + u"łóżźć" + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/cases/test_wndb_reader.py b/tests/cases/test_wndb_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..e60f631e4d178074bac2a36ab1e056335643a682 --- /dev/null +++ b/tests/cases/test_wndb_reader.py @@ -0,0 +1,667 @@ +# coding: utf8 +from __future__ import absolute_import, division, print_function + +import itertools as itt +import logging +import unittest as ut +import sys + +from plwn import enums as en +from plwn.readers import nodes as nd +from plwn.bases import RelationInfoBase + +_IS_PY2 = sys.version_info.major == 2 + +if _IS_PY2: + from plwn.readers.wndb import WNDBReader + import wndbmockup as wndbm +else: + wndbm = None + WNDBReader = None + + +__all__ = ( + 'GoodTest', + 'NonexistentRelationTest', + 'NonexistentSynsetInRelTest', + 'NonexistentUnitInRelTest', + 'SynsetWithNoUnitTest', + 'SynsetWithNoUnitInRelTest', + 'UnitWithNoSynsetInRelTest', + 'UnitWithNoSynsetTest', + 'UnitWithNonexistentSynsetTest', +) + + +# WNDBReader in general only works with python 2 (for now) +_py2_only = ut.skipIf(not _IS_PY2, 'Python 2 only functionality') + +_NODE_ORDER = [nd.RelationTypeNode, nd.LexicalUnitNode, nd.SynsetNode] + + +@_py2_only +class GoodTest(ut.TestCase): + """A case where everything's correct, touching all relevant tables.""" + + def setUp(self): + self.__dbm = wndbm.WnDbMockup() + + try: + self.__setup_mock() + except BaseException: + self.__dbm.close() + raise + + def __setup_mock(self): + self.__dbm.add_synset(1, u'Duży pies') + self.__dbm.add_synset(2, u'Duży ssak', abstract=1) + self.__dbm.add_synset(3, u'Duży kundel') + + rel_hipo = self.__dbm.add_relation_type(u'hiponimia', u'hipo') + # Make this relation have a fake parent, for testing + rel_par = self.__dbm.add_relation_type(u'foo', u'f') + rel_hiper = self.__dbm.add_relation_type( + u'hiperonimia', + u'hiper', + parent=rel_par, + ) + + self.__dbm.add_synset_relation(1, 2, rel_hipo) + self.__dbm.add_synset_relation(1, 3, rel_hiper) + + self.__dbm.add_lexical_unit( + synid=1, + lemma=u'psisko', + pos=2, + variant=1, + unitindex=1, + id_=11, + domain=1, + comment=u'##K: pot. [##W: Czarne psisko na rogu.]', + ) + self.__dbm.add_lexical_unit( + synid=1, + lemma=u'suczysko', + pos=2, + variant=1, + unitindex=2, + id_=12, + domain=1, + comment=u'##D: Samica dużego psa', + ) + self.__dbm.add_lexical_unit( + synid=2, + lemma=u'ssaczysko', + pos=2, + variant=1, + unitindex=1, + id_=21, + domain=1, + ) + self.__dbm.add_lexical_unit( + synid=3, + lemma=u'kundlisko', + # Make it a verb even if it isn't, to test out + pos=1, + variant=1, + unitindex=1, + id_=31, + domain=2, + comment=u'{##L: www.mieszance.pl} {##L: dogpedia.com/mutt}', + verb_aspect=4, + ) + + self.__dbm.add_emotion( + lex_id=31, + emotions=u'smutek;wstrÄ™t', + valuations=None, + markedness=u'- s', + example1=u'bam', + unit_status=1, + ) + self.__dbm.add_emotion( + lex_id=31, + emotions=None, + valuations=u'dobro;prawda;bob', + markedness=u'-m', + example2=u'bim', + unit_status=1, + super_annotation=1, + ) + # Additional test for coalescing emotion data: emotions and valuations + # must be uniq'd. + self.__dbm.add_emotion( + lex_id=31, + emotions=u'radość;wstrÄ™t', + valuations=u'piÄ™kno;prawda', + markedness=u'+m', + example1=u'uh', + unit_status=1, + ) + + rel_zen = self.__dbm.add_relation_type( + u'żeÅ„skość', + u'zen', + is_syn=False, + ) + + self.__dbm.add_lexical_relation(11, 12, rel_zen) + + def tearDown(self): + self.__dbm.close() + + def runTest(self): + with self.__dbm.sqlalchemy_url_file as db_file: + nodes = _process_read_nodes(self, WNDBReader(db_file)) + + self.assertEqual( + nodes, + set(( + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'hiponimia', + aliases=(u'hipo',), + ), + nd.make_relation_type_node( + kind=en.RelationKind.synset, + name=u'hiperonimia', + aliases=(u'hiper',), + parent=u'foo', + ), + nd.make_relation_type_node( + kind=en.RelationKind.lexical, + name=u'żeÅ„skość', + aliases=(u'zen',), + ), + nd.make_synset_node( + id=1, + definition=u'Duży pies', + related=( + (u'hiponimia', 2), + ( + RelationInfoBase.format_name( + u'foo', + u'hiperonimia', + ), + 3, + ), + ), + ), + nd.make_synset_node( + id=2, + definition=u'Duży ssak', + is_artificial=True, + ), + nd.make_synset_node( + id=3, + definition=u'Duży kundel', + ), + nd.make_lexical_unit_node( + id=11, + lemma=u'psisko', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=1, + usage_notes=(u'pot.',), + examples=(u'Czarne psisko na rogu.',), + examples_sources=(u'W',), + domain=en.Domain.by_db_number(1), + related=((u'żeÅ„skość', 12),), + ), + nd.make_lexical_unit_node( + id=12, + lemma=u'suczysko', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=2, + definition=u'Samica dużego psa', + domain=en.Domain.by_db_number(1), + ), + nd.make_lexical_unit_node( + id=21, + lemma='ssaczysko', + pos=en.PoS.n, + variant=1, + synset=2, + unit_index=1, + domain=en.Domain.by_db_number(1), + ), + nd.make_lexical_unit_node( + id=31, + lemma=u'kundlisko', + pos=en.PoS.v, + variant=1, + synset=3, + unit_index=1, + external_links=(u'www.mieszance.pl', u'dogpedia.com/mutt'), + domain=en.Domain.by_db_number(2), + verb_aspect=en.VerbAspect.two, + is_emotional=True, + emotion_markedness=en.EmotionMarkedness.strong_negative, + emotion_names=( + en.EmotionName.radosc, + en.EmotionName.smutek, + en.EmotionName.wstret, + ), + emotion_valuations=( + en.EmotionValuation.dobro, + en.EmotionValuation.piekno, + en.EmotionValuation.prawda, + ), + emotion_example_1=u'bam', + emotion_example_2=u'bim', + ), + )), + ) + + +@_py2_only +class UnitWithNoSynsetTest(ut.TestCase): + """Try to add a unit belonging to no synset. + + Should skip that synset. + """ + + def setUp(self): + self.__dbm = wndbm.WnDbMockup() + + try: + self.__setup_mock() + except BaseException: + self.__dbm.close() + raise + + def __setup_mock(self): + self.__dbm.add_synset(1) + # A correct unit + self.__dbm.add_lexical_unit(1, u'a', 2, 1, id_=1, domain=1) + # An incorrect unit + with self.__dbm.no_foreign_keys: + self.__dbm.add_lexical_unit(None, u'b', 2, 1, id_=2, domain=2) + + def tearDown(self): + self.__dbm.close() + + def runTest(self): + with self.__dbm.sqlalchemy_url_file as db_file: + nodes = _process_read_nodes(self, WNDBReader(db_file)) + + self.assertEqual( + nodes, + set(( + nd.make_synset_node(id=1), + nd.make_lexical_unit_node( + id=1, + lemma=u'a', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=0, + domain=en.Domain.bhp, + ), + )), + ) + + +@_py2_only +class UnitWithNonexistentSynsetTest(ut.TestCase): + """Unit has a synset that does not exist.""" + + def setUp(self): + self.__dbm = wndbm.WnDbMockup() + + try: + self.__setup_mock() + except BaseException: + self.__dbm.close() + raise + + def __setup_mock(self): + self.__dbm.add_synset(1) + # A correct unit + self.__dbm.add_lexical_unit(1, u'a', 2, 1, id_=1, domain=1) + # An incorrect unit + with self.__dbm.no_foreign_keys: + self.__dbm.add_lexical_unit(2, u'b', 2, 1, id_=2, domain=1) + + def tearDown(self): + self.__dbm.close() + + def runTest(self): + with self.__dbm.sqlalchemy_url_file as db_file: + nodes = _process_read_nodes(self, WNDBReader(db_file)) + + self.assertEqual( + nodes, + set(( + nd.make_synset_node(id=1), + nd.make_lexical_unit_node( + id=1, + lemma=u'a', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=0, + domain=en.Domain.by_db_number(1), + ), + )), + ) + + +@_py2_only +class SynsetWithNoUnitTest(ut.TestCase): + """An empty synset.""" + + def setUp(self): + self.__dbm = wndbm.WnDbMockup() + + try: + self.__setup_mock() + except BaseException: + self.__dbm.close() + raise + + def __setup_mock(self): + self.__dbm.add_synset(1) + self.__dbm.add_synset(2) + # One synset is going to be correct + self.__dbm.add_lexical_unit(1, u'a', 2, id_=1, domain=1) + + def tearDown(self): + self.__dbm.close() + + def runTest(self): + # The empty synset should be skipped + with self.__dbm.sqlalchemy_url_file as db_file: + nodes = _process_read_nodes(self, WNDBReader(db_file)) + + self.assertEqual( + nodes, + set(( + nd.make_synset_node(id=1), + nd.make_lexical_unit_node( + id=1, + lemma=u'a', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=0, + domain=en.Domain.by_db_number(1), + ), + )), + ) + + +@_py2_only +class NonexistentSynsetInRelTest(ut.TestCase): + """Try to add a relation to a bogus synset.""" + + def setUp(self): + self.__dbm = wndbm.WnDbMockup() + + try: + self.__setup_mock() + except BaseException: + self.__dbm.close() + raise + + def __setup_mock(self): + self.__dbm.add_synset(1) + # A synset needs a unit + self.__dbm.add_lexical_unit(1, u'a', 2, id_=1, domain=1) + # A relation is needed, any one + self.__dbm.add_relation_type(u'rel', u'r', id_=1) + # Now, relate to a synset which does not exist + with self.__dbm.no_foreign_keys: + self.__dbm.add_synset_relation(1, 2, 1) + + def tearDown(self): + self.__dbm.close() + + def runTest(self): + # Relation should be omitted, but synset and unit kept + with self.__dbm.sqlalchemy_url_file as db_file: + nodes = _process_read_nodes(self, WNDBReader(db_file)) + + self.assertEqual( + nodes, + set(( + nd.make_synset_node(id=1), + nd.make_lexical_unit_node( + id=1, + lemma=u'a', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=0, + domain=en.Domain.by_db_number(1), + ), + )), + ) + + +@_py2_only +class NonexistentUnitInRelTest(ut.TestCase): + """Try to add a lexical relation to a bogus unit.""" + + def setUp(self): + self.__dbm = wndbm.WnDbMockup() + + try: + self.__setup_mock() + except BaseException: + self.__dbm.close() + raise + + def __setup_mock(self): + self.__dbm.add_synset(1) + self.__dbm.add_lexical_unit(1, u'a', 2, id_=1, domain=1) + # Now, use any relation to link to nonexisting unit + self.__dbm.add_relation_type(u'rel', u'r', is_syn=False, id_=1) + with self.__dbm.no_foreign_keys: + self.__dbm.add_lexical_relation(1, 2, 1) + + def tearDown(self): + self.__dbm.close() + + def runTest(self): + # As with synset, relation should be ignored, but unit and synset + # should be kept + with self.__dbm.sqlalchemy_url_file as db_file: + nodes = _process_read_nodes(self, WNDBReader(db_file)) + + self.assertEqual( + nodes, + set(( + nd.make_synset_node(id=1), + nd.make_lexical_unit_node( + id=1, + lemma=u'a', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=0, + domain=en.Domain.by_db_number(1), + ), + )), + ) + + +@_py2_only +class NonexistentRelationTest(ut.TestCase): + """Synsets / units exist, but the relation between them is undefined.""" + + # XXX Only synset relation is checked + def setUp(self): + self.__dbm = wndbm.WnDbMockup() + + try: + self.__setup_mock() + except BaseException: + self.__dbm.close() + raise + + def __setup_mock(self): + self.__dbm.add_synset(1) + self.__dbm.add_synset(2) + self.__dbm.add_lexical_unit(1, u'a', 2, id_=1, domain=1) + self.__dbm.add_lexical_unit(2, u'b', 2, id_=2, domain=1) + # Now, an undefined relation + with self.__dbm.no_foreign_keys: + self.__dbm.add_synset_relation(1, 2, 1) + + def tearDown(self): + self.__dbm.close() + + def runTest(self): + # Both synsets and units should be retained + with self.__dbm.sqlalchemy_url_file as db_file: + nodes = _process_read_nodes(self, WNDBReader(db_file)) + + self.assertEqual( + nodes, + set(( + nd.make_synset_node(id=1), + nd.make_synset_node(id=2), + nd.make_lexical_unit_node( + id=1, + lemma=u'a', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=0, + domain=en.Domain.by_db_number(1), + ), + nd.make_lexical_unit_node( + id=2, + lemma=u'b', + pos=en.PoS.n, + variant=1, + synset=2, + unit_index=0, + domain=en.Domain.by_db_number(1), + ), + )), + ) + + +@_py2_only +class UnitWithNoSynsetInRelTest(ut.TestCase): + """Unit belongs to no synset and appears in an lexical relation.""" + + def setUp(self): + self.__dbm = wndbm.WnDbMockup() + + try: + self.__setup_mock() + except BaseException: + self.__dbm.close() + raise + + def __setup_mock(self): + self.__dbm.add_synset(1) + self.__dbm.add_lexical_unit(1, u'a', 2, id_=1, domain=1) + # Now, use any relation to link to a unit created with no synset + self.__dbm.add_relation_type(u'rel', u'r', is_syn=False, id_=1) + with self.__dbm.no_foreign_keys: + self.__dbm.add_lexical_unit(None, u'b', 2, id_=2, domain=1) + self.__dbm.add_lexical_relation(1, 2, 1) + + def tearDown(self): + self.__dbm.close() + + def runTest(self): + # Both the relation and the bogus unit should be ignored + with self.__dbm.sqlalchemy_url_file as db_file: + nodes = _process_read_nodes(self, WNDBReader(db_file)) + + self.assertEqual( + nodes, + set(( + nd.make_synset_node(id=1), + nd.make_lexical_unit_node( + id=1, + lemma=u'a', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=0, + domain=en.Domain.by_db_number(1), + ), + )), + ) + + +@_py2_only +class SynsetWithNoUnitInRelTest(ut.TestCase): + """A synset in relation exists, but has no synsets.""" + + def setUp(self): + self.__dbm = wndbm.WnDbMockup() + + try: + self.__setup_mock() + except BaseException: + self.__dbm.close() + raise + + def __setup_mock(self): + self.__dbm.add_synset(1) + # A synset needs a unit + self.__dbm.add_lexical_unit(1, u'a', 2, id_=1, domain=1) + # A relation is needed, any one + self.__dbm.add_relation_type(u'rel', u'r', id_=1) + # Now, create an empty synset and relate to it + self.__dbm.add_synset(2) + with self.__dbm.no_foreign_keys: + self.__dbm.add_synset_relation(1, 2, 1) + + def tearDown(self): + self.__dbm.close() + + def runTest(self): + # Relation should be omitted, as well as the empty synset, but synset + # and unit kept + with self.__dbm.sqlalchemy_url_file as db_file: + nodes = _process_read_nodes(self, WNDBReader(db_file)) + + self.assertEqual( + nodes, + set(( + nd.make_synset_node(id=1), + nd.make_lexical_unit_node( + id=1, + lemma=u'a', + pos=en.PoS.n, + variant=1, + synset=1, + unit_index=0, + domain=en.Domain.by_db_number(1), + ), + )), + ) + + +def setUpModule(): + logging.getLogger('wncomments').addHandler(logging.StreamHandler()) + + +def _process_read_nodes(utest, nodeiter): + type_order = [] + all_nodes = set() + + for k, g in itt.groupby(nodeiter, type): + type_order.append(k) + all_nodes.update(g) + + # Test the order, and return the nodes for further testing. + # The order must be: relation types, lexical units and synsets, with no + # breaks. The first element (relation types) is optional, so don't count it + # if the order list is shorter. + utest.assertEqual( + type_order, + _NODE_ORDER[1:] if len(type_order) < 3 else _NODE_ORDER, + ) + return all_nodes diff --git a/tests/setuptools_loader.py b/tests/setuptools_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..a62a2f9b8c783920f288e811c5667ec373b02a4e --- /dev/null +++ b/tests/setuptools_loader.py @@ -0,0 +1,16 @@ +import locale +import unittest as ut + + +def setuptools_load_tests(): + """This function should be set as ``test_suite`` in ``setup.py``. + + Setuptools doesn't honor ``load_tests`` protocol and would import test from + a package many times, if told that the ``test_suite`` is a package, so it + needs to be pointed to a function which does the importing. + + Also, make sure sorting locale is Polish + """ + + locale.setlocale(locale.LC_COLLATE, ('pl_PL', 'UTF-8')) + return ut.defaultTestLoader.discover('tests/cases', top_level_dir='tests') diff --git a/tox.ini b/tox.ini index 66929ec087524845e2f13bc3f48c2656aad11adc..c90605fa04a78a39ed9782cba82f246184f154bf 100644 --- a/tox.ini +++ b/tox.ini @@ -26,16 +26,19 @@ max-line-length = 80 [pydocstyle] +# D100 Missing docstring in public module # D101 Missing docstring in public class # D102 Missing docstring in public method # D103 Missing docstring in public function # D104 Missing docstring in public package # D105 Missing docstring in magic method +# D107: Missing docstring in __init__ # D203 1 blank line required before class docstring # D213 Multi-line docstring summary should start at the second line # D214 Section is over-indented # D215 Section underline is over-indented # D401 First line should be in imperative mood; try rephrasing +# D403: First word of the first line should be properly capitalized # D405 Section name should be properly capitalized # D406 Section name should end with a newline # D407 Missing dashed underline after section @@ -43,6 +46,6 @@ max-line-length = 80 # D409 Section underline should match the length of its name # D410 Missing blank line after section # D411 Missing blank line before section -ignore = D101,D102,D103,D104,D105,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411 +ignore = D100,D101,D102,D103,D104,D105,D107,D203,D213,D214,D215,D401,D403,D405,D406,D407,D408,D409,D410,D411 match-dir = ^(?!\.tox|venv).* match = ^(?!setup).*\.py