Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
lexworkers
plwordnet
Commits
8d1e87d8
Commit
8d1e87d8
authored
Jul 14, 2020
by
Leszek Szymczak
Committed by
Mateusz Gniewkowski
Jul 14, 2020
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Plwordnet to rancher
parent
12185e6b
Changes
40
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
40 changed files
with
212 additions
and
5393 deletions
+212
-5393
.gitlab-ci.yml
.gitlab-ci.yml
+32
-0
Dockerfile
Dockerfile
+10
-9
README
README
+0
-10
docker-compose.yml
docker-compose.yml
+10
-10
main.py
main.py
+37
-0
requirements.txt
requirements.txt
+2
-1
src/PLWN_API-0.9/MANIFEST.in
src/PLWN_API-0.9/MANIFEST.in
+0
-1
src/PLWN_API-0.9/PKG-INFO
src/PLWN_API-0.9/PKG-INFO
+0
-10
src/PLWN_API-0.9/PLWN_API.egg-info/PKG-INFO
src/PLWN_API-0.9/PLWN_API.egg-info/PKG-INFO
+0
-10
src/PLWN_API-0.9/PLWN_API.egg-info/SOURCES.txt
src/PLWN_API-0.9/PLWN_API.egg-info/SOURCES.txt
+0
-28
src/PLWN_API-0.9/PLWN_API.egg-info/dependency_links.txt
src/PLWN_API-0.9/PLWN_API.egg-info/dependency_links.txt
+0
-1
src/PLWN_API-0.9/PLWN_API.egg-info/requires.txt
src/PLWN_API-0.9/PLWN_API.egg-info/requires.txt
+0
-2
src/PLWN_API-0.9/PLWN_API.egg-info/top_level.txt
src/PLWN_API-0.9/PLWN_API.egg-info/top_level.txt
+0
-1
src/PLWN_API-0.9/README-pl-beta.txt
src/PLWN_API-0.9/README-pl-beta.txt
+0
-72
src/PLWN_API-0.9/plwn/.bases.py.swp
src/PLWN_API-0.9/plwn/.bases.py.swp
+0
-0
src/PLWN_API-0.9/plwn/__init__.py
src/PLWN_API-0.9/plwn/__init__.py
+0
-6
src/PLWN_API-0.9/plwn/_loading.py
src/PLWN_API-0.9/plwn/_loading.py
+0
-146
src/PLWN_API-0.9/plwn/bases.py
src/PLWN_API-0.9/plwn/bases.py
+0
-994
src/PLWN_API-0.9/plwn/enums.py
src/PLWN_API-0.9/plwn/enums.py
+0
-309
src/PLWN_API-0.9/plwn/exceptions.py
src/PLWN_API-0.9/plwn/exceptions.py
+0
-121
src/PLWN_API-0.9/plwn/readers/__init__.py
src/PLWN_API-0.9/plwn/readers/__init__.py
+0
-0
src/PLWN_API-0.9/plwn/readers/comments.py
src/PLWN_API-0.9/plwn/readers/comments.py
+0
-84
src/PLWN_API-0.9/plwn/readers/nodes.py
src/PLWN_API-0.9/plwn/readers/nodes.py
+0
-16
src/PLWN_API-0.9/plwn/readers/ubylmf.py
src/PLWN_API-0.9/plwn/readers/ubylmf.py
+0
-258
src/PLWN_API-0.9/plwn/readers/wndb.py
src/PLWN_API-0.9/plwn/readers/wndb.py
+0
-284
src/PLWN_API-0.9/plwn/readers/wnxml.py
src/PLWN_API-0.9/plwn/readers/wnxml.py
+0
-210
src/PLWN_API-0.9/plwn/relation_aliases.tsv
src/PLWN_API-0.9/plwn/relation_aliases.tsv
+0
-5
src/PLWN_API-0.9/plwn/relresolver.py
src/PLWN_API-0.9/plwn/relresolver.py
+0
-130
src/PLWN_API-0.9/plwn/storages/__init__.py
src/PLWN_API-0.9/plwn/storages/__init__.py
+0
-0
src/PLWN_API-0.9/plwn/storages/objects.py
src/PLWN_API-0.9/plwn/storages/objects.py
+0
-518
src/PLWN_API-0.9/plwn/storages/sqlite.py
src/PLWN_API-0.9/plwn/storages/sqlite.py
+0
-1179
src/PLWN_API-0.9/plwn/utils/__init__.py
src/PLWN_API-0.9/plwn/utils/__init__.py
+0
-0
src/PLWN_API-0.9/plwn/utils/graphmlout.py
src/PLWN_API-0.9/plwn/utils/graphmlout.py
+0
-801
src/PLWN_API-0.9/plwn/utils/sorting.py
src/PLWN_API-0.9/plwn/utils/sorting.py
+0
-22
src/PLWN_API-0.9/plwn/utils/tupwrap.py
src/PLWN_API-0.9/plwn/utils/tupwrap.py
+0
-51
src/PLWN_API-0.9/setup.cfg
src/PLWN_API-0.9/setup.cfg
+0
-5
src/PLWN_API-0.9/setup.py
src/PLWN_API-0.9/setup.py
+0
-28
src/info.json
src/info.json
+8
-0
src/plwordnet_worker.py
src/plwordnet_worker.py
+69
-71
tox.ini
tox.ini
+44
-0
No files found.
.gitlab-ci.yml
0 → 100644
View file @
8d1e87d8
image
:
'
clarinpl/python:3.6'
cache
:
paths
:
-
.tox
stages
:
-
check_style
-
build
before_script
:
-
pip install tox==2.9.1
pep8
:
stage
:
check_style
script
:
-
tox -v -e pep8
docstyle
:
stage
:
check_style
script
:
-
tox -v -e docstyle
build_image
:
stage
:
build
image
:
'
docker:18.09.7'
only
:
-
master
services
:
-
'
docker:18.09.7-dind'
before_script
:
-
'
'
script
:
-
docker build -t clarinpl/plwordnet .
-
echo $DOCKER_PASSWORD > pass.txt
-
cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin
-
rm pass.txt
-
docker push clarinpl/plwordnet
Dockerfile
View file @
8d1e87d8
FROM
clarinpl/python:2.7
WORKDIR
/tmp/
COPY
requirements.txt .
RUN
pip
install
-r
requirements.txt
COPY
src/PLWN_API-0.9 PLWN_API-0.9
RUN
cd
PLWN_API-0.9
&&
\
pip
install
.
FROM
clarinpl/python:3.6
WORKDIR
/home/worker
COPY
./src ./src
COPY
./main.py .
COPY
./requirements.txt .
RUN
python3.6
-m
pip
install
-r
requirements.txt
CMD
["python3.6", "main.py", "service"]
README
deleted
100644 → 0
View file @
12185e6b
1. get model
wget -O model/plwn_dump_27-03-2018.sqlite http://minio.clarin-pl.eu/public/models/plwn_dump_27-03-2018.sqlite
2. Build
docker-compose build
docker-compose.yml
View file @
8d1e87d8
version
:
'
3'
services
:
services
:
plwordnet
:
container_name
:
clarin_plwordnet
build
:
./
working_dir
:
/home/worker
entrypoint
:
# - sleep
# - 1d
-
python2
-
plwordnet_worker.py
-
python3.6
-
main.py
-
service
environment
:
-
PYTHONUNBUFFERED=0
volumes
:
-
/samba:/samba
-
./src/plwordnet_worker.py:/home/worker/plwordnet_worker.py
-
./config.ini:/home/worker/config.ini
-
./model/:/home/worker/model/
\ No newline at end of file
-
'
/samba:/samba'
-
'
./config.ini:/home/worker/config.ini'
-
'
./src:/home/worker/src'
-
'
./main.py:/home/worker/main.py'
main.py
0 → 100644
View file @
8d1e87d8
"""Implementation of hask service."""
import
argparse
import
lex_ws
from
src.plwordnet_worker
import
PLWordnetWorker
def
get_args
():
"""Gets command line arguments."""
parser
=
argparse
.
ArgumentParser
(
description
=
"Topic Modeling"
)
subparsers
=
parser
.
add_subparsers
(
dest
=
"algorithm"
)
subparsers
.
required
=
True
subparsers
.
add_parser
(
"service"
,
help
=
"Run as a service"
)
return
parser
.
parse_args
()
def
main
():
"""Runs the program."""
args
=
get_args
()
generators
=
{
"service"
:
lambda
:
lex_ws
.
LexService
.
main
(
PLWordnetWorker
),
}
gen_fn
=
generators
.
get
(
args
.
algorithm
,
lambda
:
None
)
gen_fn
()
if
__name__
==
"__main__"
:
main
()
requirements.txt
View file @
8d1e87d8
lex-ws
pika
==0.12
\ No newline at end of file
pika
==0.12
plwn_api
\ No newline at end of file
src/PLWN_API-0.9/MANIFEST.in
deleted
100644 → 0
View file @
12185e6b
include README-pl-beta.txt
src/PLWN_API-0.9/PKG-INFO
deleted
100644 → 0
View file @
12185e6b
Metadata-Version: 1.0
Name: PLWN_API
Version: 0.9
Summary: Python API to access plWordNet lexicon
Home-page: UNKNOWN
Author: Michał Kaliński
Author-email: michal.kalinski@pwr.edu.pl
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN
src/PLWN_API-0.9/PLWN_API.egg-info/PKG-INFO
deleted
100644 → 0
View file @
12185e6b
Metadata-Version: 1.0
Name: PLWN-API
Version: 0.9
Summary: Python API to access plWordNet lexicon
Home-page: UNKNOWN
Author: Michał Kaliński
Author-email: michal.kalinski@pwr.edu.pl
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN
src/PLWN_API-0.9/PLWN_API.egg-info/SOURCES.txt
deleted
100644 → 0
View file @
12185e6b
MANIFEST.in
README-pl-beta.txt
setup.py
PLWN_API.egg-info/PKG-INFO
PLWN_API.egg-info/SOURCES.txt
PLWN_API.egg-info/dependency_links.txt
PLWN_API.egg-info/requires.txt
PLWN_API.egg-info/top_level.txt
plwn/__init__.py
plwn/_loading.py
plwn/bases.py
plwn/enums.py
plwn/exceptions.py
plwn/relation_aliases.tsv
plwn/relresolver.py
plwn/readers/__init__.py
plwn/readers/comments.py
plwn/readers/nodes.py
plwn/readers/ubylmf.py
plwn/readers/wndb.py
plwn/readers/wnxml.py
plwn/storages/__init__.py
plwn/storages/objects.py
plwn/storages/sqlite.py
plwn/utils/__init__.py
plwn/utils/graphmlout.py
plwn/utils/sorting.py
plwn/utils/tupwrap.py
\ No newline at end of file
src/PLWN_API-0.9/PLWN_API.egg-info/dependency_links.txt
deleted
100644 → 0
View file @
12185e6b
src/PLWN_API-0.9/PLWN_API.egg-info/requires.txt
deleted
100644 → 0
View file @
12185e6b
six>=1.10
enum34>=1.1.2
src/PLWN_API-0.9/PLWN_API.egg-info/top_level.txt
deleted
100644 → 0
View file @
12185e6b
plwn
src/PLWN_API-0.9/README-pl-beta.txt
deleted
100644 → 0
View file @
12185e6b
******************
**** PlWN API ****
******************
PlWN API umożliwia:
- wyszukiwanie synsetów i jednostek leksykalnych w Słowosieci;
- dostęp do własności synsetów i jednostek leksykalnych, oraz ich relacji;
- eksport całości bądź części Słowosieci do grafu.
To README jest krótką, tymczasową instrukcją do wersji beta PlWN API.
Interfejs oraz funkcjonalność mogą ulec zmianie.
=============
Inicjalizacja
=============
>>> import plwn
>>> wn = plwn.load('plwn-3.0.db', 'sqlite3')
=================
Zrzuty baz danych
=================
Na ten moment, zalecany jest dostęp do bazy danych Słowosieci poprzez zrzuty do
plików SQLite, "plwn-X.db" (gdzie X to wersja Słowosieci). Zrzuty powinny
były zostać udostępnione razem z API.
==============
Funkcjonalność
==============
Opis funkcjonalności jest dostępny poprzez docstringi modułu plwn/bases.py
$ pydoc plwn.bases
Dodatkowo, w plwn/_pos.py znajduje się lista stałych wartości part-of-speech
używanych przez API.
Zgodnie z konwencją przyjętą przez Python 3, większość metod zwracających
kolekcje obiektów zwraca je w postaci generatorów.
>>> wn.lexical_units(lemma=u'pies')
TupWrapper(<generator object <genexpr> at 0x7f1048583410>)
Jeśli celem zapytania jest iteracja po wynikach, nie potrzeba niczego więcej.
>>> for lu in wn.lexical_units(lemma=u'pies'):
>>> print(lu.id)
5563
52245
...
Aby otrzymać listę (albo inną kolekcję), należy rzutować otrzymany obiekt.
>>> list(wn.lexical_units(lemma=u'pies'))
[<LexicalUnit id=5563 lemma=u'pies' pos=u'noun' variant=1>,
<LexicalUnit id=52245 lemma=u'pies' pos=u'noun' variant=2>,
...
]
Dla wygody w trybie interaktywnym Pythona, generatory są opakowane w obiekty
"TupWrapper", które umożliwiają rzutowanie generatora do typu krotki
poprzez "wywołanie" obiektu.
>>> wn.lexical_units(lemma=u'pies')()
(<LexicalUnit id=5563 lemma=u'pies' pos=u'noun' variant=1>,
<LexicalUnit id=52245 lemma=u'pies' pos=u'noun' variant=2>,
...
)
Jednak w przypadku pisania programów odwołujących się do API zalecane jest
jawne rzutowanie zwracanych generatorów. "Explicit is better than implicit."
src/PLWN_API-0.9/plwn/.bases.py.swp
deleted
100644 → 0
View file @
12185e6b
File deleted
src/PLWN_API-0.9/plwn/__init__.py
deleted
100644 → 0
View file @
12185e6b
from
._loading
import
*
from
.enums
import
PoS
# Setup logging for the package (not)
import
logging
as
_logging
_logging
.
getLogger
(
'plwn'
).
addHandler
(
_logging
.
NullHandler
())
src/PLWN_API-0.9/plwn/_loading.py
deleted
100644 → 0
View file @
12185e6b
"""Defines user-facing functions that allow simple construction of
:class:`PLWordnetBase` instances, with selected storages and readers.
"""
from
__future__
import
absolute_import
,
division
,
print_function
from
collections
import
namedtuple
from
importlib
import
import_module
import
textwrap
as
tw
import
six
__all__
=
'read'
,
'load'
,
'show_source_formats'
,
'show_storage_formats'
_Info
=
namedtuple
(
'_Info'
,
(
'desc'
,
'modname'
))
_READERS
=
{
'uby-lmf'
:
_Info
(
'Discontinued XML-based format'
,
'ubylmf'
),
'database'
:
_Info
(
'MySQL database of plWordNet. Only works on python 2 and requires '
'certain additional libraries. This is meant for internal use only '
'and will not work for most users. The file should contain one line '
'with SQLAlchemy URL to the database.'
,
'wndb'
,
),
'xml'
:
_Info
(
'The official PLWN XML format'
,
'wnxml'
),
}
_STORAGES
=
{
'sqlite3'
:
_Info
(
'File database format, with a compact schema (compared to internal '
'PLWN database).'
,
'sqlite'
,
),
'objects'
:
_Info
(
'Stores data in plain python objects, dumping them in pickle format. '
'Quick to construct, but querying and memory efficiency is not '
'guaranteed.'
,
'objects'
,
),
}
# Defaults for this version
_READERS
[
None
]
=
_READERS
[
'xml'
]
_STORAGES
[
None
]
=
_STORAGES
[
'sqlite3'
]
def
_imp_reader
(
modname
):
# Pre-import the root package - py3 needs this?
import
plwn.readers
# noqa
return
import_module
(
'.'
+
modname
,
'plwn.readers'
).
_this_reader_
def
_imp_storage
(
modname
):
# Pre-import the root package - py3 needs this?
import
plwn.storages
# noqa
return
import_module
(
'.'
+
modname
,
'plwn.storages'
).
_this_storage_
def
read
(
source_file
,
source_format
=
None
,
storage_file
=
None
,
storage_format
=
None
):
"""Read plWordNet data from a file and return the right
:class:`PLWordnetBase` subclass instance for the selected parameters.
Where defaults are mentioned, those values may change with each minor
version of PLWN API. If you depend on some particular format for a long
running program, state it explicitly.
:param str source_file: Path to the file from which the plWordNet data will
be read. The required contents of the file depend on selected
``source_format``.
:param str source_format: Name of the format of data that's contained in
``source_file``. If ``None``, then the default for the current version will
be chosen.
:param str storage_file: Path to the file where the internal representation
of the storage will be dumped. It will be possible to load this file using
:func:`load`. If ``None``, then the representation will not be dumped.
:param str storage_format: Name of the format in which PLWN API will store
data in memory. Access methods provided should be the same, but their
efficiency may differ. If ``None``, then the default for the current
version will be chosen.
:rtype: PLWordnetBase
"""
stor_cls
=
_imp_storage
(
_STORAGES
[
storage_format
].
modname
)
rdr
=
_imp_reader
(
_READERS
[
source_format
].
modname
)
return
stor_cls
.
from_reader
(
rdr
(
source_file
),
storage_file
)
def
load
(
storage_file
,
storage_format
=
None
):
"""Read plWordNet data from a cached file with internal PLWN API
representation.
This function is much faster than :func:`read` if such file is available.
:param str storage_file: Path to the file from which the cached data will
be read.
:param str storage_format: Name of the format the data is stored in. It
must match the actual format and version of schema contained in the file.
:rtype: PLWordnetBase
"""
stor_cls
=
_imp_storage
(
_STORAGES
[
storage_format
].
modname
)
return
stor_cls
.
from_dump
(
storage_file
)
def
show_source_formats
():
"""Print names and short descriptions of available source file formats to
``stdout``.
This function is primarily meant to be informative in interactive shell
mode.
"""
_show
(
_READERS
)
def
show_storage_formats
():
"""Print names and short descriptions of available storage formats to
``stdout``.
This function is primarily meant to be informative in interactive shell
mode.
"""
_show
(
_STORAGES
)
def
_show
(
dict_
):
for
name
,
info
in
six
.
iteritems
(
dict_
):
if
name
is
None
:
continue
print
(
name
)
print
(
'-'
*
len
(
name
))
print
(
tw
.
fill
(
info
.
desc
),
end
=
'
\n\n
'
)
src/PLWN_API-0.9/plwn/bases.py
deleted
100644 → 0
View file @
12185e6b
This diff is collapsed.
Click to expand it.
src/PLWN_API-0.9/plwn/enums.py
deleted
100644 → 0
View file @
12185e6b
# coding: utf8
"""
Enumerated values used in plWordNet
"""
from
__future__
import
absolute_import
,
division
import
re
from
enum
import
Enum
import
six
__all__
=
(
'PoS'
,
'VerbAspect'
,
'EmotionMarkedness'
,
'EmotionName'
,
'EmotionValuation'
,
'Domain'
,
'make_values_tuple'
,
)
# Helper function for making dictionaries translating enum instances into
# numbers used to denote them in plWN database.
def
_fill_numtrans
(
enumclass
,
num2enum
,
enum2num
):
for
num
,
enuminst
in
enumerate
(
enumclass
,
1
):
num2enum
[
num
]
=
enuminst
enum2num
[
enuminst
]
=
num
def
_get_from_numtrans
(
numtrans
,
num
,
optional
):
try
:
return
numtrans
[
num
]
except
KeyError
:
if
optional
:
return
None
raise
# Explicit ordering is needed only in python 2.
_POS_ORDER
=
'verb noun adverb adjective'
_POS_NUM2ENUM
=
{}
_POS_ENUM2NUM
=
{}
class
PoS
(
Enum
):
"""
Defines **Part of Speech** values used by plWN.
"""
if
six
.
PY2
:
__order__
=
_POS_ORDER
verb
=
u
'verb'
noun
=
u
'noun'
adverb
=
u
'adverb'
adjective
=
u
'adjective'
v
=
verb
n
=
noun
adv
=
adverb
adj
=
adjective
@
staticmethod
def
by_db_number
(
number
,
optional
=
False
):
return
_get_from_numtrans
(
_POS_NUM2ENUM
,
number
,
optional
)
@
property
def
db_number
(
self
):
return
_POS_ENUM2NUM
[
self
]
_fill_numtrans
(
PoS
,
_POS_NUM2ENUM
,
_POS_ENUM2NUM
)
_VA_ORDER
=
'perfective imperfective predicative two_aspect'
_VA_NUM2ENUM
=
{}
_VA_ENUM2NUM
=
{}
class
VerbAspect
(
Enum
):
"""
Defines aspect values used by verbs in plWN.
"""
if
six
.
PY2
:
__order__
=
_VA_ORDER
perfective
=
u
'perf'
imperfective
=
u
'imperf'
predicative
=
u
'pred'
two_aspect
=
u
'imperf.perf'
perf
=
perfective
imperf
=
imperfective
pred
=
predicative
two
=
two_aspect
# Additionally, some Polish abbreviations
dk
=
perfective
ndk
=
imperfective
@
staticmethod
def
by_db_number
(
number
,
optional
=
False
):
return
_get_from_numtrans
(
_VA_NUM2ENUM
,
number
,
optional
)
@
property
def
db_number
(
self
):
return
_VA_ENUM2NUM
[
self
]
_fill_numtrans
(
VerbAspect
,
_VA_NUM2ENUM
,
_VA_ENUM2NUM
)
class
EmotionMarkedness
(
Enum
):
"""
Defines markedness of emotions associated with some lexical units.
"""
strong_positive
=
u
'+ m'
strong_negative
=
u
'- m'
weak_positive
=
u
'+ s'
weak_negative
=
u
'- s'
ambiguous
=
u
'amb'
plus_m
=
strong_positive
minus_m
=
strong_negative
plus_s
=
weak_positive
minus_s
=
weak_negative
amb
=
ambiguous
@
classmethod
def
normalized
(
cls
,
strvalue
):
"""
Return an instance of this enum with string value normalized with
regards to whitespace.
"""
strvalue
=
strvalue
.
strip
()
# Try the one value value that won't require matching
if
strvalue
==
cls
.
ambiguous
.
value
:
return
cls
.
ambiguous
match
=
re
.
match
(
r
'([+-])\s*([sm])'
,
strvalue
,
re
.
U
)
if
not
match
:
# This can't be a valid string, so let the built-in exception
# raise.
return
cls
(
strvalue
)
return
cls
(
match
.
group
(
1
)
+
u
' '
+
match
.
group
(
2
))
class
EmotionName
(
Enum
):
"""
Possible names of emotions associated with some lexical units.
"""
joy
=
u
'radość'
trust
=
u
'zaufanie'
anticipation
=
u
'cieszenie się na coś oczekiwanego'
surprise
=
u
'zaskoczenie czymś nieprzewidywanym'
sadness
=
u
'smutek'
anger
=
u
'złość'
fear
=
u
'strach'
disgust
=
u
'wstręt'
radosc
=
joy
zaufanie
=
trust
cieszenie_sie_na
=
anticipation
zaskoczenie
=
surprise
smutek
=
sadness
zlosc
=
anger
strach
=
fear
wstret
=
disgust
class
EmotionValuation
(
Enum
):
"""