Commit c0756ecd authored by Mateusz Gniewkowski's avatar Mateusz Gniewkowski

Merge branch 'adding_download_option' into 'master'

Adding download option

See merge request !4
parents 24d2a446 52267138
Pipeline #1066 failed with stages
in 1 minute and 11 seconds
......@@ -2,3 +2,4 @@ include COPYING
include COPYING.LESSER
include LICENSE-plWN.txt
include LICENSE-PWN.txt
include plwn/config.ini
\ No newline at end of file
......@@ -69,6 +69,19 @@ To load this version at a later date, use `plwn.load(path)` instead of `plwn.loa
>>> api = plwn.load("storage-dumps/plwn-new.db")
Downloading API dumps
=====================
In order to download one of the dumps available at https://minio.clarin-pl.eu/ :
import plwn
plwn.download("optional_name")
File will be downloaded to the current directory.
If optional_name is not provided default dump will be downloaded.
If optional_name is provided but doesn't match name of any available dumps, the process will fail
and display possible names.
Licenses
========
......
......@@ -20,6 +20,7 @@ from ._loading import read
from ._loading import load
from ._loading import show_source_formats
from ._loading import show_storage_formats
from .download import download
# Import the enums that are needed for selecting and filtering
from .enums import PoS, RelationKind
......@@ -35,4 +36,5 @@ __all__ = [
"show_source_formats",
"load_default",
"RelationKind",
"download",
]
[DOWNLOAD]
default_model = https://minio.clarin-pl.eu/public/models/plwn_api_dumps/plwn_dump_27-03-2018.sqlite
\ No newline at end of file
"""Implementation of download method."""
import configparser
import os
import xml.etree.ElementTree as ET
import re
import requests
from six.moves.urllib.request import urlopen
config = configparser.ConfigParser()
config_path = os.path.join(os.path.dirname(
os.path.abspath(__file__)), "config.ini")
config.read(config_path)
def get_available_models():
"""Returns available models."""
root = ET.parse(urlopen("https://minio.clarin-pl.eu/public")).getroot()
available_models = []
for child in root.findall(
"{http://s3.amazonaws.com/doc/2006-03-01/}Contents"):
if "models/plwn_api_dumps/" in str(
child.find(
"{http://s3.amazonaws.com/doc/2006-03-01/}Key").text):
string = child.find(
"{http://s3.amazonaws.com/doc/2006-03-01/}Key").text
substring = r"models/plwn_api_dumps/"
available_models.append(re.sub(substring, r'', string))
return available_models
def download(name="default_model"):
"""After called it downloads a specified database model."""
models = get_available_models()
if name == "default_model":
url = config["DOWNLOAD"][name]
r = requests.get(url)
with open(name, "wb") as f:
f.write(r.content)
f.close()
return
if name in models:
url = config["DOWNLOAD"]["default_model"]
url = url.replace("plwn_dump_27-03-2018.sqlite", name)
r = requests.get(url)
with open(name, "wb") as f:
f.write(r.content)
f.close()
else:
print("Cannot download: ",
name,
"\n Possible download options: ",
models)
......@@ -9,7 +9,7 @@ ENVNAME_DIST_NODEFAULT = 'PLWN_API_DIST_NO_DEFAULT_STORAGE'
setup_args = dict(
name='PLWN_API',
version='0.23',
version='0.24',
license='LGPL-3.0+',
description='Python API to access plWordNet lexicon',
......@@ -17,10 +17,11 @@ setup_args = dict(
author_email='michal.kalinski@pwr.edu.pl',
packages=['plwn', 'plwn.readers', 'plwn.storages', 'plwn.utils'],
package_data={'plwn.default': ['*.db']},
package_data={'plwn.default': ['*.db'], 'plwn': ['config.ini']},
test_suite='tests.setuptools_loader.setuptools_load_tests',
install_requires=['six>=1.10', 'enum34>=1.1.2;python_version<"3.4"'],
install_requires=['six>=1.10',
'enum34>=1.1.2;python_version<"3.4"', 'requests'],
zip_safe=False,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment