Skip to content
Snippets Groups Projects
Commit 1408fe0d authored by leszeks's avatar leszeks
Browse files

merge request changes

parent 7d897cab
1 merge request!4Adding download option
Pipeline #1046 failed with stage
in 38 seconds
......@@ -69,6 +69,18 @@ To load this version at a later date, use `plwn.load(path)` instead of `plwn.loa
>>> api = plwn.load("storage-dumps/plwn-new.db")
Downloading API dumps
=====================
In order to download one of the dumps available at https://minio.clarin-pl.eu/ :
import plwn
plwn.download("optional_name")
File will be downloaded to the current directory.
If optional_name is not provided default dump will be downloaded.
If optional_name is provided but doesn't match name of any available dumps, the process will fail
and display possible names.
Licenses
========
......
......@@ -20,6 +20,7 @@ from ._loading import read
from ._loading import load
from ._loading import show_source_formats
from ._loading import show_storage_formats
from .download import download
# Import the enums that are needed for selecting and filtering
from .enums import PoS, RelationKind
......@@ -35,4 +36,5 @@ __all__ = [
"show_source_formats",
"load_default",
"RelationKind",
"download",
]
[DOWNLOAD]
model = https://minio.clarin-pl.eu/public/models/plwn_api_dumps/plwn_dump_27-03-2018.sqlite
\ No newline at end of file
default_model = https://minio.clarin-pl.eu/public/models/plwn_api_dumps/plwn_dump_27-03-2018.sqlite
\ No newline at end of file
"""Implementation of download method."""
import configparser
import os
import xml.etree.ElementTree as ET
import re
import requests
from six.moves.urllib.request import urlopen
models = {
"model",
}
config = configparser.ConfigParser()
config_path = os.path.join(os.path.dirname(
os.path.abspath(__file__)), "config.ini")
config.read(config_path)
def get_available_models():
root = ET.parse(urlopen("https://minio.clarin-pl.eu/public")).getroot()
available_models = []
for child in root.findall(
"{http://s3.amazonaws.com/doc/2006-03-01/}Contents"):
if "models/plwn_api_dumps/" in str(
child.find(
"{http://s3.amazonaws.com/doc/2006-03-01/}Key").text):
string = child.find(
"{http://s3.amazonaws.com/doc/2006-03-01/}Key").text
substring = r"models/plwn_api_dumps/"
available_models.append(re.sub(substring, r'', string))
return available_models
def download(name):
def download(name = "default_model"):
"""After called it downloads a specified database model.
Currently only one model available.
"""
models = get_available_models()
if name == "default_model":
url = config["DOWNLOAD"][name]
r = requests.get(url)
with open(name, "wb") as f:
f.write(r.content)
f.close()
return
if name in models:
cfg = configparser.ConfigParser()
config_path = os.path.join(os.path.dirname(
os.path.abspath(__file__)), "config.ini")
cfg.read(config_path)
url = cfg["DOWNLOAD"][name]
url = config["DOWNLOAD"]["default_model"]
url = url.replace("plwn_dump_27-03-2018.sqlite",name)
r = requests.get(url)
with open(name, "wb") as f:
f.write(r.content)
......
......@@ -9,7 +9,7 @@ ENVNAME_DIST_NODEFAULT = 'PLWN_API_DIST_NO_DEFAULT_STORAGE'
setup_args = dict(
name='PLWN_API',
version='0.23',
version='0.24',
license='LGPL-3.0+',
description='Python API to access plWordNet lexicon',
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment