merge request changes

1408fe0d · leszeks · 7d897cab · 1408fe0d · 1408fe0d · 1408fe0d
Commit 1408fe0d authored 4 years ago by leszeks
--- a/README.md
+++ b/README.md
@@ -69,6 +69,18 @@ To load this version at a later date, use `plwn.load(path)` instead of `plwn.loa
    >>> api = plwn.load("storage-dumps/plwn-new.db")


+Downloading API dumps
+=====================
+
+In order to download one of the dumps available at https://minio.clarin-pl.eu/ :
+    import plwn
+    plwn.download("optional_name")
+File will be downloaded to the current directory.
+If optional_name is not provided default dump will be downloaded.
+If optional_name is provided but doesn't match name of any available dumps, the process will fail
+and display possible names.
+
+
 Licenses
 ========


--- a/plwn/__init__.py
+++ b/plwn/__init__.py
@@ -20,6 +20,7 @@ from ._loading import read
 from ._loading import load
 from ._loading import show_source_formats
 from ._loading import show_storage_formats
+from .download import download
 # Import the enums that are needed for selecting and filtering
 from .enums import PoS, RelationKind

@@ -35,4 +36,5 @@ __all__ = [
    "show_source_formats",
    "load_default",
    "RelationKind",
+    "download",
 ]
--- a/plwn/config.ini
+++ b/plwn/config.ini
 [DOWNLOAD]
-model = https://minio.clarin-pl.eu/public/models/plwn_api_dumps/plwn_dump_27-03-2018.sqlite
\ No newline at end of file
+default_model = https://minio.clarin-pl.eu/public/models/plwn_api_dumps/plwn_dump_27-03-2018.sqlite
\ No newline at end of file
--- a/plwn/download.py
+++ b/plwn/download.py
 """Implementation of download method."""
 import configparser
 import os
+import xml.etree.ElementTree as ET
+import re

 import requests
+from six.moves.urllib.request import urlopen

-models = {
-    "model",
-}

+config = configparser.ConfigParser()
+config_path = os.path.join(os.path.dirname(
+            os.path.abspath(__file__)), "config.ini")
+config.read(config_path)
+
+
+def get_available_models():
+    root = ET.parse(urlopen("https://minio.clarin-pl.eu/public")).getroot()
+    available_models = []
+    for child in root.findall(
+            "{http://s3.amazonaws.com/doc/2006-03-01/}Contents"):
+        if "models/plwn_api_dumps/" in str(
+                child.find(
+                    "{http://s3.amazonaws.com/doc/2006-03-01/}Key").text):
+            string = child.find(
+                "{http://s3.amazonaws.com/doc/2006-03-01/}Key").text
+            substring = r"models/plwn_api_dumps/"
+            available_models.append(re.sub(substring, r'', string))
+    return available_models

-def download(name):
+
+def download(name = "default_model"):
    """After called it downloads a specified database model.

    Currently only one model available.
    """
+    models = get_available_models()
+    if name == "default_model":
+        url = config["DOWNLOAD"][name]
+        r = requests.get(url)
+        with open(name, "wb") as f:
+            f.write(r.content)
+            f.close()
+        return
    if name in models:
-        cfg = configparser.ConfigParser()
-        config_path = os.path.join(os.path.dirname(
-            os.path.abspath(__file__)), "config.ini")
-        cfg.read(config_path)
-        url = cfg["DOWNLOAD"][name]
+        url = config["DOWNLOAD"]["default_model"]
+        url = url.replace("plwn_dump_27-03-2018.sqlite",name)
        r = requests.get(url)
        with open(name, "wb") as f:
            f.write(r.content)

--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@ ENVNAME_DIST_NODEFAULT = 'PLWN_API_DIST_NO_DEFAULT_STORAGE'

 setup_args = dict(
    name='PLWN_API',
-    version='0.23',
+    version='0.24',
    license='LGPL-3.0+',
    description='Python API to access plWordNet lexicon',