From 1408fe0d207c3594ad44c50efe1c27afced633d5 Mon Sep 17 00:00:00 2001
From: leszeks <leszeks@e-science.pl>
Date: Wed, 29 Jul 2020 16:09:02 +0200
Subject: [PATCH] merge request changes

---
 README.md        | 12 ++++++++++++
 plwn/__init__.py |  2 ++
 plwn/config.ini  |  2 +-
 plwn/download.py | 43 ++++++++++++++++++++++++++++++++++---------
 setup.py         |  2 +-
 5 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index fed3cba..8e2eaa2 100644
--- a/README.md
+++ b/README.md
@@ -69,6 +69,18 @@ To load this version at a later date, use `plwn.load(path)` instead of `plwn.loa
     >>> api = plwn.load("storage-dumps/plwn-new.db")
 
 
+Downloading API dumps
+=====================
+
+In order to download one of the dumps available at https://minio.clarin-pl.eu/ :
+    import plwn
+    plwn.download("optional_name")
+File will be downloaded to the current directory.
+If optional_name is not provided default dump will be downloaded.
+If optional_name is provided but doesn't match name of any available dumps, the process will fail
+and display possible names.
+
+
 Licenses
 ========
 
diff --git a/plwn/__init__.py b/plwn/__init__.py
index fe9b1f0..83159a5 100644
--- a/plwn/__init__.py
+++ b/plwn/__init__.py
@@ -20,6 +20,7 @@ from ._loading import read
 from ._loading import load
 from ._loading import show_source_formats
 from ._loading import show_storage_formats
+from .download import download
 # Import the enums that are needed for selecting and filtering
 from .enums import PoS, RelationKind
 
@@ -35,4 +36,5 @@ __all__ = [
     "show_source_formats",
     "load_default",
     "RelationKind",
+    "download",
 ]
diff --git a/plwn/config.ini b/plwn/config.ini
index 04c4bf2..ec3c6ff 100644
--- a/plwn/config.ini
+++ b/plwn/config.ini
@@ -1,2 +1,2 @@
 [DOWNLOAD]
-model = https://minio.clarin-pl.eu/public/models/plwn_api_dumps/plwn_dump_27-03-2018.sqlite
\ No newline at end of file
+default_model = https://minio.clarin-pl.eu/public/models/plwn_api_dumps/plwn_dump_27-03-2018.sqlite
\ No newline at end of file
diff --git a/plwn/download.py b/plwn/download.py
index 9699eb5..21378e1 100644
--- a/plwn/download.py
+++ b/plwn/download.py
@@ -1,25 +1,50 @@
 """Implementation of download method."""
 import configparser
 import os
+import xml.etree.ElementTree as ET
+import re
 
 import requests
+from six.moves.urllib.request import urlopen
 
-models = {
-    "model",
-}
 
+config = configparser.ConfigParser()
+config_path = os.path.join(os.path.dirname(
+            os.path.abspath(__file__)), "config.ini")
+config.read(config_path)
+
+
+def get_available_models():
+    root = ET.parse(urlopen("https://minio.clarin-pl.eu/public")).getroot()
+    available_models = []
+    for child in root.findall(
+            "{http://s3.amazonaws.com/doc/2006-03-01/}Contents"):
+        if "models/plwn_api_dumps/" in str(
+                child.find(
+                    "{http://s3.amazonaws.com/doc/2006-03-01/}Key").text):
+            string = child.find(
+                "{http://s3.amazonaws.com/doc/2006-03-01/}Key").text
+            substring = r"models/plwn_api_dumps/"
+            available_models.append(re.sub(substring, r'', string))
+    return available_models
 
-def download(name):
+
+def download(name = "default_model"):
     """After called it downloads a specified database model.
 
     Currently only one model available.
     """
+    models = get_available_models()
+    if name == "default_model":
+        url = config["DOWNLOAD"][name]
+        r = requests.get(url)
+        with open(name, "wb") as f:
+            f.write(r.content)
+            f.close()
+        return
     if name in models:
-        cfg = configparser.ConfigParser()
-        config_path = os.path.join(os.path.dirname(
-            os.path.abspath(__file__)), "config.ini")
-        cfg.read(config_path)
-        url = cfg["DOWNLOAD"][name]
+        url = config["DOWNLOAD"]["default_model"]
+        url = url.replace("plwn_dump_27-03-2018.sqlite",name)
         r = requests.get(url)
         with open(name, "wb") as f:
             f.write(r.content)
diff --git a/setup.py b/setup.py
index f4f5bef..fad4287 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@ ENVNAME_DIST_NODEFAULT = 'PLWN_API_DIST_NO_DEFAULT_STORAGE'
 
 setup_args = dict(
     name='PLWN_API',
-    version='0.23',
+    version='0.24',
     license='LGPL-3.0+',
     description='Python API to access plWordNet lexicon',
 
-- 
GitLab