Skip to content

Commit

Permalink
Support downloading XLSX file inside Zip file, to comply with DMP+Zenodo
Browse files Browse the repository at this point in the history
  • Loading branch information
rnebot committed May 7, 2020
1 parent 79038c1 commit 8786457
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 1 deletion.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,13 @@ FADN_FILES_LOCATION | Directory where FADN datasets are downloaded and cached |
CACHE_FILE_LOCATION | Directory where SDMX datasets are downloaded and cached | "/srv/sdmx_datasets_cache" |
REDIS_HOST_FILESYSTEM_DIR | If REDIS_HOST='filesystem:local_session', directory where sessions are stored | "/srv/sessions" |
SSP_FILES_DIR | Not used | "" |
NIS_FILES_LIST | A comma-separated list of URLs to CSV files where NIS case studies or parts of them are enumerated. Each CSV file starts with a header, with four columns: name, url, description and example (True if it is an example) | "" |
REDIS_HOST | "localhost" expects a REDIS server available at localhost:6379; "redis-local" creates a local REDIS instance; "filesystem:local_session" uses filesystem to store sessions (a good option for execution in PC/laptop) | "" |
TESTING | "True"| "True" |
SELF_SCHEMA | Name of the host where Backend RESTful service responds, preceded by the protocol (http or https) | "https://one.nis.magic-nexus.eu/" |
FS_TYPE | "Webdav" | "Webdav" |
FS_SERVER | Host name of the WebDAV server | "nextcloud.data.magic-nexus.eu" |
FS_USER | User name used. Files and folders must be readable and writeable by this user | "<webdav user>" |
FS_USER | User name used. Files and folders must be readable and writable by this user | "<webdav user>" |
FS_PASSWORD | Password for the previous user | "<password in clear>" |
GAPI_CREDENTIALS_FILE | Path to a file obtained from Google API management web, to directly access a NIS workbook file in Google Sheets | "/srv/credentials.json" |
GAPI_TOKEN_FILE | Path to a file used to stored authorization token | "/srv/tocken.pickle" |
Expand Down
43 changes: 43 additions & 0 deletions nexinfosys/common/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from nexinfosys.ie_imports.google_drive import download_xlsx_file_id
from nexinfosys.models import log_level
import os
from zipfile import ZipFile

logger = logging.getLogger(__name__)
logger.setLevel(log_level)
Expand Down Expand Up @@ -1196,6 +1197,48 @@ def download_file(location, wv_user=None, wv_password=None, wv_host_name=None):
data = urllib.request.urlopen(location).read()
data = io.BytesIO(data)

# If it is a ZIP file...
if pr.path.lower().endswith(".zip"):
zipfile = ZipFile(data)
# If no anchor, return the XLSX file (find it and return it)
file_to_extract = None
candidate_files_to_extract = []
if pr.fragment:
file_to_extract = pr.fragment
if "#" in file_to_extract:
pos = file_to_extract.find("#")
sub_fragment = file_to_extract[pos + 1:]
file_to_extract = file_to_extract[:pos]
if file_to_extract.startswith("/"):
file_to_extract = file_to_extract[1:]
found = False
for name in zipfile.namelist():
if not file_to_extract:
if name.lower().endswith(".xlsx"):
candidate_files_to_extract.append(name)
found = True
else:
if strcmp(file_to_extract, name):
found = True
break
if found:
if not file_to_extract:
# Try to find best option from the list of candidates
file_to_extract = candidate_files_to_extract[0]
for f in candidate_files_to_extract:
if f.lower().startswith("msm/cs"):
file_to_extract = f
break # Best option (DMP): a file named "cs_<whatever>.xlsx" in "msm" folder
if f.lower().startswith("msm/"):
file_to_extract = f # Second best option: an XLSX file inside "msm" folder

if file_to_extract:
data = io.BytesIO(zipfile.read(file_to_extract))
else:
data = None
else:
data = None

return data


Expand Down

0 comments on commit 8786457

Please sign in to comment.