Skip to content
Snippets Groups Projects
Commit be971a66 authored by Bienchen's avatar Bienchen
Browse files

Download associated archive from internet for validation.

parent a9a85e19
No related branches found
No related tags found
No related merge requests found
......@@ -29,6 +29,7 @@ LABEL vendor3="Biozentrum - University of Basel (biozentrum.unibas.ch)"
## Install the RCSB CPP Dict Suite (only the binaries we need)
WORKDIR ${SRC_DIR}
COPY requirements.txt ${SRC_DIR}
RUN set -e pipefail; \
export DICT_PACK_SRC_DIR="${SRC_DIR}/cpp-dict-pack.git"; \
apk update; \
......@@ -56,6 +57,7 @@ RUN set -e pipefail; \
/usr/local/bin/python -m pip install --upgrade pip; \
/usr/local/bin/python -m pip install mmcif==${VERSION_PY_MMCIF} \
python-rapidjson; \
/usr/local/bin/python -m pip install -r requirements.txt; \
#
## Clean up/ remove unnecessary stuff
apk del abuild binutils bison build-base cmake flex git gcc \
......
python-rapidjson==1.9
validators==0.20.0
......@@ -20,7 +20,9 @@ import sys
import tempfile
import zipfile
from validators import url as is_url
import rapidjson as json
import requests
from mmcif.api.DataCategory import DataCategory
......@@ -215,12 +217,51 @@ def _get_entry_id(cif_datablock, entry_id_map, datablock_idx):
entry_id_map[row[eidx]] = datablock_idx
def _download_file(file_url):
"""Download a file into a temporary file. Mark for deletion on
termination"""
rspns = requests.get(file_url, stream=True, timeout=600)
if rspns.status_code != 200:
raise RuntimeError(f"File not found by URL '{file_url}'.")
dlf = tempfile.TemporaryFile()
for chunk in rspns.iter_content(chunk_size=1024):
dlf.write(chunk)
dlf.seek(0)
return dlf
def _get_assoc_obj(file_or_url, assoc_dir):
"""Get a path to an associated file. Will download from internet if path
is a URL. Downloaded files are automatically hooked up for deletion after
the script terminates."""
if assoc_dir is None or not os.path.exists(
os.path.join(assoc_dir, file_or_url)
):
if is_url(file_or_url):
return _download_file(file_or_url)
raise RuntimeError(
"Associated file path does not point to actual file or URL: "
+ f"'{assoc_dir}/{file_or_url}'"
)
return os.path.join(assoc_dir, file_or_url)
def _unzip_arc_cif(arc_file, cif_file, assoc_dir):
"""Extract a cif file from a ZIP archive."""
assoc_data = []
with zipfile.ZipFile(os.path.join(assoc_dir, arc_file)) as arc_zip:
assoc_obj = _get_assoc_obj(arc_file, assoc_dir)
with zipfile.ZipFile(assoc_obj) as arc_zip:
with TextIOWrapper(arc_zip.open(cif_file), encoding="utf-8") as cif_fh:
assoc_data = _read_mmcif(cif_fh)
# in case assoc_obj is a temporary file, we need to close
try:
assoc_obj.close()
except AttributeError:
pass
return assoc_data
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment