Skip to content
Snippets Groups Projects
Commit be971a66 authored by Bienchen's avatar Bienchen
Browse files

Download associated archive from internet for validation.

parent a9a85e19
Branches
No related tags found
No related merge requests found
...@@ -29,6 +29,7 @@ LABEL vendor3="Biozentrum - University of Basel (biozentrum.unibas.ch)" ...@@ -29,6 +29,7 @@ LABEL vendor3="Biozentrum - University of Basel (biozentrum.unibas.ch)"
## Install the RCSB CPP Dict Suite (only the binaries we need) ## Install the RCSB CPP Dict Suite (only the binaries we need)
WORKDIR ${SRC_DIR} WORKDIR ${SRC_DIR}
COPY requirements.txt ${SRC_DIR}
RUN set -e pipefail; \ RUN set -e pipefail; \
export DICT_PACK_SRC_DIR="${SRC_DIR}/cpp-dict-pack.git"; \ export DICT_PACK_SRC_DIR="${SRC_DIR}/cpp-dict-pack.git"; \
apk update; \ apk update; \
...@@ -56,6 +57,7 @@ RUN set -e pipefail; \ ...@@ -56,6 +57,7 @@ RUN set -e pipefail; \
/usr/local/bin/python -m pip install --upgrade pip; \ /usr/local/bin/python -m pip install --upgrade pip; \
/usr/local/bin/python -m pip install mmcif==${VERSION_PY_MMCIF} \ /usr/local/bin/python -m pip install mmcif==${VERSION_PY_MMCIF} \
python-rapidjson; \ python-rapidjson; \
/usr/local/bin/python -m pip install -r requirements.txt; \
# #
## Clean up/ remove unnecessary stuff ## Clean up/ remove unnecessary stuff
apk del abuild binutils bison build-base cmake flex git gcc \ apk del abuild binutils bison build-base cmake flex git gcc \
......
python-rapidjson==1.9
validators==0.20.0
...@@ -20,7 +20,9 @@ import sys ...@@ -20,7 +20,9 @@ import sys
import tempfile import tempfile
import zipfile import zipfile
from validators import url as is_url
import rapidjson as json import rapidjson as json
import requests
from mmcif.api.DataCategory import DataCategory from mmcif.api.DataCategory import DataCategory
...@@ -215,12 +217,51 @@ def _get_entry_id(cif_datablock, entry_id_map, datablock_idx): ...@@ -215,12 +217,51 @@ def _get_entry_id(cif_datablock, entry_id_map, datablock_idx):
entry_id_map[row[eidx]] = datablock_idx entry_id_map[row[eidx]] = datablock_idx
def _download_file(file_url):
"""Download a file into a temporary file. Mark for deletion on
termination"""
rspns = requests.get(file_url, stream=True, timeout=600)
if rspns.status_code != 200:
raise RuntimeError(f"File not found by URL '{file_url}'.")
dlf = tempfile.TemporaryFile()
for chunk in rspns.iter_content(chunk_size=1024):
dlf.write(chunk)
dlf.seek(0)
return dlf
def _get_assoc_obj(file_or_url, assoc_dir):
"""Get a path to an associated file. Will download from internet if path
is a URL. Downloaded files are automatically hooked up for deletion after
the script terminates."""
if assoc_dir is None or not os.path.exists(
os.path.join(assoc_dir, file_or_url)
):
if is_url(file_or_url):
return _download_file(file_or_url)
raise RuntimeError(
"Associated file path does not point to actual file or URL: "
+ f"'{assoc_dir}/{file_or_url}'"
)
return os.path.join(assoc_dir, file_or_url)
def _unzip_arc_cif(arc_file, cif_file, assoc_dir): def _unzip_arc_cif(arc_file, cif_file, assoc_dir):
"""Extract a cif file from a ZIP archive.""" """Extract a cif file from a ZIP archive."""
assoc_data = [] assoc_data = []
with zipfile.ZipFile(os.path.join(assoc_dir, arc_file)) as arc_zip: assoc_obj = _get_assoc_obj(arc_file, assoc_dir)
with zipfile.ZipFile(assoc_obj) as arc_zip:
with TextIOWrapper(arc_zip.open(cif_file), encoding="utf-8") as cif_fh: with TextIOWrapper(arc_zip.open(cif_file), encoding="utf-8") as cif_fh:
assoc_data = _read_mmcif(cif_fh) assoc_data = _read_mmcif(cif_fh)
# in case assoc_obj is a temporary file, we need to close
try:
assoc_obj.close()
except AttributeError:
pass
return assoc_data return assoc_data
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment