Skip to content
Snippets Groups Projects
Commit af54e24c authored by Bienchen's avatar Bienchen
Browse files

Merge branch 'validation-tool' into develop

parents d911872a e427a626
No related branches found
No related tags found
No related merge requests found
...@@ -29,6 +29,7 @@ LABEL vendor3="Biozentrum - University of Basel (biozentrum.unibas.ch)" ...@@ -29,6 +29,7 @@ LABEL vendor3="Biozentrum - University of Basel (biozentrum.unibas.ch)"
## Install the RCSB CPP Dict Suite (only the binaries we need) ## Install the RCSB CPP Dict Suite (only the binaries we need)
WORKDIR ${SRC_DIR} WORKDIR ${SRC_DIR}
COPY requirements.txt ${SRC_DIR}
RUN set -e pipefail; \ RUN set -e pipefail; \
export DICT_PACK_SRC_DIR="${SRC_DIR}/cpp-dict-pack.git"; \ export DICT_PACK_SRC_DIR="${SRC_DIR}/cpp-dict-pack.git"; \
apk update; \ apk update; \
...@@ -56,6 +57,7 @@ RUN set -e pipefail; \ ...@@ -56,6 +57,7 @@ RUN set -e pipefail; \
/usr/local/bin/python -m pip install --upgrade pip; \ /usr/local/bin/python -m pip install --upgrade pip; \
/usr/local/bin/python -m pip install mmcif==${VERSION_PY_MMCIF} \ /usr/local/bin/python -m pip install mmcif==${VERSION_PY_MMCIF} \
python-rapidjson; \ python-rapidjson; \
/usr/local/bin/python -m pip install -r requirements.txt; \
# #
## Clean up/ remove unnecessary stuff ## Clean up/ remove unnecessary stuff
apk del abuild binutils bison build-base cmake flex git gcc \ apk del abuild binutils bison build-base cmake flex git gcc \
......
python-rapidjson==1.9
validators==0.20.0
...@@ -20,7 +20,9 @@ import sys ...@@ -20,7 +20,9 @@ import sys
import tempfile import tempfile
import zipfile import zipfile
from validators import url as is_url
import rapidjson as json import rapidjson as json
import requests
from mmcif.api.DataCategory import DataCategory from mmcif.api.DataCategory import DataCategory
...@@ -215,12 +217,51 @@ def _get_entry_id(cif_datablock, entry_id_map, datablock_idx): ...@@ -215,12 +217,51 @@ def _get_entry_id(cif_datablock, entry_id_map, datablock_idx):
entry_id_map[row[eidx]] = datablock_idx entry_id_map[row[eidx]] = datablock_idx
def _download_file(file_url):
"""Download a file into a temporary file. Mark for deletion on
termination"""
rspns = requests.get(file_url, stream=True, timeout=600)
if rspns.status_code != 200:
raise RuntimeError(f"File not found by URL '{file_url}'.")
dlf = tempfile.TemporaryFile()
for chunk in rspns.iter_content(chunk_size=1024):
dlf.write(chunk)
dlf.seek(0)
return dlf
def _get_assoc_obj(file_or_url, assoc_dir):
"""Get a path to an associated file. Will download from internet if path
is a URL. Downloaded files are automatically hooked up for deletion after
the script terminates."""
if assoc_dir is None or not os.path.exists(
os.path.join(assoc_dir, file_or_url)
):
if is_url(file_or_url):
return _download_file(file_or_url)
raise RuntimeError(
"Associated file path does not point to actual file or URL: "
+ f"'{assoc_dir}/{file_or_url}'"
)
return os.path.join(assoc_dir, file_or_url)
def _unzip_arc_cif(arc_file, cif_file, assoc_dir): def _unzip_arc_cif(arc_file, cif_file, assoc_dir):
"""Extract a cif file from a ZIP archive.""" """Extract a cif file from a ZIP archive."""
assoc_data = [] assoc_data = []
with zipfile.ZipFile(os.path.join(assoc_dir, arc_file)) as arc_zip: assoc_obj = _get_assoc_obj(arc_file, assoc_dir)
with zipfile.ZipFile(assoc_obj) as arc_zip:
with TextIOWrapper(arc_zip.open(cif_file), encoding="utf-8") as cif_fh: with TextIOWrapper(arc_zip.open(cif_file), encoding="utf-8") as cif_fh:
assoc_data = _read_mmcif(cif_fh) assoc_data = _read_mmcif(cif_fh)
# in case assoc_obj is a temporary file, we need to close
try:
assoc_obj.close()
except AttributeError:
pass
return assoc_data return assoc_data
...@@ -256,6 +297,9 @@ def _get_associated_files(model_cif_file, assoc_dir): ...@@ -256,6 +297,9 @@ def _get_associated_files(model_cif_file, assoc_dir):
) )
if row[idxs["file_format"]] != "cif": if row[idxs["file_format"]] != "cif":
continue continue
# this should be easy to make reading URLs, using
# _get_assoc_obj(row[idxs["file_url"]], assoc_dir) but for now
# I have not seen the use case (no web server at hand for testing)
data = _read_mmcif(os.path.join(assoc_dir, row[idxs["file_url"]])) data = _read_mmcif(os.path.join(assoc_dir, row[idxs["file_url"]]))
assoc_files.append((data, row[idxs["entry_id"]])) assoc_files.append((data, row[idxs["entry_id"]]))
# make sure entry_id is matching in associated file! # make sure entry_id is matching in associated file!
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment