diff --git a/validation/Dockerfile b/validation/Dockerfile
index 7ec976fa46679088e7ac4d19cf6cd89245a48892..3f20534318c3205616c989434554e9fb5b4e537f 100644
--- a/validation/Dockerfile
+++ b/validation/Dockerfile
@@ -29,6 +29,7 @@ LABEL vendor3="Biozentrum - University of Basel (biozentrum.unibas.ch)"
 
 ## Install the RCSB CPP Dict Suite (only the binaries we need)
 WORKDIR ${SRC_DIR}
+COPY requirements.txt ${SRC_DIR}
 RUN set -e pipefail; \
     export DICT_PACK_SRC_DIR="${SRC_DIR}/cpp-dict-pack.git"; \
     apk update; \
@@ -56,6 +57,7 @@ RUN set -e pipefail; \
     /usr/local/bin/python -m pip install --upgrade pip; \
     /usr/local/bin/python -m pip install mmcif==${VERSION_PY_MMCIF} \
                                          python-rapidjson; \
+    /usr/local/bin/python -m pip install -r requirements.txt; \
     #
     ## Clean up/ remove unnecessary stuff
     apk del abuild binutils bison build-base cmake flex git gcc \
diff --git a/validation/requirements.txt b/validation/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5f0a8bb8d844b3fb52fb55f783610e4dc860b75
--- /dev/null
+++ b/validation/requirements.txt
@@ -0,0 +1,2 @@
+python-rapidjson==1.9
+validators==0.20.0
diff --git a/validation/validate-mmcif-file.py b/validation/validate-mmcif-file.py
index ad9fab4f9d35ba78aa461b657725d04f3db7b0fc..252c334281008070caba6eafa2afaad2f1a9cb75 100755
--- a/validation/validate-mmcif-file.py
+++ b/validation/validate-mmcif-file.py
@@ -20,7 +20,9 @@ import sys
 import tempfile
 import zipfile
 
+from validators import url as is_url
 import rapidjson as json
+import requests
 
 
 from mmcif.api.DataCategory import DataCategory
@@ -215,12 +217,51 @@ def _get_entry_id(cif_datablock, entry_id_map, datablock_idx):
                 entry_id_map[row[eidx]] = datablock_idx
 
 
+def _download_file(file_url):
+    """Download a file into a temporary file. Mark for deletion on
+    termination"""
+    rspns = requests.get(file_url, stream=True, timeout=600)
+    if rspns.status_code != 200:
+        raise RuntimeError(f"File not found by URL '{file_url}'.")
+
+    dlf = tempfile.TemporaryFile()
+    for chunk in rspns.iter_content(chunk_size=1024):
+        dlf.write(chunk)
+    dlf.seek(0)
+
+    return dlf
+
+
+def _get_assoc_obj(file_or_url, assoc_dir):
+    """Get a path to an associated file. Will download from internet if path
+    is a URL. Downloaded files are automatically hooked up for deletion after
+    the script terminates."""
+    if assoc_dir is None or not os.path.exists(
+        os.path.join(assoc_dir, file_or_url)
+    ):
+        if is_url(file_or_url):
+            return _download_file(file_or_url)
+
+        raise RuntimeError(
+            "Associated file path does not point to actual file or URL: "
+            + f"'{assoc_dir}/{file_or_url}'"
+        )
+
+    return os.path.join(assoc_dir, file_or_url)
+
+
 def _unzip_arc_cif(arc_file, cif_file, assoc_dir):
     """Extract a cif file from a ZIP archive."""
     assoc_data = []
-    with zipfile.ZipFile(os.path.join(assoc_dir, arc_file)) as arc_zip:
+    assoc_obj = _get_assoc_obj(arc_file, assoc_dir)
+    with zipfile.ZipFile(assoc_obj) as arc_zip:
         with TextIOWrapper(arc_zip.open(cif_file), encoding="utf-8") as cif_fh:
             assoc_data = _read_mmcif(cif_fh)
+    # in case assoc_obj is a temporary file, we need to close
+    try:
+        assoc_obj.close()
+    except AttributeError:
+        pass
 
     return assoc_data