From cc3ded3d2757da334df910511ee56e3f01c531cf Mon Sep 17 00:00:00 2001
From: Stefan Bienert <>
Date: Fri, 3 Mar 2023 17:00:08 +0100
Subject: [PATCH] Test example files

 pyproject.toml           |   2 +-
 validation/ | 335 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 336 insertions(+), 1 deletion(-)
 create mode 100644 validation/

diff --git a/pyproject.toml b/pyproject.toml
index 33018da..4805e0a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
diff --git a/validation/ b/validation/
new file mode 100644
index 0000000..9f46a72
--- /dev/null
+++ b/validation/
@@ -0,0 +1,335 @@
+# Its a script, allow nicely formatted name
+# pylint: disable=invalid-name
+# pylint: enable=invalid-name
+"""Test the validation tool - this is *NOT* a set of unit tests for the
+validation tool but functional tests. The test suite makes sure, that the
+validation tool is working as intended, scanning ModelCIF files/ mmCIF files.
+from argparse import ArgumentParser
+import json
+import os
+import re
+import subprocess
+import sys
+import requests
+# Some global variables
+TST_FLS_DIR = "test_files"
+DCKR = "docker"  # `docker` command
+DCKR_IMG_RPO = "mmcif-dict-suite"  # Docker image "name"
+# collection of docker commads used
+    "build": [DCKR, "build"],
+    "images": [DCKR, "images", "--format", "json"],
+    "inspect": [DCKR, "inspect", "--format", "json"],
+    "run": [DCKR, "run", "--rm"],
+def _parse_args():
+    """Deal with command line arguments."""
+    parser = ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        default=False,
+        action="store_true",
+        help="Print more output while running.",
+    )
+    args = parser.parse_args()
+    return args
+def _check_docker_installed():
+    """Make sure the `docker` command can be executed."""
+    # ToDo: check all Docker commands used in this script here (Add more over
+    #       time).
+    # just check `docker` as command on its own
+    args = [DCKR]
+    try:
+            args,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            check=True,
+        )
+    except FileNotFoundError as exc:
+        if exc.filename == DCKR:
+            _print_abort(
+                "Looks like Docker is not installed, running command "
+                f"`{' '.join(args)}` failed."
+            )
+        raise
+    except subprocess.CalledProcessError as exc:
+        _print_abort(
+            "Looks like Docker does not work properly, test call "
+            f"(`{' '.join(exc.cmd)}`) failed with exit code {exc.returncode} "
+            f'and output:\n"""\n{exc.output.decode()}"""'
+        )
+    # check various docker commands used in this script
+    miss_arg_re = re.compile(r"requires (?:exactly|at least) 1 argument\.$")
+    for args in DCKR_CMDS.values():
+        try:
+                args,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                check=True,
+            )
+        except subprocess.CalledProcessError as exc:
+            pass_ok = False
+            for line in exc.output.decode().splitlines():
+                if
+                    # This seems to be a default message of a working command
+                    # which lacks some arguments.
+                    pass_ok = True
+                    break
+            if not pass_ok:
+                _print_abort(
+                    "Looks like Docker does not work as expected, test call "
+                    f"(`{' '.join(exc.cmd)}`) failed with exit code "
+                    f'{exc.returncode} and output:\n"""\n'
+                    f'{exc.output.decode()}"""'
+                )
+def _get_modelcif_dic_version():
+    """Get the latest versionstring of the ModelCIF dictionary from the
+    official GitHub repo."""
+    rspns = requests.get(
+        "",
+        headers={"accept": "application/vnd.github+json"},
+        timeout=180,
+    )
+    dic_re = re.compile(r"mmcif_ma-v(\d+)\.(\d+)\.(\d+).dic")
+    ltst = (0, 0, 0)
+    for arc_itm in rspns.json():
+        dic_mt = dic_re.match(arc_itm["name"])
+        if dic_mt:
+            mjr = int(
+            mnr = int(
+            htfx = int(
+            if mjr > ltst[0] or mnr > ltst[1] or htfx > ltst[2]:
+                ltst = (mjr, mnr, htfx)
+                continue
+    return f"v{'.'.join([str(x) for x in ltst])}"
+def _find_docker_image(repo_name, image_tag):
+    """Check that the Docker image to run validations is available. If its
+    there, return the name, None otherwise."""
+    dckr_p =
+        DCKR_CMDS["images"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        check=True,
+    )
+    for j_line in dckr_p.stdout.decode().splitlines():
+        img = json.loads(j_line)
+        if img["Repository"] == repo_name and img["Tag"] == image_tag:
+            return f"{repo_name}:{image_tag}"
+    return None
+def _build_docker_image(repo_name, image_tag):
+    """Build the validation image."""
+    uid = os.getuid()
+    image = f"{repo_name}:{image_tag}"
+    args = DCKR_CMDS["build"]
+    args.extend(
+        [
+            "--build-arg",
+            f"MMCIF_USER_ID={uid}",
+            "-t",
+            image,
+            ".",
+        ]
+    )
+        args,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        check=True,
+        env={"DOCKER_BUILDKIT": "1"},
+    )
+    return image
+def _verify_docker_image(image_name, dic_version):
+    """Check certain version numbers inside the Docker image."""
+    lbls2chk = {
+        "org.modelarchive.base-image": "python:3.9-alpine3.17",
+        "org.modelarchive.cpp-dict-pack.version": "v2.500",
+        "org.modelarchive.dict_release": dic_version[1:],
+    }
+    args = DCKR_CMDS["inspect"]
+    args.append(image_name)
+    dckr_p =
+        args,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        check=True,
+        env={"DOCKER_BUILDKIT": "1"},
+    )
+    img_lbls = json.loads(dckr_p.stdout.decode())
+    assert len(img_lbls) == 1
+    img_lbls = img_lbls[0]["Config"]["Labels"]
+    for lbl, val in lbls2chk.items():
+        if lbl not in img_lbls:
+            _print_abort(f"Label '{lbl}' not found in image '{image_name}'.")
+        if img_lbls[lbl] != val:
+            _print_abort(
+                f"Label '{lbl}' ({img_lbls[lbl]}) in image '{image_name}' "
+                + f"does not equal the reference value '{val}'."
+            )
+def _test_file(cif_file, cif_dir, image, expected_results):
+    """Check that a certain mmCIF file validates as expected"""
+    args = DCKR_CMDS["run"]
+    args.extend(
+        [
+            "-v",
+            f"{os.path.abspath(cif_dir)}:/data",
+            image,
+            "validate-mmcif-file",
+            "-a",
+            "/data",
+            f"/data/{cif_file}",
+        ]
+    )
+    # run validation
+    dckr_p =
+        args,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        check=False,
+    )
+    # check output
+    if dckr_p.returncode != expected_results["ret_val"]:
+        _print_abort(
+            f"Exit value for '{cif_file}' not right: {dckr_p.returncode}, "
+            + f"expected: {expected_results['ret_val']}"
+        )
+    vldtn_json = json.loads(dckr_p.stdout.decode())
+    for report_key in ["cifcheck-errors", "status", "diagnosis"]:
+        if vldtn_json[report_key] != expected_results[report_key]:
+            _print_abort(
+                f"Validation report on '{cif_file}', value of '{report_key}' "
+                + f"not as expected, got:\n{vldtn_json[report_key]}\n"
+                + f"expected:\n{expected_results[report_key]}"
+            )
+def _print_abort(*args, **kwargs):
+    """Print an abort message and exit."""
+    print(*args, file=sys.stderr, **kwargs)
+    print("Aborting.", file=sys.stderr)
+    sys.exit(1)
+# This is a dummy function for non-verbose runs of this script. Unused
+# arguments are allowed at this point.    # pylint: disable=unused-argument
+# pylint: disable=unused-argument
+def _print_verbose(*args, **kwargs):
+    """Do not print anything."""
+# pylint: enable=unused-argument
+def _do_step(func, msg, *args, **kwargs):
+    """Perform next step decorated with a verbose message."""
+    _print_verbose(msg, "...")
+    ret_val = func(*args, **kwargs)
+    if isinstance(ret_val, str):
+        _print_verbose(f"{ret_val} ", end="")
+    _print_verbose("... done", msg)
+    return ret_val
+def _main():
+    """Run as script."""
+    expctd_rslts = {
+        "working.cif": {
+            "ret_val": 0,
+            "cifcheck-errors": [],
+            "status": "completed",
+            "diagnosis": [],
+        }
+    }
+    opts = _parse_args()
+    if opts.verbose:
+        # For verbose printing, a functions redefined sow e do not need to
+        # carry an extra argument around, no special class or logger... simply
+        # 'print'. But in general don't use 'global'.
+        # Name of the variable is allowed so it looks more like an ordinary
+        # function.
+        # pylint: disable=global-statement,invalid-name
+        global _print_verbose
+        _print_verbose = print
+    # Make sure Docker is installed and necessary commands are available.
+    _do_step(_check_docker_installed, "checking Docker installation")
+    # Get expected image tag (latest ModelCIF dic version from GitHub)
+    dic_version = _do_step(
+        _get_modelcif_dic_version,
+        "fetching latest ModelCIF dictionary version",
+    )
+    # Make sure Docker image is present present
+    image = _do_step(
+        _find_docker_image,
+        f"searching for Docker image ({DCKR_IMG_RPO}:{dic_version})",
+        DCKR_IMG_RPO,
+        dic_version,
+    )
+    if image is None:
+        image = _do_step(
+            _build_docker_image,
+            f"building Docker image ({DCKR_IMG_RPO}:{dic_version})",
+            DCKR_IMG_RPO,
+            dic_version,
+        )
+    # Verify some version numbers inside the container
+    _do_step(
+        _verify_docker_image, "verifying Docker image", image, dic_version
+    )
+    # Run the actual tests of the validation script/ validate all files in
+    # test_files/.
+    test_files = os.listdir(TST_FLS_DIR)
+    for cif in test_files:
+        if not cif.endswith(".cif"):
+            continue
+        # check that file is has expected results
+        if cif not in expctd_rslts:
+            raise RuntimeError(
+                f"File '{cif}' does not have expected results to be tested."
+            )
+        _do_step(
+            _test_file,
+            f"checking on file '{cif}'",
+            cif,
+            TST_FLS_DIR,
+            image,
+            expctd_rslts[cif],
+        )
+if __name__ == "__main__":
+    _main()
+#  LocalWords:  pylint argparse ArgumentParser subprocess sys DCKR args exc
+#  LocalWords:  stdout stderr FileNotFoundError CalledProcessError returncode