Skip to content
Snippets Groups Projects
Commit 33e6b214 authored by Bienchen's avatar Bienchen
Browse files

Merge branch 'develop' into human-heterodimers

parents 9314e371 aa83f1e8
No related branches found
No related tags found
No related merge requests found
[tool.black]
line-length=80
line-length=79
[tool.pylint.REPORTS]
reports='no'
......
ARG VERSION_PYTHON="3.9"
ARG VERSION_BASE_IMAGE="python:${VERSION_PYTHON}-alpine3.16"
ARG VERSION_BASE_IMAGE="python:${VERSION_PYTHON}-alpine3.17"
FROM ${VERSION_BASE_IMAGE}
# We need to declare ARGs again which were declared before the build stage
# (FROM directive), otherwise they won't be available in this stage.
......@@ -80,7 +80,7 @@ COPY --chmod=755 validate-mmcif-file.py /usr/local/bin/validate-mmcif-file
## https://github.com/ihmwg/ModelCIF/blob/master/dist/mmcif_ma.dic.
## Dictionaries do not change that frequently therefore we skip the hassle of
## keeping them in an external volume.
ARG USE_DICT_VERSION="1.4.3"
ARG USE_DICT_VERSION="1.4.5"
ENV USE_DICT_VERSION=${USE_DICT_VERSION}
LABEL org.modelarchive.dict_release="${USE_DICT_VERSION}"
WORKDIR ${SRC_DIR}
......@@ -118,9 +118,7 @@ RUN set -e pipefail; \
-dictSdbFile ${MMCIF_DICTS_DIR}/mmcif_pdbx_v50.dic.sdb; \
#
## Get versions of ModelCIF & PDBx/mmCIF dictionaries
get-mmcif-dict-versions --parent-location ${_GIT_URL}/base/mmcif_pdbx_v50.dic \
--child-location ${_MA_DICT_URL} \
mmcif_ma.dic; \
get-mmcif-dict-versions --child-location ${_MA_DICT_URL} mmcif_ma.dic; \
mv mmcif_ma_version.json ${MMCIF_DICTS_DIR}/; \
#
## Make SDBs readable and keep possible error logs from building them
......
For MA validation:
- theoretical models should not populate _exptl.method, instead use _struct.pdbx_structure_determination_methodology
......@@ -25,15 +25,6 @@ def _parse_command_line():
metavar="<DICTIONARY FILE>",
help="The mmCIF dictionary file to read the versions from.",
)
parser.add_argument(
"--parent",
"-p",
type=str,
metavar="<NAME OF PARENT DICT>",
help="Name of to the 'parent' dictionary. This is the one the other "
+ "dictionary is appended to. This is usually the mmcif_pdbx_v50.dic.",
default="mmcif_pdbx_v50.dic",
)
parser.add_argument(
"--output",
"-o",
......@@ -42,14 +33,6 @@ def _parse_command_line():
help="Path to store the JSON file with the version at.",
default="mmcif_ma_version.json",
)
parser.add_argument(
"--parent-location",
"-u",
type=str,
metavar="<URL OF PARENT DICT FILE>",
help="Download location of the parent dictionary file.",
default=None,
)
parser.add_argument(
"--child-location",
"-l",
......@@ -90,7 +73,7 @@ def _get_data_item(itm, cat, file_name, cat_data):
return val[0]
def _get_versions(dic_file, parent_name, io_adapter):
def _get_versions(dic_file, io_adapter):
"""Fetch the 'category_group_list' object and assemble a version for the
dictionary."""
......@@ -112,24 +95,11 @@ def _get_versions(dic_file, parent_name, io_adapter):
ttl = _get_data_item("title", "dictionary", dic_file, dic)
dic_version = {"title": ttl, "version": vrsn}
cmp = _get_data_cat("pdbx_dictionary_component", dic_file, cntnr)
dc_idx = cmp.getAttributeIndex("dictionary_component_id")
vs_idx = cmp.getAttributeIndex("version")
for row in cmp:
if row[dc_idx] == parent_name:
vrsn = row[vs_idx]
prnt_version = {"title": parent_name, "version": vrsn}
break
return dic_version, prnt_version
return dic_version
def _add_dict_location(parent, child, parent_loc, child_loc):
def _add_dict_location(child, child_loc):
"""Add URLs to the dictionary versions if available."""
if parent_loc is None:
parent["location"] = "."
else:
parent["location"] = parent_loc
if child_loc is None:
child["location"] = "."
else:
......@@ -141,13 +111,11 @@ def _main():
opts = _parse_command_line()
io_adapter = IoAdapterPy(False, sys.stdout)
c_vrsn, p_vrsn = _get_versions(opts.dic_file, opts.parent, io_adapter)
c_vrsn = _get_versions(opts.dic_file, io_adapter)
_add_dict_location(
p_vrsn, c_vrsn, opts.parent_location, opts.child_location
)
_add_dict_location(c_vrsn, opts.child_location)
with open(opts.output, "w", encoding="utf8") as jfh:
json.dump({"versions": [p_vrsn, c_vrsn]}, jfh)
json.dump({"versions": [c_vrsn]}, jfh)
if __name__ == "__main__":
......
# Its a script, allow nicely formatted name
# pylint: disable=invalid-name
# pylint: enable=invalid-name
"""Test the validation tool - this is *NOT* a set of unit tests for the
validation tool but functional tests. The test suite makes sure, that the
validation tool is working as intended, scanning ModelCIF files/ mmCIF files.
"""
from argparse import ArgumentParser
import json
import os
import re
import subprocess
import sys
import requests
# Some global variables
TST_FLS_DIR = "test_files"
DCKR = "docker" # `docker` command
DCKR_IMG_RPO = ( # Docker image "name"
"registry.scicore.unibas.ch/schwede/modelcif-converters/"
+ "mmcif-dict-suite"
)
# collection of docker commads used
DCKR_CMDS = {
"build": [DCKR, "build"],
"images": [DCKR, "images", "--format", "json"],
"inspect": [DCKR, "inspect", "--format", "json"],
"run": [DCKR, "run", "--rm"],
}
def _parse_args():
"""Deal with command line arguments."""
parser = ArgumentParser(description=__doc__)
parser.add_argument(
"-v",
"--verbose",
default=False,
action="store_true",
help="Print more output while running.",
)
args = parser.parse_args()
return args
def _check_docker_installed():
"""Make sure the `docker` command can be executed."""
# just check `docker` as command on its own
args = [DCKR]
try:
subprocess.run(
args,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
check=True,
)
except FileNotFoundError as exc:
if exc.filename == DCKR:
_print_abort(
"Looks like Docker is not installed, running command "
f"`{' '.join(args)}` failed."
)
raise
except subprocess.CalledProcessError as exc:
_print_abort(
"Looks like Docker does not work properly, test call "
f"(`{' '.join(exc.cmd)}`) failed with exit code {exc.returncode} "
f'and output:\n"""\n{exc.output.decode()}"""'
)
# check various docker commands used in this script
miss_arg_re = re.compile(r"requires (?:exactly|at least) 1 argument\.$")
for args in DCKR_CMDS.values():
try:
subprocess.run(
args,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
check=True,
)
except subprocess.CalledProcessError as exc:
pass_ok = False
for line in exc.output.decode().splitlines():
if miss_arg_re.search(line):
# This seems to be a default message of a working command
# which lacks some arguments.
pass_ok = True
break
if not pass_ok:
_print_abort(
"Looks like Docker does not work as expected, test call "
f"(`{' '.join(exc.cmd)}`) failed with exit code "
f'{exc.returncode} and output:\n"""\n'
f'{exc.output.decode()}"""'
)
def _get_modelcif_dic_version():
"""Get the latest versionstring of the ModelCIF dictionary from the
official GitHub repo."""
rspns = requests.get(
"https://api.github.com/repos/ihmwg/ModelCIF/contents/archive",
headers={"accept": "application/vnd.github+json"},
timeout=180,
)
dic_re = re.compile(r"mmcif_ma-v(\d+)\.(\d+)\.(\d+).dic")
ltst = (0, 0, 0)
for arc_itm in rspns.json():
dic_mt = dic_re.match(arc_itm["name"])
if dic_mt:
mjr = int(dic_mt.group(1))
mnr = int(dic_mt.group(2))
htfx = int(dic_mt.group(3))
if mjr > ltst[0] or mnr > ltst[1] or htfx > ltst[2]:
ltst = (mjr, mnr, htfx)
continue
return f"{'.'.join([str(x) for x in ltst])}"
def _find_docker_image(repo_name, image_tag):
"""Check that the Docker image to run validations is available. If its
there, return the name, None otherwise."""
dckr_p = subprocess.run(
DCKR_CMDS["images"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
check=True,
)
for j_line in dckr_p.stdout.decode().splitlines():
img = json.loads(j_line)
if img["Repository"] == repo_name and img["Tag"] == image_tag:
return f"{repo_name}:{image_tag}"
return None
def _build_docker_image(repo_name, image_tag):
"""Build the validation image."""
uid = os.getuid()
image = f"{repo_name}:{image_tag}"
args = DCKR_CMDS["build"]
args.extend(
[
"--build-arg",
f"MMCIF_USER_ID={uid}",
"-t",
image,
".",
]
)
subprocess.run(
args,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
check=True,
env={"DOCKER_BUILDKIT": "1"},
)
return image
def _verify_docker_image(image_name, dic_version):
"""Check certain version numbers inside the Docker image."""
lbls2chk = {
"org.modelarchive.base-image": "python:3.9-alpine3.17",
"org.modelarchive.cpp-dict-pack.version": "v2.500",
"org.modelarchive.dict_release": dic_version,
}
args = DCKR_CMDS["inspect"]
args.append(image_name)
dckr_p = subprocess.run(
args,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
check=True,
env={"DOCKER_BUILDKIT": "1"},
)
img_lbls = json.loads(dckr_p.stdout.decode())
assert len(img_lbls) == 1
img_lbls = img_lbls[0]["Config"]["Labels"]
for lbl, val in lbls2chk.items():
if lbl not in img_lbls:
_print_abort(f"Label '{lbl}' not found in image '{image_name}'.")
if img_lbls[lbl] != val:
_print_abort(
f"Label '{lbl}' ({img_lbls[lbl]}) in image '{image_name}' "
+ f"does not equal the reference value '{val}'."
)
def _test_file(cif_file, cif_dir, image, expected_results):
"""Check that a certain mmCIF file validates as expected"""
args = DCKR_CMDS["run"]
args.extend(
[
"-v",
f"{os.path.abspath(cif_dir)}:/data",
image,
"validate-mmcif-file",
"-a",
"/data",
f"/data/{cif_file}",
]
)
# run validation
dckr_p = subprocess.run(
args,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
check=False,
)
# check output
if dckr_p.returncode != expected_results["ret_val"]:
_print_abort(
f"Exit value for '{cif_file}' not right: {dckr_p.returncode}, "
+ f"expected: {expected_results['ret_val']}"
)
vldtn_json = json.loads(dckr_p.stdout.decode())
for report_key in ["cifcheck-errors", "status", "diagnosis"]:
if vldtn_json[report_key] != expected_results[report_key]:
_print_abort(
f"Validation report on '{cif_file}', value of '{report_key}' "
+ f"not as expected, got:\n{vldtn_json[report_key]}\n"
+ f"expected:\n{expected_results[report_key]}"
)
def _print_abort(*args, **kwargs):
"""Print an abort message and exit."""
print(*args, file=sys.stderr, **kwargs)
print("Aborting.", file=sys.stderr)
sys.exit(1)
# This is a dummy function for non-verbose runs of this script. Unused
# arguments are allowed at this point. # pylint: disable=unused-argument
# pylint: disable=unused-argument
def _print_verbose(*args, **kwargs):
"""Do not print anything."""
# pylint: enable=unused-argument
def _do_step(func, msg, *args, **kwargs):
"""Perform next step decorated with a verbose message."""
_print_verbose(msg, "...")
ret_val = func(*args, **kwargs)
if isinstance(ret_val, str):
_print_verbose(f"{ret_val} ", end="")
_print_verbose("... done", msg)
return ret_val
def _main():
"""Run as script."""
# ToDo: add test fetching associated data from the internet
# ToDo: add test for missing associated data
expctd_rslts = {
"working.cif": {
"ret_val": 0,
"cifcheck-errors": [],
"status": "completed",
"diagnosis": [],
}
}
opts = _parse_args()
if opts.verbose:
# For verbose printing, a functions redefined so we do not need to
# carry an extra argument around, no special class or logger... simply
# 'print'. But in general don't use 'global'.
# Name of the variable is allowed so it looks more like an ordinary
# function.
# pylint: disable=global-statement,invalid-name
global _print_verbose
_print_verbose = print
# Make sure Docker is installed and necessary commands are available.
_do_step(_check_docker_installed, "checking Docker installation")
# Get expected image tag (latest ModelCIF dic version from GitHub)
dic_version = _do_step(
_get_modelcif_dic_version,
"fetching latest ModelCIF dictionary version",
)
# Make sure Docker image is present
image = _do_step(
_find_docker_image,
f"searching for Docker image ({DCKR_IMG_RPO}:{dic_version})",
DCKR_IMG_RPO,
dic_version,
)
if image is None:
image = _do_step(
_build_docker_image,
f"building Docker image ({DCKR_IMG_RPO}:{dic_version})",
DCKR_IMG_RPO,
dic_version,
)
# Verify some version numbers inside the container
_do_step(
_verify_docker_image, "verifying Docker image", image, dic_version
)
# Run the actual tests of the validation script/ validate all files in
# test_files/.
test_files = os.listdir(TST_FLS_DIR)
for cif in test_files:
if not cif.endswith(".cif"):
continue
# check that file has expected results
if cif not in expctd_rslts:
raise RuntimeError(
f"File '{cif}' does not have expected results to be tested."
)
_do_step(
_test_file,
f"checking on file '{cif}'",
cif,
TST_FLS_DIR,
image,
expctd_rslts[cif],
)
if __name__ == "__main__":
_main()
# LocalWords: pylint argparse ArgumentParser subprocess sys DCKR args exc
# LocalWords: stdout stderr FileNotFoundError CalledProcessError returncode
This diff is collapsed.
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment