Newer
Older
ARG VERSION_BASE_IMAGE="python:${VERSION_PYTHON}-alpine3.17"
FROM ${VERSION_BASE_IMAGE}
# We need to declare ARGs again which were declared before the build stage
# (FROM directive), otherwise they won't be available in this stage.
ARG VERSION_PYTHON
ARG VERSION_BASE_IMAGE
ARG VERSION_CPP_DICT_PACK="v2.500"
## Set up environment
ENV MMCIF_DICTS_DIR="/usr/local/share/mmcif-dict-suite" \
SRC_DIR="/tmp" \
VERSION_CPP_DICT_PACK=${VERSION_CPP_DICT_PACK} \
VERSION_BASE_IMAGE=${VERSION_BASE_IMAGE} \
VERSION_PYTHON=${VERSION_PYTHON} \
VERSION_PY_MMCIF=${VERSION_PY_MMCIF} \
PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1
LABEL org.modelarchive.base-image="${VERSION_BASE_IMAGE}"
LABEL org.modelarchive.cpp-dict-pack.version="${VERSION_CPP_DICT_PACK}"
LABEL maintainer="Stefan Bienert <stefan.bienert@unibas.ch>"
LABEL vendor1="Schwede Group (schwedelab.org)"
LABEL vendor2="SIB - Swiss Institute of Bioinformatics (sib.swiss)"
LABEL vendor3="Biozentrum - University of Basel (biozentrum.unibas.ch)"
## Install the RCSB CPP Dict Suite (only the binaries we need)
WORKDIR ${SRC_DIR}
COPY requirements.txt ${SRC_DIR}
RUN set -e pipefail; \
export DICT_PACK_SRC_DIR="${SRC_DIR}/cpp-dict-pack.git"; \
apk update; \
apk upgrade; \
apk add abuild binutils bison build-base cmake flex git gcc \
extra-cmake-modules tcsh; \
#
## Install the RCSB mmCIF Dict Suite
git clone -b ${VERSION_CPP_DICT_PACK} \
--single-branch --recurse-submodules \
https://github.com/rcsb/cpp-dict-pack.git \
${DICT_PACK_SRC_DIR}; \
mkdir ${DICT_PACK_SRC_DIR}/build; \
cd ${DICT_PACK_SRC_DIR}; \
cd ${DICT_PACK_SRC_DIR}/build; \
cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON; \
make; \
for cif_tool in CifCheck DictToSdb; do \
mv bin/${cif_tool} /usr/local/bin; \
done; \
cd ${SRC_DIR}; \
rm -r ${DICT_PACK_SRC_DIR}; \
#
## Install the RCSB py-mmcif Python module
/usr/local/bin/python -m pip install --upgrade pip; \
/usr/local/bin/python -m pip install mmcif==${VERSION_PY_MMCIF} \
python-rapidjson; \
/usr/local/bin/python -m pip install -r requirements.txt; \
#
## Clean up/ remove unnecessary stuff
apk del abuild binutils bison build-base cmake flex git gcc \
extra-cmake-modules tcsh; \
apk add libstdc++
## Copy tools/ entrypoint script
COPY --chmod=755 get-mmcif-dict-versions.py \
/usr/local/bin/get-mmcif-dict-versions
COPY --chmod=755 entrypoint.sh /
COPY --chmod=755 validate-mmcif-file.py /usr/local/bin/validate-mmcif-file
## Create dictionaries for validating mmCIF files.
## The version to be created is controlled by build argument USE_DICT_VERSION.
## Use a versions vailable at
## https://github.com/ihmwg/ModelCIF/tree/master/archive or the special word
## "latest", that will use
## https://github.com/ihmwg/ModelCIF/blob/master/dist/mmcif_ma.dic.
## Dictionaries do not change that frequently therefore we skip the hassle of
## keeping them in an external volume.
ENV USE_DICT_VERSION=${USE_DICT_VERSION}
LABEL org.modelarchive.dict_release="${USE_DICT_VERSION}"
WORKDIR ${SRC_DIR}
RUN set -e pipefail; \
apk add curl; \
export _DICT_DIR="${SRC_DIR}/mmcif_dicts"; \
export _DICT_URL="https://mmcif.wwpdb.org/dictionaries/ascii"; \
export _GIT_URL="https://raw.github.com/ihmwg/ModelCIF/master"; \
# Use the path of an actual commit to keep the dict immutable (RCSB refuses
# to use Git tags for versions).
export _MA_DICT_URL="https://raw.githubusercontent.com/ihmwg/ModelCIF/d18ba38/archive/mmcif_ma-v${USE_DICT_VERSION}.dic"; \
mkdir ${_DICT_DIR}; \
mkdir ${MMCIF_DICTS_DIR}; \
cd ${_DICT_DIR}; \
#
## Fetch the dictionary definition language
curl ${_DICT_URL}/mmcif_ddl.dic.gz -s -o mmcif_ddl.dic.gz; \
gunzip *.gz; \
#
## Fetch the merged ModelCIF dictionary
if test x${USE_DICT_VERSION} = xlatest; then \
export _MA_DICT_URL="${_GIT_URL}/dist/mmcif_ma.dic"; \
fi; \
curl ${_MA_DICT_URL} -s -L -o mmcif_ma.dic; \
#
## Build the ModelCIF SDB
DictToSdb -ddlFile mmcif_ddl.dic \
-dictFile mmcif_ma.dic \
-dictSdbFile ${MMCIF_DICTS_DIR}/mmcif_ma.sdb; \
#
## Fetch the stable PDBx/mmCIF dictionary
curl ${_DICT_URL}/mmcif_pdbx_v50.dic -s -o mmcif_pdbx_v50.dic; \
#
## Build the PDBx/mmCIF SDB
DictToSdb -ddlFile mmcif_ddl.dic \
-dictFile mmcif_pdbx_v50.dic \
-dictSdbFile ${MMCIF_DICTS_DIR}/mmcif_pdbx_v50.sdb; \
#
## Get versions of ModelCIF & PDBx/mmCIF dictionaries
get-mmcif-dict-versions --child-location ${_MA_DICT_URL} mmcif_ma.dic; \
get-mmcif-dict-versions --child-location ${_DICT_URL}/mmcif_pdbx_v50.dic \
--output mmcif_pdbx_v50_version.json \
mmcif_pdbx_v50.dic; \
mv mmcif_pdbx_v50_version.json ${MMCIF_DICTS_DIR}/; \
#
## Make SDBs readable and keep possible error logs from building them
mv *.log ${MMCIF_DICTS_DIR}/ 2>/dev/null || :; \
chmod o+r ${MMCIF_DICTS_DIR}/*; \
#
## Clean up
cd ${SRC_DIR}; \
rm -r ${_DICT_DIR}; \
apk del curl
# switch to development version via build-arg
ARG ADD_DEV
RUN set -e pipefail; \
if test xYES = x`echo ${ADD_DEV} | tr '[:lower:]' '[:upper:]'`; then \
apk add bash emacs gcc build-base; \
/usr/local/bin/python -m pip install pylint black; \
apk del gcc build-base; \
fi
## Add a dedicated user for mmCIF file validation
## MMCIF_USER_ID can be used to avoid file permission issues in development.
ARG MMCIF_USER_ID=501
RUN adduser -S -u ${MMCIF_USER_ID} mmcif-vldtr
USER mmcif-vldtr
ENTRYPOINT ["/entrypoint.sh"]
# have tool ready
# - entrypoint: validate... just runs validation, celery runs celery, CMD else
# write Python to run & check mmCIF
# - Note dictionary versions in the mmCIF file!
# for Celery:
# - depends_on without implementing the 'waits' in this entrypoint.sh:
# https://marcopeg.com/docker-compose-healthcheck/
# LocalWords: ENV DICTS SRC tmp schwedelab RCSB WORKDIR pipefail apk dev ARG
# LocalWords: ARGs