Skip to content
Snippets Groups Projects
Dockerfile 5.94 KiB
Newer Older
B13nch3n's avatar
B13nch3n committed
ARG VERSION_PYTHON="3.6.15"
ARG VERSION_BASE_IMAGE="python:${VERSION_PYTHON}-alpine3.15"
FROM ${VERSION_BASE_IMAGE}
# We need to declare ARGs again which were declared before the build stage
# (FROM directive), otherwise they won't be available in this stage.
ARG VERSION_PYTHON
ARG VERSION_BASE_IMAGE

ARG VERSION_CPP_DICT_PACK="v2.500"
ARG VERSION_PY_MMCIF="0.76"

## Set up environment
ENV MMCIF_DICTS_DIR="/usr/local/share/mmcif-dict-suite" \
    SRC_DIR="/tmp" \
    VERSION_CPP_DICT_PACK=${VERSION_CPP_DICT_PACK} \
    VERSION_BASE_IMAGE=${VERSION_BASE_IMAGE} \
    VERSION_PYTHON=${VERSION_PYTHON} \
    VERSION_PY_MMCIF=${VERSION_PY_MMCIF} \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1


LABEL org.modelarchive.base-image="${VERSION_BASE_IMAGE}"
LABEL org.modelarchive.cpp-dict-pack.version="${VERSION_CPP_DICT_PACK}"
LABEL maintainer="Stefan Bienert <stefan.bienert@unibas.ch>"
LABEL vendor1="Schwede Group (schwedelab.org)"
LABEL vendor2="SIB - Swiss Institute of Bioinformatics (sib.swiss)"
LABEL vendor3="Biozentrum - University of Basel (biozentrum.unibas.ch)"

## Install the RCSB CPP Dict Suite (only the binaries we need)
WORKDIR ${SRC_DIR}
RUN set -e pipefail; \
    export DICT_PACK_SRC_DIR="${SRC_DIR}/cpp-dict-pack.git"; \
    apk update; \
    apk upgrade; \
    apk add abuild binutils bison build-base cmake flex git gcc \
            extra-cmake-modules tcsh; \
    #
    ## Install the RCSB mmCIF Dict Suite
    git clone -b ${VERSION_CPP_DICT_PACK} \
              --single-branch --recurse-submodules \
              https://github.com/rcsb/cpp-dict-pack.git \
              ${DICT_PACK_SRC_DIR}; \
    mkdir ${DICT_PACK_SRC_DIR}/build; \
    cd ${DICT_PACK_SRC_DIR}; \
    cd ${DICT_PACK_SRC_DIR}/build; \
    cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON; \
    make; \
    for cif_tool in CifCheck DictToSdb; do \
      mv bin/${cif_tool} /usr/local/bin; \
    done; \
    cd ${SRC_DIR}; \
    rm -r ${DICT_PACK_SRC_DIR}; \
    #
    ## Install the RCSB py-mmcif Python module
    /usr/local/bin/python -m pip install --upgrade pip; \
    /usr/local/bin/python -m pip install mmcif==${VERSION_PY_MMCIF} \
                                         python-rapidjson; \
    #
    ## Clean up/ remove unnecessary stuff
    apk del abuild binutils bison build-base cmake flex git gcc \
            extra-cmake-modules tcsh; \
    apk add libstdc++

## Add a dedicated user for mmCIF file validation
## MMCIF_USER_ID can be used to avoid file permission issues in development.
ARG MMCIF_USER_ID=501
RUN adduser -S -u ${MMCIF_USER_ID} mmcif-vldtr

## Copy tools (already in use during dictionary SDB creation)
COPY --chmod=755 get-mmcif-dict-versions.py \
                 /usr/local/bin/get-mmcif-dict-versions


## Create dictionaries for validating mmCIF files. To rebuild dictionaries,
## rebuild the container with build argument DICT_FETCH_DATE="<DATA>.n" so
## only the RUN Command for building the dictionary is triggered. The ".n"
## should be an increasing number to enable simple multiple builds in one
## day, in case something goes wrong.
## Dictionaries do not change that frequently therefore we skip the hassle of
## keeping them in an external volume.
Bienchen's avatar
Bienchen committed
ARG DICT_FETCH_DATE="2022-05-16.1"
B13nch3n's avatar
B13nch3n committed
# ToDo: use commit instead of USE_DICT_RELEASE
B13nch3n's avatar
B13nch3n committed
ARG USE_DICT_RELEASE="master"
ENV DICT_FETCH_DATE=${DICT_FETCH_DATE}
ENV USE_DICT_RELEASE=${USE_DICT_RELEASE}
LABEL org.modelarchive.dict-fetch-date="${DICT_FETCH_DATE}"
LABEL org.modelarchive.dict_release="${USE_DICT_RELEASE}"
WORKDIR ${SRC_DIR}
RUN set -e pipefail; \
    apk add curl; \
    export _DICT_DIR="${SRC_DIR}/mmcif_dicts"; \
    export _DICT_URL="https://mmcif.wwpdb.org/dictionaries/ascii"; \
Bienchen's avatar
Bienchen committed
    export _PATHSPEC="557bda7"; \
B13nch3n's avatar
B13nch3n committed
    export _REPO_URL="https://raw.githubusercontent.com/ihmwg/ModelCIF/${_PATHSPEC}"; \
    export _MA_DICT_URL="${_REPO_URL}/dist/mmcif_ma.dic"; \
    export _DICT_REPO="ModelCIF.git"; \
    mkdir ${_DICT_DIR}; \
    mkdir ${MMCIF_DICTS_DIR}; \
    cd ${_DICT_DIR}; \
    #
    ## Fetch the dictionary definition language
    curl ${_DICT_URL}/mmcif_ddl.dic.gz -s -o mmcif_ddl.dic.gz; \
    gunzip *.gz; \
    #
    ## Fetch the merged ModelCIF dictionary
    #
    ## Fetch the Git repo with the dictionaries
    curl ${_MA_DICT_URL} -s -L -o mmcif_ma.dic; \
    #
    ## Build the ModelCIF SDB
    DictToSdb -ddlFile mmcif_ddl.dic \
              -dictFile mmcif_ma.dic \
              -dictSdbFile ${MMCIF_DICTS_DIR}/mmcif_ma.sdb; \
    #
    ## Fetch the stable PDBx/mmCIF dictionary
    curl ${_DICT_URL}/mmcif_pdbx_v50.dic -s -o mmcif_pdbx_v50.dic; \
    #
    ## Build the PDBx/mmCIF SDB
    DictToSdb -ddlFile mmcif_ddl.dic \
              -dictFile mmcif_pdbx_v50.dic \
              -dictSdbFile ${MMCIF_DICTS_DIR}/mmcif_pdbx_v50.dic.sdb; \
    #
    ## Get versions of ModelCIF & PDBx/mmCIF dictionaries
    get-mmcif-dict-versions --parent-location ${_REPO_URL}/base/mmcif_pdbx_v50.dic \
                            --child-location ${_MA_DICT_URL} \
                            mmcif_ma.dic; \
    mv mmcif_ma_version.json ${MMCIF_DICTS_DIR}/; \
    #
    ## Make SDBs readable and keep possible error logs from building them
    mv *.log ${MMCIF_DICTS_DIR}/ 2>/dev/null || :; \
    chmod o+r ${MMCIF_DICTS_DIR}/*; \
    #
    ## Clean up
    cd ${SRC_DIR}; \
    rm -r ${_DICT_DIR}; \
    apk del curl


COPY --chmod=755 entrypoint.sh /
COPY --chmod=755 validate-mmcif-file.py /usr/local/bin/validate-mmcif-file

# for development
#RUN set -e pipefail; \
#    apk add bash emacs gcc build-base; \
#    /usr/local/bin/python -m pip install pylint black; \
#    apk del gcc build-base

USER mmcif-vldtr

ENTRYPOINT ["/entrypoint.sh"]

# have tool ready
# - entrypoint: validate... just runs validation, celery runs celery, CMD else
# write Python to run & check mmCIF
# - Note dictionary versions in the mmCIF file!
# for Celery:
# - depends_on without implementing the 'waits' in this entrypoint.sh:
#   https://marcopeg.com/docker-compose-healthcheck/


# LocalWords:  ENV DICTS SRC tmp schwedelab RCSB WORKDIR pipefail apk dev ARG
# LocalWords:  ARGs