diff --git a/.gitignore b/.gitignore index 60ce4b66d28d3f142abee00425cc53e98dc572f9..959ecc6c5ccb0c684e815d98e6deb24bc23df356 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ \.DS_Store +\.history diff --git a/README.md b/README.md index 2d39a95e7e1ccca4660fcdfdfa6df77827ea1737..3a0cc13bf31db1481fde19c0c279e268378103d3 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,8 @@ A short overview of the directories in this repository and what they do. |Path |Content | |-----------|----------------------------------------------------------------| -|[projects/](projects/) |Collection of model conversions done for various user projects. | +|[projects/](projects/)|Collection of model conversions done for various user projects. | +|[projects/docker](projects/docker)|Docker setup to run the conversion software| |[validation/](validation/)|A tool to check the formatting of ModelCIF files. | <!-- LocalWords: modelcif ModelArchive PDBx diff --git a/projects/.dockerignore b/projects/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..16fa8f992102d0f97e3109cd038748ba7b03bed5 --- /dev/null +++ b/projects/.dockerignore @@ -0,0 +1,13 @@ +# Exclude files from build context that are not used to build the Docker image. +# Helps speeding up a build since less files are copied. +# Only works when this directory is build context. + +# Exclude all kinds of README +**/README* + +# Exclude any "test*" directories & files in the individual project directories. +# Test data is not part of the Docker image. If needed, mount at run time. +**/test* + +# Exclude history files from interactive Docker container sessions +\.history \ No newline at end of file diff --git a/projects/CoFFE-sponge-proteins/translate2modelcif.py b/projects/CoFFE-sponge-proteins/translate2modelcif.py old mode 100644 new mode 100755 diff --git a/projects/README.md b/projects/README.md index 46c4ba9bb06f90548ad6ba1f0ae59ffa07a98e53..8e5fb91c4454b4ab8681172b6269663d0d924cc7 100644 --- a/projects/README.md +++ b/projects/README.md @@ -4,5 +4,15 @@ This directory tree contains tools/ code from past modelling projects converted Each project should come with a small README explaining the modelling project. This will help you checking if your project is of same kind. -<!-- LocalWords: README +The [`docker`](projects/docker/) directory does not host a modelling project. It keeps the set up of a [Docker](https://www.docker.com) image that can be used to run the converter tools from the various projects. + +<!-- +Adding projects: +- file permissions for the translation script (755) so it can be executed by any + user +- mention .dockerignore in projects README.md, like when adding projects consider..., add a reference to it, in the docker/README.md + +--> + +<!-- LocalWords: README dockerignore --> diff --git a/projects/USDA-ASFVG/translate2modelcif.py b/projects/USDA-ASFVG/translate2modelcif.py old mode 100644 new mode 100755 diff --git a/projects/docker/Dockerfile b/projects/docker/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..24b814bed562fee5cae98bc2fb2c9066eaa22980 --- /dev/null +++ b/projects/docker/Dockerfile @@ -0,0 +1,82 @@ +ARG VERSION_OST="2.3.0" +FROM registry.scicore.unibas.ch/schwede/openstructure:${VERSION_OST} +## We need to declare ARGs again which were declared before the build stage +## (FROM directive), otherwise they won't be available in this stage. +ARG VERSION_OST + + +## Set up environment +ENV SRC_DIR="/tmp" \ + VERSION_OST=${VERSION_OST} \ + PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 + + +LABEL org.openstructure.base-image="${VERSION_OST}" +LABEL maintainer="Stefan Bienert <stefan.bienert@unibas.ch>" +LABEL vendor1="Schwede Group (schwedelab.org)" +LABEL vendor2="SIB - Swiss Institute of Bioinformatics (sib.swiss)" +LABEL vendor3="Biozentrum - University of Basel (biozentrum.unibas.ch)" + + +## Install python-modelcif and python-ihm +COPY docker/requirements.txt ${SRC_DIR} +WORKDIR ${SRC_DIR} +RUN set -e pipefail; \ + apt-get update -y; \ + apt-get install -y git pip; \ + pip install -r requirements.txt; \ + ## use the python-ihm latest + git clone https://github.com/ihmwg/python-ihm.git ihm.git; \ + cd ihm.git; \ + python3 setup.py build; \ + python3 setup.py install; \ + cd ${SRC_DIR}; \ + rm -rf ${SRC_DIR}/ihm.git; \ + ## use python-modelcif latest + git clone https://github.com/ihmwg/python-modelcif.git modelcif.git; \ + cd modelcif.git; \ + python3 setup.py build; \ + python3 setup.py install; \ + cd ${SRC_DIR}; \ + rm -rf ${SRC_DIR}/modelcif.git; \ + rm -rf /var/lib/apt/lists/*; \ + apt-get purge -y --auto-remove git pip gcc + +## Add a dedicated user +## MMCIF_USER_ID can be used to avoid file permission issues. +ARG MMCIF_USER_ID=501 +RUN adduser --system -u ${MMCIF_USER_ID} mmcif + + +COPY --chmod=755 docker/entrypoint.sh / + + +## Copy tool(s) +ARG CONVERTERSCRIPT=CoFFE-sponge-proteins/translate2modelcif.py +ARG CONVERTERCMD=convert2modelcif +ENV CONVERTERCMD=$CONVERTERCMD +COPY --chmod=755 $CONVERTERSCRIPT /usr/local/bin/$CONVERTERCMD + + +## Add development setup +ARG ADD_DEV +RUN set -e pipefail; \ + if test xYES = x`echo ${ADD_DEV} | tr '[:lower:]' '[:upper:]'`; then \ + apt-get update -y; \ + apt-get install -y emacs; \ + pip install pylint black; \ + apt-get purge -y --auto-remove pip; \ + fi + + +USER mmcif + + +ENTRYPOINT ["/entrypoint.sh"] + +# LocalWords: ARG OST ARGs ENV SRC tmp PYTHONUNBUFFERED Schwede schwedelab py +# LocalWords: PYTHONDONTWRITEBYTECODE Bioinformatics sib swiss Biozentrum ihm +# LocalWords: modelcif txt WORKDIR pipefail chmod adduser mmcif ENTRYPOINT cd +# LocalWords: pylint rf entrypoint gcc CONVERTERSCRIPT CoFFE DEV xYES emacs +# LocalWords: fi diff --git a/projects/docker/README.md b/projects/docker/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6e51beafe7f406f08643b9c86ae8f21fbae18114 --- /dev/null +++ b/projects/docker/README.md @@ -0,0 +1,145 @@ +# Docker container (base) for the converter software + +This directory contains all the files needed to create the base [Docker](https://www.docker.com) image used for the converter software in [`projects`](projects/). + +A specific project's translation script can be executed either in an [app like](#run-a-fixed-converter-from-within-the-docker-container-app-like) manner, calling it directly from within the container, using a [local copy executed by the container](#run-a-local-converter-script-with-the-docker-container), or in an [interactive shell](#run-the-converter-command-in-an-interactive-shell-from-within-the-docker-container) within the container. + +[[_TOC_]] + + +## Building & running the Docker container + +This is a quick tour on how to build and run the Docker container through different scenarios. This is not a lecture on containerisation in general, nor Linux/ Unix, shell scripting or programming. But if you encounter a specific problem, feel free to ping the [MA team](https://modelarchive.org/contact). + +This section describes four use cases of the Docker container (including build instructions per use case) but starts with a short primer of what is common to all scenarios described here. + + +### Prerequisites + +For building the Docker image, you need a local copy of the [Git repository](/.). After that, this guide assumes you are in the [`projects`](projects/) subdirectory (we skip the output of the commands here): + +```terminal +$ git clone https://git.scicore.unibas.ch/schwede/modelcif-converters.git modelcif-converters.git +$ cd modelcif-converters.git/projects +$ +``` + +Since the Docker container will run a dedicated, non-root user internally, it is advisable to create this user with the ID of your local user. That way, file permission issues will be avoided. Get your user ID with the following command and write it down - it will be needed in the build steps: + +```terminal +$ whoami +localuser +$ id +uid=1234(localuser) ... +$ +``` + +Look for the `uid` in the output of `id`. In the example above, `1234` is the ID of user `localuser`, currently logged in and executing the commands. + +One last thing that is needed for the example runs of the Docker container is data. For simplicity, we assume that a directory `/home/user/models` exists on the local computer executing a converter, full of modelling data. + +### Run a fixed converter from within the Docker container (app-like) + +This use case comes closest to having a Docker container that works like a [ModelCIF](https://mmcif.wwpdb.org/dictionaries/mmcif_ma.dic/Index/) converter app, that you can + +- hand over to others + +- send to a compute cluster + +- turn into a [Singularity](https://sylabs.io/singularity/) image + +and the conversion to ModelCIF works out of the box. + +The idea is to copy a translation script from one of the [projects](projects/) into the Docker image along with all the software needed to run it. That enables you to start the script as a command with [`docker run`](https://docs.docker.com/engine/reference/run/). + +The whole build of the Docker image, including installing necessary software and copying the translation script, is covered by our [Dockerfile](./Dockerfile). You just need to specify the translation script by [build time argument](https://docs.docker.com/engine/reference/commandline/build/#set-build-time-variables---build-arg) `CONVERTERSCRIPT` during [`docker build`](https://docs.docker.com/engine/reference/commandline/build/). By default, the translation script is renamed to `convert2modelcif` to be called as a command. This can be overwritten using build time argument `CONVERTERCMD`. There is also an alias `2cif` to the converter command, which is immutable. + +The following command will build a Docker image named `converter` (with tag `latest`). The translation script will be copied from [`USDA-ASFVG/translate2modelcif.py`](projects/USDA-ASFVG/translate2modelcif.py) and made available as `convert2modelcif` in the Docker image/ `docker run`. By `MMCIF_USER_ID`, we use UID `1234` for the internal user of the Docker image, so mounted files have the right owner inside and outside of the Docker container (assuming the example user from above). Pay attention to the alternative Dockerfile location specified by `-f docker/Dockerfile`, as we are calling `docker build` from the [projects](projects/) subdirectory to get the right build context: + +```terminal +$ # DOCKER_BUILDKIT=1 is only needed for older versions of Docker. +$ DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile --build-arg MMCIF_USER_ID=1234 --build-arg CONVERTERSCRIPT=USDA-ASFVG/translate2modelcif.py -t converter:latest . +$ +``` + +After building the Docker image, its time to run the translation command. To do so, we need to make the data available inside the Docker container. This is achieved by [mounting](https://docs.docker.com/storage/bind-mounts/) the model-data directory into the Docker container. In the following example, the local `/home/user/models` directory of the host machine is made available as `/data` inside the container. So the command send to `docker run` has to use `/data` as it is executed inside the container: + +```terminal +$ docker run --rm -v /home/user/models:/data -t converter:latest convert2modelcif /data/ /data/proteome_accessions.csv +$ +``` + + +### Run a local converter script with the Docker container + +Instead of using the script which is statically copied inside the Docker image, you can also use the Docker container as run time environment executing a translation script from disk. This comes in handy when converting many different modelling project types to ModelCIF. Rather than building individual Docker images per modelling variant, use a single one (that guarantees that all ModelCIF files are build with exactly the same software stack) and iterate through the various translation scripts. + +That already works with the Docker image build for [app-like](#run-a-fixed-converter-from-within-the-docker-container-app-like) execution. But in the build example here, we use a file that can not be executed, to be copied as `convert2modelcif`. This makes sure that the Docker container is not accidentally run without declaring a specific script (*Note*: don't try to use `docker/README.md` for `CONVERTERSCRIPT`, it is excluded from the build context by `.dockerignore`.): + +```terminal +$ # DOCKER_BUILDKIT=1 is only needed for older versions of Docker. +$ DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile --build-arg MMCIF_USER_ID=1234 --build-arg CONVERTERSCRIPT=docker/requirements.txt -t converter:latest . +$ +``` + +The dedicated translation script is made available inside the Docker container as a direct [bind mount](https://docs.docker.com/storage/bind-mounts/) to the installed converter command. The remaining parameters are the same as for the [app-like](#run-a-fixed-converter-from-within-the-docker-container-app-like) `docker run` command: + +```terminal +$ docker run --rm -v /home/user/models:/data -v $(pwd)/USDA-ASFVG/translate2modelcif.py:/usr/local/bin/convert2modelcif -t converter:latest convert2modelcif /data/ /data/proteome_accessions.csv +$ +``` + + +### Run the converter command in an interactive shell from within the Docker container + +As the Docker image comes with a shell ([bash](https://tiswww.case.edu/php/chet/bash/bashtop.html)) installed, the translation script can be started from an interactive session within the Docker container. + +The Docker image does not need a special build command to allow interactive sessions, any from the [app-like](#run-a-fixed-converter-from-within-the-docker-container-app-like) and the [local](#run-a-local-converter-script-with-the-docker-container) variant will work. The magic comes in with the `docker run` command. + +A drawback of running bash inside a Docker container is the lack of your personal configuration which stays outside of the shell by default. That can be mended with a bind mount and with the example call, we also add a bash history file to not lose complex command lines: + +```terminal +$ touch .history +$ docker run --rm -i -t -v /home/user/models:/data -v $HOME/.bashrc:/home/mmcif/.bashrc -v $(pwd)/.history:/home/mmcif/.bash_history -t converter:latest bash +$ +``` + +In the interactive shell, the `convert2modelcif` command is available, as well as any script/ data that is mounted by `docker run -v ...`. + +Be aware of the `touch .history` command before `docker run`. This makes sure a file `.history` exists before starting the Docker container. If the file does not exist, Docker will create a directory `.history` itself but that does not record the bash command history. + + +### Build the development Docker container + +The [Dockerfile](./Dockerfile) has an additional build argument, `ADD_DEV`. If set to `YES`, the following development tools are added to the Docker image: + +- [Emacs](https://www.gnu.org/software/emacs/) + +- [Black](https://black.readthedocs.io/en/stable/) + +- [Pylint](https://pylint.org) + +None of these are needed to run a translation script. + +The build argument is just added to the `docker build` call: + +```terminal +$ # DOCKER_BUILDKIT=1 is only needed for older versions of Docker. +$ DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile --build-arg MMCIF_USER_ID=1234 --build-arg ADD_DEV=YES -t converter:latest . +$ +``` + +For working on a translation script, it is convenient to mount the complete Git repository when running the Docker container interactively. This makes sure `pyproject.toml` is available from the repository root to `black` and `pylint`: + +```terminal +$ touch .history +$ docker run --rm -i -t -v /home/user/models:/data -v $HOME/.bashrc:/home/mmcif/.bashrc -v $(pwd)/.history:/home/mmcif/.bash_history -v $(pwd)/../:/develop -t converter:latest bash +$ +``` + +In the session, the Git repository can be found in `/develop`. + +<!-- LocalWords: TOC modelcif cd whoami localuser uid arg CONVERTERSCRIPT + LocalWords: CONVERTERCMD cif ASFVG BUILDKIT pwd DEV Pylint pyproject + LocalWords: toml pylint + --> diff --git a/projects/docker/entrypoint.sh b/projects/docker/entrypoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..d651570ace7433f04d3e088901bc8fb1b2d14a81 --- /dev/null +++ b/projects/docker/entrypoint.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +## exit immediately on commands with a non-zero exit status. +set -euo pipefail + +## When started without any arguments, "-h", "--help", "-help" or "help", print +## usage. +if [ $# -eq 0 ] || [ x$1 == x"-h" ] || [ x$1 == x"--help" ] || + [ x$1 == x"-help" ] || [ x$1 == x"help" ]; then + echo " ModelCIF file converter" + echo "----------------------------------------" + echo "Provided by SWISS-MODEL / Schwede group" + echo "(swissmodel.expasy.org / schwedelab.org)" + echo "" + /usr/local/bin/convert2modelcif --help + exit 1 +fi +if [ x$1 == x"convert2modelcif" ] || [ x$1 == x"2cif" ]; then + shift + # take over the process, make translate2modelcif run on PID 1 + exec /usr/local/bin/convert2modelcif $@ + exit $? +fi + +exec "$@" + +# LocalWords: euo pipefail eq Schwede schwedelab mmcif fi diff --git a/projects/docker/requirements.txt b/projects/docker/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..2abd7d3a956e774cb5b3d3765de5e9b9d66f2d96 --- /dev/null +++ b/projects/docker/requirements.txt @@ -0,0 +1,2 @@ +requests +ujson