Skip to content
Snippets Groups Projects
Commit d6b74272 authored by B13nch3n's avatar B13nch3n
Browse files

Add Docker files and script

parent d8936de9
No related branches found
No related tags found
No related merge requests found
ARG VERSION_OST="2.3.0"
FROM registry.scicore.unibas.ch/schwede/openstructure:${VERSION_OST}
# We need to declare ARGs again which were declared before the build stage
# (FROM directive), otherwise they won't be available in this stage.
ARG VERSION_OST
## Set up environment
ENV SRC_DIR="/tmp" \
VERSION_OST=${VERSION_OST} \
PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1
LABEL org.openstructure.base-image="${VERSION_OST}"
LABEL org.openstructure.translate2modelcif="2022-04-08.1"
LABEL maintainer="Stefan Bienert <stefan.bienert@unibas.ch>"
LABEL vendor1="Schwede Group (schwedelab.org)"
LABEL vendor2="SIB - Swiss Institute of Bioinformatics (sib.swiss)"
LABEL vendor3="Biozentrum - University of Basel (biozentrum.unibas.ch)"
## Install python-modelcif and python-ihm
COPY docker/requirements.txt ${SRC_DIR}
WORKDIR ${SRC_DIR}
RUN set -e pipefail; \
apt-get update -y; \
# for development, install venv
apt-get install -y git pip python3.8-venv; \
pip install -r requirements.txt; \
# for development, install black & PyLint
pip install black pylint; \
git clone https://github.com/ihmwg/python-ihm.git ihm.git; \
cd ihm.git; \
python3 setup.py build; \
python3 setup.py install; \
rm -rf ${SRC_DIR}/ihm.git; \
cd ${SRC_DIR}; \
git clone https://github.com/ihmwg/python-modelcif.git modelcif.git; \
cd modelcif.git; \
python3 setup.py build; \
python3 setup.py install; \
rm -rf /var/lib/apt/lists/*; \
apt-get purge -y --auto-remove git pip gcc
## Add a dedicated user
## MMCIF_USER_ID can be used to avoid file permission issues.
ARG MMCIF_USER_ID=501
RUN adduser --system -u ${MMCIF_USER_ID} mmcif
COPY --chmod=755 docker/entrypoint.sh /
## Copy tool(s)
ARG CONVERTERSCRIPT=projects/pointmutations-haddock/convert2modelcif.py
COPY --chmod=755 $CONVERTERSCRIPT /usr/local/bin/convert2modelcif
USER mmcif
ENTRYPOINT ["/entrypoint.sh"]
# LocalWords: ARG OST ARGs ENV SRC tmp PYTHONUNBUFFERED Schwede schwedelab py
# LocalWords: PYTHONDONTWRITEBYTECODE Bioinformatics sib swiss Biozentrum ihm
# LocalWords: modelcif txt WORKDIR pipefail chmod adduser mmcif ENTRYPOINT cd
# LocalWords: pylint rf entrypoint
#!/bin/bash
## (We use sh since Alpine does not have Bash by default)
## exit immediately on commands with a non-zero exit status.
set -euo pipefail
## When started without any arguments, "-h", "--help", "-help" or "help", print
## usage.
if [ $# -eq 0 ] || [ x$1 == x"-h" ] || [ x$1 == x"--help" ] ||
[ x$1 == x"-help" ] || [ x$1 == x"help" ]; then
echo " ModelCIF file converter"
echo "----------------------------------------"
echo "Provided by SWISS-MODEL / Schwede group"
echo "(swissmodel.expasy.org / schwedelab.org)"
echo ""
/usr/local/bin/convert2modelcif --help
exit 1
fi
if [ x$1 == x"convert2modelcif" ] || [ x$1 == x"2cif" ]; then
shift
# take over the process, make translate2modelcif run on PID 1
exec /usr/local/bin/convert2modelcif $@
exit $?
fi
exec "$@"
# LocalWords: euo pipefail eq Schwede schwedelab mmcif fi
requests
ujson
#! /usr/local/bin/ost
"""Convert point mutation/ HADDOCK models (PDB + extra data into ModelCIF)."""
import argparse
import os
import sys
import ujson as json
import modelcif.dumper
def _parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=__doc__,
)
parser.add_argument(
"model_list",
nargs="+",
metavar="<MODEL FILE>",
help="Model PDB file to be converted.",
)
opts = parser.parse_args()
# check arguments
for mfle in opts.model_list:
if not os.path.isfile(mfle):
_abort_msg(f"Model PDB file '{mfle}' does not exist.")
mfp = os.path.splitext(mfle)[0]
if not os.path.isfile(f"{mfp}.json"):
_abort_msg(
f"JSON file '{mfp}.json' for model "
+ f"'{os.path.basename(mfp)}' does not exist."
)
return opts
def _abort_msg(msg, exit_code=1):
"""Write error message and exit with exit_code."""
print(f"{msg}\nAborting.", file=sys.stderr)
sys.exit(exit_code)
def _get_model_json(file_prfx):
"""Load the JSON file corresponding to a model."""
with open(f"{file_prfx}.json", encoding="ascii") as jfh:
return json.load(jfh)
def _get_modelcif_entities(target_entities):
"""Create ModelCIF entities."""
for cif_ent in target_entities:
print(len(cif_ent["target_sequence"]))
def _store_as_modelcif(name, data_json, file_prfx):
"""Mix metadata and coordinates into a ModelCIF file."""
# create systme to gather all data
system = modelcif.System(
title=data_json["title"],
id=name.upper(),
model_details=data_json["model_details"],
)
# source: syn/ nat, can be changed on object
_get_modelcif_entities(data_json["target_entities"])
# finally, write the ModelCIF file
with open(f"{file_prfx}.cif", "w", encoding="ascii") as mmcif_fh:
modelcif.dumper.write(mmcif_fh, [system])
def _main():
"""Run as script."""
opts = _parse_args()
for model_file in opts.model_list:
model_prfx = os.path.splitext(model_file)[0]
model_name = os.path.basename(model_prfx)
print(f"Working on {model_name}...", model_prfx)
model_json = _get_model_json(model_prfx)
_store_as_modelcif(model_name, model_json, model_prfx)
print(f"... done working on {model_name}.")
if __name__ == "__main__":
_main()
{
"audit_authors": [
"Bartolec T",
"V\u00e1zquez-Campos X",
"Johnson M",
"Norman A",
"Payne R",
"Wilkins M",
"Mackay J",
"Low J"
],
"max_pae": 31.75,
"model_details": "_struct.pdbx_model_details",
"model_group_name": "Crosslinked Heterodimer ALphaFold-Multimer v2 Models",
"protocol": [
{
"details": "Model using AlphaFold-Multimer (AlphaFold v2.2.0), without amber relaxation and producing 5 models with up to 3 recycles each, starting from paired and unparied MSAs for the dimers using MMseqs2.",
"input": "target_sequences",
"method_type": "modeling",
"name": "ma_protocol_step.step_name",
"output": "model",
"software": [
{
"citation": {
"authors": [
"Mirdita M",
"Sch\u00fctze K",
"Moriwaki Y",
"Heo L",
"Ovchinnikov S",
"Steinegger M"
],
"doi": "10.1101\/2021.08.15.456425",
"journal": "bioRxiv",
"page_range": null,
"pmid": null,
"title": "ColabFold - Making protein folding accessible to all",
"volume": null,
"year": 2022
},
"classification": "model building",
"description": "software.description",
"location": "https:\/\/github.com\/sokrypton\/ColabFold",
"name": "ColabFold",
"type": "package",
"version": "1.2.0"
},
{
"citation": {
"authors": [
"Mirdita M",
"Steinegger M",
"S\u00f6ding J"
],
"doi": "10.1093\/bioinformatics\/bty1057",
"journal": "Bioinformatics",
"page_range": [
2856,
2858
],
"pmid": "30615063",
"title": "MMseqs2 desktop and local web server app for fast, interactive sequence searches",
"volume": 35,
"year": 2019
},
"classification": "data collection",
"description": "Many-against-Many sequence searching",
"location": "https:\/\/github.com\/soedinglab\/mmseqs2",
"name": "MMseqs2",
"type": "package",
"version": null
},
{
"citation": {
"authors": [
"Evans R",
"O'Neill M",
"Pritzel A",
"Antropova N",
"Senior A",
"Green T",
"\u017d\u00eddek A",
"Bates R",
"Blackwell S",
"Yim J",
"Ronneberger O",
"Bodenstein S",
"Zielinski M",
"Bridgland A",
"Potapenko A",
"Cowie A",
"Tunyasuvunakool K",
"Jain R",
"Clancy E",
"Kohli P",
"Jumper J",
"Hassabis D"
],
"doi": "10.1101\/2021.10.04.463034",
"journal": "bioRxiv",
"page_range": null,
"pmid": null,
"title": "Protein complex prediction with AlphaFold-Multimer.",
"volume": null,
"year": 2021
},
"classification": "model building",
"description": "Structure prediction",
"location": "https:\/\/github.com\/deepmind\/alphafold",
"name": "AlphaFold-Multimer",
"type": "package",
"version": "2.1.1"
}
],
"software_parameters": {
"commit": "b532e910b15434f707f0b7460abc25c70fcb9b26",
"host_url": "https:\/\/api.colabfold.com",
"keep_existing_results": true,
"model_order": [
3,
4,
5,
1,
2
],
"model_type": "AlphaFold2-multimer-v2",
"msa_mode": "MMseqs2 (UniRef+Environmental)",
"num_models": 5,
"num_queries": 1,
"num_recycles": 3,
"pair_mode": "unpaired+paired",
"rank_by": "multimer",
"recompile_all_models": true,
"recompile_padding": 1.1,
"stop_at_score": 100,
"use_amber": false,
"use_templates": false,
"version": "1.2.0"
}
},
{
"details": "Select best model, which is either the top-ranked model as determined by the ColabFold pipeline (iptmscore*0.8+ptmscore*0.2), or else the model with best congruence with crosslinks reported in the related study.",
"input": "model",
"method_type": "model selection",
"name": "ma_protocol_step.step_name",
"output": "model",
"software": [],
"software_parameters": {}
}
],
"ptm": 0.24,
"target_entities": [
{
"description": "Model of ZC3H11B (A0A1B0GTU1)",
"pdb_chain_id": "B",
"pdb_sequence": "MPNQGEDCYFFFYSTCTKGDSCPFRHCEAALGNETVCTLWQEGRCFRRVCRFRHMEIDKKRSEIPCYWENQPTGCQKLNCVFHHNRGRYVDGLFLPPSKSVLPTVPESPEEEVKASQLSVQQNKLSVQSNTSPQLRSVMKVESSENVPSPKHPPVVINAADDDEDDDDQFSEEGDETKTPTLQPTPEVHNGLRVTSVRKPAVNIKQGECLHFGIKTLEEIKSKKMKEKSEEQGEGSSGVSSLLLHPEPVPGPEKENVRTVVRTVTLSTKQGEEPLVRLGLTETLGKRKFSTGGDSDPPLKRSLAQRLGKKVEAPETNTDETPKKAQVSKSLKERLGMSADPNNEDATDKVNKVGEIHVKTLEEMLLERASQKHGESQTKLKTEGPSKTDDSTSGARSSSTIRIKTFSEVLAEEEHRQQEAERQKSKKDTTCIKLKTDSEIKKTVVLPPIVASKGQSEEPAGKTKSMQEVHMKTVEEIKLEKALRVQQSSESSTSSPSQHEATPGARLLLRITKRTWRKEEKKLQEGNEVDFLSRVRMEATEASVETTGVDITKIQVKRCEIMRETRMQKQQEREKSVLTPLQGDVASCNTQVAEKPVLTAVPGITWHLTKQLPTKSSQKVEVETSGIADSLLNVKWSAQTLEKRGEAKPTVNVKQSVVKVVSSPKLAPKRKAVEMHPAVTAAVKPLSSSSVLQEPPAKKAAVDAVVLLDSEDKSVTVPEAENPRDSLVLPPTQSSSDSSPPEVSGPSSSQMSMKTRRLSSASTGKPPLSVEDDFEKLTWEISGGKLEAEIDLDPGKDEDDLPLEL",
"up_ac": "A0A1B0GTU1",
"up_crc64": "2F9E1922DA454BCD",
"up_gn": "ZC3H11B",
"up_id": "ZC11B_HUMAN",
"up_isoform": null,
"up_last_mod": "2016-10-05",
"up_ncbi_taxid": "9606",
"up_organism": "Homo sapiens (Human)",
"up_seqlen": 805,
"up_sequence": "MPNQGEDCYFFFYSTCTKGDSCPFRHCEAALGNETVCTLWQEGRCFRRVCRFRHMEIDKKRSEIPCYWENQPTGCQKLNCVFHHNRGRYVDGLFLPPSKSVLPTVPESPEEEVKASQLSVQQNKLSVQSNTSPQLRSVMKVESSENVPSPKHPPVVINAADDDEDDDDQFSEEGDETKTPTLQPTPEVHNGLRVTSVRKPAVNIKQGECLHFGIKTLEEIKSKKMKEKSEEQGEGSSGVSSLLLHPEPVPGPEKENVRTVVRTVTLSTKQGEEPLVRLGLTETLGKRKFSTGGDSDPPLKRSLAQRLGKKVEAPETNTDETPKKAQVSKSLKERLGMSADPNNEDATDKVNKVGEIHVKTLEEMLLERASQKHGESQTKLKTEGPSKTDDSTSGARSSSTIRIKTFSEVLAEEEHRQQEAERQKSKKDTTCIKLKTDSEIKKTVVLPPIVASKGQSEEPAGKTKSMQEVHMKTVEEIKLEKALRVQQSSESSTSSPSQHEATPGARLLLRITKRTWRKEEKKLQEGNEVDFLSRVRMEATEASVETTGVDITKIQVKRCEIMRETRMQKQQEREKSVLTPLQGDVASCNTQVAEKPVLTAVPGITWHLTKQLPTKSSQKVEVETSGIADSLLNVKWSAQTLEKRGEAKPTVNVKQSVVKVVSSPKLAPKRKAVEMHPAVTAAVKPLSSSSVLQEPPAKKAAVDAVVLLDSEDKSVTVPEAENPRDSLVLPPTQSSSDSSPPEVSGPSSSQMSMKTRRLSSASTGKPPLSVEDDFEKLTWEISGGKLEAEIDLDPGKDEDDLPLEL"
},
{
"description": "Model of ZC3H11A (O75152)",
"pdb_chain_id": "C",
"pdb_sequence": "MPNQGEDCYFFFYSTCTKGDSCPFRHCEAAIGNETVCTLWQEGRCFRQVCRFRHMEIDKKRSEIPCYWENQPTGCQKLNCAFHHNRGRYVDGLFLPPSKTVLPTVPESPEEEVKASQLSVQQNKLSVQSNPSPQLRSVMKVESSENVPSPTHPPVVINAADDDEDDDDQFSEEGDETKTPTLQPTPEVHNGLRVTSVRKPAVNIKQGECLNFGIKTLEEIKSKKMKEKSKKQGEGSSGVSSLLLHPEPVPGPEKENVRTVVRTVTLSTKQGEEPLVRLSLTERLGKRKFSAGGDSDPPLKRSLAQRLGKKVEAPETNIDKTPKKAQVSKSLKERLGMSADPDNEDATDKVNKVGEIHVKTLEEILLERASQKRGELQTKLKTEGPSKTDDSTSGARSSSTIRIKTFSEVLAEKKHRQQEAERQKSKKDTTCIKLKIDSEIKKTVVLPPIVASRGQSEEPAGKTKSMQEVHIKTLEEIKLEKALRVQQSSESSTSSPSQHEATPGARRLLRITKRTGMKEEKNLQEGNEVDSQSSIRTEAKEASGETTGVDITKIQVKRCETMREKHMQKQQEREKSVLTPLRGDVASCNTQVAEKPVLTAVPGITRHLTKRLPTKSSQKVEVETSGIGDSLLNVKCAAQTLEKRGKAKPKVNVKPSVVKVVSSPKLAPKRKAVEMHAAVIAAVKPLSSSSVLQEPPAKKAAVAVVPLVSEDKSVTVPEAENPRDSLVLPPTQSSSDSSPPEVSGPSSSQMSMKTRRLSSASTGKPPLSVEDDFEKLIWEISGGKLEAEIDLDPGKDEDDLLLELSEMIDS",
"up_ac": "O75152",
"up_crc64": "9048ABC7F4A372FB",
"up_gn": "ZC3H11A",
"up_id": "ZC11A_HUMAN",
"up_isoform": null,
"up_last_mod": "2016-10-05",
"up_ncbi_taxid": "9606",
"up_organism": "Homo sapiens (Human)",
"up_seqlen": 810,
"up_sequence": "MPNQGEDCYFFFYSTCTKGDSCPFRHCEAAIGNETVCTLWQEGRCFRQVCRFRHMEIDKKRSEIPCYWENQPTGCQKLNCAFHHNRGRYVDGLFLPPSKTVLPTVPESPEEEVKASQLSVQQNKLSVQSNPSPQLRSVMKVESSENVPSPTHPPVVINAADDDEDDDDQFSEEGDETKTPTLQPTPEVHNGLRVTSVRKPAVNIKQGECLNFGIKTLEEIKSKKMKEKSKKQGEGSSGVSSLLLHPEPVPGPEKENVRTVVRTVTLSTKQGEEPLVRLSLTERLGKRKFSAGGDSDPPLKRSLAQRLGKKVEAPETNIDKTPKKAQVSKSLKERLGMSADPDNEDATDKVNKVGEIHVKTLEEILLERASQKRGELQTKLKTEGPSKTDDSTSGARSSSTIRIKTFSEVLAEKKHRQQEAERQKSKKDTTCIKLKIDSEIKKTVVLPPIVASRGQSEEPAGKTKSMQEVHIKTLEEIKLEKALRVQQSSESSTSSPSQHEATPGARRLLRITKRTGMKEEKNLQEGNEVDSQSSIRTEAKEASGETTGVDITKIQVKRCETMREKHMQKQQEREKSVLTPLRGDVASCNTQVAEKPVLTAVPGITRHLTKRLPTKSSQKVEVETSGIGDSLLNVKCAAQTLEKRGKAKPKVNVKPSVVKVVSSPKLAPKRKAVEMHAAVIAAVKPLSSSSVLQEPPAKKAAVAVVPLVSEDKSVTVPEAENPRDSLVLPPTQSSSDSSPPEVSGPSSSQMSMKTRRLSSASTGKPPLSVEDDFEKLIWEISGGKLEAEIDLDPGKDEDDLLLELSEMIDS"
}
],
"title": "_struct.title"
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment