diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..cc3c2b143c81f29db6f7d553968cab0831b90f33 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,64 @@ +ARG VERSION_OST="2.3.0" +FROM registry.scicore.unibas.ch/schwede/openstructure:${VERSION_OST} +# We need to declare ARGs again which were declared before the build stage +# (FROM directive), otherwise they won't be available in this stage. +ARG VERSION_OST + +## Set up environment +ENV SRC_DIR="/tmp" \ + VERSION_OST=${VERSION_OST} \ + PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 + + +LABEL org.openstructure.base-image="${VERSION_OST}" +LABEL org.openstructure.translate2modelcif="2022-04-08.1" +LABEL maintainer="Stefan Bienert <stefan.bienert@unibas.ch>" +LABEL vendor1="Schwede Group (schwedelab.org)" +LABEL vendor2="SIB - Swiss Institute of Bioinformatics (sib.swiss)" +LABEL vendor3="Biozentrum - University of Basel (biozentrum.unibas.ch)" + + +## Install python-modelcif and python-ihm +COPY docker/requirements.txt ${SRC_DIR} +WORKDIR ${SRC_DIR} +RUN set -e pipefail; \ + apt-get update -y; \ + # for development, install venv + apt-get install -y git pip python3.8-venv; \ + pip install -r requirements.txt; \ + # for development, install black & PyLint + pip install black pylint; \ + git clone https://github.com/ihmwg/python-ihm.git ihm.git; \ + cd ihm.git; \ + python3 setup.py build; \ + python3 setup.py install; \ + rm -rf ${SRC_DIR}/ihm.git; \ + cd ${SRC_DIR}; \ + git clone https://github.com/ihmwg/python-modelcif.git modelcif.git; \ + cd modelcif.git; \ + python3 setup.py build; \ + python3 setup.py install; \ + rm -rf /var/lib/apt/lists/*; \ + apt-get purge -y --auto-remove git pip gcc + +## Add a dedicated user +## MMCIF_USER_ID can be used to avoid file permission issues. +ARG MMCIF_USER_ID=501 +RUN adduser --system -u ${MMCIF_USER_ID} mmcif + + +COPY --chmod=755 docker/entrypoint.sh / + +## Copy tool(s) +ARG CONVERTERSCRIPT=projects/pointmutations-haddock/convert2modelcif.py +COPY --chmod=755 $CONVERTERSCRIPT /usr/local/bin/convert2modelcif + +USER mmcif + +ENTRYPOINT ["/entrypoint.sh"] + +# LocalWords: ARG OST ARGs ENV SRC tmp PYTHONUNBUFFERED Schwede schwedelab py +# LocalWords: PYTHONDONTWRITEBYTECODE Bioinformatics sib swiss Biozentrum ihm +# LocalWords: modelcif txt WORKDIR pipefail chmod adduser mmcif ENTRYPOINT cd +# LocalWords: pylint rf entrypoint diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..20874f8b156d3b6b168a2fb40d7ec8a13a7ac4a8 --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,29 @@ +#!/bin/bash +## (We use sh since Alpine does not have Bash by default) + + +## exit immediately on commands with a non-zero exit status. +set -euo pipefail + +## When started without any arguments, "-h", "--help", "-help" or "help", print +## usage. +if [ $# -eq 0 ] || [ x$1 == x"-h" ] || [ x$1 == x"--help" ] || + [ x$1 == x"-help" ] || [ x$1 == x"help" ]; then + echo " ModelCIF file converter" + echo "----------------------------------------" + echo "Provided by SWISS-MODEL / Schwede group" + echo "(swissmodel.expasy.org / schwedelab.org)" + echo "" + /usr/local/bin/convert2modelcif --help + exit 1 +fi +if [ x$1 == x"convert2modelcif" ] || [ x$1 == x"2cif" ]; then + shift + # take over the process, make translate2modelcif run on PID 1 + exec /usr/local/bin/convert2modelcif $@ + exit $? +fi + +exec "$@" + +# LocalWords: euo pipefail eq Schwede schwedelab mmcif fi diff --git a/docker/requirements.txt b/docker/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..2abd7d3a956e774cb5b3d3765de5e9b9d66f2d96 --- /dev/null +++ b/docker/requirements.txt @@ -0,0 +1,2 @@ +requests +ujson diff --git a/projects/pointmutations-haddock/convert2modelcif.py b/projects/pointmutations-haddock/convert2modelcif.py new file mode 100644 index 0000000000000000000000000000000000000000..4ccafd331823670334ea496bfba83ca49cc2f5c4 --- /dev/null +++ b/projects/pointmutations-haddock/convert2modelcif.py @@ -0,0 +1,92 @@ +#! /usr/local/bin/ost +"""Convert point mutation/ HADDOCK models (PDB + extra data into ModelCIF).""" + +import argparse +import os +import sys +import ujson as json + +import modelcif.dumper + + +def _parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description=__doc__, + ) + + parser.add_argument( + "model_list", + nargs="+", + metavar="<MODEL FILE>", + help="Model PDB file to be converted.", + ) + + opts = parser.parse_args() + + # check arguments + for mfle in opts.model_list: + if not os.path.isfile(mfle): + _abort_msg(f"Model PDB file '{mfle}' does not exist.") + mfp = os.path.splitext(mfle)[0] + if not os.path.isfile(f"{mfp}.json"): + _abort_msg( + f"JSON file '{mfp}.json' for model " + + f"'{os.path.basename(mfp)}' does not exist." + ) + + return opts + + +def _abort_msg(msg, exit_code=1): + """Write error message and exit with exit_code.""" + print(f"{msg}\nAborting.", file=sys.stderr) + sys.exit(exit_code) + + +def _get_model_json(file_prfx): + """Load the JSON file corresponding to a model.""" + with open(f"{file_prfx}.json", encoding="ascii") as jfh: + return json.load(jfh) + + +def _get_modelcif_entities(target_entities): + """Create ModelCIF entities.""" + for cif_ent in target_entities: + print(len(cif_ent["target_sequence"])) + + +def _store_as_modelcif(name, data_json, file_prfx): + """Mix metadata and coordinates into a ModelCIF file.""" + # create systme to gather all data + system = modelcif.System( + title=data_json["title"], + id=name.upper(), + model_details=data_json["model_details"], + ) + + # source: syn/ nat, can be changed on object + _get_modelcif_entities(data_json["target_entities"]) + + # finally, write the ModelCIF file + with open(f"{file_prfx}.cif", "w", encoding="ascii") as mmcif_fh: + modelcif.dumper.write(mmcif_fh, [system]) + + +def _main(): + """Run as script.""" + opts = _parse_args() + + for model_file in opts.model_list: + model_prfx = os.path.splitext(model_file)[0] + model_name = os.path.basename(model_prfx) + print(f"Working on {model_name}...", model_prfx) + model_json = _get_model_json(model_prfx) + _store_as_modelcif(model_name, model_json, model_prfx) + + print(f"... done working on {model_name}.") + + +if __name__ == "__main__": + _main() diff --git a/projects/pointmutations-haddock/template.json b/projects/pointmutations-haddock/template.json new file mode 100644 index 0000000000000000000000000000000000000000..a404f34d99fbb79a3593359ae2976b1150320204 --- /dev/null +++ b/projects/pointmutations-haddock/template.json @@ -0,0 +1,185 @@ +{ + "audit_authors": [ + "Bartolec T", + "V\u00e1zquez-Campos X", + "Johnson M", + "Norman A", + "Payne R", + "Wilkins M", + "Mackay J", + "Low J" + ], + "max_pae": 31.75, + "model_details": "_struct.pdbx_model_details", + "model_group_name": "Crosslinked Heterodimer ALphaFold-Multimer v2 Models", + "protocol": [ + { + "details": "Model using AlphaFold-Multimer (AlphaFold v2.2.0), without amber relaxation and producing 5 models with up to 3 recycles each, starting from paired and unparied MSAs for the dimers using MMseqs2.", + "input": "target_sequences", + "method_type": "modeling", + "name": "ma_protocol_step.step_name", + "output": "model", + "software": [ + { + "citation": { + "authors": [ + "Mirdita M", + "Sch\u00fctze K", + "Moriwaki Y", + "Heo L", + "Ovchinnikov S", + "Steinegger M" + ], + "doi": "10.1101\/2021.08.15.456425", + "journal": "bioRxiv", + "page_range": null, + "pmid": null, + "title": "ColabFold - Making protein folding accessible to all", + "volume": null, + "year": 2022 + }, + "classification": "model building", + "description": "software.description", + "location": "https:\/\/github.com\/sokrypton\/ColabFold", + "name": "ColabFold", + "type": "package", + "version": "1.2.0" + }, + { + "citation": { + "authors": [ + "Mirdita M", + "Steinegger M", + "S\u00f6ding J" + ], + "doi": "10.1093\/bioinformatics\/bty1057", + "journal": "Bioinformatics", + "page_range": [ + 2856, + 2858 + ], + "pmid": "30615063", + "title": "MMseqs2 desktop and local web server app for fast, interactive sequence searches", + "volume": 35, + "year": 2019 + }, + "classification": "data collection", + "description": "Many-against-Many sequence searching", + "location": "https:\/\/github.com\/soedinglab\/mmseqs2", + "name": "MMseqs2", + "type": "package", + "version": null + }, + { + "citation": { + "authors": [ + "Evans R", + "O'Neill M", + "Pritzel A", + "Antropova N", + "Senior A", + "Green T", + "\u017d\u00eddek A", + "Bates R", + "Blackwell S", + "Yim J", + "Ronneberger O", + "Bodenstein S", + "Zielinski M", + "Bridgland A", + "Potapenko A", + "Cowie A", + "Tunyasuvunakool K", + "Jain R", + "Clancy E", + "Kohli P", + "Jumper J", + "Hassabis D" + ], + "doi": "10.1101\/2021.10.04.463034", + "journal": "bioRxiv", + "page_range": null, + "pmid": null, + "title": "Protein complex prediction with AlphaFold-Multimer.", + "volume": null, + "year": 2021 + }, + "classification": "model building", + "description": "Structure prediction", + "location": "https:\/\/github.com\/deepmind\/alphafold", + "name": "AlphaFold-Multimer", + "type": "package", + "version": "2.1.1" + } + ], + "software_parameters": { + "commit": "b532e910b15434f707f0b7460abc25c70fcb9b26", + "host_url": "https:\/\/api.colabfold.com", + "keep_existing_results": true, + "model_order": [ + 3, + 4, + 5, + 1, + 2 + ], + "model_type": "AlphaFold2-multimer-v2", + "msa_mode": "MMseqs2 (UniRef+Environmental)", + "num_models": 5, + "num_queries": 1, + "num_recycles": 3, + "pair_mode": "unpaired+paired", + "rank_by": "multimer", + "recompile_all_models": true, + "recompile_padding": 1.1, + "stop_at_score": 100, + "use_amber": false, + "use_templates": false, + "version": "1.2.0" + } + }, + { + "details": "Select best model, which is either the top-ranked model as determined by the ColabFold pipeline (iptmscore*0.8+ptmscore*0.2), or else the model with best congruence with crosslinks reported in the related study.", + "input": "model", + "method_type": "model selection", + "name": "ma_protocol_step.step_name", + "output": "model", + "software": [], + "software_parameters": {} + } + ], + "ptm": 0.24, + "target_entities": [ + { + "description": "Model of ZC3H11B (A0A1B0GTU1)", + "pdb_chain_id": "B", + "pdb_sequence": "MPNQGEDCYFFFYSTCTKGDSCPFRHCEAALGNETVCTLWQEGRCFRRVCRFRHMEIDKKRSEIPCYWENQPTGCQKLNCVFHHNRGRYVDGLFLPPSKSVLPTVPESPEEEVKASQLSVQQNKLSVQSNTSPQLRSVMKVESSENVPSPKHPPVVINAADDDEDDDDQFSEEGDETKTPTLQPTPEVHNGLRVTSVRKPAVNIKQGECLHFGIKTLEEIKSKKMKEKSEEQGEGSSGVSSLLLHPEPVPGPEKENVRTVVRTVTLSTKQGEEPLVRLGLTETLGKRKFSTGGDSDPPLKRSLAQRLGKKVEAPETNTDETPKKAQVSKSLKERLGMSADPNNEDATDKVNKVGEIHVKTLEEMLLERASQKHGESQTKLKTEGPSKTDDSTSGARSSSTIRIKTFSEVLAEEEHRQQEAERQKSKKDTTCIKLKTDSEIKKTVVLPPIVASKGQSEEPAGKTKSMQEVHMKTVEEIKLEKALRVQQSSESSTSSPSQHEATPGARLLLRITKRTWRKEEKKLQEGNEVDFLSRVRMEATEASVETTGVDITKIQVKRCEIMRETRMQKQQEREKSVLTPLQGDVASCNTQVAEKPVLTAVPGITWHLTKQLPTKSSQKVEVETSGIADSLLNVKWSAQTLEKRGEAKPTVNVKQSVVKVVSSPKLAPKRKAVEMHPAVTAAVKPLSSSSVLQEPPAKKAAVDAVVLLDSEDKSVTVPEAENPRDSLVLPPTQSSSDSSPPEVSGPSSSQMSMKTRRLSSASTGKPPLSVEDDFEKLTWEISGGKLEAEIDLDPGKDEDDLPLEL", + "up_ac": "A0A1B0GTU1", + "up_crc64": "2F9E1922DA454BCD", + "up_gn": "ZC3H11B", + "up_id": "ZC11B_HUMAN", + "up_isoform": null, + "up_last_mod": "2016-10-05", + "up_ncbi_taxid": "9606", + "up_organism": "Homo sapiens (Human)", + "up_seqlen": 805, + "up_sequence": "MPNQGEDCYFFFYSTCTKGDSCPFRHCEAALGNETVCTLWQEGRCFRRVCRFRHMEIDKKRSEIPCYWENQPTGCQKLNCVFHHNRGRYVDGLFLPPSKSVLPTVPESPEEEVKASQLSVQQNKLSVQSNTSPQLRSVMKVESSENVPSPKHPPVVINAADDDEDDDDQFSEEGDETKTPTLQPTPEVHNGLRVTSVRKPAVNIKQGECLHFGIKTLEEIKSKKMKEKSEEQGEGSSGVSSLLLHPEPVPGPEKENVRTVVRTVTLSTKQGEEPLVRLGLTETLGKRKFSTGGDSDPPLKRSLAQRLGKKVEAPETNTDETPKKAQVSKSLKERLGMSADPNNEDATDKVNKVGEIHVKTLEEMLLERASQKHGESQTKLKTEGPSKTDDSTSGARSSSTIRIKTFSEVLAEEEHRQQEAERQKSKKDTTCIKLKTDSEIKKTVVLPPIVASKGQSEEPAGKTKSMQEVHMKTVEEIKLEKALRVQQSSESSTSSPSQHEATPGARLLLRITKRTWRKEEKKLQEGNEVDFLSRVRMEATEASVETTGVDITKIQVKRCEIMRETRMQKQQEREKSVLTPLQGDVASCNTQVAEKPVLTAVPGITWHLTKQLPTKSSQKVEVETSGIADSLLNVKWSAQTLEKRGEAKPTVNVKQSVVKVVSSPKLAPKRKAVEMHPAVTAAVKPLSSSSVLQEPPAKKAAVDAVVLLDSEDKSVTVPEAENPRDSLVLPPTQSSSDSSPPEVSGPSSSQMSMKTRRLSSASTGKPPLSVEDDFEKLTWEISGGKLEAEIDLDPGKDEDDLPLEL" + }, + { + "description": "Model of ZC3H11A (O75152)", + "pdb_chain_id": "C", + "pdb_sequence": "MPNQGEDCYFFFYSTCTKGDSCPFRHCEAAIGNETVCTLWQEGRCFRQVCRFRHMEIDKKRSEIPCYWENQPTGCQKLNCAFHHNRGRYVDGLFLPPSKTVLPTVPESPEEEVKASQLSVQQNKLSVQSNPSPQLRSVMKVESSENVPSPTHPPVVINAADDDEDDDDQFSEEGDETKTPTLQPTPEVHNGLRVTSVRKPAVNIKQGECLNFGIKTLEEIKSKKMKEKSKKQGEGSSGVSSLLLHPEPVPGPEKENVRTVVRTVTLSTKQGEEPLVRLSLTERLGKRKFSAGGDSDPPLKRSLAQRLGKKVEAPETNIDKTPKKAQVSKSLKERLGMSADPDNEDATDKVNKVGEIHVKTLEEILLERASQKRGELQTKLKTEGPSKTDDSTSGARSSSTIRIKTFSEVLAEKKHRQQEAERQKSKKDTTCIKLKIDSEIKKTVVLPPIVASRGQSEEPAGKTKSMQEVHIKTLEEIKLEKALRVQQSSESSTSSPSQHEATPGARRLLRITKRTGMKEEKNLQEGNEVDSQSSIRTEAKEASGETTGVDITKIQVKRCETMREKHMQKQQEREKSVLTPLRGDVASCNTQVAEKPVLTAVPGITRHLTKRLPTKSSQKVEVETSGIGDSLLNVKCAAQTLEKRGKAKPKVNVKPSVVKVVSSPKLAPKRKAVEMHAAVIAAVKPLSSSSVLQEPPAKKAAVAVVPLVSEDKSVTVPEAENPRDSLVLPPTQSSSDSSPPEVSGPSSSQMSMKTRRLSSASTGKPPLSVEDDFEKLIWEISGGKLEAEIDLDPGKDEDDLLLELSEMIDS", + "up_ac": "O75152", + "up_crc64": "9048ABC7F4A372FB", + "up_gn": "ZC3H11A", + "up_id": "ZC11A_HUMAN", + "up_isoform": null, + "up_last_mod": "2016-10-05", + "up_ncbi_taxid": "9606", + "up_organism": "Homo sapiens (Human)", + "up_seqlen": 810, + "up_sequence": "MPNQGEDCYFFFYSTCTKGDSCPFRHCEAAIGNETVCTLWQEGRCFRQVCRFRHMEIDKKRSEIPCYWENQPTGCQKLNCAFHHNRGRYVDGLFLPPSKTVLPTVPESPEEEVKASQLSVQQNKLSVQSNPSPQLRSVMKVESSENVPSPTHPPVVINAADDDEDDDDQFSEEGDETKTPTLQPTPEVHNGLRVTSVRKPAVNIKQGECLNFGIKTLEEIKSKKMKEKSKKQGEGSSGVSSLLLHPEPVPGPEKENVRTVVRTVTLSTKQGEEPLVRLSLTERLGKRKFSAGGDSDPPLKRSLAQRLGKKVEAPETNIDKTPKKAQVSKSLKERLGMSADPDNEDATDKVNKVGEIHVKTLEEILLERASQKRGELQTKLKTEGPSKTDDSTSGARSSSTIRIKTFSEVLAEKKHRQQEAERQKSKKDTTCIKLKIDSEIKKTVVLPPIVASRGQSEEPAGKTKSMQEVHIKTLEEIKLEKALRVQQSSESSTSSPSQHEATPGARRLLRITKRTGMKEEKNLQEGNEVDSQSSIRTEAKEASGETTGVDITKIQVKRCETMREKHMQKQQEREKSVLTPLRGDVASCNTQVAEKPVLTAVPGITRHLTKRLPTKSSQKVEVETSGIGDSLLNVKCAAQTLEKRGKAKPKVNVKPSVVKVVSSPKLAPKRKAVEMHAAVIAAVKPLSSSSVLQEPPAKKAAVAVVPLVSEDKSVTVPEAENPRDSLVLPPTQSSSDSSPPEVSGPSSSQMSMKTRRLSSASTGKPPLSVEDDFEKLIWEISGGKLEAEIDLDPGKDEDDLLLELSEMIDS" + } + ], + "title": "_struct.title" +}