diff --git a/CHANGELOG.md b/CHANGELOG.md index 2fb5707cc3c6e35bdd02906f715fd4502ce4fd39..57a5d76e751d4336e4926b2695530d98d99355be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +Changes in Release 2.0.0 +================================================================================= + +* `run_af2` Python package: + * Prepare to run AF2 pipeline 2.1 + * rename command line options `--pdb70_database_path` and + `--obsolete_pdbs_path` to `--pdb70-database-path` and `--obsolete-pdbs-path` + + Changes in Release 1.0.5 ================================================================================= diff --git a/run_af2/bin/submit-af2 b/run_af2/bin/submit-af2 index 7d5b1d075d14991bf964d401dafeda682cf5bd0a..014b8b8ff3d4e094d4f0a4c77144c676bb668532 100755 --- a/run_af2/bin/submit-af2 +++ b/run_af2/bin/submit-af2 @@ -28,7 +28,8 @@ if test $# -lt 2; then echo "usage: submit-af2 [--use-gpu]" echo " [--db-preset {reduced_dbs,full_dbs}]" - echo " [model-preset {monomer,monomer_casp14,monomer_ptm,multimer}]" + echo " [--model-preset {monomer,monomer_casp14,monomer_ptm,multimer}]" + echo " [--is-prokaryote-list <bool>,<bool>,...]" echo " [--max-template-date YYYY-MM-DD]" echo " <OUTPUT DIR> <FASTA FILE> [<FASTA FILE> ...]" exit 1 @@ -36,9 +37,9 @@ fi # There are only two non-positional arguments, simply loop over candidates. # This should be turned into switch..case if argument extension goes on. -SLURM_AF_PARAMS="--exclude=lii[02-28]" +SLURM_AF_PARAMS="" #"--exclude=lii[02-28]" AF_PIPELINE_PARAM="" -for i in 1 2 3 4; do +for i in 1 2 3 4 5; do if test x"${1}" = x"--use-gpu"; then shift SLURM_AF_PARAMS="--partition=a100,rtx8000 --gres=gpu:1" @@ -54,6 +55,12 @@ for i in 1 2 3 4; do else if test x"${1}" = x"--model-preset"; then shift AF_PIPELINE_PARAM="${AF_PIPELINE_PARAM} --model-preset ${1}" + shift + else if test x"${1}" = x"--is-prokaryote-list"; then + shift + AF_PIPELINE_PARAM="${AF_PIPELINE_PARAM} --is-prokaryote-list ${1}" + shift + fi fi fi fi diff --git a/run_af2/src/run_af2/_shared.py b/run_af2/src/run_af2/_shared.py index 5d7f22c75261a5320142ae8b6e69550fdc399616..27aa8e1ce24b726aa2b13e957b844950464e5797 100644 --- a/run_af2/src/run_af2/_shared.py +++ b/run_af2/src/run_af2/_shared.py @@ -14,8 +14,8 @@ functionality to be external, we should create a proper module for that. # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -110,6 +110,16 @@ def parse_af2_arguments(parser): + "AF2 pipeline.", default="monomer", ) + af2_group.add_argument( + "-i", + "--is-prokaryote-list", + type=str, + help="For the multimer model, declare sequences to by prokaryotic or " + + "not. Goes by true/ false, needs to be listed for every input FASTA " + + "as a list.", + metavar="<true|false>,<true|false>,...", + default=None, + ) af2_group.add_argument( "--version", action="version", @@ -161,12 +171,26 @@ def parse_af2_arguments(parser): default=None, ) af2_group.add_argument( - "--pdb70_database_path", + "--uniprot-database-path", + type=str, + help="Path to the UniProt database, for the JackHMMer call.", + metavar="<PATH>", + default=None, + ) + af2_group.add_argument( + "--pdb70-database-path", type=str, help="Path to the PDB70 database, prefix for the HHsearch call.", metavar="<PATH/ PREFIX>", default=None, ) + af2_group.add_argument( + "--pdb-seqres-database-path", + type=str, + help="Path to the PDB seqres database, for the hhmsearch call.", + metavar="<PATH>", + default=None, + ) af2_group.add_argument( "--template-mmcif-dir", type=str, @@ -175,7 +199,7 @@ def parse_af2_arguments(parser): default=None, ) af2_group.add_argument( - "--obsolete_pdbs_path", + "--obsolete-pdbs-path", type=str, help="File mapping obsoleted PDB entries to their replacements.", metavar="<File>", @@ -200,6 +224,26 @@ def parse_af2_arguments(parser): flush=True, ) + # check that model_preset settings go along with databases + if opts.model_preset == "multimer": + if opts.pdb70_database_path is not None: + print( + f"WARNING: '--model-preset {opts.model_preset}' disables use " + + "of '--pdb70_database_path'.", + file=sys.stderr, + flush=True, + ) + + # check that --is-prokaryote-list has as many values as sequence files + if opts.is_prokaryote_list is not None: + if opts.is_prokaryote_list.count(",") + 1 != len(opts.fasta_files): + print( + "Number of values for '--is-prokaryote-list' does not match " + + "the number of FASTA files. Aborting.", + file=sys.stderr, + ) + sys.exit(1) + if opts.singularity_image is not None: if not os.path.exists(opts.singularity_image): print( diff --git a/run_af2/src/run_af2/run_singularity.py b/run_af2/src/run_af2/run_singularity.py index 5d87fa3b990016e6304fd932c54e442cb3aea9a5..de18bb1f407c9fbaa4bd69a1ee07fe4980e79679 100644 --- a/run_af2/src/run_af2/run_singularity.py +++ b/run_af2/src/run_af2/run_singularity.py @@ -116,6 +116,7 @@ def _assemble_singularity_call( # pylint: disable=too-many-arguments use_gpu, db_preset, model_preset, + is_prokaryote_list, data_paths, ): """Assemble the command to run AF2 from the Singularity image.""" @@ -151,6 +152,9 @@ def _assemble_singularity_call( # pylint: disable=too-many-arguments "--logtostderr", ] ) + if is_prokaryote_list is not None: + snglrty_cmd.append(f"--is_prokaryote_list={is_prokaryote_list}") + for flag, path in data_paths.items(): snglrty_cmd.append(f"--{flag}={path}") @@ -189,6 +193,7 @@ def run_af2_singularity_image( # pylint: disable=too-many-arguments use_gpu=False, db_preset="full_dbs", model_preset="monomer", + is_prokaryote_list=None, af2_image_file=None, # "/export/soft/singularity-containers/alphafold", af2_image_dir="/scicore/home/schwede/GROUP/alphafold_data/", @@ -205,7 +210,11 @@ def run_af2_singularity_image( # pylint: disable=too-many-arguments + "frozen_210812T135116/bfd-first_non_consensus_sequences.fasta", uniclust30_database_path="/scicore/data/managed/Uniclust/" + "frozen_181015T170110/uniclust30_2018_08/uniclust30_2018_08", + uniprot_database_path="/scicore/home/schwede/scicore/home/schwede/GROUP/" + + "alphafold_data/uniprot/uniprot.fasta", pdb70_database_path="/scicore/data/managed/PDB70/frozen_210805T142857/pdb70", + pdb_seqres_database_path="/scicore/home/schwede/scicore/home/schwede/GROUP/" + + "alphafold_data/pdb_seqres/pdb_seqres.txt", template_mmcif_dir="/scicore/data/managed/PDB/latest/data/structures/all/" + "mmcif_files/", obsolete_pdbs_path="/scicore/data/managed/PDB/latest/data/status/" @@ -240,6 +249,11 @@ def run_af2_singularity_image( # pylint: disable=too-many-arguments :param model_preset: Model configuration - corresponds to the model_preset parameter in AF2. :type model_preset: :class:`str` + :param is_prokaryote_list: For the multimer model, mark sequences as + prokaryotic. Corresponds to the + is_prokaryote_list parameter in AF2. + :type is_prokaryote_list: :class:`str` like needed by AF2, e.g. + "true,false,true" :param af2_image_file: Declare a Singularity image to run the AF2 pipeline from. If None, an image from af2_image_dir will be used. @@ -276,8 +290,17 @@ def run_af2_singularity_image( # pylint: disable=too-many-arguments :param uniclust30_database_path: Uniclust30 database prefix. Corresponds to uniclust30_database_path in AF2. :type uniclust30_database_path: :class:`str` + :param uniprot_database_path: UniProt sequence file. Corresponds to + uniprot_database_path in AF2. Needed for + model_preset == multimer. + :type uniprot_database_path: :class:`str` :param pdb70_database_path: PDB70 database prefix. Corresponds to - pdb70_database_path in AF2. + pdb70_database_path in AF2. Will be disabled if + model_preset == multimer. + :param pdb_seqres_database_path: PDB seqres file. Corresponds to + pdb_seqres_database_path in AF2. Needed for + model_preset == multimer. + :type pdb_seqres_database_path: :class:`str` :type pdb70_database_path: :class:`str` :param template_mmcif_dir: Path to the template structures. Corresponds to template_mmcif_dir in AF2. @@ -303,7 +326,7 @@ def run_af2_singularity_image( # pylint: disable=too-many-arguments ) if model_preset not in _shared.MODEL_PRESET_CHOICES: raise ValueError( - f"model_reset {model_preset} not allowed, known " + f"model_preset {model_preset} not allowed, known " + f"values: {', '.join(_shared.MODEL_PRESET_CHOICES)}" ) @@ -321,6 +344,10 @@ def run_af2_singularity_image( # pylint: disable=too-many-arguments data_paths["uniclust30_database_path"] = uniclust30_database_path elif db_preset == "reduced_dbs": data_paths["small_bfd_database_path"] = small_bfd_database_path + if model_preset == "multimer": + del data_paths["pdb70_database_path"] + data_paths["pdb_seqres_database_path"] = pdb_seqres_database_path + data_paths["uniprot_database_path"] = uniprot_database_path # Collect bind mountpoints for singularity, mountpoints for input data plus # adjusting input file paths, deal with the output directory. @@ -348,6 +375,7 @@ def run_af2_singularity_image( # pylint: disable=too-many-arguments use_gpu, db_preset, model_preset, + is_prokaryote_list, data_paths, ) @@ -396,8 +424,12 @@ def main(): data_paths["small_bfd_database_path"] = opts.bfd_database_path if opts.uniclust30_database_path is not None: data_paths["uniclust30_database_path"] = opts.uniclust30_database_path + if opts.uniprot_database_path is not None: + data_paths["uniprot_database_path"] = opts.uniprot_database_path if opts.pdb70_database_path is not None: data_paths["pdb70_database_path"] = opts.pdb70_database_path + if opts.pdb_seqres_database_path is not None: + data_paths["pdb_seqres_database_path"] = opts.pdb_seqres_database_path if opts.template_mmcif_dir is not None: data_paths["template_mmcif_dir"] = opts.template_mmcif_dir if opts.obsolete_pdbs_path is not None: @@ -411,6 +443,7 @@ def main(): opts.use_gpu, opts.db_preset, opts.model_preset, + opts.is_prokaryote_list, af2_image_file=opts.singularity_image, **data_paths, ) @@ -422,4 +455,5 @@ def main(): # LocalWords: AlphaFold Slurm sciCORE param FASTA Basenames RuntimeException # LocalWords: str dir snglrty tmpdir tmp argparse os ArgumentParser metavar # LocalWords: RawDescriptionHelpFormatter nargs getenv startswith ValueError -# LocalWords: RuntimeError bfd BFD dbs +# LocalWords: RuntimeError bfd BFD dbs mgnify HHblits uniclust Uniclust pdb +# LocalWords: multimer seqres uniprot