Skip to content
Snippets Groups Projects
Commit ef96944c authored by Bienchen's avatar Bienchen
Browse files

Add arguments needed for model_preset multimer

parent 322ec56c
Branches
Tags
No related merge requests found
Changes in Release 2.0.0
=================================================================================
* `run_af2` Python package:
* Prepare to run AF2 pipeline 2.1
* rename command line options `--pdb70_database_path` and
`--obsolete_pdbs_path` to `--pdb70-database-path` and `--obsolete-pdbs-path`
Changes in Release 1.0.5
=================================================================================
......
......@@ -28,7 +28,8 @@
if test $# -lt 2; then
echo "usage: submit-af2 [--use-gpu]"
echo " [--db-preset {reduced_dbs,full_dbs}]"
echo " [model-preset {monomer,monomer_casp14,monomer_ptm,multimer}]"
echo " [--model-preset {monomer,monomer_casp14,monomer_ptm,multimer}]"
echo " [--is-prokaryote-list <bool>,<bool>,...]"
echo " [--max-template-date YYYY-MM-DD]"
echo " <OUTPUT DIR> <FASTA FILE> [<FASTA FILE> ...]"
exit 1
......@@ -36,9 +37,9 @@ fi
# There are only two non-positional arguments, simply loop over candidates.
# This should be turned into switch..case if argument extension goes on.
SLURM_AF_PARAMS="--exclude=lii[02-28]"
SLURM_AF_PARAMS="" #"--exclude=lii[02-28]"
AF_PIPELINE_PARAM=""
for i in 1 2 3 4; do
for i in 1 2 3 4 5; do
if test x"${1}" = x"--use-gpu"; then
shift
SLURM_AF_PARAMS="--partition=a100,rtx8000 --gres=gpu:1"
......@@ -54,6 +55,12 @@ for i in 1 2 3 4; do
else if test x"${1}" = x"--model-preset"; then
shift
AF_PIPELINE_PARAM="${AF_PIPELINE_PARAM} --model-preset ${1}"
shift
else if test x"${1}" = x"--is-prokaryote-list"; then
shift
AF_PIPELINE_PARAM="${AF_PIPELINE_PARAM} --is-prokaryote-list ${1}"
shift
fi
fi
fi
fi
......
......@@ -14,8 +14,8 @@ functionality to be external, we should create a proper module for that.
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
......@@ -110,6 +110,16 @@ def parse_af2_arguments(parser):
+ "AF2 pipeline.",
default="monomer",
)
af2_group.add_argument(
"-i",
"--is-prokaryote-list",
type=str,
help="For the multimer model, declare sequences to by prokaryotic or "
+ "not. Goes by true/ false, needs to be listed for every input FASTA "
+ "as a list.",
metavar="<true|false>,<true|false>,...",
default=None,
)
af2_group.add_argument(
"--version",
action="version",
......@@ -161,12 +171,26 @@ def parse_af2_arguments(parser):
default=None,
)
af2_group.add_argument(
"--pdb70_database_path",
"--uniprot-database-path",
type=str,
help="Path to the UniProt database, for the JackHMMer call.",
metavar="<PATH>",
default=None,
)
af2_group.add_argument(
"--pdb70-database-path",
type=str,
help="Path to the PDB70 database, prefix for the HHsearch call.",
metavar="<PATH/ PREFIX>",
default=None,
)
af2_group.add_argument(
"--pdb-seqres-database-path",
type=str,
help="Path to the PDB seqres database, for the hhmsearch call.",
metavar="<PATH>",
default=None,
)
af2_group.add_argument(
"--template-mmcif-dir",
type=str,
......@@ -175,7 +199,7 @@ def parse_af2_arguments(parser):
default=None,
)
af2_group.add_argument(
"--obsolete_pdbs_path",
"--obsolete-pdbs-path",
type=str,
help="File mapping obsoleted PDB entries to their replacements.",
metavar="<File>",
......@@ -200,6 +224,26 @@ def parse_af2_arguments(parser):
flush=True,
)
# check that model_preset settings go along with databases
if opts.model_preset == "multimer":
if opts.pdb70_database_path is not None:
print(
f"WARNING: '--model-preset {opts.model_preset}' disables use "
+ "of '--pdb70_database_path'.",
file=sys.stderr,
flush=True,
)
# check that --is-prokaryote-list has as many values as sequence files
if opts.is_prokaryote_list is not None:
if opts.is_prokaryote_list.count(",") + 1 != len(opts.fasta_files):
print(
"Number of values for '--is-prokaryote-list' does not match "
+ "the number of FASTA files. Aborting.",
file=sys.stderr,
)
sys.exit(1)
if opts.singularity_image is not None:
if not os.path.exists(opts.singularity_image):
print(
......
......@@ -116,6 +116,7 @@ def _assemble_singularity_call( # pylint: disable=too-many-arguments
use_gpu,
db_preset,
model_preset,
is_prokaryote_list,
data_paths,
):
"""Assemble the command to run AF2 from the Singularity image."""
......@@ -151,6 +152,9 @@ def _assemble_singularity_call( # pylint: disable=too-many-arguments
"--logtostderr",
]
)
if is_prokaryote_list is not None:
snglrty_cmd.append(f"--is_prokaryote_list={is_prokaryote_list}")
for flag, path in data_paths.items():
snglrty_cmd.append(f"--{flag}={path}")
......@@ -189,6 +193,7 @@ def run_af2_singularity_image( # pylint: disable=too-many-arguments
use_gpu=False,
db_preset="full_dbs",
model_preset="monomer",
is_prokaryote_list=None,
af2_image_file=None,
# "/export/soft/singularity-containers/alphafold",
af2_image_dir="/scicore/home/schwede/GROUP/alphafold_data/",
......@@ -205,7 +210,11 @@ def run_af2_singularity_image( # pylint: disable=too-many-arguments
+ "frozen_210812T135116/bfd-first_non_consensus_sequences.fasta",
uniclust30_database_path="/scicore/data/managed/Uniclust/"
+ "frozen_181015T170110/uniclust30_2018_08/uniclust30_2018_08",
uniprot_database_path="/scicore/home/schwede/scicore/home/schwede/GROUP/"
+ "alphafold_data/uniprot/uniprot.fasta",
pdb70_database_path="/scicore/data/managed/PDB70/frozen_210805T142857/pdb70",
pdb_seqres_database_path="/scicore/home/schwede/scicore/home/schwede/GROUP/"
+ "alphafold_data/pdb_seqres/pdb_seqres.txt",
template_mmcif_dir="/scicore/data/managed/PDB/latest/data/structures/all/"
+ "mmcif_files/",
obsolete_pdbs_path="/scicore/data/managed/PDB/latest/data/status/"
......@@ -240,6 +249,11 @@ def run_af2_singularity_image( # pylint: disable=too-many-arguments
:param model_preset: Model configuration - corresponds to the model_preset
parameter in AF2.
:type model_preset: :class:`str`
:param is_prokaryote_list: For the multimer model, mark sequences as
prokaryotic. Corresponds to the
is_prokaryote_list parameter in AF2.
:type is_prokaryote_list: :class:`str` like needed by AF2, e.g.
"true,false,true"
:param af2_image_file: Declare a Singularity image to run the AF2 pipeline
from. If None, an image from af2_image_dir will be
used.
......@@ -276,8 +290,17 @@ def run_af2_singularity_image( # pylint: disable=too-many-arguments
:param uniclust30_database_path: Uniclust30 database prefix. Corresponds to
uniclust30_database_path in AF2.
:type uniclust30_database_path: :class:`str`
:param uniprot_database_path: UniProt sequence file. Corresponds to
uniprot_database_path in AF2. Needed for
model_preset == multimer.
:type uniprot_database_path: :class:`str`
:param pdb70_database_path: PDB70 database prefix. Corresponds to
pdb70_database_path in AF2.
pdb70_database_path in AF2. Will be disabled if
model_preset == multimer.
:param pdb_seqres_database_path: PDB seqres file. Corresponds to
pdb_seqres_database_path in AF2. Needed for
model_preset == multimer.
:type pdb_seqres_database_path: :class:`str`
:type pdb70_database_path: :class:`str`
:param template_mmcif_dir: Path to the template structures. Corresponds to
template_mmcif_dir in AF2.
......@@ -303,7 +326,7 @@ def run_af2_singularity_image( # pylint: disable=too-many-arguments
)
if model_preset not in _shared.MODEL_PRESET_CHOICES:
raise ValueError(
f"model_reset {model_preset} not allowed, known "
f"model_preset {model_preset} not allowed, known "
+ f"values: {', '.join(_shared.MODEL_PRESET_CHOICES)}"
)
......@@ -321,6 +344,10 @@ def run_af2_singularity_image( # pylint: disable=too-many-arguments
data_paths["uniclust30_database_path"] = uniclust30_database_path
elif db_preset == "reduced_dbs":
data_paths["small_bfd_database_path"] = small_bfd_database_path
if model_preset == "multimer":
del data_paths["pdb70_database_path"]
data_paths["pdb_seqres_database_path"] = pdb_seqres_database_path
data_paths["uniprot_database_path"] = uniprot_database_path
# Collect bind mountpoints for singularity, mountpoints for input data plus
# adjusting input file paths, deal with the output directory.
......@@ -348,6 +375,7 @@ def run_af2_singularity_image( # pylint: disable=too-many-arguments
use_gpu,
db_preset,
model_preset,
is_prokaryote_list,
data_paths,
)
......@@ -396,8 +424,12 @@ def main():
data_paths["small_bfd_database_path"] = opts.bfd_database_path
if opts.uniclust30_database_path is not None:
data_paths["uniclust30_database_path"] = opts.uniclust30_database_path
if opts.uniprot_database_path is not None:
data_paths["uniprot_database_path"] = opts.uniprot_database_path
if opts.pdb70_database_path is not None:
data_paths["pdb70_database_path"] = opts.pdb70_database_path
if opts.pdb_seqres_database_path is not None:
data_paths["pdb_seqres_database_path"] = opts.pdb_seqres_database_path
if opts.template_mmcif_dir is not None:
data_paths["template_mmcif_dir"] = opts.template_mmcif_dir
if opts.obsolete_pdbs_path is not None:
......@@ -411,6 +443,7 @@ def main():
opts.use_gpu,
opts.db_preset,
opts.model_preset,
opts.is_prokaryote_list,
af2_image_file=opts.singularity_image,
**data_paths,
)
......@@ -422,4 +455,5 @@ def main():
# LocalWords: AlphaFold Slurm sciCORE param FASTA Basenames RuntimeException
# LocalWords: str dir snglrty tmpdir tmp argparse os ArgumentParser metavar
# LocalWords: RawDescriptionHelpFormatter nargs getenv startswith ValueError
# LocalWords: RuntimeError bfd BFD dbs
# LocalWords: RuntimeError bfd BFD dbs mgnify HHblits uniclust Uniclust pdb
# LocalWords: multimer seqres uniprot
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment