Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
O
openstructure
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container registry
Model registry
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
openstructure
Commits
5c61238e
Unverified
Commit
5c61238e
authored
1 year ago
by
Xavier Robin
Browse files
Options
Downloads
Patches
Plain Diff
feat: handle biounits in ligand scorer.
This is made possible now that we have a CreateBU function.
parent
258f724f
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
actions/ost-compare-ligand-structures
+125
-38
125 additions, 38 deletions
actions/ost-compare-ligand-structures
with
125 additions
and
38 deletions
actions/ost-compare-ligand-structures
+
125
−
38
View file @
5c61238e
...
@@ -8,8 +8,7 @@ Example: ost compare-ligand-structures \\
...
@@ -8,8 +8,7 @@ Example: ost compare-ligand-structures \\
--lddt-pli --rmsd
--lddt-pli --rmsd
Structures of polymer entities (proteins and nucleotides) can be given in PDB
Structures of polymer entities (proteins and nucleotides) can be given in PDB
or mmCIF format. If the structure is given in mmCIF format, only the asymmetric
or mmCIF format.
unit (AU) is used for scoring.
Ligands can be given as path to SDF files containing the ligand for both model
Ligands can be given as path to SDF files containing the ligand for both model
(--model-ligands/-ml) and reference (--reference-ligands/-rl). If omitted,
(--model-ligands/-ml) and reference (--reference-ligands/-rl). If omitted,
...
@@ -117,18 +116,46 @@ def _ParseArgs():
...
@@ -117,18 +116,46 @@ def _ParseArgs():
"--mdl-format",
"--mdl-format",
"--model-format",
"--model-format",
dest="model_format",
dest="model_format",
default="auto",
required=False,
choices=["pdb", "mmcif", "cif"],
default=None,
help=("Format of model file. Inferred from path if not given."))
choices=["pdb", "cif", "mmcif"],
help=("Format of model file. pdb reads pdb but also pdb.gz, same "
"applies to cif/mmcif. Inferred from filepath if not given."))
parser.add_argument(
parser.add_argument(
"-rf",
"-rf",
"--reference-format",
"--reference-format",
"--ref-format",
"--ref-format",
dest="reference_format",
dest="reference_format",
default="auto",
required=False,
choices=["pdb", "mmcif", "cif"],
default=None,
help=("Format of reference file. Inferred from path if not given."))
choices=["pdb", "cif", "mmcif"],
help=("Format of reference file. pdb reads pdb but also pdb.gz, same "
"applies to cif/mmcif. Inferred from filepath if not given."))
parser.add_argument(
"-mb",
"--model-biounit",
dest="model_biounit",
required=False,
default=None,
type=int,
help=("Only has an effect if model is in mmcif format. By default, "
"the asymmetric unit (AU) is used for scoring. If there are "
"biounits defined in the mmcif file, you can specify the "
"(0-based) index of the one which should be used."))
parser.add_argument(
"-rb",
"--reference-biounit",
dest="reference_biounit",
required=False,
default=None,
type=int,
help=("Only has an effect if reference is in mmcif format. By default, "
"the asymmetric unit (AU) is used for scoring. If there are "
"biounits defined in the mmcif file, you can specify the "
"(0-based) index of the one which should be used."))
parser.add_argument(
parser.add_argument(
"-ft",
"-ft",
...
@@ -268,16 +295,11 @@ def _ParseArgs():
...
@@ -268,16 +295,11 @@ def _ParseArgs():
return parser.parse_args()
return parser.parse_args()
def _LoadStructure(structure_path, format="auto", fault_tolerant=False):
def _GetStructureFormat(structure_path, sformat=None):
"""Read OST entity either from mmCIF or PDB.
"""Get the structure format and return it as "pdb" or "mmcif".
The returned structure has structure_path attached as structure name
"""
"""
if not os.path.exists(structure_path):
if sformat is None:
raise Exception(f"file not found: {structure_path}")
if format == "auto":
# Determine file format from suffix.
# Determine file format from suffix.
ext = structure_path.split(".")
ext = structure_path.split(".")
if ext[-1] == "gz":
if ext[-1] == "gz":
...
@@ -285,26 +307,60 @@ def _LoadStructure(structure_path, format="auto", fault_tolerant=False):
...
@@ -285,26 +307,60 @@ def _LoadStructure(structure_path, format="auto", fault_tolerant=False):
if len(ext) <= 1:
if len(ext) <= 1:
raise Exception(f"Could not determine format of file "
raise Exception(f"Could not determine format of file "
f"{structure_path}.")
f"{structure_path}.")
format = ext[-1].lower()
sformat = ext[-1].lower()
if sformat in ["mmcif", "cif"]:
return "mmcif"
elif sformat == "pdb":
return sformat
else:
raise Exception(f"Unknown/unsupported file format found for "
f"file {structure_path}.")
def _LoadStructure(structure_path, sformat, fault_tolerant, bu_idx):
"""Read OST entity either from mmCIF or PDB.
The returned structure has structure_path attached as structure name
"""
if not os.path.exists(structure_path):
raise Exception(f"file not found: {structure_path}")
# increase loglevel, as we would pollute the info log with weird stuff
ost.PushVerbosityLevel(ost.LogLevel.Error)
# Load the structure
# Load the structure
if format in ["mmcif", "cif"]:
if sformat == "mmcif":
entity = io.LoadMMCIF(structure_path, fault_tolerant=fault_tolerant)
if bu_idx is not None:
cif_entity, cif_seqres, cif_info = \
io.LoadMMCIF(structure_path, info=True, seqres=True,
fault_tolerant=fault_tolerant)
if len(cif_info.biounits) == 0:
raise RuntimeError(f"No biounit found - requested index"
f" {bu_idx}.")
elif bu_idx < 0:
raise RuntimeError(f"Invalid biounit - requested index {bu_idx}, "
f"must be a positive integer or 0.")
elif bu_idx >= len(cif_info.biounits):
raise RuntimeError(f"Invalid biounit - requested index {bu_idx}, "
f"must be < {len(cif_info.biounits)}.")
biounit = cif_info.biounits[bu_idx]
entity = ost.mol.alg.CreateBU(cif_entity, biounit)
if not entity.IsValid():
raise IOError(
"Provided file does not contain valid entity.")
else:
entity = io.LoadMMCIF(structure_path,
fault_tolerant = fault_tolerant)
if len(entity.residues) == 0:
if len(entity.residues) == 0:
raise Exception(f"No residues found in file: {structure_path}")
raise Exception(f"No residues found in file: {structure_path}")
el
if format == "pdb"
:
el
se
:
entity = io.LoadPDB(structure_path, fault_tolerant
=
fault_tolerant)
entity = io.LoadPDB(structure_path, fault_tolerant
=
fault_tolerant)
if len(entity.residues) == 0:
if len(entity.residues) == 0:
raise Exception(f"No residues found in file: {structure_path}")
raise Exception(f"No residues found in file: {structure_path}")
else:
raise Exception(f"Unknown/ unsupported file extension found for "
f"file {structure_path}.")
# Remove hydrogens
# restore old loglevel and return
cleaned_entity = ost.mol.CreateEntityFromView(entity.Select(
ost.PopVerbosityLevel()
"ele != H and ele != D"), include_exlusive_atoms=False)
entity.SetName(structure_path)
cleaned_entity.SetName(structure_path)
return entity
return cleaned_entity
def _LoadLigands(ligands):
def _LoadLigands(ligands):
...
@@ -322,9 +378,24 @@ def _LoadLigand(file):
...
@@ -322,9 +378,24 @@ def _LoadLigand(file):
"""
"""
Load a single ligand from file names. Return an entity.
Load a single ligand from file names. Return an entity.
"""
"""
ligand_ent = ost.io.LoadEntity(file, format="sdf")
return ost.io.LoadEntity(file, format="sdf")
ligand_view = ligand_ent.Select("ele != H")
return ost.mol.CreateEntityFromView(ligand_view, False)
def _CleanupStructure(entity):
"""Cleans up the structure.
Currently only removes hydrogens (and deuterium atoms).
"""
return ost.mol.CreateEntityFromView(entity.Select(
"ele != H and ele != D"), include_exlusive_atoms=False)
def _CleanupLigands(ligands):
"""Clean up a list of structures.
"""
if ligands is None:
return None
else:
return [_CleanupStructure(lig) for lig in ligands]
def _Validate(structure, ligands, legend, fault_tolerant=False):
def _Validate(structure, ligands, legend, fault_tolerant=False):
...
@@ -333,7 +404,8 @@ def _Validate(structure, ligands, legend, fault_tolerant=False):
...
@@ -333,7 +404,8 @@ def _Validate(structure, ligands, legend, fault_tolerant=False):
If fault_tolerant is True, only warns in case of problems. If False,
If fault_tolerant is True, only warns in case of problems. If False,
raise them as ValueErrors.
raise them as ValueErrors.
At the moment this chiefly checks for ligands in polymers
At the moment this chiefly checks if ligands are in the structure and are
given explicitly at the same time.
"""
"""
if ligands is not None:
if ligands is not None:
for residue in structure.residues:
for residue in structure.residues:
...
@@ -518,23 +590,38 @@ def _Main():
...
@@ -518,23 +590,38 @@ def _Main():
ost.PushVerbosityLevel(args.verbosity)
ost.PushVerbosityLevel(args.verbosity)
try:
try:
# Load structures
# Load structures
reference_format = _GetStructureFormat(args.reference,
sformat=args.reference_format)
reference = _LoadStructure(args.reference,
reference = _LoadStructure(args.reference,
format=args.reference_format,
sformat=reference_format,
bu_idx=args.reference_biounit,
fault_tolerant = args.fault_tolerant)
fault_tolerant = args.fault_tolerant)
model = _LoadStructure(args.model, format=args.model_format,
model_format = _GetStructureFormat(args.model,
sformat=args.model_format)
model = _LoadStructure(args.model,
sformat=model_format,
bu_idx=args.model_biounit,
fault_tolerant = args.fault_tolerant)
fault_tolerant = args.fault_tolerant)
# Load ligands
# Load ligands
model_ligands = _LoadLigands(args.model_ligands)
model_ligands = _LoadLigands(args.model_ligands)
reference_ligands = _LoadLigands(args.reference_ligands)
reference_ligands = _LoadLigands(args.reference_ligands)
# Cleanup
cleaned_reference = _CleanupStructure(reference)
cleaned_model = _CleanupStructure(model)
cleaned_reference_ligands = _CleanupLigands(reference_ligands)
cleaned_model_ligands = _CleanupLigands(model_ligands)
# Validate
# Validate
_Validate(
model,
model_ligands, "model",
_Validate(
cleaned_model, cleaned_
model_ligands, "model",
fault_tolerant = args.fault_tolerant)
fault_tolerant = args.fault_tolerant)
_Validate(reference, reference_ligands, "reference",
_Validate(
cleaned_
reference,
cleaned_
reference_ligands, "reference",
fault_tolerant = args.fault_tolerant)
fault_tolerant = args.fault_tolerant)
out = _Process(model, model_ligands, reference, reference_ligands, args)
out = _Process(cleaned_model, cleaned_model_ligands,
cleaned_reference, cleaned_reference_ligands,
args)
out["status"] = "SUCCESS"
out["status"] = "SUCCESS"
with open(args.output, 'w') as fh:
with open(args.output, 'w') as fh:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment