Skip to content
Snippets Groups Projects
Unverified Commit be93bcc5 authored by Xavier Robin's avatar Xavier Robin
Browse files

fix: SCHWED-6001: only consider residues in chain mapper

This should prevent any further "substructure has residue [...] which is not
in self.target" errors. Add test and doc to clarify what we ignore.
parent 8b581de8
No related branches found
No related tags found
No related merge requests found
......@@ -29,6 +29,13 @@ class LigandScorer:
onto the model, symmetry-correction, and finally assignment (mapping)
of model and target ligands, as described in (Manuscript in preparation).
The binding site is defined based on a radius around the target ligand
in the reference structure only. It only contains protein and nucleic
acid chains that pass the criteria for the
:class:`chain mapping <ost.mol.alg.chain_mapping>`. This means ignoring
other ligands, waters, short polymers as well as any incorrectly connected
chains that may be in proximity.
Results are available as matrices (`(lddt_pli|rmsd)_matrix`), where every
target-model score is reported in a matrix; as `(lddt_pli|rmsd)` where
a model-target assignment has been determined (see below) and reported in
......@@ -186,9 +193,9 @@ class LigandScorer:
:type chain_mapper: :class:`ost.mol.alg.chain_mapping.ChainMapper`
:param substructure_match: Set this to True to allow partial target ligand.
:type substructure_match: :class:`bool`
:param radius: Inclusion radius for the binding site. Any residue with
atoms within this distance of the ligand will be included
in the binding site.
:param radius: Inclusion radius for the binding site. Residues with
atoms within this distance of the ligand will be considered
for inclusion in the binding site.
:type radius: :class:`float`
:param lddt_pli_radius: lDDT inclusion radius for lDDT-PLI.
:type lddt_pli_radius: :class:`float`
......@@ -456,7 +463,10 @@ class LigandScorer:
def _get_binding_sites(self, ligand):
"""Find representations of the binding site of *ligand* in the model.
Ignore other ligands and waters that may be in proximity.
Only consider protein and nucleic acid chains that pass the criteria
for the :class:`ost.mol.alg.chain_mapping`. This means ignoring other
ligands, waters, short polymers as well as any incorrectly connected
chain that may be in proximity.
:param ligand: Defines the binding site to identify.
:type ligand: :class:`~ost.mol.ResidueHandle`
......@@ -465,18 +475,29 @@ class LigandScorer:
# create view of reference binding site
ref_residues_hashes = set() # helper to keep track of added residues
ignored_residue_hashes = {ligand.hash_code}
for ligand_at in ligand.atoms:
close_atoms = self.target.FindWithin(ligand_at.GetPos(), self.radius)
for close_at in close_atoms:
# Skip other ligands and waters.
# This assumes that .IsLigand() is properly set on the entity's
# residues.
# Skip any residue not in the chain mapping target
ref_res = close_at.GetResidue()
if not (ref_res.is_ligand or
ref_res.chem_type == mol.ChemType.WATERS):
h = ref_res.handle.GetHashCode()
if h not in ref_residues_hashes:
h = ref_res.handle.GetHashCode()
if h not in ref_residues_hashes and \
h not in ignored_residue_hashes:
if self.chain_mapper.target.ViewForHandle(ref_res).IsValid():
h = ref_res.handle.GetHashCode()
ref_residues_hashes.add(h)
elif ref_res.is_ligand:
LogWarning("Ignoring ligand %s in binding site of %s" % (
ref_res.qualified_name, ligand.qualified_name))
ignored_residue_hashes.add(h)
elif ref_res.chem_type == mol.ChemType.WATERS:
pass # That's ok, no need to warn
else:
LogWarning("Ignoring residue %s in binding site of %s" % (
ref_res.qualified_name, ligand.qualified_name))
ignored_residue_hashes.add(h)
# reason for doing that separately is to guarantee same ordering of
# residues as in underlying entity. (Reorder by ResNum seems only
......
......@@ -3,6 +3,7 @@ from functools import lru_cache
import numpy as np
import ost
from ost import io, mol, geom
# check if we can import: fails if numpy or scipy not available
try:
......@@ -407,6 +408,19 @@ class TestLigandScoring(unittest.TestCase):
sc = LigandScorer(mdl, trg, None, None, rmsd_assignment=True)
assert sc.rmsd_details["L_2"][1]["target_ligand"] == sc.lddt_pli_details["L_2"][1]["target_ligand"]
def test_ignore_binding_site(self):
"""Test that we ignore non polymer stuff in the binding site.
NOTE: we should consider changing this behavior in the future and take
other ligands, peptides and short oligomers into account for superposition.
When that's the case this test should be adapter
"""
trg = _LoadMMCIF("1SSP.cif.gz")
sc = LigandScorer(trg, trg, None, None)
expected_bs_ref_res = ['C.GLY62', 'C.GLN63', 'C.ASP64', 'C.PRO65', 'C.TYR66', 'C.CYS76', 'C.PHE77', 'C.ASN123', 'C.HIS187']
ost.PushVerbosityLevel(ost.LogLevel.Error)
assert [str(r) for r in sc.rmsd_details["D"][1]["bs_ref_res"]] == expected_bs_ref_res
ost.PopVerbosityLevel()
if __name__ == "__main__":
from ost import testutils
......
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment