diff --git a/modules/mol/alg/pymod/ligand_scoring.py b/modules/mol/alg/pymod/ligand_scoring.py index 6b567c358bcc48c1d3353e0e955f49a80722dc21..cc54c2f5f96715765af43456b0a1ef133c74a9e9 100644 --- a/modules/mol/alg/pymod/ligand_scoring.py +++ b/modules/mol/alg/pymod/ligand_scoring.py @@ -29,6 +29,13 @@ class LigandScorer: onto the model, symmetry-correction, and finally assignment (mapping) of model and target ligands, as described in (Manuscript in preparation). + The binding site is defined based on a radius around the target ligand + in the reference structure only. It only contains protein and nucleic + acid chains that pass the criteria for the + :class:`chain mapping <ost.mol.alg.chain_mapping>`. This means ignoring + other ligands, waters, short polymers as well as any incorrectly connected + chains that may be in proximity. + Results are available as matrices (`(lddt_pli|rmsd)_matrix`), where every target-model score is reported in a matrix; as `(lddt_pli|rmsd)` where a model-target assignment has been determined (see below) and reported in @@ -186,9 +193,9 @@ class LigandScorer: :type chain_mapper: :class:`ost.mol.alg.chain_mapping.ChainMapper` :param substructure_match: Set this to True to allow partial target ligand. :type substructure_match: :class:`bool` - :param radius: Inclusion radius for the binding site. Any residue with - atoms within this distance of the ligand will be included - in the binding site. + :param radius: Inclusion radius for the binding site. Residues with + atoms within this distance of the ligand will be considered + for inclusion in the binding site. :type radius: :class:`float` :param lddt_pli_radius: lDDT inclusion radius for lDDT-PLI. :type lddt_pli_radius: :class:`float` @@ -456,7 +463,10 @@ class LigandScorer: def _get_binding_sites(self, ligand): """Find representations of the binding site of *ligand* in the model. - Ignore other ligands and waters that may be in proximity. + Only consider protein and nucleic acid chains that pass the criteria + for the :class:`ost.mol.alg.chain_mapping`. This means ignoring other + ligands, waters, short polymers as well as any incorrectly connected + chain that may be in proximity. :param ligand: Defines the binding site to identify. :type ligand: :class:`~ost.mol.ResidueHandle` @@ -465,18 +475,29 @@ class LigandScorer: # create view of reference binding site ref_residues_hashes = set() # helper to keep track of added residues + ignored_residue_hashes = {ligand.hash_code} for ligand_at in ligand.atoms: close_atoms = self.target.FindWithin(ligand_at.GetPos(), self.radius) for close_at in close_atoms: - # Skip other ligands and waters. - # This assumes that .IsLigand() is properly set on the entity's - # residues. + # Skip any residue not in the chain mapping target ref_res = close_at.GetResidue() - if not (ref_res.is_ligand or - ref_res.chem_type == mol.ChemType.WATERS): - h = ref_res.handle.GetHashCode() - if h not in ref_residues_hashes: + h = ref_res.handle.GetHashCode() + if h not in ref_residues_hashes and \ + h not in ignored_residue_hashes: + if self.chain_mapper.target.ViewForHandle(ref_res).IsValid(): + h = ref_res.handle.GetHashCode() ref_residues_hashes.add(h) + elif ref_res.is_ligand: + LogWarning("Ignoring ligand %s in binding site of %s" % ( + ref_res.qualified_name, ligand.qualified_name)) + ignored_residue_hashes.add(h) + elif ref_res.chem_type == mol.ChemType.WATERS: + pass # That's ok, no need to warn + else: + LogWarning("Ignoring residue %s in binding site of %s" % ( + ref_res.qualified_name, ligand.qualified_name)) + ignored_residue_hashes.add(h) + # reason for doing that separately is to guarantee same ordering of # residues as in underlying entity. (Reorder by ResNum seems only diff --git a/modules/mol/alg/tests/test_ligand_scoring.py b/modules/mol/alg/tests/test_ligand_scoring.py index 2cd83eca03817052e318cec9de8d7419f046e748..0ec50a4b687f19f7dbf0bdabd474a06d0cc2e77c 100644 --- a/modules/mol/alg/tests/test_ligand_scoring.py +++ b/modules/mol/alg/tests/test_ligand_scoring.py @@ -3,6 +3,7 @@ from functools import lru_cache import numpy as np +import ost from ost import io, mol, geom # check if we can import: fails if numpy or scipy not available try: @@ -407,6 +408,19 @@ class TestLigandScoring(unittest.TestCase): sc = LigandScorer(mdl, trg, None, None, rmsd_assignment=True) assert sc.rmsd_details["L_2"][1]["target_ligand"] == sc.lddt_pli_details["L_2"][1]["target_ligand"] + def test_ignore_binding_site(self): + """Test that we ignore non polymer stuff in the binding site. + NOTE: we should consider changing this behavior in the future and take + other ligands, peptides and short oligomers into account for superposition. + When that's the case this test should be adapter + """ + trg = _LoadMMCIF("1SSP.cif.gz") + sc = LigandScorer(trg, trg, None, None) + expected_bs_ref_res = ['C.GLY62', 'C.GLN63', 'C.ASP64', 'C.PRO65', 'C.TYR66', 'C.CYS76', 'C.PHE77', 'C.ASN123', 'C.HIS187'] + ost.PushVerbosityLevel(ost.LogLevel.Error) + assert [str(r) for r in sc.rmsd_details["D"][1]["bs_ref_res"]] == expected_bs_ref_res + ost.PopVerbosityLevel() + if __name__ == "__main__": from ost import testutils diff --git a/modules/mol/alg/tests/testfiles/1SSP.cif.gz b/modules/mol/alg/tests/testfiles/1SSP.cif.gz new file mode 100644 index 0000000000000000000000000000000000000000..be272f4d8a027f58f99e3256ed0ca63e88a7f8c0 Binary files /dev/null and b/modules/mol/alg/tests/testfiles/1SSP.cif.gz differ