diff --git a/modules/mol/alg/pymod/ligand_scoring.py b/modules/mol/alg/pymod/ligand_scoring.py index ac1e1fb1f80f6eced54a832ddc7e0b93812914cb..1bfca88ee98391c899b8e04dcfcf5cf2869336f0 100644 --- a/modules/mol/alg/pymod/ligand_scoring.py +++ b/modules/mol/alg/pymod/ligand_scoring.py @@ -301,6 +301,15 @@ class LigandScorer: self._binding_sites = {} self.__model_mapping = None + # Bookkeeping of unassigned ligands + self._unassigned_target_ligands = {} + self._unassigned_model_ligands = {} + # Keep track of symmetries/isomorphisms + # 0: no isomorphism + # 1: isomorphic + # np.nan: not assessed yet + self._assignment_isomorphisms = None + if custom_mapping is not None: self._set_custom_mapping(custom_mapping) @@ -508,19 +517,32 @@ class LigandScorer: if r.handle.GetHashCode() in ref_residues_hashes: ref_bs.AddResidue(r, mol.ViewAddFlag.INCLUDE_ALL) if len(ref_bs.residues) == 0: - LogWarning("No residue in proximity of target ligand " - "%s" % str(ligand)) + raise RuntimeError("Failed to add proximity residues to " + "the reference binding site entity") # Find the representations if self.global_chain_mapping: self._binding_sites[ligand.hash_code] = self.chain_mapper.GetRepr( ref_bs, self.model, inclusion_radius=self.lddt_lp_radius, - global_mapping=self._model_mapping) + global_mapping = self._model_mapping) else: self._binding_sites[ligand.hash_code] = self.chain_mapper.GetRepr( ref_bs, self.model, inclusion_radius=self.lddt_lp_radius, topn=self.binding_sites_topn) + # Flag empty representation + if not self._binding_sites[ligand.hash_code]: + self._unassigned_target_ligands[ligand] = ( + "model_representation", + "No representation of the reference binding site was " + "found in the model") + + else: # if ref_residues_hashes + # Flag missing binding site + self._unassigned_target_ligands[ligand] = ("binding_site", + "No residue in proximity of the target ligand") + self._binding_sites[ligand.hash_code] = [] + return self._binding_sites[ligand.hash_code] @staticmethod @@ -583,6 +605,9 @@ class LigandScorer: (len(self.target_ligands), len(self.model_ligands)), dtype=dict) lddt_pli_full_matrix = np.empty( (len(self.target_ligands), len(self.model_ligands)), dtype=dict) + self._assignment_isomorphisms = np.full( + (len(self.target_ligands), len(self.model_ligands)), fill_value=np.nan) + for target_i, target_ligand in enumerate(self.target_ligands): LogVerbose("Analyzing target ligand %s" % target_ligand) @@ -611,10 +636,12 @@ class LigandScorer: by_atom_index=True) LogVerbose("Ligands %s and %s symmetry match" % ( str(model_ligand), str(target_ligand))) + self._assignment_isomorphisms[target_i, model_i] = 1 except NoSymmetryError: # Ligands are different - skip LogVerbose("No symmetry between %s and %s" % ( str(model_ligand), str(target_ligand))) + self._assignment_isomorphisms[target_i, model_i] = 0 continue substructure_match = len(symmetries[0][0]) != len( model_ligand.atoms) @@ -814,8 +841,12 @@ class LigandScorer: assignments = self._find_ligand_assignment(mat1, mat2) out_main = {} out_details = {} + assigned_trg = [False] * len(self.target_ligands) + assigned_mdl = [False] * len(self.model_ligands) for assignment in assignments: trg_idx, mdl_idx = assignment + assigned_mdl[mdl_idx] = True + assigned_trg[trg_idx] = True mdl_lig = self.model_ligands[mdl_idx] mdl_cname = mdl_lig.chain.name mdl_resnum = mdl_lig.number @@ -1019,6 +1050,13 @@ class LigandScorer: Note: more binding site mappings may be explored during scoring, but only inconsistencies in the selected mapping are reported. + Unassigned ligands are reported as well and contain the following data: + + * `rmsd`: None. + * `target_ligand`: None + * `model_ligand`: residue handle of the model ligand. + * `error`: the likely reason why the ligand could not be assigned. + :rtype: :class:`dict` """ if self._rmsd_details is None: @@ -1190,6 +1228,94 @@ class LigandScorer: chem_mapping, final_mapping, alns) + def _find_unassigned_model_ligand_reason(self, ligand, assignment="lddt_pli"): + # Is this a model ligand? + try: + ligand_idx = self.model_ligands.index(ligand) + except ValueError: + # Raise with a better error message + raise ValueError("Ligand %s is not in self.model_ligands" % ligand) + + # Ensure we are unassigned + details = getattr(self, assignment + "_details") + if ligand.chain.name in details and ligand.number in details[ligand.chain.name]: + raise RuntimeError("Ligand %s is mapped to %s" % (ligand, details[ligand.chain.name][ligand.number]["target_ligand"])) + + # Do we have isomorphisms with the target? + for trg_lig_idx, assigned in enumerate(self._assignment_isomorphisms[:, ligand_idx]): + if np.isnan(assigned): + try: + _ComputeSymmetries( + self.model_ligands[ligand_idx], + self.target_ligands[trg_lig_idx], + substructure_match=self.substructure_match, + by_atom_index=True) + except NoSymmetryError: + assigned = 0. + else: + assigned = 1. + self._assignment_isomorphisms[trg_lig_idx,ligand_idx] = assigned + if assigned == 1.: + # Could have been assigned + # So what's up with this target ligand? + assignment_matrix = getattr(self, assignment + "_matrix") + all_nan = np.all(np.isnan(assignment_matrix[:, ligand_idx])) + if all_nan: + # The assignment matrix is all nans so we have a problem + # with the binding site or the representation + trg_ligand = self.target_ligands[trg_lig_idx] + return self._unassigned_target_ligands[trg_ligand] + else: + # Ligand was assigned to an other + return ("stoichiometry", "Ligand was assigned to an other " + "target ligand (different stoichiometry)") + + # Could not be assigned to any ligand - must be different + return ("identity", "Ligand was not found in the target (by isomorphism)") + + def _find_unassigned_target_ligand_reason(self, ligand, assignment="lddt_pli"): + # Is this a target ligand? + try: + ligand_idx = self.target_ligands.index(ligand) + except ValueError: + # Raise with a better error message + raise ValueError("Ligand %s is not in self.target_ligands" % ligand) + + # Ensure we are unassigned + details = getattr(self, assignment + "_details") + for cname, chain_ligands in details.items(): + for rnum, details in chain_ligands.items(): + if details['target_ligand'] == ligand: + raise RuntimeError("Ligand %s is mapped to %s.%s" % ( + ligand, cname, rnum)) + + # Is it because there was no valid binding site or no representation? + if ligand in self._unassigned_target_ligands: + return self._unassigned_target_ligands[ligand] + # Or because no symmetry? + for model_lig_idx, assigned in enumerate( + self._assignment_isomorphisms[ligand_idx, :]): + if np.isnan(assigned): + try: + _ComputeSymmetries( + self.model_ligands[model_lig_idx], + self.target_ligands[ligand_idx], + substructure_match=self.substructure_match, + by_atom_index=True) + except NoSymmetryError: + assigned = 0. + else: + assigned = 1. + self._assignment_isomorphisms[ligand_idx,model_lig_idx] = assigned + if assigned: + # Could have been assigned but was assigned to a different ligand + return ("stoichiometry", "Ligand was assigned to an other " + "model ligand (different stoichiometry)") + # Could not be assigned to any ligand - must be different + return ("identity", "Ligand was not found in the model (by isomorphism)") + + + def _ResidueToGraph(residue, by_atom_index=False): """Return a NetworkX graph representation of the residue. diff --git a/modules/mol/alg/tests/test_ligand_scoring.py b/modules/mol/alg/tests/test_ligand_scoring.py index 948941c69a06bdc945cfe7f16bf2a3b7648d8c48..e7c7a1bba2307e6b7eb5aa744845307ae130d1cb 100644 --- a/modules/mol/alg/tests/test_ligand_scoring.py +++ b/modules/mol/alg/tests/test_ligand_scoring.py @@ -420,6 +420,113 @@ class TestLigandScoring(unittest.TestCase): assert [str(r) for r in sc.rmsd_details["D"][1]["bs_ref_res"]] == expected_bs_ref_res ost.PopVerbosityLevel() + def test_unassigned_reasons(self): + """Test reasons for being unassigned.""" + trg = _LoadMMCIF("1r8q.cif.gz") + mdl = _LoadMMCIF("P84080_model_02.cif.gz") + + def _AppendResidueWithBonds(ed, chain, old_res): + new_res = ed.AppendResidue(chain, old_res.name) + for old_atom in old_res.atoms: + ed.InsertAtom(new_res, old_atom.name, old_atom.pos, old_atom.element, + old_atom.occupancy, old_atom.b_factor, old_atom.is_hetatom) + for old_bond in old_atom.bonds: + new_first = new_res.FindAtom(old_bond.first.name) + new_second = new_res.FindAtom(old_bond.second.name) + ed.Connect(new_first, new_second) + return new_res + + # Add interesting ligands to model and target + mdl_ed = mdl.EditXCS() + trg_ed = trg.EditXCS() + + # Add ZN: representation in the model (chain missing in model) + new_chain = mdl_ed.InsertChain("L_ZN") + mdl_ed.SetChainType(new_chain, mol.ChainType.CHAINTYPE_NON_POLY) + new_res = _AppendResidueWithBonds(mdl_ed, new_chain, trg.Select("rname=ZN").residues[0].handle) + new_res.is_ligand = True + + # Add NA: not in contact with target + new_chain = trg_ed.InsertChain("L_NA") + trg_ed.SetChainType(new_chain, mol.ChainType.CHAINTYPE_NON_POLY) + new_res = trg_ed.AppendResidue(new_chain, "NA") + new_atom = trg_ed.InsertAtom(new_res, "NA", geom.Vec3(100, 100, 100), "NA") + new_res.is_ligand = True + new_chain = mdl_ed.InsertChain("L_NA") + mdl_ed.SetChainType(new_chain, mol.ChainType.CHAINTYPE_NON_POLY) + new_res = mdl_ed.AppendResidue(new_chain, "NA") + new_atom = mdl_ed.InsertAtom(new_res, "NA", geom.Vec3(100, 100, 100), "NA") + new_res.is_ligand = True + + # Add OXY: no symmetry/ not identical - + new_chain = mdl_ed.InsertChain("L_OXY") + mdl_ed.SetChainType(new_chain, mol.ChainType.CHAINTYPE_NON_POLY) + new_res = mdl_ed.AppendResidue(new_chain, "OXY") + new_atom1 = mdl_ed.InsertAtom(new_res, "O1", geom.Vec3(0, 0, 0), "O") + new_atom2 = mdl_ed.InsertAtom(new_res, "O2", geom.Vec3(1, 1, 1), "O") + mdl_ed.Connect(new_atom1, new_atom2) + new_res.is_ligand = True + + # Add 3 MG in model: assignment/stoichiometry + mg_pos = [ + mdl.geometric_center, + mdl.geometric_center + 1, + mdl.geometric_center + 100 + ] + for i in range(3): + new_chain = mdl_ed.InsertChain("L_MG_%d" % i) + mdl_ed.SetChainType(new_chain, mol.ChainType.CHAINTYPE_NON_POLY) + new_res = mdl_ed.AppendResidue(new_chain, "MG") + new_atom = mdl_ed.InsertAtom(new_res, "MG", mg_pos[i], "MG") + new_res.is_ligand = True + + mdl_ed.UpdateICS() + trg_ed.UpdateICS() + + sc = LigandScorer(mdl, trg, None, None) + + # Check unassigned targets + # NA: not in contact with target + trg_na = sc.target.FindResidue("L_NA", 1) + assert sc._find_unassigned_target_ligand_reason(trg_na)[0] == "binding_site" + # ZN: no representation + trg_zn = sc.target.FindResidue("H", 1) + assert sc._find_unassigned_target_ligand_reason(trg_zn)[0] == "model_representation" + # CMO: not identical to anything in the model + trg_afb = sc.target.FindResidue("G", 1) + assert sc._find_unassigned_target_ligand_reason(trg_afb)[0] == "identity" + # F.G3D1: J.G3D1 assigned instead + trg_fg3d1 = sc.target.FindResidue("F", 1) + assert sc._find_unassigned_target_ligand_reason(trg_fg3d1)[0] == "stoichiometry" + # J.G3D1: assigned to L_2.G3D1 => error + trg_jg3d1 = sc.target.FindResidue("J", 1) + with self.assertRaises(RuntimeError): + sc._find_unassigned_target_ligand_reason(trg_jg3d1) + # Raises with an invalid ligand + with self.assertRaises(ValueError): + sc._find_unassigned_target_ligand_reason(sc.model_ligands[0]) + + # Check unassigned models + # OXY: not identical to anything in the model + mdl_oxy = sc.model.FindResidue("L_OXY", 1) + assert sc._find_unassigned_model_ligand_reason(mdl_oxy)[0] == "identity" + # NA: not in contact with target + mdl_na = sc.model.FindResidue("L_NA", 1) + assert sc._find_unassigned_model_ligand_reason(mdl_na)[0] == "binding_site" + # ZN: no representation + mdl_zn = sc.model.FindResidue("L_ZN", 1) + assert sc._find_unassigned_model_ligand_reason(mdl_zn)[0] == "model_representation" + # MG in L_MG_2 has stupid coordinates and is not assigned + mdl_mg_2 = sc.model.FindResidue("L_MG_2", 1) + assert sc._find_unassigned_model_ligand_reason(mdl_mg_2)[0] == "stoichiometry" + # MG in L_MG_0: assigned to I.MG1 => error + mdl_mg_0 = sc.model.FindResidue("L_MG_0", 1) + with self.assertRaises(RuntimeError): + sc._find_unassigned_model_ligand_reason(mdl_mg_0) + # Raises with an invalid ligand + with self.assertRaises(ValueError): + sc._find_unassigned_model_ligand_reason(sc.target_ligands[0]) + if __name__ == "__main__": from ost import testutils