diff --git a/actions/ost-compare-ligand-structures b/actions/ost-compare-ligand-structures index d8e55d6b5a22bdd332bf46192b9e839fb56adeec..c05c678d10df72a85116227a1e83ec226faa8d57 100644 --- a/actions/ost-compare-ligand-structures +++ b/actions/ost-compare-ligand-structures @@ -442,6 +442,8 @@ def _Process(model, model_ligands, reference, reference_ligands, args): trg_lig = scorer.target.FindResidue(chain, resnum) out["unassigned_reference_ligands"][reference_ligands_map[ trg_lig.hash_code]] = unassigned + out["unassigned_model_ligand_descriptions"] = scorer.unassigned_model_ligand_descriptions + out["unassigned_reference_ligand_descriptions"] = scorer.unassigned_target_ligand_descriptions if args.lddt_pli: diff --git a/modules/mol/alg/pymod/ligand_scoring.py b/modules/mol/alg/pymod/ligand_scoring.py index e4b37ed65f2a279c466569c603bfba80eba6d079..072f60573ee67e4e0f42a3d1497740b11511c0e6 100644 --- a/modules/mol/alg/pymod/ligand_scoring.py +++ b/modules/mol/alg/pymod/ligand_scoring.py @@ -313,6 +313,10 @@ class LigandScorer: self._unassigned_target_ligands = None self._unassigned_model_ligands = None self._unassigned_target_ligands_reason = {} + self._unassigned_target_ligand_short = None + self._unassigned_model_ligand_short = None + self._unassigned_target_ligand_descriptions = None + self._unassigned_model_ligand_descriptions = None # Keep track of symmetries/isomorphisms # 0.0: no isomorphism # 1.0: isomorphic @@ -1145,7 +1149,7 @@ class LigandScorer: `unassigned=True`: `False` If the scoring object was instantiated with `unassigned=True`, in - addition the unmapped ligands will be reported with a score of `None` + addition the unassigned ligands will be reported with a score of `None` and the following information: * `unassigned`: `True`, @@ -1253,11 +1257,13 @@ class LigandScorer: """Get a dictionary of target ligands not assigned to any model ligand, keyed by target ligand (chain name, :class:`~ost.mol.ResNum`). - Assignment is the same as for the lDDT-PLI score (and is controlled + The assignment for the lDDT-PLI score is used (and is controlled by the `rmsd_assignment` argument). - Each sub-dictionary contains a string from a controlled dictionary + Each item contains a string from a controlled dictionary about the reason for the absence of assignment. + A human-readable description can be obtained from the + :attr:`unassigned_target_ligand_descriptions` property. Currently, the following reasons are reported: @@ -1280,24 +1286,42 @@ class LigandScorer: :rtype: :class:`dict` """ - if self._unassigned_target_ligands is None: + if self._unassigned_target_ligand_short is None: if self.rmsd_assignment: self._assign_ligands_rmsd_only() else: self._assign_ligands_lddt_pli() - return self._unassigned_target_ligands + self._unassigned_target_ligand_short = {} + self._unassigned_target_ligand_descriptions = {} + for cname, res in self._unassigned_target_ligands.items(): + self._unassigned_target_ligand_short[cname] = {} + for resnum, val in res.items(): + self._unassigned_target_ligand_short[cname][resnum] = val[0] + self._unassigned_target_ligand_descriptions[val[0]] = val[1] + return self._unassigned_target_ligand_short + + @property + def unassigned_target_ligand_descriptions(self): + """Get a human-readable description of why target ligands were + unassigned, as a dictionary keyed by the controlled dictionary + from :attr:`unassigned_target_ligands`. + """ + if self._unassigned_target_ligand_descriptions is None: + _ = self.unassigned_target_ligands # assigned there + return self._unassigned_target_ligand_descriptions @property def unassigned_model_ligands(self): """Get a dictionary of model ligands not assigned to any target ligand, keyed by model ligand (chain name, :class:`~ost.mol.ResNum`). - Assignment is the same as for the lDDT-PLI score (and is controlled + The assignment for the lDDT-PLI score is used (and is controlled by the `rmsd_assignment` argument). - Each sub-dictionary contains a tuple with information about the reason - for the absence of assignment, in short and long format. - + Each item contains a string from a controlled dictionary + about the reason for the absence of assignment. + A human-readable description can be obtained from the + :attr:`unassigned_model_ligand_descriptions` property. Currently, the following reasons are reported: * `no_ligand`: there was no ligand in the target. @@ -1322,12 +1346,29 @@ class LigandScorer: :rtype: :class:`dict` """ - if self._unassigned_model_ligands is None: + if self._unassigned_model_ligand_short is None: if self.rmsd_assignment: self._assign_ligands_rmsd_only() else: self._assign_ligands_lddt_pli() - return self._unassigned_model_ligands + self._unassigned_model_ligand_short = {} + self._unassigned_model_ligand_descriptions = {} + for cname, res in self._unassigned_model_ligands.items(): + self._unassigned_model_ligand_short[cname] = {} + for resnum, val in res.items(): + self._unassigned_model_ligand_short[cname][resnum] = val[0] + self._unassigned_model_ligand_descriptions[val[0]] = val[1] + return self._unassigned_model_ligand_short + + @property + def unassigned_model_ligand_descriptions(self): + """Get a human-readable description of why model ligands were + unassigned, as a dictionary keyed by the controlled dictionary + from :attr:`unassigned_model_ligands`. + """ + if self._unassigned_model_ligand_descriptions is None: + _ = self.unassigned_model_ligands # assigned there + return self._unassigned_model_ligand_descriptions def _set_custom_mapping(self, mapping): diff --git a/modules/mol/alg/tests/test_ligand_scoring.py b/modules/mol/alg/tests/test_ligand_scoring.py index 9a279594c5dbe60927986e4e901ac936d8e8e66e..3896aa2fdd9d3a2e85deee047ac1e48ac7b5dc7f 100644 --- a/modules/mol/alg/tests/test_ligand_scoring.py +++ b/modules/mol/alg/tests/test_ligand_scoring.py @@ -502,24 +502,19 @@ class TestLigandScoring(unittest.TestCase): # Check unassigned targets # NA: not in contact with target trg_na = sc.target.FindResidue("L_NA", 1) - assert sc._find_unassigned_target_ligand_reason(trg_na)[0] == "binding_site" - assert sc.unassigned_target_ligands["L_NA"][1][0] == "binding_site" + assert sc.unassigned_target_ligands["L_NA"][1] == "binding_site" # ZN: no representation trg_zn = sc.target.FindResidue("H", 1) - assert sc._find_unassigned_target_ligand_reason(trg_zn)[0] == "model_representation" - assert sc.unassigned_target_ligands["H"][1][0] == "model_representation" + assert sc.unassigned_target_ligands["H"][1] == "model_representation" # AFB: not identical to anything in the model trg_afb = sc.target.FindResidue("G", 1) - assert sc._find_unassigned_target_ligand_reason(trg_afb)[0] == "identity" - assert sc.unassigned_target_ligands["G"][1][0] == "identity" + assert sc.unassigned_target_ligands["G"][1] == "identity" # F.G3D1: J.G3D1 assigned instead trg_fg3d1 = sc.target.FindResidue("F", 1) - assert sc._find_unassigned_target_ligand_reason(trg_fg3d1)[0] == "stoichiometry" - assert sc.unassigned_target_ligands["F"][1][0] == "stoichiometry" + assert sc.unassigned_target_ligands["F"][1] == "stoichiometry" # CMO: disconnected trg_cmo1 = sc.target.FindResidue("L_CMO", 1) - sc._find_unassigned_target_ligand_reason(trg_cmo1)[0] == "disconnected" - assert sc.unassigned_target_ligands["L_CMO"][1][0] == "disconnected" + assert sc.unassigned_target_ligands["L_CMO"][1] == "disconnected" # J.G3D1: assigned to L_2.G3D1 => error trg_jg3d1 = sc.target.FindResidue("J", 1) with self.assertRaises(RuntimeError): @@ -532,23 +527,19 @@ class TestLigandScoring(unittest.TestCase): # Check unassigned models # OXY: not identical to anything in the model mdl_oxy = sc.model.FindResidue("L_OXY", 1) - assert sc._find_unassigned_model_ligand_reason(mdl_oxy)[0] == "identity" - assert sc.unassigned_model_ligands["L_OXY"][1][0] == "identity" + assert sc.unassigned_model_ligands["L_OXY"][1] == "identity" assert sc.lddt_pli["L_OXY"][1] is None # NA: not in contact with target mdl_na = sc.model.FindResidue("L_NA", 1) - assert sc._find_unassigned_model_ligand_reason(mdl_na)[0] == "binding_site" - assert sc.unassigned_model_ligands["L_NA"][1][0] == "binding_site" + assert sc.unassigned_model_ligands["L_NA"][1] == "binding_site" assert sc.lddt_pli["L_NA"][1] is None # ZN: no representation mdl_zn = sc.model.FindResidue("L_ZN", 1) - assert sc._find_unassigned_model_ligand_reason(mdl_zn)[0] == "model_representation" - assert sc.unassigned_model_ligands["L_ZN"][1][0] == "model_representation" + assert sc.unassigned_model_ligands["L_ZN"][1] == "model_representation" assert sc.lddt_pli["L_ZN"][1] is None # MG in L_MG_2 has stupid coordinates and is not assigned mdl_mg_2 = sc.model.FindResidue("L_MG_2", 1) - assert sc._find_unassigned_model_ligand_reason(mdl_mg_2)[0] == "stoichiometry" - assert sc.unassigned_model_ligands["L_MG_2"][1][0] == "stoichiometry" + assert sc.unassigned_model_ligands["L_MG_2"][1] == "stoichiometry" assert sc.lddt_pli["L_MG_2"][1] is None # MG in L_MG_0: assigned to I.MG1 => error mdl_mg_0 = sc.model.FindResidue("L_MG_0", 1) @@ -557,8 +548,7 @@ class TestLigandScoring(unittest.TestCase): assert "L_MG_0" not in sc.unassigned_model_ligands # CMO: disconnected mdl_cmo1 = sc.model.FindResidue("L_CMO", 1) - sc._find_unassigned_model_ligand_reason(mdl_cmo1)[0] == "disconnected" - assert sc.unassigned_model_ligands["L_CMO"][1][0] == "disconnected" + assert sc.unassigned_model_ligands["L_CMO"][1] == "disconnected" # Raises with an invalid ligand with self.assertRaises(ValueError): sc._find_unassigned_model_ligand_reason(sc.target_ligands[0]) @@ -567,47 +557,36 @@ class TestLigandScoring(unittest.TestCase): sc = LigandScorer(mdl, trg, None, None, unassigned=True, rmsd_assignment=True) assert sc.unassigned_model_ligands == { - 'L_ZN': {1: ('model_representation', - 'No representation of the reference binding site was found in the model')}, - 'L_NA': {1: ('binding_site', - 'No residue in proximity of the target ligand')}, - 'L_OXY': {1: ('identity', - 'Ligand was not found in the target (by full graph isomorphism)')}, - 'L_MG_2': {1: ('stoichiometry', - 'Ligand was already assigned to an other model ligand (different stoichiometry)')}, - "L_CMO": {1: ('disconnected', - 'Ligand graph is disconnected')} + 'L_ZN': {1: 'model_representation'}, + 'L_NA': {1: 'binding_site'}, + 'L_OXY': {1: 'identity'}, + 'L_MG_2': {1: 'stoichiometry'}, + "L_CMO": {1: 'disconnected'} } assert sc.unassigned_target_ligands == { - 'G': {1: ('identity', - 'Ligand was not found in the model (by full graph isomorphism)')}, - 'H': {1: ('model_representation', - 'No representation of the reference binding site was found in the model')}, - 'J': {1: ('stoichiometry', - 'Ligand was already assigned to an other target ligand (different stoichiometry)')}, - 'K': {1: ('identity', - 'Ligand was not found in the model (by full graph isomorphism)')}, - 'L_NA': {1: ('binding_site', - 'No residue in proximity of the target ligand')}, - "L_CMO": {1: ('disconnected', - 'Ligand graph is disconnected')} + 'G': {1: 'identity'}, + 'H': {1: 'model_representation'}, + 'J': {1: 'stoichiometry'}, + 'K': {1: 'identity'}, + 'L_NA': {1: 'binding_site'}, + "L_CMO": {1: 'disconnected'} } assert sc.lddt_pli["L_OXY"][1] is None # With missing ligands sc = LigandScorer(mdl.Select("cname=A"), trg, None, None) - assert sc.unassigned_target_ligands["E"][1] == ('no_ligand', 'No ligand in the model') + assert sc.unassigned_target_ligands["E"][1] == 'no_ligand' sc = LigandScorer(mdl, trg.Select("cname=A"), None, None) - assert sc.unassigned_model_ligands["L_2"][1] == ('no_ligand', 'No ligand in the target') + assert sc.unassigned_model_ligands["L_2"][1] == 'no_ligand' sc = LigandScorer(mdl.Select("cname=A"), trg, None, None, unassigned=True, rmsd_assignment=True) - assert sc.unassigned_target_ligands["E"][1] == ('no_ligand', 'No ligand in the model') + assert sc.unassigned_target_ligands["E"][1] == 'no_ligand' sc = LigandScorer(mdl, trg.Select("cname=A"), None, None, unassigned=True, rmsd_assignment=True) - assert sc.unassigned_model_ligands["L_2"][1] == ('no_ligand', 'No ligand in the target') + assert sc.unassigned_model_ligands["L_2"][1] == 'no_ligand' # However not everything must be missing with self.assertRaises(ValueError):