diff --git a/modules/mol/alg/pymod/ligand_scoring_base.py b/modules/mol/alg/pymod/ligand_scoring_base.py index d9ad9003978a5ccaf6fdecbb6576f80d9f2e318c..0c3c74db930143c5f849b62c1b8d85f4bc893191 100644 --- a/modules/mol/alg/pymod/ligand_scoring_base.py +++ b/modules/mol/alg/pymod/ligand_scoring_base.py @@ -221,7 +221,7 @@ class LigandScorer: # keep track of states # simple integers instead of enums - documentation of property describes # encoding - self._states_matrix = None + self._state_matrix = None # score matrices self._score_matrix = None @@ -233,31 +233,42 @@ class LigandScorer: self._score_dict = None self._aux_dict = None + # human readable description of states - child class must extend with + # with child class specific states + # each state code comes with a tuple of two elements: + # 1) short description 2) human readable description + # The actual states are set in _compute_scores in :class:`LigandScorer` + # or _compute_score of the child class. + if self.substructure_match: + iso = "subgraph isomorphism" + else: + iso = "full graph isomorphism" + self.state_decoding = \ + {0: ("OK", "OK"), + 1: ("identity", f"Ligands could not be matched (by {iso})"), + 2: ("symmetries", "Too many symmetries between ligand atoms were " + "found - increasing max_symmetries might help"), + 3: ("no_iso", "No fully isomorphic match could be found - enabling " + "substructure_match might allow a match"), + 4: ("disconnected", "Ligand graph is disconnected"), + 9: ("unknown", "An unknown error occured in LigandScorer")} + @property - def states_matrix(self): + def state_matrix(self): """ Encodes states of ligand pairs - Expect a valid score if respective location in this matrix is 0. + Ligand pairs can be matched and a valid score can be expected if + respective location in this matrix is 0. Target ligands are in rows, model ligands in columns. States are encoded as integers <= 9. Larger numbers encode errors for child classes. - - * 0: Ligand pair can be matched and valid score is computed. - * 1: Ligand pair has no valid symmetry - cannot be matched. - * 2: Ligand pair has too many symmetries - cannot be matched. - You might be able to get a match by increasing *max_symmetries*. - * 3: Ligand pair has no isomorphic symmetries - cannot be matched. - Target ligand is subgraph of model ligand. This error only occurs - if *substructure_match* is False. These cases may become - 0 if this flag is enabled. - * 4: Disconnected graph error - cannot be matched. - Either target ligand or model ligand has disconnected graph. - * 9: Unknown Error - cannot be matched + + Human readable description is accessible as `scorer.state_decoding[2]` :rtype: :class:`~numpy.ndarray` """ - if self._states_matrix is None: + if self._state_matrix is None: self._compute_scores() - return self._states_matrix + return self._state_matrix @property def score_matrix(self): @@ -267,7 +278,7 @@ class LigandScorer: NaN values indicate that no value could be computed (i.e. different ligands). In other words: values are only valid if respective location - :attr:`~states` is 0. + :attr:`~state_matrix` is 0. :rtype: :class:`~numpy.ndarray` """ @@ -283,7 +294,7 @@ class LigandScorer: NaN values indicate that no value could be computed (i.e. different ligands). In other words: values are only valid if respective location - :attr:`~states` is 0. If `substructure_match=False`, only full + :attr:`~state_matrix` is 0. If `substructure_match=False`, only full match isomorphisms are considered, and therefore only values of 1.0 can be observed. @@ -304,7 +315,7 @@ class LigandScorer: empty dictionaries indicate that the child class simply didn't return anything or that no value could be computed (e.g. different ligands). In other words: values are only valid if respective location - :attr:`~states` is 0. + :attr:`~state_matrix` is 0. :rtype: :class:`~numpy.ndarray` """ @@ -318,7 +329,7 @@ class LigandScorer: Implements a greedy algorithm to assign target and model ligands with each other. Starts from each valid ligand pair as indicated - by a state of 0 in :attr:`states_matrix`. Each iteration first selects + by a state of 0 in :attr:`state_matrix`. Each iteration first selects high coverage pairs. Given max_coverage defined as the highest coverage observed in the available pairs, all pairs with coverage in [max_coverage-*coverage_delta*, max_coverage] are selected. @@ -336,7 +347,7 @@ class LigandScorer: tmp = list() for trg_idx in range(self.score_matrix.shape[0]): for mdl_idx in range(self.score_matrix.shape[1]): - if self.states_matrix[trg_idx, mdl_idx] == 0: + if self.state_matrix[trg_idx, mdl_idx] == 0: tmp.append((self.score_matrix[trg_idx, mdl_idx], self.coverage_matrix[trg_idx, mdl_idx], trg_idx, mdl_idx)) @@ -565,7 +576,7 @@ class LigandScorer: shape = (len(self.target_ligands), len(self.model_ligands)) self._score_matrix = np.full(shape, np.nan, dtype=np.float32) self._coverage_matrix = np.full(shape, np.nan, dtype=np.float32) - self._states_matrix = np.full(shape, -1, dtype=np.int32) + self._state_matrix = np.full(shape, -1, dtype=np.int32) self._aux_matrix = np.empty(shape, dtype=dict) for target_id, target_ligand in enumerate(self.target_ligands): @@ -588,24 +599,24 @@ class LigandScorer: # Ligands are different - skip LogVerbose("No symmetry between %s and %s" % ( str(model_ligand), str(target_ligand))) - self._states_matrix[target_id, model_id] = 1 + self._state_matrix[target_id, model_id] = 1 continue except TooManySymmetriesError: # Ligands are too symmetrical - skip LogVerbose("Too many symmetries between %s and %s" % ( str(model_ligand), str(target_ligand))) - self._states_matrix[target_id, model_id] = 2 + self._state_matrix[target_id, model_id] = 2 continue except NoIsomorphicSymmetryError: # Ligands are different - skip LogVerbose("No isomorphic symmetry between %s and %s" % ( str(model_ligand), str(target_ligand))) - self._states_matrix[target_id, model_id] = 3 + self._state_matrix[target_id, model_id] = 3 continue except DisconnectedGraphError: LogVerbose("Disconnected graph observed for %s and %s" % ( str(model_ligand), str(target_ligand))) - self._states_matrix[target_id, model_id] = 4 + self._state_matrix[target_id, model_id] = 4 continue ##################################################### @@ -622,7 +633,18 @@ class LigandScorer: if state <= 9: raise RuntimeError("Child returned reserved err. state") - self._states_matrix[target_id, model_id] = state + # Ensure that returned state is associated with a + # description. This is a requirement when subclassing + # LigandScorer => state_decoding dict from base class must + # be modified in subclass constructor + if state not in self.state_decoding: + raise RuntimeError(f"Subclass returned state " + f"\"{state}\" for which no " + f"description is available. Point " + f"the developer of the used scorer " + f"to this error message.") + + self._state_matrix[target_id, model_id] = state if state == 0: if score is None or np.isnan(score): raise RuntimeError("LigandScorer returned invalid " diff --git a/modules/mol/alg/pymod/ligand_scoring_lddtpli.py b/modules/mol/alg/pymod/ligand_scoring_lddtpli.py index 031ebc6a865f24ca51e9d1062e90bde5ae7d987e..26e2ba130373f4e5ff7f6384ca926ddb5adb2891 100644 --- a/modules/mol/alg/pymod/ligand_scoring_lddtpli.py +++ b/modules/mol/alg/pymod/ligand_scoring_lddtpli.py @@ -45,12 +45,6 @@ class LDDTPLIScorer(ligand_scoring_base.LigandScorer): respective atoms can be mapped there, the contact is considered not fulfilled and added as penalty. - Populates :attr:`LigandScorer.states` matrix with the following additional - error states: - - * 10: No contact observed - * 20: Unknown error - Populates :attr:`LigandScorer.aux_data` with following :class:`dict` keys: * lddt_pli: The score @@ -139,6 +133,13 @@ class LDDTPLIScorer(ligand_scoring_base.LigandScorer): self.__ref_mdl_alns = None self.__chain_mapping_mdl = None + # update state decoding from parent with subclass specific stuff + self.state_decoding[10] = ("no_contact", + "There were no lDDT contacts between the " + "binding site and the ligand, and lDDT-PLI " + "is undefined.") + self.state_decoding[20] = ("unknown", + "Unknown error occured in LDDTPLIScorer") def _compute(self, symmetries, target_ligand, model_ligand): """ Implements interface from parent diff --git a/modules/mol/alg/pymod/ligand_scoring_scrmsd.py b/modules/mol/alg/pymod/ligand_scoring_scrmsd.py index 75488947b84906db6443b9a7e0f30630b4de8175..4718a2c51eaef2cea535023fddaeef759574182d 100644 --- a/modules/mol/alg/pymod/ligand_scoring_scrmsd.py +++ b/modules/mol/alg/pymod/ligand_scoring_scrmsd.py @@ -34,16 +34,6 @@ class SCRMSDScorer(ligand_scoring_base.LigandScorer): each symmetry, i.e. atom-atom assignments of the ligand as given by :class:`LigandScorer`. The lowest RMSD value is returned - Populates :attr:`LigandScorer.states` matrix with the following additional - error states: - - * 10: binding_site - no residues were in proximity of the target ligand - * 11: model_representation - no representation of the reference binding site - was found in the model, i.e. the binding site was not modeled, or the - model ligand was positioned too far in combination with - *full_bs_search*=False - * 20: Unknown error - Populates :attr:`LigandScorer.aux_data` with following :class:`dict` keys: * rmsd: The score @@ -155,6 +145,19 @@ class SCRMSDScorer(ligand_scoring_base.LigandScorer): self.__chain_mapping_mdl = None self._get_repr_input = dict() + # update state decoding from parent with subclass specific stuff + self.state_decoding[10] = ("binding_site", + "No residues were in proximity of the " + "target ligand.") + self.state_decoding[11] = ("model_representation", "No representation " + "of the reference binding site was found in " + "the model, i.e. the binding site was not " + "modeled or the model ligand was positioned " + "too far in combination with " + "full_bs_search=False.") + self.state_decoding[20] = ("unknown", + "Unknown error occured in SCRMSDScorer") + def _compute(self, symmetries, target_ligand, model_ligand): """ Implements interface from parent """ diff --git a/modules/mol/alg/tests/test_ligand_scoring_fancy.py b/modules/mol/alg/tests/test_ligand_scoring_fancy.py index 08ad3d9a417178c51a2a6be0a79b373e8f58f34d..1301522cef12f3650662a41f27580783eaed655e 100644 --- a/modules/mol/alg/tests/test_ligand_scoring_fancy.py +++ b/modules/mol/alg/tests/test_ligand_scoring_fancy.py @@ -536,7 +536,7 @@ class TestLigandScoringFancy(unittest.TestCase): substructure_match=False) self.assertEqual(sc.coverage_matrix.shape, (1,1)) self.assertTrue(np.isnan(sc.coverage_matrix[0,0])) - self.assertEqual(sc.states_matrix[0,0], 3) # error encoding for that particular issue + self.assertEqual(sc.state_matrix[0,0], 3) # error encoding for that particular issue # Substructure matches sc = ligand_scoring_scrmsd.SCRMSDScorer(mdl.Select("protein=True"), trg.Select("protein=True"), @@ -544,7 +544,7 @@ class TestLigandScoringFancy(unittest.TestCase): substructure_match=True) self.assertEqual(sc.coverage_matrix.shape, (1,1)) self.assertEqual(sc.coverage_matrix[0,0], 0.75) - self.assertEqual(sc.states_matrix[0,0], 0) # no error encoded in state + self.assertEqual(sc.state_matrix[0,0], 0) # no error encoded in state def test_6jyf(self): """6JYF initially caused issues in the CASP15-CAMEO/LIGATE paper where