diff --git a/modules/mol/alg/pymod/ligand_scoring.py b/modules/mol/alg/pymod/ligand_scoring.py index 992bb6ea686f4242a6751dbbef00fc24a58db489..00d2365db7288deab75e568475d51b095e76f1d3 100644 --- a/modules/mol/alg/pymod/ligand_scoring.py +++ b/modules/mol/alg/pymod/ligand_scoring.py @@ -23,7 +23,7 @@ class LigandScorer: * lDDT-PLI, that looks at the conservation of protein-ligand contacts with :class:`lDDT <ost.mol.alg.lddt.lDDTScorer>`. * Binding-site superposed, symmetry-corrected RMSD that assesses the - accuracy of the ligand pose. + accuracy of the ligand pose (BiSyRMSD, hereinafter referred to as RMSD). Both scores involve local chain mapping of the reference binding site onto the model, symmetry-correction, and finally assignment (mapping) @@ -31,11 +31,33 @@ class LigandScorer: Results are available as matrices (`(lddt_pli|rmsd)_matrix`), where every target-model score is reported in a matrix; as `(lddt_pli|rmsd)` where - a model-target assignment has been determined, starting from the "best" - possible mapping and using each target and model ligand in a single - assignment, and the results are reported in a dictionary; and as - (`(lddt_pli|rmsd)_details`) methods, which report additional details - about different aspects of the scoring such as chain mapping. + a model-target assignment has been determined (see below) and reported in + a dictionary; and as (`(lddt_pli|rmsd)_details`) methods, which report + additional details about different aspects of the scoring such as chain + mapping. + + The behavior of chain mapping and ligand assignment can be controlled + with the `global_chain_mapping` and `rmsd_assignment` arguments. + + By default, chain mapping is performed locally, ie. only within the + binding site. As a result, different ligand scores can correspond to + different chain mappings. This tends to produce more favorable scores, + especially in large, partially regular oligomeric complexes. + Setting `global_chain_mapping=True` enforces a single global chain mapping, + as per :meth:`ost.mol.alg.chain_mapping.ChainMapper.GetMapping`. + Note that this global chain mapping currently ignores non polymer entities + such as small ligands, and may result in overly pessimistic scores. + + By defaults, target-model ligand assignments are computed independently + for the RMSD and lDDT-PLI scores. For RMSD, each model ligand is uniquely + assigned to a target ligand, starting from the "best" possible mapping + (lowest RMSD) and using each target and model ligand in a single + assignment. Ties are resolved by best (highest) lDDT-PLI. Similarly, + for lDDT-PLI, the assignment is based on the highest lDDT-PLI, and ties + broken by lowest RMSD. Setting `rmsd_assignment=True` forces a single + ligand assignment, based on RMSD only. Ties are broken arbitrarily. + + Assumptions: The class generally assumes that the :attr:`~ost.mol.ResidueHandle.is_ligand` property is properly set on all @@ -182,13 +204,19 @@ class LigandScorer: chain mappings are allowed (where different ligand may be scored against different chain mappings). + :type global_chain_mapping: :class:`bool` + :param rmsd_assignment: assign ligands based on RMSD only. The default + (False) is to use a combination of lDDT-PLI and + RMSD for the assignment. + :type rmsd_assignment: :class:`bool` """ def __init__(self, model, target, model_ligands=None, target_ligands=None, resnum_alignments=False, check_resnames=True, rename_ligand_chain=False, chain_mapper=None, substructure_match=False, radius=4.0, lddt_pli_radius=6.0, lddt_lp_radius=10.0, - binding_sites_topn=100000, global_chain_mapping=False): + binding_sites_topn=100000, global_chain_mapping=False, + rmsd_assignment=False): if isinstance(model, mol.EntityView): self.model = mol.CreateEntityFromView(model, False) @@ -234,6 +262,7 @@ class LigandScorer: self.lddt_lp_radius = lddt_lp_radius self.binding_sites_topn = binding_sites_topn self.global_chain_mapping = global_chain_mapping + self.rmsd_assignment = rmsd_assignment # scoring matrices self._rmsd_matrix = None @@ -623,15 +652,21 @@ class LigandScorer: self._lddt_pli_full_matrix = lddt_pli_full_matrix @staticmethod - def _find_ligand_assignment(mat1, mat2): - """ Find the ligand assignment based on mat1 + def _find_ligand_assignment(mat1, mat2=None): + """ Find the ligand assignment based on mat1. If mat2 is provided, it + will be used to break ties in mat1. If mat2 is not provided, ties will + be resolved by taking the first match arbitrarily. Both mat1 and mat2 should "look" like RMSD - ie be between inf (bad) and 0 (good). """ # We will modify mat1 and mat2, so make copies of it first mat1 = np.copy(mat1) - mat2 = np.copy(mat2) + if mat2 is None: + mat2 = np.copy(mat1) + mat2[~np.isnan(mat2)] = np.inf + else: + mat2 = np.copy(mat1) assignments = [] min_mat1 = LigandScorer._nanmin_nowarn(mat1) while not np.isnan(min_mat1): @@ -641,6 +676,7 @@ class LigandScorer: # Get the values of mat2 at these positions best_mat2_match = [mat2[tuple(x)] for x in best_mat1] # Find the index of the best mat2 + # Note: argmin returns the first value which is min. best_mat2_idx = np.array(best_mat2_match).argmin() # Now get the original indices max_i_trg, max_i_mdl = best_mat1[best_mat2_idx] @@ -681,7 +717,7 @@ class LigandScorer: def _assign_ligands_rmsd(self): """Assign (map) ligands between model and target. - Sets self._rmsd, self._rmsd_assignment and self._rmsd_details. + Sets self._rmsd and self._rmsd_details. """ mat2 = self._reverse_lddt(self.lddt_pli_matrix) @@ -736,8 +772,73 @@ class LigandScorer: trg_idx, mdl_idx] return out_main, out_details + def _assign_matrix(self, mat, data1, main_key1, data2, main_key2): + """ + Perform the ligand assignment, ie find the mapping between model and + target ligands, based on a single matrix + + The algorithm starts by assigning the "best" mapping, and then discards + the target and model ligands (row, column) so that every model ligand + can be assigned to a single target ligand, and every target ligand + is only assigned to a single model ligand. Repeat until there is + nothing left to assign. + + This algorithm doesn't guarantee a globally optimal assignment. + + `mat` should contain values between 0 and infinity, + with lower values representing better scores. Use the + :meth:`_reverse_lddt` method to convert lDDT values to such a score. + + :param mat: the ligand assignment criteria (RMSD or lDDT-PLI) + :param data1: the first data (either self._rmsd_full_matrix or self._lddt_pli_matrix) + :param main_key1: the first key of data (dictionnaries within `data`) to + assign into out_main. + :param data2: the second data (either self._rmsd_full_matrix or self._lddt_pli_matrix) + :param main_key2: the second key of data (dictionnaries within `data`) to + assign into out_main. + :return: a tuple with 4 dictionaries of matrices containing the main + data1, details1, main data2 and details2, respectively. + """ + assignments = self._find_ligand_assignment(mat) + out_main1 = {} + out_details1 = {} + out_main2 = {} + out_details2 = {} + for assignment in assignments: + trg_idx, mdl_idx = assignment + mdl_lig = self.model_ligands[mdl_idx] + mdl_cname = mdl_lig.chain.name + mdl_resnum = mdl_lig.number + # Data 1 + if mdl_cname not in out_main1: + out_main1[mdl_cname] = {} + out_details1[mdl_cname] = {} + out_main1[mdl_cname][mdl_resnum] = data1[ + trg_idx, mdl_idx][main_key1] + out_details1[mdl_cname][mdl_resnum] = data1[ + trg_idx, mdl_idx] + out_main1[mdl_cname][mdl_resnum] = data2[ + trg_idx, mdl_idx][main_key2] + out_details1[mdl_cname][mdl_resnum] = data2[ + trg_idx, mdl_idx] + # Data2 + if mdl_cname not in out_main2: + out_main2[mdl_cname] = {} + out_details2[mdl_cname] = {} + out_main2[mdl_cname][mdl_resnum] = data1[ + trg_idx, mdl_idx][main_key1] + out_details2[mdl_cname][mdl_resnum] = data1[ + trg_idx, mdl_idx] + out_main2[mdl_cname][mdl_resnum] = data2[ + trg_idx, mdl_idx][main_key2] + out_details2[mdl_cname][mdl_resnum] = data2[ + trg_idx, mdl_idx] + return out_main1, out_details1, out_main2, out_details2 + def _assign_ligands_lddt_pli(self): """ Assign ligands based on lDDT-PLI. + + Sets self._lddt_pli and self._lddt_pli_details. """ mat1 = self._reverse_lddt(self.lddt_pli_matrix) @@ -748,6 +849,22 @@ class LigandScorer: self._lddt_pli = mat_tuple[0] self._lddt_pli_details = mat_tuple[1] + def _assign_ligands_rmsd_only(self): + """Assign (map) ligands between model and target based on RMSD only. + + Sets self._rmsd, self._rmsd_details, self._lddt_pli and + self._lddt_pli_details. + """ + mat_tuple = self._assign_matrix(self.rmsd_matrix, + self._rmsd_full_matrix, + "rmsd", + self._lddt_pli_full_matrix, + "lddt_pli") + self._rmsd = mat_tuple[0] + self._rmsd_details = mat_tuple[1] + self._lddt_pli = mat_tuple[2] + self._lddt_pli_details = mat_tuple[3] + @property def rmsd_matrix(self): """ Get the matrix of RMSD values. @@ -802,7 +919,10 @@ class LigandScorer: :rtype: :class:`dict` """ if self._rmsd is None: - self._assign_ligands_rmsd() + if self.rmsd_assignment: + self._assign_ligands_rmsd_only() + else: + self._assign_ligands_rmsd() return self._rmsd @property @@ -844,7 +964,10 @@ class LigandScorer: :rtype: :class:`dict` """ if self._rmsd_details is None: - self._assign_ligands_rmsd() + if self.rmsd_assignment: + self._assign_ligands_rmsd_only() + else: + self._assign_ligands_rmsd() return self._rmsd_details @property @@ -855,7 +978,10 @@ class LigandScorer: :rtype: :class:`dict` """ if self._lddt_pli is None: - self._assign_ligands_lddt_pli() + if self.rmsd_assignment: + self._assign_ligands_rmsd_only() + else: + self._assign_ligands_lddt_pli() return self._lddt_pli @property @@ -906,7 +1032,10 @@ class LigandScorer: :rtype: :class:`dict` """ if self._lddt_pli_details is None: - self._assign_ligands_lddt_pli() + if self.rmsd_assignment: + self._assign_ligands_rmsd_only() + else: + self._assign_ligands_lddt_pli() return self._lddt_pli_details diff --git a/modules/mol/alg/tests/test_ligand_scoring.py b/modules/mol/alg/tests/test_ligand_scoring.py index e06e64cfbe59ffa1039ad5d3be26d4bc5bed6fbf..61f70d8b4507d631759541dc6c2256f1c6e54f77 100644 --- a/modules/mol/alg/tests/test_ligand_scoring.py +++ b/modules/mol/alg/tests/test_ligand_scoring.py @@ -377,6 +377,25 @@ class TestLigandScoring(unittest.TestCase): assert sc.rmsd_details["L_2"][1]["chain_mapping"] == {'C': 'A'} assert sc.lddt_pli_details["L_2"][1]["chain_mapping"] == {'C': 'A'} + def test_rmsd_assignment(self): + """Test that the RMSD-based assignment works. + + For RMSD, A: A results in a better chain mapping. However, C: A is a + better global chain mapping from an lDDT perspective (and lDDT-PLI). + """ + trg = _LoadMMCIF("1r8q.cif.gz") + mdl = _LoadMMCIF("P84080_model_02.cif.gz") + + # By default, assignment differs between RMSD and lDDT-PLI in this + # specific test case, so we can first ensure it does. + # For now we skip as this is slow + # sc = LigandScorer(mdl, trg, None, None) + # assert sc.rmsd_details["L_2"][1]["target_ligand"] != sc.lddt_pli_details["L_2"][1]["target_ligand"] + + # RMSD assignment forces the same assignment + sc = LigandScorer(mdl, trg, None, None, rmsd_assignment=True) + assert sc.rmsd_details["L_2"][1]["target_ligand"] == sc.lddt_pli_details["L_2"][1]["target_ligand"] + if __name__ == "__main__": from ost import testutils