From 98ebf05e210265b04ff515fe44e93474429603c5 Mon Sep 17 00:00:00 2001 From: Xavier Robin <xavier.robin@unibas.ch> Date: Mon, 13 Mar 2023 16:54:47 +0100 Subject: [PATCH] refactor: SCHWED-5481 -ec instead of -cr, report inconsistencies --- actions/ost-compare-ligand-structures | 28 +++++++++++++++------- modules/mol/alg/pymod/chain_mapping.py | 32 +++++++++++++++++++++++-- modules/mol/alg/pymod/ligand_scoring.py | 16 +++++++++++++ 3 files changed, 66 insertions(+), 10 deletions(-) diff --git a/actions/ost-compare-ligand-structures b/actions/ost-compare-ligand-structures index b04f1be17..ce0decf6a 100644 --- a/actions/ost-compare-ligand-structures +++ b/actions/ost-compare-ligand-structures @@ -148,13 +148,19 @@ def _ParseArgs(): "a global BLOSUM62-based alignment (NUC44 for nucleotides).")) parser.add_argument( - "-cr", - "--check-resnames", - dest="check_resnames", - default=True, - action="store_false", - help=("Enforce residue name matches between mapped model and target" - "residues.")) + "-ec", + "--enforce-consistency", + dest="enforce_consistency", + default=False, + action="store_true", + help=("Enforce consistency of residue names between the reference " + "binding site and the model. By default residue name " + "discrepancies are reported but the program proceeds. " + "If this is set to True, the program will fail with an error " + "message if the residues names differ. " + "Note: more binding site mappings may be explored during " + "scoring, but only inconsistencies in the selected mapping are " + "reported.")) parser.add_argument( "-sm", @@ -309,7 +315,7 @@ def _Process(model, model_ligands, reference, reference_ligands, args): model_ligands=model_ligands, target_ligands=reference_ligands, resnum_alignments=args.residue_number_alignment, - check_resnames=args.check_resnames, + check_resnames=args.enforce_consistency, substructure_match=args.substructure_match, radius=args.radius, lddt_pli_radius=args.lddt_pli_radius, @@ -360,6 +366,9 @@ def _Process(model, model_ligands, reference, reference_ligands, args): lddt_pli["bs_ref_res_mapped"]] lddt_pli["bs_mdl_res_mapped"] = [r.qualified_name for r in lddt_pli["bs_mdl_res_mapped"]] + lddt_pli["inconsistent_residues"] = ["%s-%s" %( + x.qualified_name, y.qualified_name) for x,y in lddt_pli[ + "inconsistent_residues"]] out["lddt_pli"][model_key] = lddt_pli if args.rmsd: @@ -379,6 +388,9 @@ def _Process(model, model_ligands, reference, reference_ligands, args): rmsd["bs_ref_res_mapped"]] rmsd["bs_mdl_res_mapped"] = [r.qualified_name for r in rmsd["bs_mdl_res_mapped"]] + rmsd["inconsistent_residues"] = ["%s-%s" %( + x.qualified_name, y.qualified_name) for x,y in rmsd[ + "inconsistent_residues"]] out["rmsd"][model_key] = rmsd return out diff --git a/modules/mol/alg/pymod/chain_mapping.py b/modules/mol/alg/pymod/chain_mapping.py index 37c36a132..74dfc115f 100644 --- a/modules/mol/alg/pymod/chain_mapping.py +++ b/modules/mol/alg/pymod/chain_mapping.py @@ -197,6 +197,7 @@ class ReprResult: self._gdt_1 = None self._ost_query = None self._flat_mapping = None + self._inconsistent_residues = None @property def lDDT(self): @@ -228,7 +229,7 @@ class ReprResult: @property def mdl_view(self): - """ The :attr:`ref_view` represention in the model + """ The :attr:`ref_view` representation in the model :type: :class:`ost.mol.EntityView` """ @@ -249,7 +250,22 @@ class ReprResult: :type: :class:`mol.ResidueViewList` """ return self.mdl_view.residues - + + @property + def inconsistent_residues(self): + """ A list of mapped residue whose names do not match (eg. ALA in the + reference and LEU in the model). + + The mismatches are reported as a tuple of :class:`~ost.mol.ResidueView` + (reference, model), or as an empty list if all the residue names match. + + :type: :class:`list` + """ + if self._inconsistent_residues is None: + self._inconsistent_residues = self._GetInconsistentResidues( + self.ref_residues, self.mdl_residues) + return self._inconsistent_residues + @property def ref_bb_pos(self): """ Representative backbone positions for reference residues. @@ -475,6 +491,18 @@ class ReprResult: bb_pos.append(at.GetPos()) return bb_pos + def _GetInconsistentResidues(self, ref_residues, mdl_residues): + """ Helper to extract a list of inconsistent residues. + """ + if len(ref_residues) != len(mdl_residues): + raise ValueError("Something terrible happened... Reference and " + "model lengths differ... RUN...") + inconsistent_residues = list() + for ref_residue, mdl_residue in zip(ref_residues, mdl_residues): + if ref_residue.name != mdl_residue.name: + inconsistent_residues.append((ref_residue, mdl_residue)) + return inconsistent_residues + class ChainMapper: """ Class to compute chain mappings diff --git a/modules/mol/alg/pymod/ligand_scoring.py b/modules/mol/alg/pymod/ligand_scoring.py index ac7dc060d..2298bbbbd 100644 --- a/modules/mol/alg/pymod/ligand_scoring.py +++ b/modules/mol/alg/pymod/ligand_scoring.py @@ -488,6 +488,7 @@ class LigandScorer: "chain_mapping": binding_site.GetFlatChainMapping(), "transform": binding_site.transform, "substructure_match": substructure_match, + "inconsistent_residues": binding_site.inconsistent_residues, } LogDebug("Saved RMSD") @@ -548,6 +549,7 @@ class LigandScorer: "chain_mapping": binding_site.GetFlatChainMapping(), "transform": binding_site.transform, "substructure_match": substructure_match, + "inconsistent_residues": binding_site.inconsistent_residues, } LogDebug("Saved lDDT-PLI") @@ -765,6 +767,13 @@ class LigandScorer: (substructure) match. A value of `True` indicates that the target ligand covers only part of the model, while `False` indicates a perfect match. + * `inconsistent_residues`: a list of tuples of mapped residues views + (:class:`~ost.mol.ResidueView`) with residue names that differ + between the reference and the model, respectively. + The list is empty if all residue names match, which is guaranteed + if `check_resnames=True`. + Note: more binding site mappings may be explored during scoring, + but only inconsistencies in the selected mapping are reported. :rtype: :class:`dict` """ @@ -820,6 +829,13 @@ class LigandScorer: (substructure) match. A value of `True` indicates that the target ligand covers only part of the model, while `False` indicates a perfect match. + * `inconsistent_residues`: a list of tuples of mapped residues views + (:class:`~ost.mol.ResidueView`) with residue names that differ + between the reference and the model, respectively. + The list is empty if all residue names match, which is guaranteed + if `check_resnames=True`. + Note: more binding site mappings may be explored during scoring, + but only inconsistencies in the selected mapping are reported. :rtype: :class:`dict` """ -- GitLab