From 0015bac7895b53b86bd6baa5fd9772f15a5f6d4a Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@unibas.ch> Date: Thu, 27 Feb 2025 14:20:37 +0100 Subject: [PATCH] scoring/chain mapping: rename unmapped_mdl_chains to mdl_chains_without_chem_mapping While I agree that mdl_chains_without_chem_mapping is the longest variable name ever, unmapped_mdl_chains suggests that is was not mapped in the actual mapping procedure. However, with this variable we're reporting the chains that didnt even make it into the mapping procedure due to unsuccessful chem mapping. --- actions/ost-compare-ligand-structures | 10 ++-- actions/ost-compare-structures | 13 +++--- modules/doc/actions.rst | 6 +-- modules/mol/alg/pymod/chain_mapping.py | 46 +++++++++---------- modules/mol/alg/pymod/ligand_scoring_base.py | 16 +++---- .../mol/alg/pymod/ligand_scoring_scrmsd.py | 2 +- modules/mol/alg/pymod/scoring.py | 4 +- modules/mol/alg/tests/test_chain_mapping.py | 10 ++-- 8 files changed, 54 insertions(+), 53 deletions(-) diff --git a/actions/ost-compare-ligand-structures b/actions/ost-compare-ligand-structures index 2801c7319..cb5527b9e 100644 --- a/actions/ost-compare-ligand-structures +++ b/actions/ost-compare-ligand-structures @@ -68,10 +68,10 @@ keys: in chain mapping. That is 1) pass the same size threshold as for chem_groups 2) can be aligned to any of the chem groups with a sequence identity threshold that can be controlled by --chem-map-seqid-thresh. - * "unmapped_mdl_chains": Model chains that could be considered in chain mapping, - i.e. are long enough, but could not be mapped to any chem group. - Depends on --chem-map-seqid-thresh. A mapping for each model chain can be - enforced by setting it to 0. + * "mdl_chains_without_chem_mapping": Model chains that could be considered in + chain mapping, i.e. are long enough, but could not be mapped to any chem + group. Depends on --chem-map-seqid-thresh. A mapping for each model chain can + be enforced by setting it to 0. * "status": SUCCESS if everything ran through. In case of failure, the only content of the JSON output will be \"status\" set to FAILURE and an additional key: "traceback". @@ -903,7 +903,7 @@ def _Process(model, model_ligands, reference, reference_ligands, args): # add info relevant for chain mapping and cleanup out["chem_groups"] = scorer._chain_mapper.chem_groups out["chem_mapping"] = scorer._chem_mapping - out["unmapped_mdl_chains"] = scorer._unmapped_mdl_chains + out["mdl_chains_without_chem_mapping"] = scorer._mdl_chains_without_chem_mapping out["model_cleanup_log"] = scorer.model_cleanup_log out["reference_cleanup_log"] = scorer.target_cleanup_log diff --git a/actions/ost-compare-structures b/actions/ost-compare-structures index 025a3737f..ec843dba9 100644 --- a/actions/ost-compare-structures +++ b/actions/ost-compare-structures @@ -33,14 +33,14 @@ comparison: in chain mapping. That is 1) pass the same size threshold as fo chem_groups 2) can be aligned to any of the chem groups with a sequence identity threshold that can be controlled by --chem-map-seqid-thresh. - * "unmapped_mdl_chains": Model chains that could be considered in chain mapping, - i.e. are long enough, but could not be mapped to any chem group. - Depends on --chem-map-seqid-thresh. A mapping for each model chain can be - enforced by setting it to 0. + * "mdl_chains_without_chem_mapping": Model chains that could be considered in + chain mapping, i.e. are long enough, but could not be mapped to any chem + group. Depends on --chem-map-seqid-thresh. A mapping for each model chain can + be enforced by setting it to 0. * "chain_mapping": A dictionary with reference chain names as keys and the mapped model chain names as values. Missing chains are either not mapped (but present in "chem_groups", "chem_mapping"), were not mapped to any chem - group (present in "unmapped_mdl_chains") or were not considered in + group (present in "mdl_chains_without_chem_mapping") or were not considered in chain mapping (short peptides etc.) * "aln": Pairwise sequence alignment for each pair of mapped chains in fasta format. @@ -994,7 +994,8 @@ def _Process(model, reference, args, model_format, reference_format): out["model_chains"] = [ch.GetName() for ch in scorer.model.chains] out["chem_groups"] = scorer.chain_mapper.chem_groups out["chem_mapping"] = scorer.mapping.chem_mapping - out["unmapped_mdl_chains"] = scorer.mapping.unmapped_mdl_chains + out["mdl_chains_without_chem_mapping"] = \ + scorer.mapping.mdl_chains_without_chem_mapping out["chain_mapping"] = scorer.mapping.GetFlatMapping() out["aln"] = [_AlnToFastaStr(aln) for aln in scorer.aln] out["inconsistent_residues"] = ir diff --git a/modules/doc/actions.rst b/modules/doc/actions.rst index 09913c44a..161073843 100644 --- a/modules/doc/actions.rst +++ b/modules/doc/actions.rst @@ -92,14 +92,14 @@ Details on the usage (output of ``ost compare-structures --help``): in chain mapping. That is 1) pass the same size threshold as fo chem_groups 2) can be aligned to any of the chem groups with a sequence identity threshold that can be controlled by --chem-map-seqid-thresh. - * "unmapped_mdl_chains": Model chains that could be considered in chain mapping, + * "mdl_chains_without_chem_mapping": Model chains that could be considered in chain mapping, i.e. are long enough, but could not be mapped to any chem group. Depends on --chem-map-seqid-thresh. A mapping for each model chain can be enforced by setting it to 0. * "chain_mapping": A dictionary with reference chain names as keys and the mapped model chain names as values. Missing chains are either not mapped (but present in "chem_groups", "chem_mapping"), were not mapped to any chem - group (present in "unmapped_mdl_chains") or were not considered in + group (present in "mdl_chains_without_chem_mapping") or were not considered in chain mapping (short peptides etc.) * "aln": Pairwise sequence alignment for each pair of mapped chains in fasta format. @@ -613,7 +613,7 @@ Details on the usage (output of ``ost compare-ligand-structures --help``): in chain mapping. That is 1) pass the same size threshold as for chem_groups 2) can be aligned to any of the chem groups with a sequence identity threshold that can be controlled by --chem-map-seqid-thresh. - * "unmapped_mdl_chains": Model chains that could be considered in chain mapping, + * "mdl_chains_without_chem_mapping": Model chains that could be considered in chain mapping, i.e. are long enough, but could not be mapped to any chem group. Depends on --chem-map-seqid-thresh. A mapping for each model chain can be enforced by setting it to 0. diff --git a/modules/mol/alg/pymod/chain_mapping.py b/modules/mol/alg/pymod/chain_mapping.py index 16691471e..dec705c52 100644 --- a/modules/mol/alg/pymod/chain_mapping.py +++ b/modules/mol/alg/pymod/chain_mapping.py @@ -37,12 +37,12 @@ class MappingResult: such objects yourself. """ def __init__(self, target, model, chem_groups, chem_mapping, - unmapped_mdl_chains, mapping, alns, opt_score=None): + mdl_chains_without_chem_mapping, mapping, alns, opt_score=None): self._target = target self._model = model self._chem_groups = chem_groups self._chem_mapping = chem_mapping - self._unmapped_mdl_chains = unmapped_mdl_chains + self._mdl_chains_without_chem_mapping = mdl_chains_without_chem_mapping self._mapping = mapping self._alns = alns self._opt_score = opt_score @@ -85,14 +85,14 @@ class MappingResult: return self._chem_mapping @property - def unmapped_mdl_chains(self): + def mdl_chains_without_chem_mapping(self): """ Model chains that cannot be mapped to :attr:`chem_groups` Depends on parameterization of :class:`ChainMapper` :class:`list` of class:`str` (chain names) """ - return self._unmapped_mdl_chains + return self._mdl_chains_without_chem_mapping @property def mapping(self): @@ -776,7 +776,7 @@ class ChainMapper: """ mdl, mdl_pep_seqs, mdl_nuc_seqs = self.ProcessStructure(model) mapping = [list() for x in self.chem_groups] - unmapped_mdl_chains = list() + mdl_chains_without_chem_mapping = list() alns = [seq.AlignmentList() for x in self.chem_groups] for s in mdl_pep_seqs: @@ -786,7 +786,7 @@ class ChainMapper: seq_id_thr = self.mdl_map_pep_seqid_thr, min_aln_length = self.min_pep_length) if idx is None: - unmapped_mdl_chains.append(s.GetName()) + mdl_chains_without_chem_mapping.append(s.GetName()) else: mapping[idx].append(s.GetName()) alns[idx].append(aln) @@ -798,12 +798,12 @@ class ChainMapper: seq_id_thr = self.mdl_map_nuc_seqid_thr, min_aln_length = self.min_nuc_length) if idx is None: - unmapped_mdl_chains.append(s.GetName()) + mdl_chains_without_chem_mapping.append(s.GetName()) else: mapping[idx].append(s.GetName()) alns[idx].append(aln) - return (mapping, alns, unmapped_mdl_chains, mdl) + return (mapping, alns, mdl_chains_without_chem_mapping, mdl) def GetlDDTMapping(self, model, inclusion_radius=15.0, @@ -893,10 +893,10 @@ class ChainMapper: raise RuntimeError(f"Strategy must be in {strategies}") if chem_mapping_result is None: - chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \ + chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \ self.GetChemMapping(model) else: - chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \ + chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \ chem_mapping_result ref_mdl_alns = _GetRefMdlAlns(self.chem_groups, @@ -914,7 +914,7 @@ class ChainMapper: aln = ref_mdl_alns[(ref_ch, mdl_ch)] alns[(ref_ch, mdl_ch)] = aln return MappingResult(self.target, mdl, self.chem_groups, chem_mapping, - unmapped_mdl_chains, one_to_one, alns) + mdl_chains_without_chem_mapping, one_to_one, alns) if strategy == "heuristic": if _NMappingsWithin(self.chem_groups, chem_mapping, @@ -956,7 +956,7 @@ class ChainMapper: alns[(ref_ch, mdl_ch)] = aln return MappingResult(self.target, mdl, self.chem_groups, chem_mapping, - unmapped_mdl_chains, mapping, alns, + mdl_chains_without_chem_mapping, mapping, alns, opt_score = opt_lddt) @@ -1031,10 +1031,10 @@ class ChainMapper: raise RuntimeError(f"strategy must be {strategies}") if chem_mapping_result is None: - chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \ + chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \ self.GetChemMapping(model) else: - chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \ + chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \ chem_mapping_result ref_mdl_alns = _GetRefMdlAlns(self.chem_groups, self.chem_group_alignments, @@ -1050,7 +1050,7 @@ class ChainMapper: aln = ref_mdl_alns[(ref_ch, mdl_ch)] alns[(ref_ch, mdl_ch)] = aln return MappingResult(self.target, mdl, self.chem_groups, chem_mapping, - unmapped_mdl_chains, one_to_one, alns) + mdl_chains_without_chem_mapping, one_to_one, alns) if strategy == "heuristic": if _NMappingsWithin(self.chem_groups, chem_mapping, @@ -1093,7 +1093,7 @@ class ChainMapper: alns[(ref_ch, mdl_ch)] = aln return MappingResult(self.target, mdl, self.chem_groups, chem_mapping, - unmapped_mdl_chains, mapping, alns, + mdl_chains_without_chem_mapping, mapping, alns, opt_score = opt_qsscore) def GetRMSDMapping(self, model, strategy = "heuristic", subsampling=50, @@ -1146,10 +1146,10 @@ class ChainMapper: raise RuntimeError(f"strategy must be {strategies}") if chem_mapping_result is None: - chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \ + chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \ self.GetChemMapping(model) else: - chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \ + chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \ chem_mapping_result ref_mdl_alns = _GetRefMdlAlns(self.chem_groups, self.chem_group_alignments, @@ -1166,7 +1166,7 @@ class ChainMapper: aln = ref_mdl_alns[(ref_ch, mdl_ch)] alns[(ref_ch, mdl_ch)] = aln return MappingResult(self.target, mdl, self.chem_groups, chem_mapping, - unmapped_mdl_chains, one_to_one, alns) + mdl_chains_without_chem_mapping, one_to_one, alns) trg_group_pos, mdl_group_pos = _GetRefPos(self.target, mdl, self.chem_group_alignments, @@ -1237,7 +1237,7 @@ class ChainMapper: alns[(ref_ch, mdl_ch)] = aln return MappingResult(self.target, mdl, self.chem_groups, chem_mapping, - unmapped_mdl_chains, final_mapping, alns) + mdl_chains_without_chem_mapping, final_mapping, alns) def GetMapping(self, model, n_max_naive = 40320): """ Convenience function to get mapping with currently preferred method @@ -1365,10 +1365,10 @@ class ChainMapper: # perform mapping and alignments on full structures if chem_mapping_result is None: - chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \ + chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \ self.GetChemMapping(model) else: - chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \ + chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \ chem_mapping_result ref_mdl_alns = _GetRefMdlAlns(self.chem_groups, self.chem_group_alignments, @@ -1529,7 +1529,7 @@ class ChainMapper: :attr:`chem_groups` :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle` """ - chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \ + chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \ self.GetChemMapping(model) return _NMappings(self.chem_groups, chem_mapping) diff --git a/modules/mol/alg/pymod/ligand_scoring_base.py b/modules/mol/alg/pymod/ligand_scoring_base.py index f676463bd..cf0a73f49 100644 --- a/modules/mol/alg/pymod/ligand_scoring_base.py +++ b/modules/mol/alg/pymod/ligand_scoring_base.py @@ -502,7 +502,7 @@ class LigandScorer: self.__chem_mapping = None self.__chem_group_alns = None self.__ref_mdl_alns = None - self.__unmapped_mdl_chains = None + self.__mdl_chains_without_chem_mapping = None self.__chain_mapping_mdl = None # keep track of states @@ -1165,7 +1165,7 @@ class LigandScorer: def _chem_mapping(self): if self.__chem_mapping is None: self.__chem_mapping, self.__chem_group_alns, \ - self.__unmapped_mdl_chains, self.__chain_mapping_mdl = \ + self.__mdl_chains_without_chem_mapping, self.__chain_mapping_mdl = \ self._chain_mapper.GetChemMapping(self.model) return self.__chem_mapping @@ -1173,7 +1173,7 @@ class LigandScorer: def _chem_group_alns(self): if self.__chem_group_alns is None: self.__chem_mapping, self.__chem_group_alns, \ - self.__unmapped_mdl_chains, self.__chain_mapping_mdl = \ + self.__mdl_chains_without_chem_mapping, self.__chain_mapping_mdl = \ self._chain_mapper.GetChemMapping(self.model) return self.__chem_group_alns @@ -1192,17 +1192,17 @@ class LigandScorer: if self.__chain_mapping_mdl is None: with _SinkVerbosityLevel(): self.__chem_mapping, self.__chem_group_alns, \ - self.__unmapped_mdl_chains, self.__chain_mapping_mdl = \ + self.__mdl_chains_without_chem_mapping, self.__chain_mapping_mdl = \ self._chain_mapper.GetChemMapping(self.model) return self.__chain_mapping_mdl @property - def _unmapped_mdl_chains(self): - if self.__unmapped_mdl_chains is None: + def _mdl_chains_without_chem_mapping(self): + if self.__mdl_chains_without_chem_mapping is None: self.__chem_mapping, self.__chem_group_alns, \ - self.__unmapped_mdl_chains, self.__chain_mapping_mdl = \ + self.__mdl_chains_without_chem_mapping, self.__chain_mapping_mdl = \ self._chain_mapper.GetChemMapping(self.model) - return self.__unmapped_mdl_chains + return self.__mdl_chains_without_chem_mapping def _compute_scores(self): """ diff --git a/modules/mol/alg/pymod/ligand_scoring_scrmsd.py b/modules/mol/alg/pymod/ligand_scoring_scrmsd.py index b14c68b7c..699642c8c 100644 --- a/modules/mol/alg/pymod/ligand_scoring_scrmsd.py +++ b/modules/mol/alg/pymod/ligand_scoring_scrmsd.py @@ -369,7 +369,7 @@ class SCRMSDScorer(ligand_scoring_base.LigandScorer): return (self._get_repr_input[mdl_ligand.hash_code][1], self._chem_group_alns, - self._unmapped_mdl_chains, + self._mdl_chains_without_chem_mapping, self._get_repr_input[mdl_ligand.hash_code][0]) diff --git a/modules/mol/alg/pymod/scoring.py b/modules/mol/alg/pymod/scoring.py index 62a0d92be..9f88d18fc 100644 --- a/modules/mol/alg/pymod/scoring.py +++ b/modules/mol/alg/pymod/scoring.py @@ -3224,7 +3224,7 @@ class Scorer: LogInfo("Setting custom chain mapping") chain_mapper = self.chain_mapper - chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \ + chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \ chain_mapper.GetChemMapping(self.model) # now that we have a chem mapping, lets do consistency checks @@ -3308,7 +3308,7 @@ class Scorer: return chain_mapping.MappingResult(chain_mapper.target, mdl, chain_mapper.chem_groups, chem_mapping, - unmapped_mdl_chains, + mdl_chains_without_chem_mapping, final_mapping, alns) def _compute_tmscore(self): diff --git a/modules/mol/alg/tests/test_chain_mapping.py b/modules/mol/alg/tests/test_chain_mapping.py index 03a0bb6bc..8cca758f1 100644 --- a/modules/mol/alg/tests/test_chain_mapping.py +++ b/modules/mol/alg/tests/test_chain_mapping.py @@ -176,7 +176,7 @@ class TestChainMapper(unittest.TestCase): mapper = ChainMapper(ref) - chem_mapping, alns, unmapped_mdl_chains, mdl_view = mapper.GetChemMapping(mdl) + chem_mapping, alns, mdl_chains_without_chem_mapping, mdl_view = mapper.GetChemMapping(mdl) self.assertEqual(len(mapper.chem_groups), 3) self.assertEqual(len(chem_mapping), len(mapper.chem_groups)) @@ -201,12 +201,12 @@ class TestChainMapper(unittest.TestCase): mdl = _LoadFile("mdl_different_chain_mdl.pdb") ref = _LoadFile("mdl_different_chain_ref.pdb") mapper = ChainMapper(ref) - chem_mapping, alns, unmapped_mdl_chains, mdl_view = mapper.GetChemMapping(mdl) - self.assertTrue(len(unmapped_mdl_chains)==0) + chem_mapping, alns, mdl_chains_without_chem_mapping, mdl_view = mapper.GetChemMapping(mdl) + self.assertTrue(len(mdl_chains_without_chem_mapping)==0) mapper = ChainMapper(ref, mdl_map_pep_seqid_thr=70) - chem_mapping, alns, unmapped_mdl_chains, mdl_view = mapper.GetChemMapping(mdl) - self.assertEqual(unmapped_mdl_chains, ["A"]) + chem_mapping, alns, mdl_chains_without_chem_mapping, mdl_view = mapper.GetChemMapping(mdl) + self.assertEqual(mdl_chains_without_chem_mapping, ["A"]) def test_chain_mapping(self): ref = _LoadFile("3l1p.1.pdb") -- GitLab