From 0015bac7895b53b86bd6baa5fd9772f15a5f6d4a Mon Sep 17 00:00:00 2001
From: Gabriel Studer <gabriel.studer@unibas.ch>
Date: Thu, 27 Feb 2025 14:20:37 +0100
Subject: [PATCH] scoring/chain mapping: rename unmapped_mdl_chains to
 mdl_chains_without_chem_mapping

While I agree that mdl_chains_without_chem_mapping is the longest
variable name ever, unmapped_mdl_chains suggests that is was not
mapped in the actual mapping procedure. However, with this variable
we're reporting the chains that didnt even make it into the
mapping procedure due to unsuccessful chem mapping.
---
 actions/ost-compare-ligand-structures         | 10 ++--
 actions/ost-compare-structures                | 13 +++---
 modules/doc/actions.rst                       |  6 +--
 modules/mol/alg/pymod/chain_mapping.py        | 46 +++++++++----------
 modules/mol/alg/pymod/ligand_scoring_base.py  | 16 +++----
 .../mol/alg/pymod/ligand_scoring_scrmsd.py    |  2 +-
 modules/mol/alg/pymod/scoring.py              |  4 +-
 modules/mol/alg/tests/test_chain_mapping.py   | 10 ++--
 8 files changed, 54 insertions(+), 53 deletions(-)

diff --git a/actions/ost-compare-ligand-structures b/actions/ost-compare-ligand-structures
index 2801c7319..cb5527b9e 100644
--- a/actions/ost-compare-ligand-structures
+++ b/actions/ost-compare-ligand-structures
@@ -68,10 +68,10 @@ keys:
    in chain mapping. That is 1) pass the same size threshold as for chem_groups
    2) can be aligned to any of the chem groups with a sequence identity
    threshold that can be controlled by --chem-map-seqid-thresh.
- * "unmapped_mdl_chains": Model chains that could be considered in chain mapping,
-   i.e. are long enough, but could not be mapped to any chem group.
-   Depends on --chem-map-seqid-thresh. A mapping for each model chain can be
-   enforced by setting it to 0.
+ * "mdl_chains_without_chem_mapping": Model chains that could be considered in
+   chain mapping, i.e. are long enough, but could not be mapped to any chem
+   group. Depends on --chem-map-seqid-thresh. A mapping for each model chain can
+   be enforced by setting it to 0.
  * "status": SUCCESS if everything ran through. In case of failure, the only
    content of the JSON output will be \"status\" set to FAILURE and an
    additional key: "traceback".
@@ -903,7 +903,7 @@ def _Process(model, model_ligands, reference, reference_ligands, args):
     # add info relevant for chain mapping and cleanup
     out["chem_groups"] = scorer._chain_mapper.chem_groups
     out["chem_mapping"] = scorer._chem_mapping
-    out["unmapped_mdl_chains"] = scorer._unmapped_mdl_chains
+    out["mdl_chains_without_chem_mapping"] = scorer._mdl_chains_without_chem_mapping
     out["model_cleanup_log"] = scorer.model_cleanup_log
     out["reference_cleanup_log"] = scorer.target_cleanup_log
 
diff --git a/actions/ost-compare-structures b/actions/ost-compare-structures
index 025a3737f..ec843dba9 100644
--- a/actions/ost-compare-structures
+++ b/actions/ost-compare-structures
@@ -33,14 +33,14 @@ comparison:
    in chain mapping. That is 1) pass the same size threshold as fo chem_groups
    2) can be aligned to any of the chem groups with a sequence identity
    threshold that can be controlled by --chem-map-seqid-thresh.
- * "unmapped_mdl_chains": Model chains that could be considered in chain mapping,
-   i.e. are long enough, but could not be mapped to any chem group.
-   Depends on --chem-map-seqid-thresh. A mapping for each model chain can be
-   enforced by setting it to 0.
+ * "mdl_chains_without_chem_mapping": Model chains that could be considered in
+   chain mapping, i.e. are long enough, but could not be mapped to any chem
+   group. Depends on --chem-map-seqid-thresh. A mapping for each model chain can
+   be enforced by setting it to 0.
  * "chain_mapping": A dictionary with reference chain names as keys and the
    mapped model chain names as values. Missing chains are either not mapped
    (but present in "chem_groups", "chem_mapping"), were not mapped to any chem
-   group (present in "unmapped_mdl_chains") or were not considered in
+   group (present in "mdl_chains_without_chem_mapping") or were not considered in
    chain mapping (short peptides etc.)
  * "aln": Pairwise sequence alignment for each pair of mapped chains in fasta
    format.
@@ -994,7 +994,8 @@ def _Process(model, reference, args, model_format, reference_format):
     out["model_chains"] = [ch.GetName() for ch in scorer.model.chains]
     out["chem_groups"] = scorer.chain_mapper.chem_groups
     out["chem_mapping"] = scorer.mapping.chem_mapping
-    out["unmapped_mdl_chains"] = scorer.mapping.unmapped_mdl_chains
+    out["mdl_chains_without_chem_mapping"] = \
+    scorer.mapping.mdl_chains_without_chem_mapping
     out["chain_mapping"] = scorer.mapping.GetFlatMapping()
     out["aln"] = [_AlnToFastaStr(aln) for aln in scorer.aln]
     out["inconsistent_residues"] = ir
diff --git a/modules/doc/actions.rst b/modules/doc/actions.rst
index 09913c44a..161073843 100644
--- a/modules/doc/actions.rst
+++ b/modules/doc/actions.rst
@@ -92,14 +92,14 @@ Details on the usage (output of ``ost compare-structures --help``):
      in chain mapping. That is 1) pass the same size threshold as fo chem_groups
      2) can be aligned to any of the chem groups with a sequence identity
      threshold that can be controlled by --chem-map-seqid-thresh.
-   * "unmapped_mdl_chains": Model chains that could be considered in chain mapping,
+   * "mdl_chains_without_chem_mapping": Model chains that could be considered in chain mapping,
      i.e. are long enough, but could not be mapped to any chem group.
      Depends on --chem-map-seqid-thresh. A mapping for each model chain can be
      enforced by setting it to 0.
    * "chain_mapping": A dictionary with reference chain names as keys and the
      mapped model chain names as values. Missing chains are either not mapped
      (but present in "chem_groups", "chem_mapping"), were not mapped to any chem
-     group (present in "unmapped_mdl_chains") or were not considered in
+     group (present in "mdl_chains_without_chem_mapping") or were not considered in
      chain mapping (short peptides etc.)
    * "aln": Pairwise sequence alignment for each pair of mapped chains in fasta
      format.
@@ -613,7 +613,7 @@ Details on the usage (output of ``ost compare-ligand-structures --help``):
      in chain mapping. That is 1) pass the same size threshold as for chem_groups
      2) can be aligned to any of the chem groups with a sequence identity
      threshold that can be controlled by --chem-map-seqid-thresh.
-   * "unmapped_mdl_chains": Model chains that could be considered in chain mapping,
+   * "mdl_chains_without_chem_mapping": Model chains that could be considered in chain mapping,
      i.e. are long enough, but could not be mapped to any chem group.
      Depends on --chem-map-seqid-thresh. A mapping for each model chain can be
      enforced by setting it to 0.
diff --git a/modules/mol/alg/pymod/chain_mapping.py b/modules/mol/alg/pymod/chain_mapping.py
index 16691471e..dec705c52 100644
--- a/modules/mol/alg/pymod/chain_mapping.py
+++ b/modules/mol/alg/pymod/chain_mapping.py
@@ -37,12 +37,12 @@ class MappingResult:
     such objects yourself.
     """
     def __init__(self, target, model, chem_groups, chem_mapping,
-                 unmapped_mdl_chains, mapping, alns, opt_score=None):
+                 mdl_chains_without_chem_mapping, mapping, alns, opt_score=None):
         self._target = target
         self._model = model
         self._chem_groups = chem_groups
         self._chem_mapping = chem_mapping
-        self._unmapped_mdl_chains = unmapped_mdl_chains
+        self._mdl_chains_without_chem_mapping = mdl_chains_without_chem_mapping
         self._mapping = mapping
         self._alns = alns
         self._opt_score = opt_score
@@ -85,14 +85,14 @@ class MappingResult:
         return self._chem_mapping
 
     @property
-    def unmapped_mdl_chains(self):
+    def mdl_chains_without_chem_mapping(self):
         """ Model chains that cannot be mapped to :attr:`chem_groups`
 
         Depends on parameterization of :class:`ChainMapper`
 
         :class:`list` of class:`str` (chain names)
         """
-        return self._unmapped_mdl_chains
+        return self._mdl_chains_without_chem_mapping
 
     @property
     def mapping(self):
@@ -776,7 +776,7 @@ class ChainMapper:
         """
         mdl, mdl_pep_seqs, mdl_nuc_seqs = self.ProcessStructure(model)
         mapping = [list() for x in self.chem_groups]
-        unmapped_mdl_chains = list()
+        mdl_chains_without_chem_mapping = list()
         alns = [seq.AlignmentList() for x in self.chem_groups]
 
         for s in mdl_pep_seqs:
@@ -786,7 +786,7 @@ class ChainMapper:
                                          seq_id_thr = self.mdl_map_pep_seqid_thr,
                                          min_aln_length = self.min_pep_length)
             if idx is None:
-                unmapped_mdl_chains.append(s.GetName())
+                mdl_chains_without_chem_mapping.append(s.GetName())
             else:
                 mapping[idx].append(s.GetName())
                 alns[idx].append(aln)
@@ -798,12 +798,12 @@ class ChainMapper:
                                          seq_id_thr = self.mdl_map_nuc_seqid_thr,
                                          min_aln_length = self.min_nuc_length)
             if idx is None:
-                unmapped_mdl_chains.append(s.GetName())
+                mdl_chains_without_chem_mapping.append(s.GetName())
             else:
                 mapping[idx].append(s.GetName())
                 alns[idx].append(aln)
 
-        return (mapping, alns, unmapped_mdl_chains, mdl)
+        return (mapping, alns, mdl_chains_without_chem_mapping, mdl)
 
 
     def GetlDDTMapping(self, model, inclusion_radius=15.0,
@@ -893,10 +893,10 @@ class ChainMapper:
             raise RuntimeError(f"Strategy must be in {strategies}")
 
         if chem_mapping_result is None:
-            chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
+            chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \
             self.GetChemMapping(model)
         else:
-            chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
+            chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \
             chem_mapping_result
 
         ref_mdl_alns =  _GetRefMdlAlns(self.chem_groups,
@@ -914,7 +914,7 @@ class ChainMapper:
                         aln = ref_mdl_alns[(ref_ch, mdl_ch)]
                         alns[(ref_ch, mdl_ch)] = aln
             return MappingResult(self.target, mdl, self.chem_groups, chem_mapping,
-                                 unmapped_mdl_chains, one_to_one, alns)
+                                 mdl_chains_without_chem_mapping, one_to_one, alns)
 
         if strategy == "heuristic":
             if _NMappingsWithin(self.chem_groups, chem_mapping,
@@ -956,7 +956,7 @@ class ChainMapper:
                     alns[(ref_ch, mdl_ch)] = aln
 
         return MappingResult(self.target, mdl, self.chem_groups, chem_mapping,
-                             unmapped_mdl_chains, mapping, alns,
+                             mdl_chains_without_chem_mapping, mapping, alns,
                              opt_score = opt_lddt)
 
 
@@ -1031,10 +1031,10 @@ class ChainMapper:
             raise RuntimeError(f"strategy must be {strategies}")
 
         if chem_mapping_result is None:
-            chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
+            chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \
             self.GetChemMapping(model)
         else:
-            chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
+            chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \
             chem_mapping_result
         ref_mdl_alns =  _GetRefMdlAlns(self.chem_groups,
                                        self.chem_group_alignments,
@@ -1050,7 +1050,7 @@ class ChainMapper:
                         aln = ref_mdl_alns[(ref_ch, mdl_ch)]
                         alns[(ref_ch, mdl_ch)] = aln
             return MappingResult(self.target, mdl, self.chem_groups, chem_mapping,
-                                 unmapped_mdl_chains, one_to_one, alns)
+                                 mdl_chains_without_chem_mapping, one_to_one, alns)
 
         if strategy == "heuristic":
             if _NMappingsWithin(self.chem_groups, chem_mapping,
@@ -1093,7 +1093,7 @@ class ChainMapper:
                     alns[(ref_ch, mdl_ch)] = aln
 
         return MappingResult(self.target, mdl, self.chem_groups, chem_mapping,
-                             unmapped_mdl_chains, mapping, alns,
+                             mdl_chains_without_chem_mapping, mapping, alns,
                              opt_score = opt_qsscore)
 
     def GetRMSDMapping(self, model, strategy = "heuristic", subsampling=50,
@@ -1146,10 +1146,10 @@ class ChainMapper:
             raise RuntimeError(f"strategy must be {strategies}")
 
         if chem_mapping_result is None:
-            chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
+            chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \
             self.GetChemMapping(model)
         else:
-            chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
+            chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \
             chem_mapping_result
         ref_mdl_alns =  _GetRefMdlAlns(self.chem_groups,
                                        self.chem_group_alignments,
@@ -1166,7 +1166,7 @@ class ChainMapper:
                         aln = ref_mdl_alns[(ref_ch, mdl_ch)]
                         alns[(ref_ch, mdl_ch)] = aln
             return MappingResult(self.target, mdl, self.chem_groups, chem_mapping,
-                                 unmapped_mdl_chains, one_to_one, alns)
+                                 mdl_chains_without_chem_mapping, one_to_one, alns)
 
         trg_group_pos, mdl_group_pos = _GetRefPos(self.target, mdl,
                                                   self.chem_group_alignments,
@@ -1237,7 +1237,7 @@ class ChainMapper:
                     alns[(ref_ch, mdl_ch)] = aln
 
         return MappingResult(self.target, mdl, self.chem_groups, chem_mapping,
-                             unmapped_mdl_chains, final_mapping, alns)
+                             mdl_chains_without_chem_mapping, final_mapping, alns)
 
     def GetMapping(self, model, n_max_naive = 40320):
         """ Convenience function to get mapping with currently preferred method
@@ -1365,10 +1365,10 @@ class ChainMapper:
 
         # perform mapping and alignments on full structures
         if chem_mapping_result is None:
-            chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
+            chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \
             self.GetChemMapping(model)
         else:
-            chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
+            chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \
             chem_mapping_result
         ref_mdl_alns =  _GetRefMdlAlns(self.chem_groups,
                                        self.chem_group_alignments,
@@ -1529,7 +1529,7 @@ class ChainMapper:
                       :attr:`chem_groups`
         :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
         """
-        chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
+        chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \
         self.GetChemMapping(model)
         return _NMappings(self.chem_groups, chem_mapping)
 
diff --git a/modules/mol/alg/pymod/ligand_scoring_base.py b/modules/mol/alg/pymod/ligand_scoring_base.py
index f676463bd..cf0a73f49 100644
--- a/modules/mol/alg/pymod/ligand_scoring_base.py
+++ b/modules/mol/alg/pymod/ligand_scoring_base.py
@@ -502,7 +502,7 @@ class LigandScorer:
         self.__chem_mapping = None
         self.__chem_group_alns = None
         self.__ref_mdl_alns = None
-        self.__unmapped_mdl_chains = None
+        self.__mdl_chains_without_chem_mapping = None
         self.__chain_mapping_mdl = None
 
         # keep track of states
@@ -1165,7 +1165,7 @@ class LigandScorer:
     def _chem_mapping(self):
         if self.__chem_mapping is None:
             self.__chem_mapping, self.__chem_group_alns, \
-            self.__unmapped_mdl_chains, self.__chain_mapping_mdl = \
+            self.__mdl_chains_without_chem_mapping, self.__chain_mapping_mdl = \
             self._chain_mapper.GetChemMapping(self.model)
         return self.__chem_mapping
 
@@ -1173,7 +1173,7 @@ class LigandScorer:
     def _chem_group_alns(self):
         if self.__chem_group_alns is None:   
             self.__chem_mapping, self.__chem_group_alns, \
-            self.__unmapped_mdl_chains, self.__chain_mapping_mdl = \
+            self.__mdl_chains_without_chem_mapping, self.__chain_mapping_mdl = \
             self._chain_mapper.GetChemMapping(self.model)
         return self.__chem_group_alns
 
@@ -1192,17 +1192,17 @@ class LigandScorer:
         if self.__chain_mapping_mdl is None:
             with _SinkVerbosityLevel():
                 self.__chem_mapping, self.__chem_group_alns, \
-                self.__unmapped_mdl_chains, self.__chain_mapping_mdl = \
+                self.__mdl_chains_without_chem_mapping, self.__chain_mapping_mdl = \
                 self._chain_mapper.GetChemMapping(self.model)
         return self.__chain_mapping_mdl
 
     @property
-    def _unmapped_mdl_chains(self):
-        if self.__unmapped_mdl_chains is None:
+    def _mdl_chains_without_chem_mapping(self):
+        if self.__mdl_chains_without_chem_mapping is None:
             self.__chem_mapping, self.__chem_group_alns, \
-            self.__unmapped_mdl_chains, self.__chain_mapping_mdl = \
+            self.__mdl_chains_without_chem_mapping, self.__chain_mapping_mdl = \
             self._chain_mapper.GetChemMapping(self.model)
-        return self.__unmapped_mdl_chains
+        return self.__mdl_chains_without_chem_mapping
 
     def _compute_scores(self):
         """
diff --git a/modules/mol/alg/pymod/ligand_scoring_scrmsd.py b/modules/mol/alg/pymod/ligand_scoring_scrmsd.py
index b14c68b7c..699642c8c 100644
--- a/modules/mol/alg/pymod/ligand_scoring_scrmsd.py
+++ b/modules/mol/alg/pymod/ligand_scoring_scrmsd.py
@@ -369,7 +369,7 @@ class SCRMSDScorer(ligand_scoring_base.LigandScorer):
 
         return (self._get_repr_input[mdl_ligand.hash_code][1],
                 self._chem_group_alns,
-                self._unmapped_mdl_chains,
+                self._mdl_chains_without_chem_mapping,
                 self._get_repr_input[mdl_ligand.hash_code][0])
 
 
diff --git a/modules/mol/alg/pymod/scoring.py b/modules/mol/alg/pymod/scoring.py
index 62a0d92be..9f88d18fc 100644
--- a/modules/mol/alg/pymod/scoring.py
+++ b/modules/mol/alg/pymod/scoring.py
@@ -3224,7 +3224,7 @@ class Scorer:
         LogInfo("Setting custom chain mapping")
 
         chain_mapper = self.chain_mapper
-        chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
+        chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \
         chain_mapper.GetChemMapping(self.model)
 
         # now that we have a chem mapping, lets do consistency checks
@@ -3308,7 +3308,7 @@ class Scorer:
         return chain_mapping.MappingResult(chain_mapper.target, mdl,
                                            chain_mapper.chem_groups,
                                            chem_mapping,
-                                           unmapped_mdl_chains,
+                                           mdl_chains_without_chem_mapping,
                                            final_mapping, alns)
 
     def _compute_tmscore(self):
diff --git a/modules/mol/alg/tests/test_chain_mapping.py b/modules/mol/alg/tests/test_chain_mapping.py
index 03a0bb6bc..8cca758f1 100644
--- a/modules/mol/alg/tests/test_chain_mapping.py
+++ b/modules/mol/alg/tests/test_chain_mapping.py
@@ -176,7 +176,7 @@ class TestChainMapper(unittest.TestCase):
 
     mapper = ChainMapper(ref)
 
-    chem_mapping, alns, unmapped_mdl_chains, mdl_view = mapper.GetChemMapping(mdl)
+    chem_mapping, alns, mdl_chains_without_chem_mapping, mdl_view = mapper.GetChemMapping(mdl)
 
     self.assertEqual(len(mapper.chem_groups), 3)
     self.assertEqual(len(chem_mapping), len(mapper.chem_groups))
@@ -201,12 +201,12 @@ class TestChainMapper(unittest.TestCase):
     mdl = _LoadFile("mdl_different_chain_mdl.pdb")
     ref = _LoadFile("mdl_different_chain_ref.pdb")
     mapper = ChainMapper(ref)
-    chem_mapping, alns, unmapped_mdl_chains, mdl_view = mapper.GetChemMapping(mdl)
-    self.assertTrue(len(unmapped_mdl_chains)==0)
+    chem_mapping, alns, mdl_chains_without_chem_mapping, mdl_view = mapper.GetChemMapping(mdl)
+    self.assertTrue(len(mdl_chains_without_chem_mapping)==0)
 
     mapper = ChainMapper(ref, mdl_map_pep_seqid_thr=70)
-    chem_mapping, alns, unmapped_mdl_chains, mdl_view = mapper.GetChemMapping(mdl)
-    self.assertEqual(unmapped_mdl_chains, ["A"])
+    chem_mapping, alns, mdl_chains_without_chem_mapping, mdl_view = mapper.GetChemMapping(mdl)
+    self.assertEqual(mdl_chains_without_chem_mapping, ["A"])
 
   def test_chain_mapping(self):
     ref = _LoadFile("3l1p.1.pdb")
-- 
GitLab