enable backbone only lDDT in Scorer and compare-structures

c98c30d0 · Studer Gabriel · bd4b59bb · c98c30d0 · c98c30d0 · c98c30d0
Commit c98c30d0 authored 2 years ago by Studer Gabriel
--- a/actions/ost-compare-structures
+++ b/actions/ost-compare-structures
@@ -224,6 +224,28 @@ def _ParseArgs():
              "\"model_clashes\", \"model_bad_bonds\", \"model_bad_angles\" "
              "and the respective reference counterparts."))

+    parser.add_argument(
+        "--bb-lddt",
+        dest="bb_lddt",
+        default=False,
+        action="store_true",
+        help=("Compute global lDDT score with default parameterization and "
+              "store as key \"bb_lddt\". lDDT in this case is only computed on "
+              "backbone atoms: CA for peptides and C3' for nucleotides"))
+
+    parser.add_argument(
+        "--bb-local-lddt",
+        dest="bb_local_lddt",
+        default=False,
+        action="store_true",
+        help=("Compute per-residue lDDT scores with default parameterization "
+              "and store as key \"bb_local_lddt\". lDDT in this case is only "
+              "computed on backbone atoms: CA for peptides and C3' for "
+              "nucleotides. Score for model residue with "
+              "number 42 in chain X can be extracted with: "
+              "data[\"local_lddt\"][\"X\"][\"42\"]. If there is an insertion "
+              "code, lets say A, the last key becomes \"42A\""))
+
    parser.add_argument(
        "--cad-score",
        dest="cad_score",
@@ -490,6 +512,12 @@ def _Process(model, reference, args):
        out["reference_bad_bonds"] = [x.ToJSON() for x in scorer.target_bad_bonds]
        out["reference_bad_angles"] = [x.ToJSON() for x in scorer.target_bad_angles]

+    if args.bb_lddt:
+        out["bb_lddt"] = scorer.bb_lddt
+
+    if args.bb_local_lddt:
+        out["bb_local_lddt"] = _LocalScoresToJSONDict(scorer.bb_local_lddt)
+
    if args.cad_score:
        out["cad_score"] = scorer.cad_score


--- a/modules/doc/actions.rst
+++ b/modules/doc/actions.rst
@@ -38,7 +38,8 @@ Details on the usage (output of ``ost compare-structures --help``):
                                [-mb MODEL_BIOUNIT] [-rb REFERENCE_BIOUNIT]
                                [-rna] [-ec] [-d] [-ds DUMP_SUFFIX] [-ft]
                                [-c CHAIN_MAPPING [CHAIN_MAPPING ...]] [--lddt]
-                                [--local-lddt] [--cad-score] [--local-cad-score]
+                                [--local-lddt] [--bb-lddt] [--bb-local-lddt]
+                                [--cad-score] [--local-cad-score]
                                [--cad-exec CAD_EXEC] [--qs-score]
                                [--rigid-scores] [--interface-scores]
                                [--patch-scores]
@@ -48,7 +49,7 @@ Details on the usage (output of ``ost compare-structures --help``):
  Example: ost compare-structures -m model.pdb -r reference.cif
  
  Loads the structures and performs basic cleanup:
-
+  
   * Assign elements according to the PDB Chemical Component Dictionary
   * Map nonstandard residues to their parent residues as defined by the PDB
     Chemical Component Dictionary, e.g. phospho-serine => serine
@@ -57,12 +58,12 @@ Details on the usage (output of ``ost compare-structures --help``):
   * Remove unknown atoms, i.e. atoms that are not expected according to the PDB
     Chemical Component Dictionary
   * Select for peptide/nucleotide residues
-
+  
  The cleaned structures are optionally dumped using -d/--dump-structures
  
  Output is written in JSON format (default: out.json). In case of no additional
  options, this is a dictionary with 8 keys:
-  
+
   * "reference_chains": Chain names of reference
   * "model_chains": Chain names of model
   * "chem_groups": Groups of polypeptides/polynucleotides from reference that
@@ -92,17 +93,17 @@ Details on the usage (output of ``ost compare-structures --help``):
  BLOSUM62 (NUC44 for nucleotides). Many benchmarking scenarios preprocess the
  structures to ensure matching residue numbers (CASP/CAMEO). In these cases,
  enabling -rna/--residue-number-alignment is recommended.
-  
+
  Each score is opt-in and can be enabled with optional arguments.
-  
+
  Example to compute global and per-residue lDDT values as well as QS-score:
-  
+
  ost compare-structures -m model.pdb -r reference.cif --lddt --local-lddt --qs-score
-  
+
  Example to inject custom chain mapping
-  
+
  ost compare-structures -m model.pdb -r reference.cif -c A:B B:A
-  
+
  optional arguments:
    -h, --help            show this help message and exit
    -m MODEL, --model MODEL
@@ -122,13 +123,13 @@ Details on the usage (output of ``ost compare-structures --help``):
                          filepath if not given.
    -mb MODEL_BIOUNIT, --model-biounit MODEL_BIOUNIT
                          Only has an effect if model is in mmcif format. By
-                          default, the assymetric unit (AU) is used for scoring.
+                          default, the asymmetric unit (AU) is used for scoring.
                          If there are biounits defined in the mmcif file, you
                          can specify the (0-based) index of the one which
                          should be used.
    -rb REFERENCE_BIOUNIT, --reference-biounit REFERENCE_BIOUNIT
                          Only has an effect if reference is in mmcif format. By
-                          default, the assymetric unit (AU) is used for scoring.
+                          default, the asymmetric unit (AU) is used for scoring.
                          If there are biounits defined in the mmcif file, you
                          can specify the (0-based) index of the one which
                          should be used.
@@ -170,6 +171,18 @@ Details on the usage (output of ``ost compare-structures --help``):
                          lDDT are reported as keys "model_clashes",
                          "model_bad_bonds", "model_bad_angles" and the
                          respective reference counterparts.
+    --bb-lddt             Compute global lDDT score with default
+                          parameterization and store as key "bb_lddt". lDDT in
+                          this case is only computed on backbone atoms: CA for
+                          peptides and C3' for nucleotides
+    --bb-local-lddt       Compute per-residue lDDT scores with default
+                          parameterization and store as key "bb_local_lddt".
+                          lDDT in this case is only computed on backbone atoms:
+                          CA for peptides and C3' for nucleotides. Score for
+                          model residue with number 42 in chain X can be
+                          extracted with: data["local_lddt"]["X"]["42"]. If
+                          there is an insertion code, lets say A, the last key
+                          becomes "42A"
    --cad-score           Compute global CAD's atom-atom (AA) score and store as
                          key "cad_score". --residue-number-alignment must be
                          enabled to compute this score. Requires

--- a/modules/mol/alg/pymod/scoring.py
+++ b/modules/mol/alg/pymod/scoring.py
@@ -214,11 +214,14 @@ class Scorer:

        # lazily constructed scorer objects
        self._lddt_scorer = None
+        self._bb_lddt_scorer = None
        self._qs_scorer = None

        # lazily computed scores
        self._lddt = None
        self._local_lddt = None
+        self._bb_lddt = None
+        self._bb_local_lddt = None

        self._qs_global = None
        self._qs_best = None
@@ -463,6 +466,19 @@ class Scorer:
            self._lddt_scorer = lDDTScorer(self.stereochecked_target)
        return self._lddt_scorer

+    @property
+    def bb_lddt_scorer(self):
+        """ Backbone only lDDT scorer for :attr:`~target`
+
+        No stereochecks applied for bb only lDDT which considers CA atoms
+        for peptides and C3' atoms for nucleotides.
+
+        :type: :class:`ost.mol.alg.lddt.lDDTScorer`
+        """
+        if self._bb_lddt_scorer is None:
+            self._bb_lddt_scorer = lDDTScorer(self.target, bb_only=True)
+        return self._bb_lddt_scorer
+
    @property
    def qs_scorer(self):
        """ QS scorer constructed from :attr:`~mapping`
@@ -506,6 +522,36 @@ class Scorer:
            self._compute_lddt()
        return self._local_lddt

+    @property
+    def bb_lddt(self):
+        """ Backbone only global lDDT score in range [0.0, 1.0]
+
+        Computed based on :attr:`~model` on backbone atoms only. This is CA for
+        peptides and C3' for nucleotides. No stereochecks are performed. In case
+        of oligomers, :attr:`~mapping` is used.
+
+        :type: :class:`float`
+        """
+        if self._bb_lddt is None:
+            self._compute_bb_lddt()
+        return self._bb_lddt
+    
+    @property
+    def bb_local_lddt(self):
+        """ Backbone only per residue lDDT scores in range [0.0, 1.0]
+
+        Computed based on :attr:`~model` on backbone atoms only. This is CA for
+        peptides and C3' for nucleotides. No stereochecks are performed. If a
+        residue is not covered by the target or is in a chain skipped by the
+        chain mapping procedure (happens for super short chains), the respective
+        score is set to None. In case of oligomers, :attr:`~mapping` is used.
+
+        :type: :class:`dict`
+        """
+        if self._bb_local_lddt is None:
+            self._compute_bb_lddt()
+        return self._bb_local_lddt
+
    @property
    def qs_global(self):
        """  Global QS-score
@@ -1022,6 +1068,42 @@ class Scorer:
        self._lddt = lddt_score
        self._local_lddt = local_lddt

+    def _compute_bb_lddt(self):
+
+        # make alignments accessible by mdl seq name
+        alns = dict()
+        for aln in self.aln:
+            mdl_seq = aln.GetSequence(1)
+            alns[mdl_seq.name] = aln
+
+        # lDDT requires a flat mapping with mdl_ch as key and trg_ch as value
+        flat_mapping = self.mapping.GetFlatMapping(mdl_as_key=True)
+        lddt_chain_mapping = dict()
+        for mdl_ch, trg_ch in flat_mapping.items():
+            if mdl_ch in alns:
+                lddt_chain_mapping[mdl_ch] = trg_ch
+
+        lddt_score = self.bb_lddt_scorer.lDDT(self.model,
+                                              chain_mapping = lddt_chain_mapping,
+                                              residue_mapping = alns,
+                                              check_resnames=False,
+                                              local_lddt_prop="bb_lddt")[0]
+        local_lddt = dict()
+        for r in self.model.residues:
+            cname = r.GetChain().GetName()
+            if cname not in local_lddt:
+                local_lddt[cname] = dict()
+            if r.HasProp("bb_lddt"):
+                score = round(r.GetFloatProp("bb_lddt"), 3)
+                local_lddt[cname][r.GetNumber()] = score
+            else:
+                # not covered by trg or skipped in chain mapping procedure
+                # the latter happens if its part of a super short chain
+                local_lddt[cname][r.GetNumber()] = None
+
+        self._bb_lddt = lddt_score
+        self._bb_local_lddt = local_lddt
+
    def _compute_qs(self):
        qs_score_result = self.qs_scorer.Score(self.mapping.mapping)
        self._qs_global = qs_score_result.QS_global