diff --git a/actions/ost-compare-structures b/actions/ost-compare-structures index e88068505e10a7c1b1293b2570dafc0fc995933f..f94d79bc10168f7fe698af8c7e8f82eb34b5608a 100644 --- a/actions/ost-compare-structures +++ b/actions/ost-compare-structures @@ -224,6 +224,28 @@ def _ParseArgs(): "\"model_clashes\", \"model_bad_bonds\", \"model_bad_angles\" " "and the respective reference counterparts.")) + parser.add_argument( + "--bb-lddt", + dest="bb_lddt", + default=False, + action="store_true", + help=("Compute global lDDT score with default parameterization and " + "store as key \"bb_lddt\". lDDT in this case is only computed on " + "backbone atoms: CA for peptides and C3' for nucleotides")) + + parser.add_argument( + "--bb-local-lddt", + dest="bb_local_lddt", + default=False, + action="store_true", + help=("Compute per-residue lDDT scores with default parameterization " + "and store as key \"bb_local_lddt\". lDDT in this case is only " + "computed on backbone atoms: CA for peptides and C3' for " + "nucleotides. Score for model residue with " + "number 42 in chain X can be extracted with: " + "data[\"local_lddt\"][\"X\"][\"42\"]. If there is an insertion " + "code, lets say A, the last key becomes \"42A\"")) + parser.add_argument( "--cad-score", dest="cad_score", @@ -490,6 +512,12 @@ def _Process(model, reference, args): out["reference_bad_bonds"] = [x.ToJSON() for x in scorer.target_bad_bonds] out["reference_bad_angles"] = [x.ToJSON() for x in scorer.target_bad_angles] + if args.bb_lddt: + out["bb_lddt"] = scorer.bb_lddt + + if args.bb_local_lddt: + out["bb_local_lddt"] = _LocalScoresToJSONDict(scorer.bb_local_lddt) + if args.cad_score: out["cad_score"] = scorer.cad_score diff --git a/modules/doc/actions.rst b/modules/doc/actions.rst index 08276326718da6b622169ca1d02dcd28699e5edd..c8ddb6da55fd59742a6c9f728228f1f36f8757ed 100644 --- a/modules/doc/actions.rst +++ b/modules/doc/actions.rst @@ -38,7 +38,8 @@ Details on the usage (output of ``ost compare-structures --help``): [-mb MODEL_BIOUNIT] [-rb REFERENCE_BIOUNIT] [-rna] [-ec] [-d] [-ds DUMP_SUFFIX] [-ft] [-c CHAIN_MAPPING [CHAIN_MAPPING ...]] [--lddt] - [--local-lddt] [--cad-score] [--local-cad-score] + [--local-lddt] [--bb-lddt] [--bb-local-lddt] + [--cad-score] [--local-cad-score] [--cad-exec CAD_EXEC] [--qs-score] [--rigid-scores] [--interface-scores] [--patch-scores] @@ -48,7 +49,7 @@ Details on the usage (output of ``ost compare-structures --help``): Example: ost compare-structures -m model.pdb -r reference.cif Loads the structures and performs basic cleanup: - + * Assign elements according to the PDB Chemical Component Dictionary * Map nonstandard residues to their parent residues as defined by the PDB Chemical Component Dictionary, e.g. phospho-serine => serine @@ -57,12 +58,12 @@ Details on the usage (output of ``ost compare-structures --help``): * Remove unknown atoms, i.e. atoms that are not expected according to the PDB Chemical Component Dictionary * Select for peptide/nucleotide residues - + The cleaned structures are optionally dumped using -d/--dump-structures Output is written in JSON format (default: out.json). In case of no additional options, this is a dictionary with 8 keys: - + * "reference_chains": Chain names of reference * "model_chains": Chain names of model * "chem_groups": Groups of polypeptides/polynucleotides from reference that @@ -92,17 +93,17 @@ Details on the usage (output of ``ost compare-structures --help``): BLOSUM62 (NUC44 for nucleotides). Many benchmarking scenarios preprocess the structures to ensure matching residue numbers (CASP/CAMEO). In these cases, enabling -rna/--residue-number-alignment is recommended. - + Each score is opt-in and can be enabled with optional arguments. - + Example to compute global and per-residue lDDT values as well as QS-score: - + ost compare-structures -m model.pdb -r reference.cif --lddt --local-lddt --qs-score - + Example to inject custom chain mapping - + ost compare-structures -m model.pdb -r reference.cif -c A:B B:A - + optional arguments: -h, --help show this help message and exit -m MODEL, --model MODEL @@ -122,13 +123,13 @@ Details on the usage (output of ``ost compare-structures --help``): filepath if not given. -mb MODEL_BIOUNIT, --model-biounit MODEL_BIOUNIT Only has an effect if model is in mmcif format. By - default, the assymetric unit (AU) is used for scoring. + default, the asymmetric unit (AU) is used for scoring. If there are biounits defined in the mmcif file, you can specify the (0-based) index of the one which should be used. -rb REFERENCE_BIOUNIT, --reference-biounit REFERENCE_BIOUNIT Only has an effect if reference is in mmcif format. By - default, the assymetric unit (AU) is used for scoring. + default, the asymmetric unit (AU) is used for scoring. If there are biounits defined in the mmcif file, you can specify the (0-based) index of the one which should be used. @@ -170,6 +171,18 @@ Details on the usage (output of ``ost compare-structures --help``): lDDT are reported as keys "model_clashes", "model_bad_bonds", "model_bad_angles" and the respective reference counterparts. + --bb-lddt Compute global lDDT score with default + parameterization and store as key "bb_lddt". lDDT in + this case is only computed on backbone atoms: CA for + peptides and C3' for nucleotides + --bb-local-lddt Compute per-residue lDDT scores with default + parameterization and store as key "bb_local_lddt". + lDDT in this case is only computed on backbone atoms: + CA for peptides and C3' for nucleotides. Score for + model residue with number 42 in chain X can be + extracted with: data["local_lddt"]["X"]["42"]. If + there is an insertion code, lets say A, the last key + becomes "42A" --cad-score Compute global CAD's atom-atom (AA) score and store as key "cad_score". --residue-number-alignment must be enabled to compute this score. Requires diff --git a/modules/mol/alg/pymod/scoring.py b/modules/mol/alg/pymod/scoring.py index 97423299d0702509af5fc292a46f651b34ebb089..79fd4a29fa583116f2f3f0c0158873177d574f9d 100644 --- a/modules/mol/alg/pymod/scoring.py +++ b/modules/mol/alg/pymod/scoring.py @@ -214,11 +214,14 @@ class Scorer: # lazily constructed scorer objects self._lddt_scorer = None + self._bb_lddt_scorer = None self._qs_scorer = None # lazily computed scores self._lddt = None self._local_lddt = None + self._bb_lddt = None + self._bb_local_lddt = None self._qs_global = None self._qs_best = None @@ -463,6 +466,19 @@ class Scorer: self._lddt_scorer = lDDTScorer(self.stereochecked_target) return self._lddt_scorer + @property + def bb_lddt_scorer(self): + """ Backbone only lDDT scorer for :attr:`~target` + + No stereochecks applied for bb only lDDT which considers CA atoms + for peptides and C3' atoms for nucleotides. + + :type: :class:`ost.mol.alg.lddt.lDDTScorer` + """ + if self._bb_lddt_scorer is None: + self._bb_lddt_scorer = lDDTScorer(self.target, bb_only=True) + return self._bb_lddt_scorer + @property def qs_scorer(self): """ QS scorer constructed from :attr:`~mapping` @@ -506,6 +522,36 @@ class Scorer: self._compute_lddt() return self._local_lddt + @property + def bb_lddt(self): + """ Backbone only global lDDT score in range [0.0, 1.0] + + Computed based on :attr:`~model` on backbone atoms only. This is CA for + peptides and C3' for nucleotides. No stereochecks are performed. In case + of oligomers, :attr:`~mapping` is used. + + :type: :class:`float` + """ + if self._bb_lddt is None: + self._compute_bb_lddt() + return self._bb_lddt + + @property + def bb_local_lddt(self): + """ Backbone only per residue lDDT scores in range [0.0, 1.0] + + Computed based on :attr:`~model` on backbone atoms only. This is CA for + peptides and C3' for nucleotides. No stereochecks are performed. If a + residue is not covered by the target or is in a chain skipped by the + chain mapping procedure (happens for super short chains), the respective + score is set to None. In case of oligomers, :attr:`~mapping` is used. + + :type: :class:`dict` + """ + if self._bb_local_lddt is None: + self._compute_bb_lddt() + return self._bb_local_lddt + @property def qs_global(self): """ Global QS-score @@ -1022,6 +1068,42 @@ class Scorer: self._lddt = lddt_score self._local_lddt = local_lddt + def _compute_bb_lddt(self): + + # make alignments accessible by mdl seq name + alns = dict() + for aln in self.aln: + mdl_seq = aln.GetSequence(1) + alns[mdl_seq.name] = aln + + # lDDT requires a flat mapping with mdl_ch as key and trg_ch as value + flat_mapping = self.mapping.GetFlatMapping(mdl_as_key=True) + lddt_chain_mapping = dict() + for mdl_ch, trg_ch in flat_mapping.items(): + if mdl_ch in alns: + lddt_chain_mapping[mdl_ch] = trg_ch + + lddt_score = self.bb_lddt_scorer.lDDT(self.model, + chain_mapping = lddt_chain_mapping, + residue_mapping = alns, + check_resnames=False, + local_lddt_prop="bb_lddt")[0] + local_lddt = dict() + for r in self.model.residues: + cname = r.GetChain().GetName() + if cname not in local_lddt: + local_lddt[cname] = dict() + if r.HasProp("bb_lddt"): + score = round(r.GetFloatProp("bb_lddt"), 3) + local_lddt[cname][r.GetNumber()] = score + else: + # not covered by trg or skipped in chain mapping procedure + # the latter happens if its part of a super short chain + local_lddt[cname][r.GetNumber()] = None + + self._bb_lddt = lddt_score + self._bb_local_lddt = local_lddt + def _compute_qs(self): qs_score_result = self.qs_scorer.Score(self.mapping.mapping) self._qs_global = qs_score_result.QS_global