Skip to content
Snippets Groups Projects
Commit c98c30d0 authored by Studer Gabriel's avatar Studer Gabriel
Browse files

enable backbone only lDDT in Scorer and compare-structures

parent bd4b59bb
No related branches found
No related tags found
No related merge requests found
......@@ -224,6 +224,28 @@ def _ParseArgs():
"\"model_clashes\", \"model_bad_bonds\", \"model_bad_angles\" "
"and the respective reference counterparts."))
parser.add_argument(
"--bb-lddt",
dest="bb_lddt",
default=False,
action="store_true",
help=("Compute global lDDT score with default parameterization and "
"store as key \"bb_lddt\". lDDT in this case is only computed on "
"backbone atoms: CA for peptides and C3' for nucleotides"))
parser.add_argument(
"--bb-local-lddt",
dest="bb_local_lddt",
default=False,
action="store_true",
help=("Compute per-residue lDDT scores with default parameterization "
"and store as key \"bb_local_lddt\". lDDT in this case is only "
"computed on backbone atoms: CA for peptides and C3' for "
"nucleotides. Score for model residue with "
"number 42 in chain X can be extracted with: "
"data[\"local_lddt\"][\"X\"][\"42\"]. If there is an insertion "
"code, lets say A, the last key becomes \"42A\""))
parser.add_argument(
"--cad-score",
dest="cad_score",
......@@ -490,6 +512,12 @@ def _Process(model, reference, args):
out["reference_bad_bonds"] = [x.ToJSON() for x in scorer.target_bad_bonds]
out["reference_bad_angles"] = [x.ToJSON() for x in scorer.target_bad_angles]
if args.bb_lddt:
out["bb_lddt"] = scorer.bb_lddt
if args.bb_local_lddt:
out["bb_local_lddt"] = _LocalScoresToJSONDict(scorer.bb_local_lddt)
if args.cad_score:
out["cad_score"] = scorer.cad_score
......
......@@ -38,7 +38,8 @@ Details on the usage (output of ``ost compare-structures --help``):
[-mb MODEL_BIOUNIT] [-rb REFERENCE_BIOUNIT]
[-rna] [-ec] [-d] [-ds DUMP_SUFFIX] [-ft]
[-c CHAIN_MAPPING [CHAIN_MAPPING ...]] [--lddt]
[--local-lddt] [--cad-score] [--local-cad-score]
[--local-lddt] [--bb-lddt] [--bb-local-lddt]
[--cad-score] [--local-cad-score]
[--cad-exec CAD_EXEC] [--qs-score]
[--rigid-scores] [--interface-scores]
[--patch-scores]
......@@ -48,7 +49,7 @@ Details on the usage (output of ``ost compare-structures --help``):
Example: ost compare-structures -m model.pdb -r reference.cif
Loads the structures and performs basic cleanup:
* Assign elements according to the PDB Chemical Component Dictionary
* Map nonstandard residues to their parent residues as defined by the PDB
Chemical Component Dictionary, e.g. phospho-serine => serine
......@@ -57,12 +58,12 @@ Details on the usage (output of ``ost compare-structures --help``):
* Remove unknown atoms, i.e. atoms that are not expected according to the PDB
Chemical Component Dictionary
* Select for peptide/nucleotide residues
The cleaned structures are optionally dumped using -d/--dump-structures
Output is written in JSON format (default: out.json). In case of no additional
options, this is a dictionary with 8 keys:
* "reference_chains": Chain names of reference
* "model_chains": Chain names of model
* "chem_groups": Groups of polypeptides/polynucleotides from reference that
......@@ -92,17 +93,17 @@ Details on the usage (output of ``ost compare-structures --help``):
BLOSUM62 (NUC44 for nucleotides). Many benchmarking scenarios preprocess the
structures to ensure matching residue numbers (CASP/CAMEO). In these cases,
enabling -rna/--residue-number-alignment is recommended.
Each score is opt-in and can be enabled with optional arguments.
Example to compute global and per-residue lDDT values as well as QS-score:
ost compare-structures -m model.pdb -r reference.cif --lddt --local-lddt --qs-score
Example to inject custom chain mapping
ost compare-structures -m model.pdb -r reference.cif -c A:B B:A
optional arguments:
-h, --help show this help message and exit
-m MODEL, --model MODEL
......@@ -122,13 +123,13 @@ Details on the usage (output of ``ost compare-structures --help``):
filepath if not given.
-mb MODEL_BIOUNIT, --model-biounit MODEL_BIOUNIT
Only has an effect if model is in mmcif format. By
default, the assymetric unit (AU) is used for scoring.
default, the asymmetric unit (AU) is used for scoring.
If there are biounits defined in the mmcif file, you
can specify the (0-based) index of the one which
should be used.
-rb REFERENCE_BIOUNIT, --reference-biounit REFERENCE_BIOUNIT
Only has an effect if reference is in mmcif format. By
default, the assymetric unit (AU) is used for scoring.
default, the asymmetric unit (AU) is used for scoring.
If there are biounits defined in the mmcif file, you
can specify the (0-based) index of the one which
should be used.
......@@ -170,6 +171,18 @@ Details on the usage (output of ``ost compare-structures --help``):
lDDT are reported as keys "model_clashes",
"model_bad_bonds", "model_bad_angles" and the
respective reference counterparts.
--bb-lddt Compute global lDDT score with default
parameterization and store as key "bb_lddt". lDDT in
this case is only computed on backbone atoms: CA for
peptides and C3' for nucleotides
--bb-local-lddt Compute per-residue lDDT scores with default
parameterization and store as key "bb_local_lddt".
lDDT in this case is only computed on backbone atoms:
CA for peptides and C3' for nucleotides. Score for
model residue with number 42 in chain X can be
extracted with: data["local_lddt"]["X"]["42"]. If
there is an insertion code, lets say A, the last key
becomes "42A"
--cad-score Compute global CAD's atom-atom (AA) score and store as
key "cad_score". --residue-number-alignment must be
enabled to compute this score. Requires
......
......@@ -214,11 +214,14 @@ class Scorer:
# lazily constructed scorer objects
self._lddt_scorer = None
self._bb_lddt_scorer = None
self._qs_scorer = None
# lazily computed scores
self._lddt = None
self._local_lddt = None
self._bb_lddt = None
self._bb_local_lddt = None
self._qs_global = None
self._qs_best = None
......@@ -463,6 +466,19 @@ class Scorer:
self._lddt_scorer = lDDTScorer(self.stereochecked_target)
return self._lddt_scorer
@property
def bb_lddt_scorer(self):
""" Backbone only lDDT scorer for :attr:`~target`
No stereochecks applied for bb only lDDT which considers CA atoms
for peptides and C3' atoms for nucleotides.
:type: :class:`ost.mol.alg.lddt.lDDTScorer`
"""
if self._bb_lddt_scorer is None:
self._bb_lddt_scorer = lDDTScorer(self.target, bb_only=True)
return self._bb_lddt_scorer
@property
def qs_scorer(self):
""" QS scorer constructed from :attr:`~mapping`
......@@ -506,6 +522,36 @@ class Scorer:
self._compute_lddt()
return self._local_lddt
@property
def bb_lddt(self):
""" Backbone only global lDDT score in range [0.0, 1.0]
Computed based on :attr:`~model` on backbone atoms only. This is CA for
peptides and C3' for nucleotides. No stereochecks are performed. In case
of oligomers, :attr:`~mapping` is used.
:type: :class:`float`
"""
if self._bb_lddt is None:
self._compute_bb_lddt()
return self._bb_lddt
@property
def bb_local_lddt(self):
""" Backbone only per residue lDDT scores in range [0.0, 1.0]
Computed based on :attr:`~model` on backbone atoms only. This is CA for
peptides and C3' for nucleotides. No stereochecks are performed. If a
residue is not covered by the target or is in a chain skipped by the
chain mapping procedure (happens for super short chains), the respective
score is set to None. In case of oligomers, :attr:`~mapping` is used.
:type: :class:`dict`
"""
if self._bb_local_lddt is None:
self._compute_bb_lddt()
return self._bb_local_lddt
@property
def qs_global(self):
""" Global QS-score
......@@ -1022,6 +1068,42 @@ class Scorer:
self._lddt = lddt_score
self._local_lddt = local_lddt
def _compute_bb_lddt(self):
# make alignments accessible by mdl seq name
alns = dict()
for aln in self.aln:
mdl_seq = aln.GetSequence(1)
alns[mdl_seq.name] = aln
# lDDT requires a flat mapping with mdl_ch as key and trg_ch as value
flat_mapping = self.mapping.GetFlatMapping(mdl_as_key=True)
lddt_chain_mapping = dict()
for mdl_ch, trg_ch in flat_mapping.items():
if mdl_ch in alns:
lddt_chain_mapping[mdl_ch] = trg_ch
lddt_score = self.bb_lddt_scorer.lDDT(self.model,
chain_mapping = lddt_chain_mapping,
residue_mapping = alns,
check_resnames=False,
local_lddt_prop="bb_lddt")[0]
local_lddt = dict()
for r in self.model.residues:
cname = r.GetChain().GetName()
if cname not in local_lddt:
local_lddt[cname] = dict()
if r.HasProp("bb_lddt"):
score = round(r.GetFloatProp("bb_lddt"), 3)
local_lddt[cname][r.GetNumber()] = score
else:
# not covered by trg or skipped in chain mapping procedure
# the latter happens if its part of a super short chain
local_lddt[cname][r.GetNumber()] = None
self._bb_lddt = lddt_score
self._bb_local_lddt = local_lddt
def _compute_qs(self):
qs_score_result = self.qs_scorer.Score(self.mapping.mapping)
self._qs_global = qs_score_result.QS_global
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment