Skip to content
Snippets Groups Projects
Commit ce53cc57 authored by Studer Gabriel's avatar Studer Gabriel
Browse files

simple plDDT transfer heuristic in promod3.modelling.AFDBModel

parent 4d9ba7a7
Branches
Tags
No related merge requests found
......@@ -21,13 +21,27 @@ import numpy as np
import time
import tarfile
import glob
import re
import ost
from ost import io
from ost import seq
from ._raw_model import BuildRawModel
from ._pipeline import BuildFromRawModel
from ._modelling import *
class _AFDBLogSink(ost.LogSink):
""" To capture all log messages of ProMod3 when modelling
"""
def __init__(self):
ost.LogSink.__init__(self)
self.messages = list()
self.severities = list()
def LogMessage(self, message, severity):
self.messages.append(message)
self.severities.append(severity)
class FSStructureServer:
""" FSStructureServer - A filesystem based AFDB structure server
......@@ -564,8 +578,80 @@ def AFDBTPLSearch(fs_server, pentamatch, trg_seq, pentamatch_n = 100,
return_list.append((item[0], aln))
return return_list
def AFDBModel(fs_server, pentamatch, trg_seq):
def _TransferBFactors(messages, aln, model):
""" Simple heuristic to transfer plDDT from AFDB model
Assumes *model* to be monomer. In case of an aligned residue, bfactor
(i.e. plDDT) gets simply transferred. Gaps are treated with a heuristic.
This operates on the full stretch of remodelled amino acids and not solely
on the gaps indicated in the alignment. We first derive a minimum plDDT
which is 0.5*min(n_stem_plddt, c_stem_plddt). The plDDT values of the
processed amino acids then linearly decreases from the stem towards that
minimum with a slope of 0.25 (i.e. reach the minimum value when they're 4
residues away).
:param messages: List of log messages that you derived during logging
of :func:`modelling.BuildFromRawModel`.
:type messages: :class:`list` of :class:`str`
:param aln: Alignment
:type aln: :class:`ost.seq.AlignmentHandle`
:param model: Model
:type model: :class:`ost.mol.EntityHandle`
"""
assert(model.chain_count == 1)
bfactors = [0.0] * len(aln)
for col_idx, col in enumerate(aln):
if col[0] != '-' and col[1] != '-':
r = col.GetResidue(1)
bfactors[col_idx] = np.mean([a.GetBFactor() for a in r.atoms])
# regular expression that finds stuff like: filling A.PRO59-(ESRQG)-A.ILE65
# and directly makes stem residue numbers (59 and 65) available as groups
cname = model.chains[0].GetName()
pat = f"filling {cname}\.[A-Z]+([0-9]*)-\([A-Z]+\)-{cname}\.[A-Z]+([0-9]+)"
for m in messages:
if m.startswith("Resolved"):
match = re.search(pat, m)
assert(match is not None)
groups = match.groups()
assert(len(groups) == 2)
n_stem = int(groups[0]) - 1 # rnum to idx
c_stem = int(groups[1]) - 1 # rnum to idx
# we have no guarantee that these stems were aligned from the
# very beginning. Lets move towards the termini and find the first
# non-zero bfactors
while n_stem > 0:
if bfactors[n_stem] != 0.0:
break
n_stem -= 1
while c_stem < len(bfactors):
if bfactors[c_stem] != 0.0:
break
c_stem += 1
n_stem_bfac = bfactors[n_stem]
c_stem_bfac = bfactors[c_stem]
min_bfac = 0.5*(min(n_stem_bfac, c_stem_bfac))
for idx in range(n_stem+1, c_stem):
n_stem_d = idx - n_stem
c_stem_d = c_stem - idx
if n_stem_d < c_stem_d:
# closer to n stem
n_stem_d = min(4, n_stem_d)
weight = 0.25 * n_stem_d
bfactors[idx] = weight * min_bfac + (1-weight)*n_stem_bfac
else:
# closer to c stem (or same d...)
c_stem_d = min(4, c_stem_d)
weight = 0.25*c_stem_d
bfactors[idx] = weight * min_bfac + (1-weight)*c_stem_bfac
for r in model.residues:
rnum = r.GetNumber().GetNum()
bfac = bfactors[rnum-1]
for a in r.atoms:
a.SetBFactor(bfac)
def AFDBModel(fs_server, pentamatch, trg_seq, transfer_bfactors=False):
""" Build model with AFDB as template library
:param fs_server: Structure database - The AFDB
......@@ -574,6 +660,18 @@ def AFDBModel(fs_server, pentamatch, trg_seq):
:type pentamatch: :class:`PentaMatch`
:param trg_seq: Target sequence
:type trg_seq: :class:`ost.seq.SequenceHandle`/:class:`str`
:param transfer_bfactors: Simple heuristic to transfer bfactors (plDDT) to
model. In case of an aligned residue, bfactor
(i.e. plDDT) gets simply transferred.
Gaps are treated with a heuristic. This operates
on the full stretch of remodelled amino acids and
not solely on the gap indicated by the alignment.
We first derive a minimum plDDT which is
0.5*min(n_stem_plddt, c_stem_plddt). The plDDT
values of the processed amino acids then linearly
decreases from the stem towards that minimum with
a slope of 0.25 (i.e. reach the minimum value when
they're 4 residues away).
:returns: :class:`tuple` with 4 elements. 1: The model 2: The model score
based on plDDT 3: Pairwise alignment (first seq: *trg_seq*,
second seq: tpl seq) 4: Template name (formatted as
......@@ -586,7 +684,23 @@ def AFDBModel(fs_server, pentamatch, trg_seq):
score = tpl_list[0][0]
aln = tpl_list[0][1]
mhandle = BuildRawModel(aln)
model = BuildFromRawModel(mhandle)
if transfer_bfactors:
# setup custom logger to fish out logging messages
log_sink = _AFDBLogSink()
ost.PushLogSink(log_sink)
ost.PushVerbosityLevel(3)
model = BuildFromRawModel(mhandle)
_TransferBFactors(log_sink.messages, aln, model)
ost.PopLogSink()
ost.PopVerbosityLevel()
# let the world know in original log sink
orig_log_sink = ost.GetCurrentLogSink()
if orig_log_sink:
for m,s in zip(log_sink.messages, log_sink.severities):
orig_log_sink.LogMessage(m, s)
else:
model = BuildFromRawModel(mhandle)
name = aln.GetSequence(1).GetAttachedView().GetName()
return (model, score, name, aln)
return (None, None, None, None)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment