diff --git a/modelling/pymod/_afdb_modelling.py b/modelling/pymod/_afdb_modelling.py index 7ff8761a582ba67e8796ef0deb1bdc9a5592c8ae..ae12b4db66cef1cf10efae230542ff919ebdbe43 100644 --- a/modelling/pymod/_afdb_modelling.py +++ b/modelling/pymod/_afdb_modelling.py @@ -21,13 +21,27 @@ import numpy as np import time import tarfile import glob +import re +import ost from ost import io from ost import seq from ._raw_model import BuildRawModel from ._pipeline import BuildFromRawModel from ._modelling import * +class _AFDBLogSink(ost.LogSink): + """ To capture all log messages of ProMod3 when modelling + """ + def __init__(self): + ost.LogSink.__init__(self) + self.messages = list() + self.severities = list() + + def LogMessage(self, message, severity): + self.messages.append(message) + self.severities.append(severity) + class FSStructureServer: """ FSStructureServer - A filesystem based AFDB structure server @@ -564,8 +578,80 @@ def AFDBTPLSearch(fs_server, pentamatch, trg_seq, pentamatch_n = 100, return_list.append((item[0], aln)) return return_list - -def AFDBModel(fs_server, pentamatch, trg_seq): +def _TransferBFactors(messages, aln, model): + """ Simple heuristic to transfer plDDT from AFDB model + + Assumes *model* to be monomer. In case of an aligned residue, bfactor + (i.e. plDDT) gets simply transferred. Gaps are treated with a heuristic. + This operates on the full stretch of remodelled amino acids and not solely + on the gaps indicated in the alignment. We first derive a minimum plDDT + which is 0.5*min(n_stem_plddt, c_stem_plddt). The plDDT values of the + processed amino acids then linearly decreases from the stem towards that + minimum with a slope of 0.25 (i.e. reach the minimum value when they're 4 + residues away). + + :param messages: List of log messages that you derived during logging + of :func:`modelling.BuildFromRawModel`. + :type messages: :class:`list` of :class:`str` + :param aln: Alignment + :type aln: :class:`ost.seq.AlignmentHandle` + :param model: Model + :type model: :class:`ost.mol.EntityHandle` + """ + assert(model.chain_count == 1) + bfactors = [0.0] * len(aln) + for col_idx, col in enumerate(aln): + if col[0] != '-' and col[1] != '-': + r = col.GetResidue(1) + bfactors[col_idx] = np.mean([a.GetBFactor() for a in r.atoms]) + + # regular expression that finds stuff like: filling A.PRO59-(ESRQG)-A.ILE65 + # and directly makes stem residue numbers (59 and 65) available as groups + cname = model.chains[0].GetName() + pat = f"filling {cname}\.[A-Z]+([0-9]*)-\([A-Z]+\)-{cname}\.[A-Z]+([0-9]+)" + for m in messages: + if m.startswith("Resolved"): + match = re.search(pat, m) + assert(match is not None) + groups = match.groups() + assert(len(groups) == 2) + n_stem = int(groups[0]) - 1 # rnum to idx + c_stem = int(groups[1]) - 1 # rnum to idx + # we have no guarantee that these stems were aligned from the + # very beginning. Lets move towards the termini and find the first + # non-zero bfactors + while n_stem > 0: + if bfactors[n_stem] != 0.0: + break + n_stem -= 1 + while c_stem < len(bfactors): + if bfactors[c_stem] != 0.0: + break + c_stem += 1 + n_stem_bfac = bfactors[n_stem] + c_stem_bfac = bfactors[c_stem] + min_bfac = 0.5*(min(n_stem_bfac, c_stem_bfac)) + for idx in range(n_stem+1, c_stem): + n_stem_d = idx - n_stem + c_stem_d = c_stem - idx + if n_stem_d < c_stem_d: + # closer to n stem + n_stem_d = min(4, n_stem_d) + weight = 0.25 * n_stem_d + bfactors[idx] = weight * min_bfac + (1-weight)*n_stem_bfac + else: + # closer to c stem (or same d...) + c_stem_d = min(4, c_stem_d) + weight = 0.25*c_stem_d + bfactors[idx] = weight * min_bfac + (1-weight)*c_stem_bfac + + for r in model.residues: + rnum = r.GetNumber().GetNum() + bfac = bfactors[rnum-1] + for a in r.atoms: + a.SetBFactor(bfac) + +def AFDBModel(fs_server, pentamatch, trg_seq, transfer_bfactors=False): """ Build model with AFDB as template library :param fs_server: Structure database - The AFDB @@ -574,6 +660,18 @@ def AFDBModel(fs_server, pentamatch, trg_seq): :type pentamatch: :class:`PentaMatch` :param trg_seq: Target sequence :type trg_seq: :class:`ost.seq.SequenceHandle`/:class:`str` + :param transfer_bfactors: Simple heuristic to transfer bfactors (plDDT) to + model. In case of an aligned residue, bfactor + (i.e. plDDT) gets simply transferred. + Gaps are treated with a heuristic. This operates + on the full stretch of remodelled amino acids and + not solely on the gap indicated by the alignment. + We first derive a minimum plDDT which is + 0.5*min(n_stem_plddt, c_stem_plddt). The plDDT + values of the processed amino acids then linearly + decreases from the stem towards that minimum with + a slope of 0.25 (i.e. reach the minimum value when + they're 4 residues away). :returns: :class:`tuple` with 4 elements. 1: The model 2: The model score based on plDDT 3: Pairwise alignment (first seq: *trg_seq*, second seq: tpl seq) 4: Template name (formatted as @@ -586,7 +684,23 @@ def AFDBModel(fs_server, pentamatch, trg_seq): score = tpl_list[0][0] aln = tpl_list[0][1] mhandle = BuildRawModel(aln) - model = BuildFromRawModel(mhandle) + if transfer_bfactors: + # setup custom logger to fish out logging messages + log_sink = _AFDBLogSink() + ost.PushLogSink(log_sink) + ost.PushVerbosityLevel(3) + model = BuildFromRawModel(mhandle) + _TransferBFactors(log_sink.messages, aln, model) + ost.PopLogSink() + ost.PopVerbosityLevel() + # let the world know in original log sink + orig_log_sink = ost.GetCurrentLogSink() + if orig_log_sink: + for m,s in zip(log_sink.messages, log_sink.severities): + orig_log_sink.LogMessage(m, s) + else: + model = BuildFromRawModel(mhandle) + name = aln.GetSequence(1).GetAttachedView().GetName() return (model, score, name, aln) return (None, None, None, None)