Something went wrong on our end
-
Studer Gabriel authoredStuder Gabriel authored
_closegaps.py 70.91 KiB
# Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
# Biozentrum - University of Basel
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''High-level functionality for modelling module to close gaps. Added in the
__init__.py file. To be used directly by passing a ModellingHandle instance
as argument.
'''
# internal
from promod3 import loop, core, scoring
from ._modelling import *
from ._ring_punches import *
from ._reconstruct_sidechains import *
# external
import ost
import sys
###############################################################################
# loop candidate selection setup
def _GetBestLC(mhandle, loop_candidates, start_resnum, chain_idx,
db_scores, max_num_all_atom=0, length_dep_weights=False):
# returns (min_idx, min_score)
# dummy check
if len(loop_candidates) == 0:
raise RuntimeError("Need at least 1 candidate to choose from!")
# setup weights
with_db = (not db_scores.IsEmpty())
with_aa = (max_num_all_atom > 0)
weights = ScoringWeights.GetWeights(with_db, with_aa,
length_dep_weights,
len(loop_candidates[0]))
# setup all scores object (we collect needed scores there)
all_scores = db_scores.Copy() # copy to avoid changing db_scores
# add backbone scores
loop_candidates.CalculateBackboneScores(all_scores, mhandle,
start_resnum, chain_idx)
# add all atom scores?
if with_aa:
# get best ones based on previous scores
non_aa_weights = ScoringWeights.GetWeights(with_db, False,
length_dep_weights,
len(loop_candidates[0]))
non_aa_scores = all_scores.LinearCombine(non_aa_weights)
arg_sorted_scores = sorted([(v,i) for i,v in enumerate(non_aa_scores)])
min_indices = [i for v,i in arg_sorted_scores[:max_num_all_atom]]
# extract relevant loop candidates and scores
aa_loop_candidates = loop_candidates.Extract(min_indices)
all_scores = all_scores.Extract(min_indices)
# add all atom scores
aa_loop_candidates.CalculateAllAtomScores(all_scores, mhandle,
start_resnum, chain_idx)
# get / return best
scores = all_scores.LinearCombine(weights)
min_score = min(scores)
min_idx = scores.index(min_score)
# translate to original indexing for all atom
if with_aa:
min_idx = min_indices[min_idx]
return (min_idx, min_score)
###############################################################################
# helper functions
def _GetGapExtender(mhandle, actual_gap, use_scoring_extender,
use_full_extender, max_length=-1):
# DO NOT USE full ext. with max_len = -1 as this can use LOTS of memory
if use_full_extender and max_length < 0:
raise RuntimeError("Cannot use neg. max_length with full extender!")
# return appropriate gap extender
actual_chain_idx = actual_gap.GetChainIndex()
if use_scoring_extender:
# get extender_penalties (note: changes as gaps filled)
extender_penalties = [0.0] * len(mhandle.seqres[actual_chain_idx])
for r in mhandle.model.chains[actual_chain_idx].residues:
if r.GetSecStructure().IsHelical() or \
r.GetSecStructure().IsExtended():
num = r.GetNumber().GetNum()
extender_penalties[num-1] = 1.0
# setup scoring extender
if use_full_extender:
return ScoringGapExtender(actual_gap, 0.8,
extender_penalties,
mhandle.seqres[actual_chain_idx],
max_length)
else:
raise RuntimeError("Cannot use ScoringGapExtender w/o max_length.")
else:
if use_full_extender:
return FullGapExtender(actual_gap,
mhandle.seqres[actual_chain_idx],
max_length)
else:
return GapExtender(actual_gap,
mhandle.seqres[actual_chain_idx])
def _ResolveLogInfo(gap_orig, optimal_gap, n_candidates, with_db, with_aa):
# helper for consistent logging...
scor_str = "BB"
if with_db:
scor_str += "_DB"
if with_aa:
scor_str += "_AA"
ost.LogInfo("Resolved %s by filling %s (%d candidates, %s)" % \
(str(gap_orig), str(optimal_gap), n_candidates, scor_str))
def _CloseLoopFrame(mhandle, gap_orig, actual_candidates, actual_extended_gaps,
actual_db_scores=[], max_num_all_atom=0,
length_dep_weights=False):
'''Rank candidates with "frame-approach".
All found candidates are extended prior to scoring so they match in size.
'''
# get chain for gap
actual_chain_idx = actual_extended_gaps[0].GetChainIndex()
actual_chain = mhandle.model.chains[actual_chain_idx]
# get min_res_num, max_after_resnum
min_before_resnum = sys.maxsize
max_after_resnum = -sys.maxsize-1
for g in actual_extended_gaps:
min_before_resnum = min(min_before_resnum,
g.before.GetNumber().GetNum())
max_after_resnum = max(max_after_resnum,
g.after.GetNumber().GetNum())
# all loop candidates will be scored along the full max. extension ever
# reached in the search before, so we build an overall frame, where we
# insert the loops
frame_seq = mhandle.seqres[actual_chain_idx]\
[min_before_resnum-1:max_after_resnum]
frame_backbone_list = loop.BackboneList(frame_seq)
actual_res_num = ost.mol.ResNum(min_before_resnum)
for i in range(len(frame_seq)):
actual_res = actual_chain.FindResidue(actual_res_num)
if actual_res.IsValid():
frame_backbone_list.Set(i, actual_res, frame_seq[i])
actual_res_num += 1
# prepare loop candidates for scoring
final_loop_candidates = LoopCandidates(frame_seq)
back_mapper = list()
for i, loop_candidates in enumerate(actual_candidates):
start_index = actual_extended_gaps[i].before.GetNumber().GetNum()\
- min_before_resnum
for j, loop_candidate in enumerate(loop_candidates):
actual_frame_backbone_list = frame_backbone_list.Copy()
actual_frame_backbone_list.ReplaceFragment(
loop_candidate, start_index, False)
final_loop_candidates.Add(actual_frame_backbone_list)
back_mapper.append((i, j))
# db scores requested
db_scores = ScoreContainer()
for actual_scores in actual_db_scores:
db_scores.Extend(actual_scores)
if len(final_loop_candidates) > 0:
# get best
min_idx, min_score = _GetBestLC(mhandle, final_loop_candidates,
min_before_resnum, actual_chain_idx,
db_scores, max_num_all_atom,
length_dep_weights)
ost.LogVerbose("Gap %s - %d candidates, best (min) score %g" %
(str(gap_orig), len(final_loop_candidates), min_score))
# resolve loop
idx_a = back_mapper[min_idx][0]
idx_b = back_mapper[min_idx][1]
_ResolveLogInfo(gap_orig, actual_extended_gaps[idx_a],
len(final_loop_candidates), bool(actual_db_scores),
max_num_all_atom > 0)
# update model and clear gaps
bb_list = actual_candidates[idx_a][idx_b]
actual_gap = actual_extended_gaps[idx_a]
# will return -1 if last gap removed, else next gap idx
return InsertLoopClearGaps(mhandle, bb_list, actual_gap)
else:
ost.LogInfo("Failed at loop insertion (%s)" % str(gap_orig))
return -2
def _CloseLoopBare(mhandle, gap_orig, actual_candidates, actual_extended_gaps,
actual_db_scores=[], penalize_length=True,
max_num_all_atom=0, length_dep_weights=False):
'''Rank candidates directly.
All candidates are scored directly and optionally penalized for gap length.
'''
# get chain for gap
actual_chain_idx = actual_extended_gaps[0].GetChainIndex()
# score loops as they are
min_score = float("inf")
optimal_gap = None
optimal_candidate = None
n_candidates = 0
for i, loop_candidates in enumerate(actual_candidates):
if len(loop_candidates) == 0: continue
n_candidates += len(loop_candidates)
# get current best
before_resnum = actual_extended_gaps[i].before.GetNumber().GetNum()
db_scores = ScoreContainer()
if actual_db_scores:
db_scores = actual_db_scores[i]
best_idx, score = _GetBestLC(mhandle, loop_candidates, before_resnum,
actual_chain_idx, db_scores,
max_num_all_atom, length_dep_weights)
# compare with others
if penalize_length:
# penalized by gap length
score = score * actual_extended_gaps[i].length
if score < min_score:
# keep best one
min_score = score
optimal_gap = actual_extended_gaps[i]
optimal_candidate = loop_candidates[best_idx]
ost.LogVerbose("Gap %s - %d candidates, best (min) score %g" %
(str(gap_orig), n_candidates, min_score))
# finally resolve loop
if optimal_candidate is not None:
# report
_ResolveLogInfo(gap_orig, optimal_gap, n_candidates,
bool(actual_db_scores), max_num_all_atom > 0)
# update model and clear gaps
# will return -1 if last gap removed, else next gap idx
return InsertLoopClearGaps(mhandle, optimal_candidate, optimal_gap)
else:
ost.LogInfo("Failed at loop insertion (%s)" % str(gap_orig))
return -2
def _CloseLoop(mhandle, gap_orig, actual_candidates,
actual_extended_gaps, variant=0, actual_db_scores=[],
max_num_all_atom=0, length_dep_weights=False):
'''Choose best scoring loop candidate and close loop in mhandle.
:param gap_orig: Gap we actually wanted to close
:param actual_candidates: List of LoopCandidates
:param actual_extended_gaps: List of gaps (same size as actual_candidates)
:param variant: 0 = _CloseLoopFrame()
1 = _CloseLoopBare(penalize_length=False)
2 = _CloseLoopBare(penalize_length=True)
:param actual_db_scores: List of DB scores (same size as actual_candidates)
:param max_num_all_atom: Num. of all atom LC to consider
:return: gap-index as returned by ClearGaps or -2 if fail.
'''
prof = core.StaticRuntimeProfiler.StartScoped('closegaps::_CloseLoop')
# check consistency
N = len(actual_extended_gaps)
if len(actual_candidates) != N:
raise RuntimeError("Inconsistent list-lengths in _CloseLoop " \
"(%d, %d)" % (len(actual_candidates), N))
# check for empty candidate list
if N == 0:
ost.LogInfo("Failed at loop insertion (%s)" % str(gap_orig))
return -2
# choose variant
if variant == 0:
return _CloseLoopFrame(mhandle, gap_orig, actual_candidates,
actual_extended_gaps, actual_db_scores,
max_num_all_atom, length_dep_weights)
elif variant in [1,2]:
return _CloseLoopBare(mhandle, gap_orig, actual_candidates,
actual_extended_gaps, actual_db_scores,
variant == 2, max_num_all_atom,
length_dep_weights)
else:
raise RuntimeError("Unknown variant %d" % variant);
def _InRange(gap, chain_idx, resnum_range):
'''Check if gap is in range to be processed.'''
# It is possible to specify exact ranges that should be resolved by
# the chain_idx and resnum_range parameters.
# Let's check whether we care for that particular gap in case of one
# parameter being set.
if chain_idx != None and gap.GetChainIndex() != chain_idx:
return False
elif resnum_range != None:
if gap.IsNTerminal():
c_stem_num = gap.after.GetNumber().GetNum()
return c_stem_num > resnum_range[0]
if gap.IsCTerminal():
n_stem_num = gap.before.GetNumber().GetNum()
return n_stem_num < resnum_range[1]
n_stem_num = gap.before.GetNumber().GetNum()
c_stem_num = gap.after.GetNumber().GetNum()
if n_stem_num <= resnum_range[0] and c_stem_num >= resnum_range[1]:
# full overlap => current gap is fully enclosing range
return True
elif n_stem_num >= resnum_range[0] and n_stem_num < resnum_range[1]:
# partial overlap => n-stem is within range
return True
elif c_stem_num > resnum_range[0] and c_stem_num <= resnum_range[1]:
# partial overlap => c-stem is within range
return True
else:
return False
else:
return True
def _GetMCWeights():
# get weights for Monte Carlo sampling (subset of BB only scores for super
# fast sampling)
bb_weights = ScoringWeights.GetWeights()
return_weights = dict()
return_weights["reduced"] = bb_weights["reduced"]
return_weights["cb_packing"] = bb_weights["cb_packing"]
return_weights["clash"] = bb_weights["clash"]
return return_weights
###############################################################################
def CloseSmallDeletions(mhandle, max_extension=9, clash_thresh=1.0,
e_thresh=200, use_scoring_extender=True,
use_full_extender=True, chain_idx=None,
resnum_range=None, ff_lookup=None):
'''Close small deletions by relaxing neighbouring residues.
Small deletions in the template from the target-template alignment have a
good chance to be bridged just by relaxing neighbours around a tiny gap.
Before diving into the more demanding tasks in modeling, those may be closed
already in the raw-model. After closure some checks are done to see if the
solution is stereochemically sensible.
Closed gaps are removed from :attr:`mhandle.gaps`.
.. literalinclude:: ../../../tests/doc/scripts/modelling_close_small_deletions.py
:param mhandle: Modelling handle on which to apply change.
:type mhandle: :class:`ModellingHandle`
:param max_extension: Maximal number of gap extension steps to perform
(see :class:`GapExtender`)
:type max_extension: :class:`int`
:param clash_thresh: Threshold for the backbone clash score. Acceptance
means being lower than this.
:type clash_thresh: :class:`float`
:param e_thresh: Potential energy should be lower than this.
:type e_thresh: :class:`float`
:param use_scoring_extender: True = use :class:`ScoringGapExtender` instead
of :class:`GapExtender`.
The gap is penalized according as
0.8*length + sum(helices) + sum(sheets).
For the scondary-structure-penalty to work,
the model-template must have the appropriate
information before :func:`BuildRawModel` is
called (e.g. with
:meth:`ost.mol.alg.AssignSecStruct`).
:type use_scoring_extender: :class:`bool`
:param use_full_extender: True = use :class:`FullGapExtender` instead of
of :class:`GapExtender`. Also works in combination
with `use_scoring_extender`. This allows the gap
extender to skip neighboring gaps and to correctly
handle gaps close to termini.
:type use_full_extender: :class:`bool`
:param chain_idx: If not None, only gaps from chain with given index get
processed
:type chain_idx: :class:`int`
:param resnum_range: If not None, only gaps within this resnum range get
processed.
:type resnum_range: :class:`tuple` containing two :class:`int`
:param ff_lookup: Forcefield to parametrize
:class:`promod3.loop.BackboneList` in
:class:`promod3.modelling.BackboneRelaxer`.
If set to None, the one returned by
:func:`promod3.loop.ForcefieldLookup.GetDefault`
gets used.
:type ff_lookup: :class:`promod3.loop.ForcefieldLookup`
'''
prof_name = 'closegaps::CloseSmallDeletions'
prof = core.StaticRuntimeProfiler.StartScoped(prof_name)
if len(mhandle.gaps) > 0:
ost.LogInfo("Trying to close small deletions (no. of gap(s): %d)." \
% len(mhandle.gaps))
else:
return
# check/setup scoring
if not IsBackboneScoringSetUp(mhandle):
SetupDefaultBackboneScoring(mhandle)
# we're only calculating clash scores here, so we copy the default
# env and only attach a clash scorer.
clash_scorer_env = mhandle.backbone_scorer_env.Copy()
clash_scorer = scoring.ClashScorer()
clash_scorer.AttachEnvironment(clash_scorer_env)
if ff_lookup == None:
ff_lookup = loop.ForcefieldLookup.GetDefault()
# iterating mhandle.gaps. The number of gaps may change during the process,
# hence we run by 'while', comparing to the updated list of gaps. If a gap
# gets closed, it is deleted from mhandle.gaps, otherwise, current_gap_index
# as a counter is increased.
current_gap_index = 0
while current_gap_index < len(mhandle.gaps):
# in the end this determines if a gap was closed or not
success = False
# work on a copy of the gap, if not closed in the end, no harm done
current_gap = mhandle.gaps[current_gap_index].Copy()
# A deletion is a gap of size 0 in the template, this means that to
# transform the template sequence into the target sequence, aa's vanish,
# so the target sequence has gap characters, the template not.
# If we are not looking at a deletion, do nothing.
if len(current_gap.seq) == 0 and not current_gap.IsTerminal()\
and _InRange(current_gap, chain_idx, resnum_range):
current_chain = current_gap.GetChain()
current_chain_index = current_gap.GetChainIndex()
# Try to close gap by relaxation: by checking how far we may extend
# the gap, we get the backbone to be stretched. If no more extension
# is possible, break out. On first successful relaxation for an
# extension, we successfully stop.
extender = _GetGapExtender(mhandle, current_gap,
use_scoring_extender,
use_full_extender,
max_extension)
for _ in range(max_extension):
if not extender.Extend():
break
# gather residues for backbone relaxation, check that we get a
# bunch of actual residues, otherwise jump to next extension
res_list = list()
n_stem_resnum = current_gap.before.GetNumber()
c_stem_resnum = current_gap.after.GetNumber()
idx = 0
found_residues = True
while n_stem_resnum + idx <= c_stem_resnum:
res = current_chain.FindResidue(n_stem_resnum+idx)
if not res.IsValid():
found_residues = False
break
res_list.append(res)
idx += 1
if not found_residues:
continue
# backbone relaxation, for now we allow 300 steps or stop at
# max. force of 0.1
bb_list = loop.BackboneList(current_gap.full_seq, res_list)
bb_relaxer = BackboneRelaxer(bb_list, ff_lookup)
potential_e = bb_relaxer.Run(bb_list, 300, 0.1)
# check for clashes
# its a bit problematic since we score loops that are shifting
# around... so we perform a stash operation for each scoring step
clash_scorer_env.Stash(n_stem_resnum.GetNum(),
len(bb_list), current_chain_index)
clash_scorer_env.SetEnvironment(bb_list, n_stem_resnum.GetNum(),
current_chain_index)
score = clash_scorer.CalculateScore(n_stem_resnum.GetNum(),
len(bb_list),
current_chain_index)
clash_scorer_env.Pop()
# if there is no clash and potential energy is low enough we
# just solved a gap, delete it and update the scorer for the
# changed model
if score < clash_thresh and \
potential_e < e_thresh and \
bb_list.TransOmegaTorsions():
ost.LogInfo("Closed: %s by relaxing %s" % \
(mhandle.gaps[current_gap_index], current_gap))
InsertLoopClearGaps(mhandle, bb_list, current_gap)
clash_scorer_env.SetEnvironment(bb_list,
n_stem_resnum.GetNum(),
current_chain_index)
success = True
break
# On closed gap, it is removed so the no. of gaps goes down by itself.
# In case of no success, counter needs to be increased to jump to the
# next gap.
if not success:
current_gap_index += 1
def MergeGapsByDistance(mhandle, distance, chain_idx = None,
resnum_range = None):
'''Merge 2 neighbouring gaps by deleting residues in-between.
Check if two neighbouring gaps are at max. *distance* residues apart from
each other. Then delete the residues and store a new gap spanning the whole
stretch of original gaps and the deleted region. Original gaps will be
removed. Stem residues count to the gap, so **A-A-A** has a distance of 0.
IMPORTANT: we assume here that *mhandle* stores gaps sequentially.
Non-sequential gaps are ignored!
.. literalinclude:: ../../../tests/doc/scripts/modelling_merge_gaps_by_distance.py
:param mhandle: Modelling handle on which to apply change.
:type mhandle: :class:`ModellingHandle`
:param distance: The max. no. of residues between two gaps up to which
merge happens.
:type distance: :class:`int`
:param chain_idx: If not None, only gaps from chain with given index get
processed
:type chain_idx: :class:`int`
:param resnum_range: If not None, two gaps only get merged if they're
both in this resnum range.
:type resnum_range: :class:`tuple` containing two :class:`int`
'''
prof_name = 'closegaps::MergeGapsByDistance'
prof = core.StaticRuntimeProfiler.StartScoped(prof_name)
if len(mhandle.gaps) > 1:
ost.LogInfo("Trying to merge %d gap(s) with distance %d." % \
(len(mhandle.gaps), distance))
else:
return
# indicate if we merged gaps and should check for more
try_again = True
# The number of gaps changes on merge, so we cannot just iterate them.
# If we merged gaps, we do not know if this was the last one so try_again
# is set to True. If no more gaps were merged, we stop by leaving try_again
# as False.
while try_again:
try_again = False
# iterate all but the last gap, since we are always looking ahead
for i in range(len(mhandle.gaps) - 1):
current_gap = mhandle.gaps[i].Copy()
next_gap = mhandle.gaps[i+1].Copy()
# check that we are on the same chain
if current_gap.GetChain() != next_gap.GetChain():
continue
# check for range (if given)
if not (_InRange(current_gap, chain_idx, resnum_range) and\
_InRange(next_gap, chain_idx, resnum_range)):
continue
# no merging of gaps at the end AND the start :)
if current_gap.IsNTerminal() and next_gap.IsCTerminal():
continue
# get the distance between the gaps
dist = next_gap.before.GetNumber().GetNum() \
- current_gap.after.GetNumber().GetNum()
# NOTE: -1 can currently only happen when fixing ring punches
# in _pipeline.BuildSidechains
if dist < -1:
ost.LogInfo("Non-sequential gaps found. Ignoring.")
continue
if dist <= distance:
# gaps are close enough, combine! combine!
MergeGaps(mhandle, i)
ost.LogInfo("Merged gap %s and %s into %s" % \
(current_gap, next_gap, mhandle.gaps[i]))
try_again = True
break
def FillLoopsByDatabase(mhandle, fragment_db, structure_db,
torsion_sampler=None, max_loops_to_search=40,
min_loops_required=4, max_res_extension=-1,
extended_search=True, use_scoring_extender=True,
use_full_extender=True, score_variant=0,
ring_punch_detection=1, chain_idx=None,
resnum_range=None, max_num_all_atom=0,
clash_thresh=-1, length_dep_weights=False):
'''Try to fill up loops from a structural database.
Usually this will extend the gaps a bit to match candidates from the
database. Do not expect a gap being filled in between its actual stem
residues.
This function cannot fill gaps at C- or N-terminal.
.. literalinclude:: ../../../tests/doc/scripts/modelling_fill_loops_by_database.py
:param mhandle: Modelling handle on which to apply change.
:type mhandle: :class:`ModellingHandle`
:param fragment_db: A fragment database coupled to the *structure_db*.
:type fragment_db: :class:`~promod3.loop.FragDB`
:param structure_db: Backbone/profile data.
:type structure_db: :class:`~promod3.loop.StructureDB`
:param torsion_sampler: A sampler for torsion angles. A default one is
loaded if None.
:type torsion_sampler: :class:`~promod3.loop.TorsionSampler`
:param max_loops_to_search: Define how many candidates are 'enough' to be
evaluated per loop. The actual found candidates
may be more (if we found 'enough') or less (if
not enough candidates exist) of this number.
:type max_loops_to_search: :class:`int`
:param min_loops_required: Define how many candidates we require to close
the loop. If we did not find at least this number
of candidates for a gap, we skip it without
closing. Can be set to ``max_loops_to_search``
(or equivalently to -1) to enforce that we only
close gaps for which we found enough candidates.
:type min_loops_required: :class:`int`
:param max_res_extension: Only allow this number of residues to be added to
the gaps when extending. If set to **-1**, any
number of residues can be added (as long as the
`fragment_db` allows it).
:type max_res_extension: :class:`int`
:param extended_search: True = more loop candidates are considered.
The candidate search is done less precisely (see
:meth:`~LoopCandidates.FillFromDatabase`).
The candidates are still scored and evaluated the
same though (only more of them considered).
:type extended_search: :class:`bool`
:param use_scoring_extender: True = use :class:`ScoringGapExtender` instead
of :class:`GapExtender`.
See :func:`CloseSmallDeletions`.
:type use_scoring_extender: :class:`bool`
:param use_full_extender: True = use :class:`FullGapExtender` instead of
:class:`GapExtender`.
See :func:`CloseSmallDeletions`.
:type use_full_extender: :class:`bool`
:param score_variant: How to score loop candidates. Options:
- **0**: put frame of backbone residues enclosing all
candidates and score frame. This will also "score"
non-modelled residues!
- **1**: score candidates directly
- **2**: like **1** but penalize length of candidate
:type score_variant: :class:`int`
:param ring_punch_detection: How to deal with ring punchings. Options:
- **0**: not at all (fastest)
- **1**: check for punchings with existing rings
- **2**: check incl. sidechain for loop cand.
:type ring_punch_detection: :class:`int`
:param chain_idx: If not None, only gaps from chain with given index get
processed
:type chain_idx: :class:`int`
:param resnum_range: If not None, only gaps within this resnum range get
processed
:type resnum_range: :class:`tuple` containing two :class:`int`
:param max_num_all_atom: If > 0, we prefilter loop candidates based on
non-all-atom-scores and apply all atom scoring to
the best *max_num_all_atom* candidates. If desired,
*5* is a good value here (larger values give only
numerical improvement). With *5*, this will be
approx. 2x slower than without and will give a
slight improvement in loop selection.
:type max_num_all_atom: :class:`int`
:param clash_thresh: If > 0, we only keep loop candidates which have a
backbone clash score lower than this.
:type clash_thresh: :class:`float`
:param length_dep_weights: :class:`ScoringWeights` provides different sets
of weights that have been trained on different
loop subsets. If this flag is true, the length
dependent weights are used to select the final
loops.
:type length_dep_weights: :class:`bool`
'''
prof_name = 'closegaps::FillLoopsByDatabase'
prof = core.StaticRuntimeProfiler.StartScoped(prof_name)
# load stuff if needed
if torsion_sampler is None:
torsion_sampler = loop.LoadTorsionSamplerCoil()
n_non_terminal_gaps = 0
for gap in mhandle.gaps:
if not gap.IsTerminal():
n_non_terminal_gaps += 1
if n_non_terminal_gaps > 0:
ost.LogInfo("Trying to fill %d gap(s) by database."%n_non_terminal_gaps)
else:
return
# check/setup scoring
if not IsBackboneScoringSetUp(mhandle):
SetupDefaultBackboneScoring(mhandle)
if max_num_all_atom > 0 and not IsAllAtomScoringSetUp(mhandle):
SetupDefaultAllAtomScoring(mhandle)
# some score variants cannot deal with multiple insertions in one "gap"
disallow_ins_merge = (score_variant == 0)
# do we want DB features?
add_db_features = False
if len(mhandle.profiles) > 0 and \
structure_db.HasData(loop.StructureDBDataType.AAFrequenciesStruct) and \
structure_db.HasData(loop.StructureDBDataType.AAFrequencies):
add_db_features = True
elif len(mhandle.profiles) > 0:
ost.LogWarning("Cannot make use of profiles attached to mhandle in " +\
"FillLoopsByDatabase as the provided StructureDB " +\
"does not contain profiles. This might lead to " +\
"suboptimal modelling results.")
# check min_loops_required
if min_loops_required < 0:
min_loops_required = max_loops_to_search
# get biggest loop (w/o stems) stored in db
max_db_loop_len = fragment_db.MaxFragLength() - 2
# point to the current gap
gap_idx = 0
# Iterate all gaps. Since the number of gaps may change, always compare to
# an updated list. gap_idx is only increased when necessary, e.g. current
# gap could not be removed.
while gap_idx < len(mhandle.gaps) and gap_idx >= 0:
# keep copy of original gap
gap_orig = mhandle.gaps[gap_idx].Copy()
if disallow_ins_merge:
gap_ins = CountEnclosedInsertions(mhandle, gap_orig)
# ignore terminal gaps and out-of-range (if range given)
if gap_orig.IsTerminal() \
or not _InRange(gap_orig, chain_idx, resnum_range):
gap_idx += 1
continue
##################################
# find loop candidates
##################################
# list of LoopCandidates to fill gap
actual_candidates = list()
# list of StructuralGap corresponding to actual_candidates
actual_extended_gaps = list()
# list of dicts with DB specific scores
# -> empty if not add_db_features
actual_db_scores = list()
# number of loops found (stop if >= max_loops_to_search)
found_loops = 0
# maximal length for this gap
if max_res_extension >= 0:
max_gap_length = min(gap_orig.length + max_res_extension,
max_db_loop_len)
else:
max_gap_length = max_db_loop_len
# currently extended gap and gap-extender
actual_gap = gap_orig.Copy()
actual_extender = _GetGapExtender(mhandle, actual_gap,
use_scoring_extender,
use_full_extender,
max_gap_length)
# iteratively extend actual_gap until we have enough loops
first_iteration = True
while actual_gap.length <= max_gap_length:
# check if we have enough
if found_loops >= max_loops_to_search:
break
# extend the gap
if not first_iteration:
if not actual_extender.Extend():
break
first_iteration = False
# ensure both stems are valid
n_stem = actual_gap.before
c_stem = actual_gap.after
if not n_stem.IsValid() or not c_stem.IsValid():
break
# skip if we try to merge insertions when disallowed
if disallow_ins_merge and \
CountEnclosedInsertions(mhandle, actual_gap) != gap_ins:
continue
# get candidates for the current loop
candidates = LoopCandidates.FillFromDatabase(
n_stem, c_stem, actual_gap.full_seq,
fragment_db, structure_db, extended_search)
# skip gaps with no loop candidates
if len(candidates) == 0:
continue
# check for stem rmsd before ApplyCCD
if add_db_features:
db_scores = ScoreContainer()
candidates.CalculateStemRMSDs(db_scores, n_stem, c_stem)
# try to close loops
try:
#pylint: disable=broad-except
orig_indices = candidates.ApplyCCD(n_stem, c_stem,
torsion_sampler)
except Exception:
# CCD should work even if no residues exist before and/or after
# the stems. If this is not desired, you should skip those gaps.
# If anything else fails, the proposed gap is skipped.
ost.LogWarning("ApplyCCD failure for " + str(actual_gap))
continue
# remove clashing ones if desired
if clash_thresh > 0:
clash_score_container = ScoreContainer()
candidates.CalculateBackboneScores(clash_score_container,
mhandle, ["clash"],
n_stem.GetNumber().GetNum(),
actual_gap.GetChainIndex())
clash_scores = clash_score_container.Get("clash")
to_keep = []
for i, bb_list in enumerate(candidates):
if clash_scores[i] < clash_thresh:
to_keep.append(i)
candidates = candidates.Extract(to_keep)
orig_indices = [orig_indices[i] for i in to_keep]
assert(len(orig_indices) == len(candidates))
# check for ring punchings
if ring_punch_detection == 1 and len(candidates) > 0:
FilterCandidates(candidates, mhandle.model,
actual_gap, orig_indices)
elif ring_punch_detection == 2 and len(candidates) > 0:
FilterCandidatesWithSC(candidates, mhandle.model,
actual_gap, orig_indices)
# skip if no loop was successfully closed
if len(candidates) == 0:
continue
# deal with DB features
if add_db_features:
# get subset of stem rmsd scores
assert(len(orig_indices) == len(candidates))
db_scores = db_scores.Extract(orig_indices)
# add profile scores
prof = mhandle.profiles[actual_gap.GetChainIndex()]
start_pos = n_stem.GetNumber().GetNum() - 1
candidates.CalculateSequenceProfileScores(db_scores,
structure_db,
prof, start_pos)
candidates.CalculateStructureProfileScores(db_scores,
structure_db,
prof, start_pos)
# update list
actual_db_scores.append(db_scores)
# update candidate lists
actual_candidates.append(candidates)
actual_extended_gaps.append(actual_gap.Copy())
found_loops += len(candidates)
# skip if we didn't find enough
if found_loops < min_loops_required:
ost.LogInfo("Failed at loop insertion (%s), only %d candidates" % \
(str(gap_orig), found_loops))
gap_idx += 1
continue
##################################
# close loop
##################################
new_idx = _CloseLoop(mhandle, gap_orig, actual_candidates,
actual_extended_gaps, score_variant,
actual_db_scores, max_num_all_atom,
length_dep_weights)
if new_idx == -2:
# try next one if we failed
gap_idx += 1
else:
# all good: fix sidechains if we're in ring-punch-mode and continue
if ring_punch_detection == 2:
ReconstructSidechains(mhandle.model, keep_sidechains=True)
gap_idx = new_idx
def FillLoopsByMonteCarlo(mhandle, torsion_sampler=None, max_loops_to_search=6,
max_extension=30, mc_num_loops=2, mc_steps=5000,
use_scoring_extender=True, use_full_extender=True,
score_variant=0, ring_punch_detection=1,
fragger_handles=None, chain_idx=None,
resnum_range=None, length_dep_weights=False):
'''Try to fill up loops with Monte Carlo sampling.
This is meant as a "last-resort" approach when it is not possible to fill
the loops from the database with :func:`FillLoopsByDatabase`.
This will extend the gaps (up to *max_extension* times) a bit to allow for
more loop candidates to be found.
The loops are modelled by either sampling the dihedral angles or (if
*fragger_handles* is given) :class:`~promod3.loop.Fragger` lists. The latter
is only used if the gap length is >= the length of fragments stored.
This function cannot fill gaps at C- or N-terminal.
.. literalinclude:: ../../../tests/doc/scripts/modelling_fill_loops_by_monte_carlo.py
:param mhandle: Modelling handle on which to apply change.
:type mhandle: :class:`ModellingHandle`
:param torsion_sampler: A sampler for torsion angles. A default one is
loaded if None.
:type torsion_sampler: :class:`~promod3.loop.TorsionSampler`
:param max_loops_to_search: Define how many candidates are 'enough' to be
evaluated per loop.
:type max_loops_to_search: :class:`int`
:param max_extension: Maximal number of gap extension steps to perform
(see :class:`GapExtender`)
:type max_extension: :class:`int`
:param mc_num_loops: Number of loop candidates to consider for each extended gap
(see :meth:`~LoopCandidates.FillFromMonteCarloSampler`)
:type mc_num_loops: :class:`int`
:param mc_steps: Number of MC steps to perform for each loop candidate
(see :meth:`~LoopCandidates.FillFromMonteCarloSampler`)
:type mc_steps: :class:`int`
:param use_scoring_extender: True = use :class:`ScoringGapExtender` instead
of :class:`GapExtender`.
See :func:`CloseSmallDeletions`.
:type use_scoring_extender: :class:`bool`
:param use_full_extender: True = use :class:`FullGapExtender` instead of
:class:`GapExtender`.
See :func:`CloseSmallDeletions`.
:type use_full_extender: :class:`bool`
:param score_variant: How to score loop candidates (AllAtom not supported).
See :func:`FillLoopsByDatabase`.
:type score_variant: :class:`int`
:param ring_punch_detection: How to deal with ring punchings.
See :func:`FillLoopsByDatabase`.
:type ring_punch_detection: :class:`int`
:param fragger_handles: Either None (no fragger sampling used) or one
fragger handle for each chain in *mhandle*.
:type fragger_handles: :class:`list` of :class:`~promod3.modelling.FraggerHandle`
:param chain_idx: If not None, only gaps from chain with given index get
processed
:type chain_idx: :class:`int`
:param resnum_range: If not None, only gaps within this resnum range get
processed
:type resnum_range: :class:`tuple` containing two :class:`int`
:param length_dep_weights: :class:`ScoringWeights` provides different sets
of weights that have been trained on different
loop subsets. If this flag is true, the length
dependent weights are used to select the final
loops.
:type length_dep_weights: :class:`bool`
'''
prof_name = 'closegaps::FillLoopsByMonteCarlo'
prof = core.StaticRuntimeProfiler.StartScoped(prof_name)
# load stuff if needed
if torsion_sampler is None:
torsion_sampler = loop.LoadTorsionSamplerCoil()
n_non_terminal_gaps = 0
for gap in mhandle.gaps:
if not gap.IsTerminal():
n_non_terminal_gaps += 1
if n_non_terminal_gaps > 0:
ost.LogInfo("Trying to fill %d gap(s) by Monte Carlo." \
% len(mhandle.gaps))
else:
return
# check/setup scoring
if not IsBackboneScoringSetUp(mhandle):
SetupDefaultBackboneScoring(mhandle)
# point to the current gap
gap_idx = 0
# Iterate all gaps. Since the number of gaps may change, always compare to
# an updated list. gap_idx is only increased when necessary, e.g. current
# gap could not be removed.
while gap_idx < len(mhandle.gaps) and gap_idx >= 0:
# keep copy of original gap
gap_orig = mhandle.gaps[gap_idx].Copy()
if score_variant == 0:
gap_ins = CountEnclosedInsertions(mhandle, gap_orig)
# ignore terminal gaps and out-of-range (if range given)
if gap_orig.IsTerminal() \
or not _InRange(gap_orig, chain_idx, resnum_range):
gap_idx += 1
continue
##################################
# find loop candidates
##################################
# list of LoopCandidates to fill gap
actual_candidates = list()
# list of StructuralGap corresponding to actual_candidates
actual_extended_gaps = list()
# number of loops found (stop if >= max_loops_to_search)
found_loops = 0
# currently extended gap and gap-extender
actual_gap = gap_orig.Copy()
# each extension can make it at most 1 longer...
max_loop_len = gap_orig.length + max_extension
actual_extender = _GetGapExtender(mhandle, actual_gap,
use_scoring_extender,
use_full_extender,
max_loop_len)
actual_chain_idx = actual_gap.GetChainIndex()
# iteratively extend actual_gap
ext_step = 0
first_iteration = True
while found_loops < max_loops_to_search and ext_step < max_extension:
# extend the gap
if not first_iteration:
if not actual_extender.Extend():
break
first_iteration = False
# make sure, that the loop seq has at least length 3
if (len(actual_gap.full_seq) < 3):
continue
ext_step += 1
# ensure both stems are valid
if not actual_gap.before.IsValid() or \
not actual_gap.after.IsValid():
break
# skip if we try to merge insertions with score_variant=0
if score_variant == 0 and \
CountEnclosedInsertions(mhandle, actual_gap) != gap_ins:
continue
# setup sampler, closer, scorer and cooler for MC
if fragger_handles is not None:
fragger_handle = fragger_handles[actual_chain_idx]
try:
# choose sampler
if fragger_handles is None or \
actual_gap.length < fragger_handle.fragment_length:
mc_sampler = PhiPsiSampler(actual_gap.full_seq,
torsion_sampler)
else:
start_pos = actual_gap.before.GetNumber().GetNum() - 1
end_pos = actual_gap.after.GetNumber().GetNum() - 1
fragger_list = fragger_handle.GetList(start_pos, end_pos)
mc_sampler = FragmentSampler(actual_gap.full_seq,
fragger_list,
init_fragments=5)
mc_closer = DirtyCCDCloser(actual_gap.before,
actual_gap.after)
weights = _GetMCWeights()
mc_scorer = LinearScorer(mhandle.backbone_scorer,
mhandle.backbone_scorer_env,
actual_gap.before.GetNumber().GetNum(),
len(actual_gap.full_seq),
actual_chain_idx, weights)
start_temperature = 100
cooling_factor = 0.9
# the number of 109 is roughly the number of times we have to apply
# a factor of 0.9 to 100 until it reaches a value of 0.001
cooling_interval = int(round(mc_steps/109.0))
mc_cooler = ExponentialCooler(cooling_interval,
start_temperature,
cooling_factor)
except:
# something went terribly wrong...
ost.LogError('Failed to set up MC components')
raise
# try to get candidates for the current loop
try:
ost.LogVerbose("Firing MC for " + str(actual_gap))
candidates = LoopCandidates.FillFromMonteCarloSampler(
actual_gap.full_seq, mc_num_loops, mc_steps, mc_sampler,
mc_closer, mc_scorer, mc_cooler)
except RuntimeError:
# monte carlo cannot be initialized when the stems are too far
# apart => we need further loop extension
ost.LogVerbose("Failed in setting up " + str(actual_gap) +
" stems might be too far away...")
continue
# check for ring punchings
if ring_punch_detection == 1 and len(candidates) > 0:
FilterCandidates(candidates, mhandle.model, actual_gap)
elif ring_punch_detection == 2 and len(candidates) > 0:
FilterCandidatesWithSC(candidates, mhandle.model, actual_gap)
# skip if nothing found
if len(candidates) == 0:
continue
# update candidate lists
actual_candidates.append(candidates)
actual_extended_gaps.append(actual_gap.Copy())
found_loops += len(candidates)
##################################
# close loop
##################################
new_idx = _CloseLoop(mhandle, gap_orig, actual_candidates,
actual_extended_gaps, score_variant,
length_dep_weights=length_dep_weights)
if new_idx == -2:
# try next one if we failed
gap_idx += 1
else:
# all good: fix sidechains if we're in ring-punch-mode and continue
if ring_punch_detection == 2:
ReconstructSidechains(mhandle.model, keep_sidechains=True)
gap_idx = new_idx
def CloseGaps(mhandle, merge_distance=4, fragment_db=None, structure_db=None,
torsion_sampler=None, fragger_handles=None, chain_idx=None,
resnum_range=None, length_dep_weights=False):
'''
Tries to close all gaps in a model, except termini. It will go through
following steps:
- Try to close small deletions by relaxing them
(see :func:`CloseSmallDeletions`)
- Iteratively merge gaps up to a distance **merge_distance**
(see :func:`MergeGapsByDistance`) and try to fill them with a database
approach (see :func:`FillLoopsByDatabase`)
- Try to fill remaining gaps using a Monte Carlo approach
(see :func:`FillLoopsByMonteCarlo`)
- Large deletions get closed using a last resort approach
(see :func:`CloseLargeDeletions`)
:param mhandle: Modelling handle on which to apply change.
:type mhandle: :class:`ModellingHandle`
:param merge_distance: Max. merge distance when performing the database
approach
:type merge_distance: :class:`int`
:param fragment_db: Database for searching fragments in database
approach, must be consistent with provided
**structure_db**. A default is loaded if None.
:type fragment_db: :class:`~promod3.loop.FragDB`
:param structure_db: Structure db from which the **fragment_db** gets
it's structural information. A default is loaded
if None.
:type structure_db: :class:`~promod3.loop.StructureDB`
:param torsion_sampler: Used as parameter for :func:`FillLoopsByDatabase`
and :func:`FillLoopsByMonteCarlo` A default one is
loaded if None.
:type torsion_sampler: :class:`promod3.loop.TorsionSampler`
:param fragger_handles: A list of :class:`promod3.modelling.FraggerHandle`
objects for each chain in **mhandle**.
If provided, fragments will be used for
sampling when the :func:`FillLoopsByMonteCarlo`
gets executed.
:type fragger_handles: :class:`list`
:param chain_idx: If not None, only gaps from chain with given index get
processed
:type chain_idx: :class:`int`
:param resnum_range: If not None, only gaps within this resnum range get
processed.
:type resnum_range: :class:`tuple` containing two :class:`int`
:param length_dep_weights: :class:`ScoringWeights` provides different sets
of weights that have been trained on different
loop subsets. If this flag is true, the length
dependent weights are used to close loops with
database / Monte Carlo.
:type length_dep_weights: :class:`bool`
'''
# load stuff if needed
if fragment_db is None:
fragment_db = loop.LoadFragDB()
if structure_db is None:
structure_db = loop.LoadStructureDB()
if torsion_sampler is None:
torsion_sampler = loop.LoadTorsionSamplerCoil()
# try to close small deletions by relaxing them
CloseSmallDeletions(mhandle, chain_idx=chain_idx,
resnum_range=resnum_range)
# iteratively merge gaps of distance i and fill loops by database
for distance in range(merge_distance):
MergeGapsByDistance(mhandle, distance, chain_idx=chain_idx,
resnum_range=resnum_range)
FillLoopsByDatabase(mhandle, fragment_db, structure_db,
torsion_sampler=torsion_sampler,
min_loops_required=-1, max_res_extension=6,
chain_idx=chain_idx, resnum_range=resnum_range,
clash_thresh=10,
length_dep_weights=length_dep_weights)
# if above fails, try DB-fill with less restrictions
FillLoopsByDatabase(mhandle, fragment_db, structure_db,
torsion_sampler=torsion_sampler, min_loops_required=-1,
chain_idx=chain_idx, resnum_range=resnum_range,
clash_thresh=10,
length_dep_weights=length_dep_weights)
FillLoopsByDatabase(mhandle, fragment_db, structure_db,
torsion_sampler=torsion_sampler, chain_idx=chain_idx,
resnum_range=resnum_range,
length_dep_weights=length_dep_weights)
# close remaining gaps by Monte Carlo
FillLoopsByMonteCarlo(mhandle, torsion_sampler=torsion_sampler,
fragger_handles=fragger_handles,
chain_idx=chain_idx,
resnum_range=resnum_range,
length_dep_weights=length_dep_weights)
# last resort approach to close large deletions
CloseLargeDeletions(mhandle, structure_db, chain_idx=chain_idx,
resnum_range=resnum_range)
# NOTE:
# In the function above, we call :func:`FillLoopsByDatabase` multiple
# times. First, we try to close "easy" gaps which require few extensions
# (we wish to limit the damage we do on the template) and for which we have
# plenty of loop candidates. If some gaps cannot be closed like this, we try
# less restrictive options. This approach is helpful if neighboring gaps are
# close together and the one closer to the C-terminus is easier to close.
# Several variants were evaluated on 1752 target-template-pairs and this one
# worked best.
def ModelTermini(mhandle, torsion_sampler, fragger_handles=None,
mc_num_loops=20, mc_steps=5000):
'''Try to model termini with Monte Carlo sampling.
Use with care! This is an experimental feature which will increase coverage
but we do not assume that the resulting termini are of high quality!
The termini are modelled by either sampling the dihedral angles or (if
*fragger_handles* is given) :class:`~promod3.loop.Fragger` lists. The latter
is only used if the gap length is >= the length of fragments stored.
.. literalinclude:: ../../../tests/doc/scripts/modelling_model_termini.py
:param mhandle: Modelling handle on which to apply change.
:type mhandle: :class:`ModellingHandle`
:param torsion_sampler: A sampler for torsion angles.
:type torsion_sampler: :class:`~promod3.loop.TorsionSampler`
:param fragger_handles: Either None (no fragger sampling used) or one
fragger handle for each chain in *mhandle*.
:type fragger_handles: :class:`list` of :class:`~promod3.modelling.FraggerHandle`
:param mc_num_loops: Number of loop candidates to consider for each terminal gap
(see :meth:`~LoopCandidates.FillFromMonteCarloSampler`)
:type mc_num_loops: :class:`int`
:param mc_steps: Number of MC steps to perform for each loop candidate
(see :meth:`~LoopCandidates.FillFromMonteCarloSampler`)
:type mc_steps: :class:`int`
'''
prof_name = 'closegaps::ModelTermini'
prof = core.StaticRuntimeProfiler.StartScoped(prof_name)
# get terminal gaps (copies as we'll clear them as we go)
terminal_gaps = [g.Copy() for g in mhandle.gaps if g.IsTerminal()]
if len(terminal_gaps) > 0:
ost.LogInfo("Trying to model %d terminal gap(s)." % len(terminal_gaps))
else:
return
# check/setup scoring
if not IsBackboneScoringSetUp(mhandle):
SetupDefaultBackboneScoring(mhandle)
# model them
for actual_gap in terminal_gaps:
# extract info
if actual_gap.IsNTerminal():
start_resnum = 1
else:
start_resnum = actual_gap.before.GetNumber().GetNum()
actual_chain = actual_gap.GetChain()
actual_chain_idx = actual_gap.GetChainIndex()
if fragger_handles is not None:
fragger_handle = fragger_handles[actual_chain_idx]
# choose sampler
if fragger_handles is None or \
actual_gap.length < fragger_handle.fragment_length:
mc_sampler = PhiPsiSampler(actual_gap.full_seq,
torsion_sampler)
else:
end_pos = start_resnum-1 + actual_gap.length
fragger_list = fragger_handle.GetList(start_resnum-1, end_pos)
mc_sampler = FragmentSampler(actual_gap.full_seq, fragger_list,
init_fragments=5)
# choose closer
if actual_gap.IsNTerminal():
mc_closer = NTerminalCloser(actual_gap.after)
else:
mc_closer = CTerminalCloser(actual_gap.before)
# setup scorer
weights = _GetMCWeights()
mc_scorer = LinearScorer(mhandle.backbone_scorer,
mhandle.backbone_scorer_env,
start_resnum, len(actual_gap.full_seq),
actual_chain_idx, weights)
# setup cooler
start_temperature = 100
cooling_factor = 0.9
# the number of 109 is roughly the number of times we have to apply
# a factor of 0.9 to 100 until it reaches a value of 0.001
cooling_interval = int(round(mc_steps/109.0))
mc_cooler = ExponentialCooler(cooling_interval, start_temperature,
cooling_factor)
# try to get loop candidates
ost.LogVerbose("Firing MC for " + str(actual_gap))
candidates = LoopCandidates.FillFromMonteCarloSampler(
actual_gap.full_seq, mc_num_loops, mc_steps, mc_sampler,
mc_closer, mc_scorer, mc_cooler)
if len(candidates) > 0:
# score candidates
bb_scores = ScoreContainer()
candidates.CalculateBackboneScores(bb_scores, mhandle,
start_resnum, actual_chain_idx)
scores = bb_scores.LinearCombine(ScoringWeights.GetWeights())
min_score = min(scores)
min_idx = scores.index(min_score)
# report
ost.LogInfo("Resolved terminal gap %s (%d candidates)" % \
(str(actual_gap), len(candidates)))
# update model and clear gap
InsertLoopClearGaps(mhandle, candidates[min_idx], actual_gap)
else:
ost.LogInfo("Failed to model terminal gap (%s)" % str(actual_gap))
def CloseLargeDeletions(mhandle, structure_db, linker_length=8,
num_fragments=500, use_scoring_extender=True,
use_full_extender=True, chain_idx=None,
resnum_range=None):
'''Try to close large deletions.
This is meant as a "last-resort" approach. In some cases you cannot
close very large deletions simply because the two parts separated
by a deletion are too far apart. The idea is to sample a linker region
and always move the whole chain towards the n-terminus.
:param mhandle: Modelling handle on which to apply change.
:type mhandle: :class:`ModellingHandle`
:param structure_db: The database from which to extract fragments for
the linker region.
:type structure_db: :class:`~promod3.loop.StructureDB`
:param linker_length: Desired length (in residues w/o stems) for the
linker. This may be shorter if extender cannot
extend further.
:type linker_length: :class:`int`
:param num_fragments: Number of fragments to sample the linker.
:type num_fragments: :class:`int`
:param use_scoring_extender: True = use :class:`ScoringGapExtender` instead
of :class:`GapExtender`.
See :func:`CloseSmallDeletions`.
:type use_scoring_extender: :class:`bool`
:param use_full_extender: True = use :class:`FullGapExtender` instead of
:class:`GapExtender`.
See :func:`CloseSmallDeletions`.
:type use_full_extender: :class:`bool`
:param chain_idx: If not None, only gaps from chain with given index get
processed
:type chain_idx: :class:`int`
:param resnum_range: If not None, only gaps within this resnum range get
processed
:type resnum_range: :class:`tuple` containing two :class:`int`
'''
prof_name = 'closegaps::CloseLargeDeletions'
prof = core.StaticRuntimeProfiler.StartScoped(prof_name)
n_non_terminal_gaps = 0
for gap in mhandle.gaps:
if not gap.IsTerminal():
n_non_terminal_gaps += 1
if n_non_terminal_gaps > 0:
ost.LogInfo("Trying to resolve large deletions (%d gap(s) "
"left) by sampling a linker region." % n_non_terminal_gaps)
else:
return
# check/setup scoring
if not IsBackboneScoringSetUp(mhandle):
SetupDefaultBackboneScoring(mhandle)
# point to the current gap
gap_idx = 0
# Iterate all gaps. Since the number of gaps may change, always compare to
# an updated list. gap_idx is only increased when necessary, e.g. current
# gap could not be removed.
while gap_idx < len(mhandle.gaps) and gap_idx >= 0:
# keep copy of original gap
gap_orig = mhandle.gaps[gap_idx].Copy()
# check whether we are in the desired range
if not _InRange(gap_orig, chain_idx, resnum_range):
gap_idx += 1
continue
# get chain for gap
actual_chain_idx = gap_orig.GetChainIndex()
actual_chain = mhandle.model.chains[actual_chain_idx]
# terminal gaps are not deletions...
if gap_orig.IsTerminal():
gap_idx += 1
continue
# check if too long
if gap_orig.length > linker_length:
ost.LogInfo("Failed to close large deletion (%s), gap too long."\
% str(gap_orig))
gap_idx += 1
continue
# the function only works, if there are no gaps (del. AND insert.)
# towards the n-ter in the same chain, except terminal gaps.
clean_nter = True
for i in range(gap_idx):
if actual_chain_idx == mhandle.gaps[i].GetChainIndex():
if not mhandle.gaps[i].IsTerminal():
clean_nter = False
break
if not clean_nter:
ost.LogInfo("Failed to close large deletion (%s), more gaps found."\
% str(gap_orig))
gap_idx += 1
continue
# extend gap to desired length
actual_gap = gap_orig.Copy()
actual_extender = _GetGapExtender(mhandle, actual_gap,
use_scoring_extender,
use_full_extender,
linker_length)
while actual_gap.length < linker_length:
# extend the gap
if not actual_extender.Extend():
break
# FAIL (Fragger needs at least 3 residues)
if len(actual_gap.full_seq) < 3:
ost.LogInfo("Failed to close large deletion (%s), linker too short."\
% str(gap_orig))
gap_idx += 1
continue
# extract gap info
n_stem_res = actual_gap.before
c_stem_res = actual_gap.after
n_stem_res_num = n_stem_res.GetNumber().GetNum()
c_stem_res_num = c_stem_res.GetNumber().GetNum()
# let's find fragments!
fragger = loop.Fragger(actual_gap.full_seq)
seqsim_matrix = ost.seq.alg.BLOSUM62
fragger.AddSeqSimParameters(1.0, seqsim_matrix)
fragger.Fill(structure_db, 0.2, num_fragments)
# We generate two backbonelists based on residues beginning from the
# n-terminus:
# - bb_list: covers the full thing being sampled (incl. stuff in gap)
# - initial_n_stem: n-stem residue before the fragment insertion
#
# After having found the ideal fragment, we cannot simply insert it into
# the model, since all sidechain information would be lost.
# We therefore need the initial_n_stem to store the initial N stem
# positions. We can then calculate a transformation in the end and
# apply it manually in the end for the according atom positions.
# only put valid residues in bb_seq
first_num = actual_chain.residues[0].GetNumber().GetNum()
bb_seq = mhandle.seqres[actual_chain_idx][first_num-1:c_stem_res_num]
bb_list = loop.BackboneList(bb_seq)
actual_res_num = ost.mol.ResNum(first_num)
for i in range(len(bb_seq)):
actual_res = actual_chain.FindResidue(actual_res_num)
if actual_res.IsValid():
bb_list.Set(i, actual_res, bb_seq[i])
actual_res_num += 1
# define region in fragger and region before, including the n_stem of
# the fragment
frag_start_idx = n_stem_res_num - first_num
initial_n_stem = bb_list.Extract(frag_start_idx, frag_start_idx+1)
# all fragments get now sampled and scored
# the idea is to sample the fragments by moving the full part towards
# the n-terminus
closer = NTerminalCloser(c_stem_res)
best_score = float("inf")
best_idx = 0
scorer_weights = {"clash": 1, "reduced": 1}
for i in range(len(fragger)):
bb_list.ReplaceFragment(fragger[i], frag_start_idx, True)
closer.Close(bb_list, bb_list)
# a simple score gets calculated and best chosen
mhandle.backbone_scorer_env.SetEnvironment(bb_list, first_num,
actual_chain_idx)
score = mhandle.backbone_scorer.CalculateLinearCombination(\
scorer_weights, first_num, len(bb_list), actual_chain_idx)
if score < best_score:
best_score = score
best_idx = i
# set best fragment into bb_list and
fragment = fragger[best_idx]
bb_list.ReplaceFragment(fragment, frag_start_idx, True)
closer.Close(bb_list, bb_list)
# reextract fragment, since the whole thing has undergone a
# transformation
fragment = bb_list.Extract(frag_start_idx, frag_start_idx +
len(fragment))
# We finally calculate the transformation from the initial positions
# of all residues towards the n-terminus and apply it manually.
# this is done to not loose all the sidechain information
t = initial_n_stem.GetTransform(0, bb_list, frag_start_idx)
ed = mhandle.model.EditXCS(ost.mol.BUFFERED_EDIT)
for r in actual_chain.residues[:frag_start_idx]:
for a in r.atoms:
new_pos = t * ost.geom.Vec4(a.GetPos())
ed.SetAtomPos(a, ost.geom.Vec3(new_pos))
ed.UpdateICS()
# replace fragment part
fragment.InsertInto(actual_chain, n_stem_res_num)
# update score env. (manual to keep all atom sidechains)
mhandle.backbone_scorer_env.SetEnvironment(bb_list, first_num,
actual_chain_idx)
# will return -1 if last gap removed
gap_idx = ClearGaps(mhandle, actual_gap)
ost.LogInfo("Resolved %s by sampling %s as linker" % \
(str(gap_orig), str(actual_gap)))
# reset all atom env. if it's set (changes are too drastic so we set all)
if IsAllAtomScoringSetUp(mhandle):
mhandle.all_atom_sidechain_env.SetInitialEnvironment(mhandle.model)
# these methods will be exported into module
__all__ = ('CloseSmallDeletions', 'MergeGapsByDistance', 'FillLoopsByDatabase',
'FillLoopsByMonteCarlo', 'CloseGaps', 'ModelTermini',
'CloseLargeDeletions')