Skip to content
Snippets Groups Projects
_closegaps.py 70.91 KiB
# Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
#                          Biozentrum - University of Basel
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#   http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


'''High-level functionality for modelling module to close gaps. Added in the
__init__.py file. To be used directly by passing a ModellingHandle instance
as argument.
'''

# internal
from promod3 import loop, core, scoring
from ._modelling import *
from ._ring_punches import *
from ._reconstruct_sidechains import *
# external
import ost
import sys

###############################################################################
# loop candidate selection setup
def _GetBestLC(mhandle, loop_candidates, start_resnum, chain_idx,
               db_scores, max_num_all_atom=0, length_dep_weights=False):
    # returns (min_idx, min_score)

    # dummy check
    if len(loop_candidates) == 0:
        raise RuntimeError("Need at least 1 candidate to choose from!")
    
    # setup weights
    with_db = (not db_scores.IsEmpty())
    with_aa = (max_num_all_atom > 0)
    weights = ScoringWeights.GetWeights(with_db, with_aa, 
                                        length_dep_weights, 
                                        len(loop_candidates[0]))

    # setup all scores object (we collect needed scores there)
    all_scores = db_scores.Copy()  # copy to avoid changing db_scores

    # add backbone scores
    loop_candidates.CalculateBackboneScores(all_scores, mhandle,
                                            start_resnum, chain_idx)

    # add all atom scores?
    if with_aa:
        # get best ones based on previous scores
        non_aa_weights = ScoringWeights.GetWeights(with_db, False, 
                                                   length_dep_weights,
                                                   len(loop_candidates[0]))

        non_aa_scores = all_scores.LinearCombine(non_aa_weights)
        arg_sorted_scores = sorted([(v,i) for i,v in enumerate(non_aa_scores)])
        min_indices = [i for v,i in arg_sorted_scores[:max_num_all_atom]]
        # extract relevant loop candidates and scores
        aa_loop_candidates = loop_candidates.Extract(min_indices)
        all_scores = all_scores.Extract(min_indices)
        # add all atom scores
        aa_loop_candidates.CalculateAllAtomScores(all_scores, mhandle,
                                                  start_resnum, chain_idx)

    # get / return best
    scores = all_scores.LinearCombine(weights)
    min_score = min(scores)
    min_idx = scores.index(min_score)
    # translate to original indexing for all atom
    if with_aa:
        min_idx = min_indices[min_idx]
    return (min_idx, min_score)

###############################################################################
# helper functions
def _GetGapExtender(mhandle, actual_gap, use_scoring_extender,
                    use_full_extender, max_length=-1):
    # DO NOT USE full ext. with max_len = -1 as this can use LOTS of memory
    if use_full_extender and max_length < 0:
        raise RuntimeError("Cannot use neg. max_length with full extender!")
    # return appropriate gap extender
    actual_chain_idx = actual_gap.GetChainIndex()
    if use_scoring_extender:
        # get extender_penalties (note: changes as gaps filled)
        extender_penalties = [0.0] * len(mhandle.seqres[actual_chain_idx])
        for r in mhandle.model.chains[actual_chain_idx].residues:
            if r.GetSecStructure().IsHelical() or \
               r.GetSecStructure().IsExtended():
                num = r.GetNumber().GetNum()
                extender_penalties[num-1] = 1.0
        # setup scoring extender
        if use_full_extender:
            return ScoringGapExtender(actual_gap, 0.8,
                                      extender_penalties,
                                      mhandle.seqres[actual_chain_idx],
                                      max_length)
        else:
            raise RuntimeError("Cannot use ScoringGapExtender w/o max_length.")
    else:
        if use_full_extender:
            return FullGapExtender(actual_gap,
                                   mhandle.seqres[actual_chain_idx],
                                   max_length)
        else:
            return GapExtender(actual_gap,
                               mhandle.seqres[actual_chain_idx])

def _ResolveLogInfo(gap_orig, optimal_gap, n_candidates, with_db, with_aa):
    # helper for consistent logging...
    scor_str = "BB"
    if with_db:
        scor_str += "_DB"
    if with_aa:
        scor_str += "_AA"
    ost.LogInfo("Resolved %s by filling %s (%d candidates, %s)" % \
                (str(gap_orig), str(optimal_gap), n_candidates, scor_str))

def _CloseLoopFrame(mhandle, gap_orig, actual_candidates, actual_extended_gaps,
                    actual_db_scores=[], max_num_all_atom=0, 
                    length_dep_weights=False):
    '''Rank candidates with "frame-approach".
    All found candidates are extended prior to scoring so they match in size.
    '''
    # get chain for gap
    actual_chain_idx = actual_extended_gaps[0].GetChainIndex()
    actual_chain = mhandle.model.chains[actual_chain_idx]
    # get min_res_num, max_after_resnum
    min_before_resnum = sys.maxsize
    max_after_resnum = -sys.maxsize-1
    for g in actual_extended_gaps:
        min_before_resnum = min(min_before_resnum,
                                g.before.GetNumber().GetNum())
        max_after_resnum = max(max_after_resnum,
                               g.after.GetNumber().GetNum())
    
    # all loop candidates will be scored along the full max. extension ever
    # reached in the search before, so we build an overall frame, where we
    # insert the loops
    frame_seq = mhandle.seqres[actual_chain_idx]\
                [min_before_resnum-1:max_after_resnum]
    frame_backbone_list = loop.BackboneList(frame_seq)
    actual_res_num = ost.mol.ResNum(min_before_resnum)
    for i in range(len(frame_seq)):
        actual_res = actual_chain.FindResidue(actual_res_num)
        if actual_res.IsValid():
            frame_backbone_list.Set(i, actual_res, frame_seq[i])
        actual_res_num += 1

    # prepare loop candidates for scoring
    final_loop_candidates = LoopCandidates(frame_seq)
    back_mapper = list()
    for i, loop_candidates in enumerate(actual_candidates):
        start_index = actual_extended_gaps[i].before.GetNumber().GetNum()\
                      - min_before_resnum
        for j, loop_candidate in enumerate(loop_candidates):
            actual_frame_backbone_list = frame_backbone_list.Copy()
            actual_frame_backbone_list.ReplaceFragment(
                loop_candidate, start_index, False)
            final_loop_candidates.Add(actual_frame_backbone_list)
            back_mapper.append((i, j))

    # db scores requested
    db_scores = ScoreContainer()
    for actual_scores in actual_db_scores:
        db_scores.Extend(actual_scores)
    
    if len(final_loop_candidates) > 0:
        # get best
        min_idx, min_score = _GetBestLC(mhandle, final_loop_candidates,
                                        min_before_resnum, actual_chain_idx,
                                        db_scores, max_num_all_atom, 
                                        length_dep_weights)
        ost.LogVerbose("Gap %s - %d candidates, best (min) score %g" %
                       (str(gap_orig), len(final_loop_candidates), min_score))
        # resolve loop
        idx_a = back_mapper[min_idx][0]
        idx_b = back_mapper[min_idx][1]
        _ResolveLogInfo(gap_orig, actual_extended_gaps[idx_a],
                        len(final_loop_candidates), bool(actual_db_scores),
                        max_num_all_atom > 0)
        # update model and clear gaps
        bb_list = actual_candidates[idx_a][idx_b]
        actual_gap = actual_extended_gaps[idx_a]
        # will return -1 if last gap removed, else next gap idx
        return InsertLoopClearGaps(mhandle, bb_list, actual_gap)
    else:
        ost.LogInfo("Failed at loop insertion (%s)" % str(gap_orig))
        return -2


def _CloseLoopBare(mhandle, gap_orig, actual_candidates, actual_extended_gaps,
                   actual_db_scores=[], penalize_length=True,
                   max_num_all_atom=0, length_dep_weights=False):
    '''Rank candidates directly.
    All candidates are scored directly and optionally penalized for gap length.
    '''
    # get chain for gap
    actual_chain_idx = actual_extended_gaps[0].GetChainIndex()
    
    # score loops as they are
    min_score = float("inf")
    optimal_gap = None
    optimal_candidate = None
    n_candidates = 0
    for i, loop_candidates in enumerate(actual_candidates):
        if len(loop_candidates) == 0: continue
        n_candidates += len(loop_candidates)
        # get current best
        before_resnum = actual_extended_gaps[i].before.GetNumber().GetNum()
        db_scores = ScoreContainer()
        if actual_db_scores:
            db_scores = actual_db_scores[i]
        best_idx, score = _GetBestLC(mhandle, loop_candidates, before_resnum,
                                     actual_chain_idx, db_scores,
                                     max_num_all_atom, length_dep_weights)
        # compare with others
        if penalize_length:
            # penalized by gap length
            score = score * actual_extended_gaps[i].length
        if score < min_score:
            # keep best one
            min_score = score
            optimal_gap = actual_extended_gaps[i]
            optimal_candidate = loop_candidates[best_idx]

    ost.LogVerbose("Gap %s - %d candidates, best (min) score %g" %
                   (str(gap_orig), n_candidates, min_score))

    # finally resolve loop
    if optimal_candidate is not None:
        # report
        _ResolveLogInfo(gap_orig, optimal_gap, n_candidates,
                        bool(actual_db_scores), max_num_all_atom > 0)
        # update model and clear gaps
        # will return -1 if last gap removed, else next gap idx
        return InsertLoopClearGaps(mhandle, optimal_candidate, optimal_gap)
    else:
        ost.LogInfo("Failed at loop insertion (%s)" % str(gap_orig))
        return -2

def _CloseLoop(mhandle, gap_orig, actual_candidates,
               actual_extended_gaps, variant=0, actual_db_scores=[],
               max_num_all_atom=0, length_dep_weights=False):
    '''Choose best scoring loop candidate and close loop in mhandle.
    :param gap_orig: Gap we actually wanted to close
    :param actual_candidates: List of LoopCandidates
    :param actual_extended_gaps: List of gaps (same size as actual_candidates)
    :param variant: 0 = _CloseLoopFrame()
                    1 = _CloseLoopBare(penalize_length=False)
                    2 = _CloseLoopBare(penalize_length=True)
    :param actual_db_scores: List of DB scores (same size as actual_candidates)
    :param max_num_all_atom: Num. of all atom LC to consider
    :return: gap-index as returned by ClearGaps or -2 if fail.
    '''
    prof = core.StaticRuntimeProfiler.StartScoped('closegaps::_CloseLoop')
    # check consistency
    N = len(actual_extended_gaps)
    if len(actual_candidates) != N:
        raise RuntimeError("Inconsistent list-lengths in _CloseLoop " \
                           "(%d, %d)" % (len(actual_candidates), N))
    # check for empty candidate list
    if N == 0:
        ost.LogInfo("Failed at loop insertion (%s)" % str(gap_orig))
        return -2
    # choose variant
    if variant == 0:
        return _CloseLoopFrame(mhandle, gap_orig, actual_candidates,
                               actual_extended_gaps, actual_db_scores,
                               max_num_all_atom, length_dep_weights)
    elif variant in [1,2]:
        return _CloseLoopBare(mhandle, gap_orig, actual_candidates,
                              actual_extended_gaps, actual_db_scores,
                              variant == 2, max_num_all_atom,
                              length_dep_weights)
    else:
        raise RuntimeError("Unknown variant %d" % variant);

def _InRange(gap, chain_idx, resnum_range):
    '''Check if gap is in range to be processed.'''
    # It is possible to specify exact ranges that should be resolved by
    # the chain_idx and resnum_range parameters.
    # Let's check whether we care for that particular gap in case of one
    # parameter being set.
    if chain_idx != None and gap.GetChainIndex() != chain_idx:
        return False
    elif resnum_range != None:
        if gap.IsNTerminal():
            c_stem_num = gap.after.GetNumber().GetNum()
            return c_stem_num > resnum_range[0]
        if gap.IsCTerminal():
            n_stem_num = gap.before.GetNumber().GetNum()
            return n_stem_num < resnum_range[1]
        n_stem_num = gap.before.GetNumber().GetNum()
        c_stem_num = gap.after.GetNumber().GetNum()
        if n_stem_num <= resnum_range[0] and c_stem_num >= resnum_range[1]:
            # full overlap => current gap is fully enclosing range
            return True
        elif n_stem_num >= resnum_range[0] and n_stem_num < resnum_range[1]:
            # partial overlap => n-stem is within range
            return True
        elif c_stem_num > resnum_range[0] and c_stem_num <= resnum_range[1]:
            # partial overlap => c-stem is within range
            return True
        else:
            return False
    else:
        return True

def _GetMCWeights():
    # get weights for Monte Carlo sampling (subset of BB only scores for super 
    #                                       fast sampling)
    bb_weights = ScoringWeights.GetWeights()
    return_weights = dict()
    return_weights["reduced"] = bb_weights["reduced"]
    return_weights["cb_packing"] = bb_weights["cb_packing"]
    return_weights["clash"] = bb_weights["clash"]
    return return_weights

###############################################################################

def CloseSmallDeletions(mhandle, max_extension=9, clash_thresh=1.0,
                        e_thresh=200, use_scoring_extender=True,
                        use_full_extender=True, chain_idx=None,
                        resnum_range=None, ff_lookup=None):
    '''Close small deletions by relaxing neighbouring residues.

    Small deletions in the template from the target-template alignment have a
    good chance to be bridged just by relaxing neighbours around a tiny gap.
    Before diving into the more demanding tasks in modeling, those may be closed
    already in the raw-model. After closure some checks are done to see if the
    solution is stereochemically sensible.

    Closed gaps are removed from :attr:`mhandle.gaps`.

    .. literalinclude:: ../../../tests/doc/scripts/modelling_close_small_deletions.py

    :param mhandle: Modelling handle on which to apply change.
    :type mhandle:  :class:`ModellingHandle`

    :param max_extension: Maximal number of gap extension steps to perform
                          (see :class:`GapExtender`)
    :type max_extension: :class:`int`

    :param clash_thresh: Threshold for the backbone clash score. Acceptance
                         means being lower than this.
    :type clash_thresh: :class:`float`

    :param e_thresh: Potential energy should be lower than this.
    :type e_thresh: :class:`float`

    :param use_scoring_extender: True = use :class:`ScoringGapExtender` instead
                                 of :class:`GapExtender`.
                                 The gap is penalized according as
                                 0.8*length + sum(helices) + sum(sheets).
                                 For the scondary-structure-penalty to work,
                                 the model-template must have the appropriate
                                 information before :func:`BuildRawModel` is
                                 called (e.g. with 
                                 :meth:`ost.mol.alg.AssignSecStruct`).
    :type use_scoring_extender:  :class:`bool`

    :param use_full_extender: True = use :class:`FullGapExtender` instead of
                              of :class:`GapExtender`. Also works in combination
                              with `use_scoring_extender`. This allows the gap
                              extender to skip neighboring gaps and to correctly
                              handle gaps close to termini.
    :type use_full_extender:  :class:`bool`

    :param chain_idx: If not None, only gaps from chain with given index get
                      processed
    :type chain_idx:  :class:`int`

    :param resnum_range: If not None, only gaps within this resnum range get
                         processed.
    :type resnum_range: :class:`tuple` containing two :class:`int`

    :param ff_lookup: Forcefield to parametrize 
                      :class:`promod3.loop.BackboneList` in 
                      :class:`promod3.modelling.BackboneRelaxer`.
                      If set to None, the one returned by
                      :func:`promod3.loop.ForcefieldLookup.GetDefault` 
                      gets used.
    :type ff_lookup:  :class:`promod3.loop.ForcefieldLookup`
    '''

    prof_name = 'closegaps::CloseSmallDeletions'
    prof = core.StaticRuntimeProfiler.StartScoped(prof_name)

    if len(mhandle.gaps) > 0:
        ost.LogInfo("Trying to close small deletions (no. of gap(s): %d)." \
                    % len(mhandle.gaps))
    else:
        return

    # check/setup scoring
    if not IsBackboneScoringSetUp(mhandle):
        SetupDefaultBackboneScoring(mhandle)

    # we're only calculating clash scores here, so we copy the default
    # env and only attach a clash scorer. 
    clash_scorer_env = mhandle.backbone_scorer_env.Copy()
    clash_scorer = scoring.ClashScorer()
    clash_scorer.AttachEnvironment(clash_scorer_env)

    if ff_lookup == None:
        ff_lookup = loop.ForcefieldLookup.GetDefault()

    # iterating mhandle.gaps. The number of gaps may change during the process,
    # hence we run by 'while', comparing to the updated list of gaps. If a gap
    # gets closed, it is deleted from mhandle.gaps, otherwise, current_gap_index
    # as a counter is increased.
    current_gap_index = 0
    while current_gap_index < len(mhandle.gaps):
        # in the end this determines if a gap was closed or not
        success = False
        # work on a copy of the gap, if not closed in the end, no harm done
        current_gap = mhandle.gaps[current_gap_index].Copy()

        # A deletion is a gap of size 0 in the template, this means that to
        # transform the template sequence into the target sequence, aa's vanish,
        # so the target sequence has gap characters, the template not.
        # If we are not looking at a deletion, do nothing.
        if len(current_gap.seq) == 0 and not current_gap.IsTerminal()\
           and _InRange(current_gap, chain_idx, resnum_range):
            
            current_chain = current_gap.GetChain()
            current_chain_index = current_gap.GetChainIndex()

            # Try to close gap by relaxation: by checking how far we may extend
            # the gap, we get the backbone to be stretched. If no more extension
            # is possible, break out. On first successful relaxation for an
            # extension, we successfully stop.
            extender = _GetGapExtender(mhandle, current_gap,
                                       use_scoring_extender,
                                       use_full_extender,
                                       max_extension)
            for _ in range(max_extension):
                if not extender.Extend():
                    break
                # gather residues for backbone relaxation, check that we get a
                # bunch of actual residues, otherwise jump to next extension
                res_list = list()
                n_stem_resnum = current_gap.before.GetNumber()
                c_stem_resnum = current_gap.after.GetNumber()
                idx = 0
                found_residues = True
                while n_stem_resnum + idx <= c_stem_resnum:
                    res = current_chain.FindResidue(n_stem_resnum+idx)
                    if not res.IsValid():
                        found_residues = False
                        break
                    res_list.append(res)
                    idx += 1
                if not found_residues:
                    continue
                # backbone relaxation, for now we allow 300 steps or stop at
                # max. force of 0.1
                bb_list = loop.BackboneList(current_gap.full_seq, res_list)
                bb_relaxer = BackboneRelaxer(bb_list, ff_lookup)
                potential_e = bb_relaxer.Run(bb_list, 300, 0.1)
                # check for clashes

                # its a bit problematic since we score loops that are shifting 
                # around... so we perform a stash operation for each scoring step

                clash_scorer_env.Stash(n_stem_resnum.GetNum(),
                                       len(bb_list), current_chain_index)
                clash_scorer_env.SetEnvironment(bb_list, n_stem_resnum.GetNum(), 
                                                current_chain_index)
                score = clash_scorer.CalculateScore(n_stem_resnum.GetNum(),
                                                    len(bb_list),
                                                    current_chain_index)
                clash_scorer_env.Pop()

                # if there is no clash and potential energy is low enough we
                # just solved a gap, delete it and update the scorer for the
                # changed model
                if score < clash_thresh and \
                   potential_e < e_thresh and \
                   bb_list.TransOmegaTorsions():
                    ost.LogInfo("Closed: %s by relaxing %s" % \
                                (mhandle.gaps[current_gap_index], current_gap))
                    InsertLoopClearGaps(mhandle, bb_list, current_gap)
                    clash_scorer_env.SetEnvironment(bb_list, 
                                                    n_stem_resnum.GetNum(), 
                                                    current_chain_index)
                    success = True
                    break


        # On closed gap, it is removed so the no. of gaps goes down by itself.
        # In case of no success, counter needs to be increased to jump to the
        # next gap.
        if not success:
            current_gap_index += 1

def MergeGapsByDistance(mhandle, distance, chain_idx = None, 
                        resnum_range = None):
    '''Merge 2 neighbouring gaps by deleting residues in-between.

    Check if two neighbouring gaps are at max. *distance* residues apart from
    each other. Then delete the residues and store a new gap spanning the whole
    stretch of original gaps and the deleted region. Original gaps will be
    removed. Stem residues count to the gap, so **A-A-A** has a distance of 0.

    IMPORTANT: we assume here that *mhandle* stores gaps sequentially.
    Non-sequential gaps are ignored!

    .. literalinclude:: ../../../tests/doc/scripts/modelling_merge_gaps_by_distance.py

    :param mhandle: Modelling handle on which to apply change.
    :type mhandle:  :class:`ModellingHandle`
    :param distance: The max. no. of residues between two gaps up to which
                     merge happens.
    :type distance: :class:`int`

    :param chain_idx: If not None, only gaps from chain with given index get
                      processed
    :type chain_idx:  :class:`int`

    :param resnum_range: If not None, two gaps only get merged if they're
                         both in this resnum range.
    :type resnum_range: :class:`tuple` containing two :class:`int`
    '''

    prof_name = 'closegaps::MergeGapsByDistance'
    prof = core.StaticRuntimeProfiler.StartScoped(prof_name)

    if len(mhandle.gaps) > 1:
        ost.LogInfo("Trying to merge %d gap(s) with distance %d." % \
                       (len(mhandle.gaps), distance))
    else:
        return

    # indicate if we merged gaps and should check for more
    try_again = True

    # The number of gaps changes on merge, so we cannot just iterate them.
    # If we merged gaps, we do not know if this was the last one so try_again
    # is set to True. If no more gaps were merged, we stop by leaving try_again
    # as False.
    while try_again:
        try_again = False
        # iterate all but the last gap, since we are always looking ahead
        for i in range(len(mhandle.gaps) - 1):
            current_gap = mhandle.gaps[i].Copy()
            next_gap = mhandle.gaps[i+1].Copy()
            # check that we are on the same chain
            if current_gap.GetChain() != next_gap.GetChain():
                continue
            # check for range (if given)
            if not (_InRange(current_gap, chain_idx, resnum_range) and\
                    _InRange(next_gap, chain_idx, resnum_range)):
                continue
            # no merging of gaps at the end AND the start :)
            if current_gap.IsNTerminal() and next_gap.IsCTerminal():
                continue
            # get the distance between the gaps
            dist = next_gap.before.GetNumber().GetNum() \
                   - current_gap.after.GetNumber().GetNum()
            # NOTE: -1 can currently only happen when fixing ring punches
            #       in _pipeline.BuildSidechains
            if dist < -1:
                ost.LogInfo("Non-sequential gaps found. Ignoring.")
                continue
            if dist <= distance:
                # gaps are close enough, combine! combine!
                MergeGaps(mhandle, i)
                ost.LogInfo("Merged gap %s and %s into %s" % \
                               (current_gap, next_gap, mhandle.gaps[i]))
                try_again = True
                break

def FillLoopsByDatabase(mhandle, fragment_db, structure_db,
                        torsion_sampler=None, max_loops_to_search=40,
                        min_loops_required=4, max_res_extension=-1,
                        extended_search=True, use_scoring_extender=True,
                        use_full_extender=True, score_variant=0,
                        ring_punch_detection=1, chain_idx=None,
                        resnum_range=None, max_num_all_atom=0,
                        clash_thresh=-1, length_dep_weights=False):
    '''Try to fill up loops from a structural database.

    Usually this will extend the gaps a bit to match candidates from the
    database. Do not expect a gap being filled in between its actual stem
    residues.
    This function cannot fill gaps at C- or N-terminal.

    .. literalinclude:: ../../../tests/doc/scripts/modelling_fill_loops_by_database.py

    :param mhandle: Modelling handle on which to apply change.
    :type mhandle:  :class:`ModellingHandle`

    :param fragment_db: A fragment database coupled to the *structure_db*.
    :type fragment_db: :class:`~promod3.loop.FragDB`

    :param structure_db: Backbone/profile data.
    :type structure_db: :class:`~promod3.loop.StructureDB`

    :param torsion_sampler: A sampler for torsion angles. A default one is 
                            loaded if None.

    :type torsion_sampler: :class:`~promod3.loop.TorsionSampler`

    :param max_loops_to_search: Define how many candidates are 'enough' to be
                                evaluated per loop. The actual found candidates
                                may be more (if we found 'enough') or less (if
                                not enough candidates exist) of this number.
    :type max_loops_to_search: :class:`int`

    :param min_loops_required: Define how many candidates we require to close
                               the loop. If we did not find at least this number
                               of candidates for a gap, we skip it without
                               closing. Can be set to ``max_loops_to_search``
                               (or equivalently to -1) to enforce that we only
                               close gaps for which we found enough candidates.
    :type min_loops_required: :class:`int`

    :param max_res_extension: Only allow this number of residues to be added to
                              the gaps when extending. If set to **-1**, any
                              number of residues can be added (as long as the
                              `fragment_db` allows it).
    :type max_res_extension: :class:`int`

    :param extended_search: True = more loop candidates are considered.
                            The candidate search is done less precisely (see
                            :meth:`~LoopCandidates.FillFromDatabase`).
                            The candidates are still scored and evaluated the
                            same though (only more of them considered).
    :type extended_search:  :class:`bool`

    :param use_scoring_extender: True = use :class:`ScoringGapExtender` instead
                                 of :class:`GapExtender`.
                                 See :func:`CloseSmallDeletions`.
    :type use_scoring_extender:  :class:`bool`

    :param use_full_extender: True = use :class:`FullGapExtender` instead of
                              :class:`GapExtender`.
                              See :func:`CloseSmallDeletions`.
    :type use_full_extender:  :class:`bool`


    :param score_variant: How to score loop candidates. Options:

                          - **0**: put frame of backbone residues enclosing all
                            candidates and score frame. This will also "score"
                            non-modelled residues!
                          - **1**: score candidates directly
                          - **2**: like **1** but penalize length of candidate
                          
    :type score_variant:  :class:`int`

    :param ring_punch_detection: How to deal with ring punchings. Options:

                                 - **0**: not at all (fastest)
                                 - **1**: check for punchings with existing rings
                                 - **2**: check incl. sidechain for loop cand.

    :type ring_punch_detection:  :class:`int`

    :param chain_idx: If not None, only gaps from chain with given index get
                      processed
    :type chain_idx:  :class:`int`

    :param resnum_range: If not None, only gaps within this resnum range get
                         processed
    :type resnum_range: :class:`tuple` containing two :class:`int`

    :param max_num_all_atom: If > 0, we prefilter loop candidates based on
                             non-all-atom-scores and apply all atom scoring to
                             the best *max_num_all_atom* candidates. If desired,
                             *5* is a good value here (larger values give only
                             numerical improvement). With *5*, this will be
                             approx. 2x slower than without and will give a
                             slight improvement in loop selection.
    :type max_num_all_atom:  :class:`int`

    :param clash_thresh: If > 0, we only keep loop candidates which have a
                         backbone clash score lower than this.
    :type clash_thresh:  :class:`float`

    :param length_dep_weights: :class:`ScoringWeights` provides different sets
                               of weights that have been trained on different
                               loop subsets. If this flag is true, the length
                               dependent weights are used to select the final 
                               loops.
    :type length_dep_weights: :class:`bool`
    '''

    prof_name = 'closegaps::FillLoopsByDatabase'
    prof = core.StaticRuntimeProfiler.StartScoped(prof_name)

    # load stuff if needed
    if torsion_sampler is None:
        torsion_sampler = loop.LoadTorsionSamplerCoil()

    n_non_terminal_gaps = 0
    for gap in mhandle.gaps:
        if not gap.IsTerminal():
            n_non_terminal_gaps += 1

    if n_non_terminal_gaps > 0:
        ost.LogInfo("Trying to fill %d gap(s) by database."%n_non_terminal_gaps)
    else:
        return

    # check/setup scoring
    if not IsBackboneScoringSetUp(mhandle):
        SetupDefaultBackboneScoring(mhandle)
    if max_num_all_atom > 0 and not IsAllAtomScoringSetUp(mhandle):
        SetupDefaultAllAtomScoring(mhandle)

    # some score variants cannot deal with multiple insertions in one "gap"
    disallow_ins_merge = (score_variant == 0)
    # do we want DB features?
    add_db_features = False
    if len(mhandle.profiles) > 0 and \
       structure_db.HasData(loop.StructureDBDataType.AAFrequenciesStruct) and \
       structure_db.HasData(loop.StructureDBDataType.AAFrequencies):
        add_db_features = True
    elif len(mhandle.profiles) > 0:
        ost.LogWarning("Cannot make use of profiles attached to mhandle in " +\
                       "FillLoopsByDatabase as the provided StructureDB " +\
                       "does not contain profiles. This might lead to " +\
                       "suboptimal modelling results.")

    # check min_loops_required
    if min_loops_required < 0:
        min_loops_required = max_loops_to_search
    # get biggest loop (w/o stems) stored in db
    max_db_loop_len = fragment_db.MaxFragLength() - 2

    # point to the current gap
    gap_idx = 0

    # Iterate all gaps. Since the number of gaps may change, always compare to
    # an updated list. gap_idx is only increased when necessary, e.g. current
    # gap could not be removed.
    while gap_idx < len(mhandle.gaps) and gap_idx >= 0:

        # keep copy of original gap
        gap_orig = mhandle.gaps[gap_idx].Copy()
        if disallow_ins_merge:
            gap_ins = CountEnclosedInsertions(mhandle, gap_orig)

        # ignore terminal gaps and out-of-range (if range given)
        if gap_orig.IsTerminal() \
           or not _InRange(gap_orig, chain_idx, resnum_range):
            gap_idx += 1
            continue

        ##################################
        # find loop candidates
        ##################################
        # list of LoopCandidates to fill gap
        actual_candidates = list()
        # list of StructuralGap corresponding to actual_candidates
        actual_extended_gaps = list()
        # list of dicts with DB specific scores
        # -> empty if not add_db_features
        actual_db_scores = list()
        # number of loops found (stop if >= max_loops_to_search)
        found_loops = 0
        # maximal length for this gap
        if max_res_extension >= 0:
            max_gap_length = min(gap_orig.length + max_res_extension,
                                 max_db_loop_len)
        else:
            max_gap_length = max_db_loop_len
        # currently extended gap and gap-extender
        actual_gap = gap_orig.Copy()
        actual_extender = _GetGapExtender(mhandle, actual_gap,
                                          use_scoring_extender,
                                          use_full_extender,
                                          max_gap_length)
        # iteratively extend actual_gap until we have enough loops
        first_iteration = True
        while actual_gap.length <= max_gap_length:
            # check if we have enough
            if found_loops >= max_loops_to_search:
                break

            # extend the gap
            if not first_iteration:
                if not actual_extender.Extend():
                    break
            first_iteration = False

            # ensure both stems are valid
            n_stem = actual_gap.before
            c_stem = actual_gap.after
            if not n_stem.IsValid() or not c_stem.IsValid():
                break
            # skip if we try to merge insertions when disallowed
            if disallow_ins_merge and \
               CountEnclosedInsertions(mhandle, actual_gap) != gap_ins:
                continue

            # get candidates for the current loop
            candidates = LoopCandidates.FillFromDatabase(
                n_stem, c_stem, actual_gap.full_seq,
                fragment_db, structure_db, extended_search)

            # skip gaps with no loop candidates
            if len(candidates) == 0:
                continue
            # check for stem rmsd before ApplyCCD
            if add_db_features:
                db_scores = ScoreContainer()
                candidates.CalculateStemRMSDs(db_scores, n_stem, c_stem)
            # try to close loops
            try:
                #pylint: disable=broad-except
                orig_indices = candidates.ApplyCCD(n_stem, c_stem,
                                                   torsion_sampler)
                
            except Exception:
                # CCD should work even if no residues exist before and/or after
                # the stems. If this is not desired, you should skip those gaps.
                # If anything else fails, the proposed gap is skipped.
                ost.LogWarning("ApplyCCD failure for " + str(actual_gap))
                continue

            # remove clashing ones if desired
            if clash_thresh > 0:
                clash_score_container = ScoreContainer()
                candidates.CalculateBackboneScores(clash_score_container,
                                                   mhandle, ["clash"],
                                                   n_stem.GetNumber().GetNum(),
                                                   actual_gap.GetChainIndex())
                clash_scores = clash_score_container.Get("clash")
                to_keep = []
                for i, bb_list in enumerate(candidates):
                    if clash_scores[i] < clash_thresh:
                        to_keep.append(i)
                candidates = candidates.Extract(to_keep)
                orig_indices = [orig_indices[i] for i in to_keep]
                assert(len(orig_indices) == len(candidates))

            # check for ring punchings
            if ring_punch_detection == 1 and len(candidates) > 0:
                FilterCandidates(candidates, mhandle.model,
                                 actual_gap, orig_indices)
            elif ring_punch_detection == 2 and len(candidates) > 0:
                FilterCandidatesWithSC(candidates, mhandle.model,
                                       actual_gap, orig_indices)
            
            # skip if no loop was successfully closed
            if len(candidates) == 0:
                continue

            # deal with DB features
            if add_db_features:
                # get subset of stem rmsd scores
                assert(len(orig_indices) == len(candidates))
                db_scores = db_scores.Extract(orig_indices)
                # add profile scores
                prof = mhandle.profiles[actual_gap.GetChainIndex()]
                start_pos = n_stem.GetNumber().GetNum() - 1
                candidates.CalculateSequenceProfileScores(db_scores,
                                                          structure_db,
                                                          prof, start_pos)
                candidates.CalculateStructureProfileScores(db_scores,
                                                           structure_db,
                                                           prof, start_pos)
                # update list
                actual_db_scores.append(db_scores)

            # update candidate lists
            actual_candidates.append(candidates)
            actual_extended_gaps.append(actual_gap.Copy())
            found_loops += len(candidates)

        # skip if we didn't find enough
        if found_loops < min_loops_required:
            ost.LogInfo("Failed at loop insertion (%s), only %d candidates" % \
                        (str(gap_orig), found_loops))
            gap_idx += 1
            continue

        ##################################
        # close loop
        ##################################
        new_idx = _CloseLoop(mhandle, gap_orig, actual_candidates,
                             actual_extended_gaps, score_variant,
                             actual_db_scores, max_num_all_atom,
                             length_dep_weights)
        if new_idx == -2:
            # try next one if we failed
            gap_idx += 1
        else:
            # all good: fix sidechains if we're in ring-punch-mode and continue
            if ring_punch_detection == 2:
                ReconstructSidechains(mhandle.model, keep_sidechains=True)
            gap_idx = new_idx


def FillLoopsByMonteCarlo(mhandle, torsion_sampler=None, max_loops_to_search=6,
                          max_extension=30, mc_num_loops=2, mc_steps=5000,
                          use_scoring_extender=True, use_full_extender=True,
                          score_variant=0, ring_punch_detection=1,
                          fragger_handles=None, chain_idx=None,
                          resnum_range=None, length_dep_weights=False):
    '''Try to fill up loops with Monte Carlo sampling.

    This is meant as a "last-resort" approach when it is not possible to fill
    the loops from the database with :func:`FillLoopsByDatabase`.
    This will extend the gaps (up to *max_extension* times) a bit to allow for 
    more loop candidates to be found.
    
    The loops are modelled by either sampling the dihedral angles or (if
    *fragger_handles* is given) :class:`~promod3.loop.Fragger` lists. The latter
    is only used if the gap length is >= the length of fragments stored.

    This function cannot fill gaps at C- or N-terminal.

    .. literalinclude:: ../../../tests/doc/scripts/modelling_fill_loops_by_monte_carlo.py
    
    :param mhandle: Modelling handle on which to apply change.
    :type mhandle:  :class:`ModellingHandle`

    :param torsion_sampler: A sampler for torsion angles. A default one is 
                            loaded if None.
    :type torsion_sampler: :class:`~promod3.loop.TorsionSampler`

    :param max_loops_to_search: Define how many candidates are 'enough' to be
                                evaluated per loop.
    :type max_loops_to_search:  :class:`int`

    :param max_extension: Maximal number of gap extension steps to perform
                          (see :class:`GapExtender`)
    :type max_extension:  :class:`int`

    :param mc_num_loops: Number of loop candidates to consider for each extended gap
                         (see :meth:`~LoopCandidates.FillFromMonteCarloSampler`)
    :type mc_num_loops:  :class:`int`

    :param mc_steps: Number of MC steps to perform for each loop candidate
                     (see :meth:`~LoopCandidates.FillFromMonteCarloSampler`)
    :type mc_steps:  :class:`int`

    :param use_scoring_extender: True = use :class:`ScoringGapExtender` instead
                                 of :class:`GapExtender`.
                                 See :func:`CloseSmallDeletions`.
    :type use_scoring_extender:  :class:`bool`

    :param use_full_extender: True = use :class:`FullGapExtender` instead of
                              :class:`GapExtender`.
                              See :func:`CloseSmallDeletions`.
    :type use_full_extender:  :class:`bool`

    :param score_variant: How to score loop candidates (AllAtom not supported).
                          See :func:`FillLoopsByDatabase`.
    :type score_variant:  :class:`int`

    :param ring_punch_detection: How to deal with ring punchings.
                                 See :func:`FillLoopsByDatabase`.
    :type ring_punch_detection:  :class:`int`

    :param fragger_handles: Either None (no fragger sampling used) or one
                            fragger handle for each chain in *mhandle*.
    :type fragger_handles:  :class:`list` of :class:`~promod3.modelling.FraggerHandle`

    :param chain_idx: If not None, only gaps from chain with given index get
                      processed
    :type chain_idx:  :class:`int`

    :param resnum_range: If not None, only gaps within this resnum range get
                         processed
    :type resnum_range: :class:`tuple` containing two :class:`int`

    :param length_dep_weights: :class:`ScoringWeights` provides different sets
                               of weights that have been trained on different
                               loop subsets. If this flag is true, the length
                               dependent weights are used to select the final 
                               loops.
    :type length_dep_weights: :class:`bool`
    '''

    prof_name = 'closegaps::FillLoopsByMonteCarlo'
    prof = core.StaticRuntimeProfiler.StartScoped(prof_name)

    # load stuff if needed
    if torsion_sampler is None:
        torsion_sampler = loop.LoadTorsionSamplerCoil()

    n_non_terminal_gaps = 0
    for gap in mhandle.gaps:
        if not gap.IsTerminal():
            n_non_terminal_gaps += 1

    if n_non_terminal_gaps > 0:
        ost.LogInfo("Trying to fill %d gap(s) by Monte Carlo." \
                    % len(mhandle.gaps))
    else:
        return

    # check/setup scoring
    if not IsBackboneScoringSetUp(mhandle):
        SetupDefaultBackboneScoring(mhandle)

    # point to the current gap
    gap_idx = 0

    # Iterate all gaps. Since the number of gaps may change, always compare to
    # an updated list. gap_idx is only increased when necessary, e.g. current
    # gap could not be removed.
    while gap_idx < len(mhandle.gaps) and gap_idx >= 0:

        # keep copy of original gap
        gap_orig = mhandle.gaps[gap_idx].Copy()
        if score_variant == 0:
            gap_ins = CountEnclosedInsertions(mhandle, gap_orig)

        # ignore terminal gaps and out-of-range (if range given)
        if gap_orig.IsTerminal() \
           or not _InRange(gap_orig, chain_idx, resnum_range):
            gap_idx += 1
            continue

        ##################################
        # find loop candidates
        ##################################
        # list of LoopCandidates to fill gap
        actual_candidates = list()
        # list of StructuralGap corresponding to actual_candidates
        actual_extended_gaps = list()
        # number of loops found (stop if >= max_loops_to_search)
        found_loops = 0
        # currently extended gap and gap-extender
        actual_gap = gap_orig.Copy()
        # each extension can make it at most 1 longer...
        max_loop_len = gap_orig.length + max_extension
        actual_extender = _GetGapExtender(mhandle, actual_gap,
                                          use_scoring_extender,
                                          use_full_extender,
                                          max_loop_len)
        actual_chain_idx = actual_gap.GetChainIndex()
        # iteratively extend actual_gap
        ext_step = 0

        first_iteration = True
        while found_loops < max_loops_to_search and ext_step < max_extension:

            # extend the gap
            if not first_iteration:
                if not actual_extender.Extend():
                    break
            first_iteration = False

            # make sure, that the loop seq has at least length 3
            if (len(actual_gap.full_seq) < 3):
                continue
            ext_step += 1
            
            # ensure both stems are valid
            if not actual_gap.before.IsValid() or \
               not actual_gap.after.IsValid():
                break
            # skip if we try to merge insertions with score_variant=0
            if score_variant == 0 and \
               CountEnclosedInsertions(mhandle, actual_gap) != gap_ins:
                continue

            # setup sampler, closer, scorer and cooler for MC
            if fragger_handles is not None:
                fragger_handle = fragger_handles[actual_chain_idx]
            try:
                # choose sampler
                if fragger_handles is None or \
                   actual_gap.length < fragger_handle.fragment_length:
                    mc_sampler = PhiPsiSampler(actual_gap.full_seq,
                                               torsion_sampler)
                else:
                    start_pos = actual_gap.before.GetNumber().GetNum() - 1
                    end_pos = actual_gap.after.GetNumber().GetNum() - 1
                    fragger_list = fragger_handle.GetList(start_pos, end_pos)
                    mc_sampler = FragmentSampler(actual_gap.full_seq,
                                                 fragger_list,
                                                 init_fragments=5)

                mc_closer = DirtyCCDCloser(actual_gap.before,
                                           actual_gap.after)

                weights = _GetMCWeights()
                
                mc_scorer = LinearScorer(mhandle.backbone_scorer,
                                         mhandle.backbone_scorer_env,
                                         actual_gap.before.GetNumber().GetNum(),
                                         len(actual_gap.full_seq),
                                         actual_chain_idx, weights)

                start_temperature = 100
                cooling_factor = 0.9
                # the number of 109 is roughly the number of times we have to apply
                # a factor of 0.9 to 100 until it reaches a value of 0.001
                cooling_interval = int(round(mc_steps/109.0))
                mc_cooler = ExponentialCooler(cooling_interval,
                                              start_temperature,
                                              cooling_factor)
            except:
                # something went terribly wrong...
                ost.LogError('Failed to set up MC components')
                raise

            # try to get candidates for the current loop
            try:
                ost.LogVerbose("Firing MC for " + str(actual_gap))
                candidates = LoopCandidates.FillFromMonteCarloSampler(
                    actual_gap.full_seq, mc_num_loops, mc_steps, mc_sampler,
                    mc_closer, mc_scorer, mc_cooler)
            except RuntimeError:
                # monte carlo cannot be initialized when the stems are too far
                # apart  => we need further loop extension
                ost.LogVerbose("Failed in setting up " + str(actual_gap) + 
                               " stems might be too far away...")
                continue
            # check for ring punchings
            if ring_punch_detection == 1 and len(candidates) > 0:
                FilterCandidates(candidates, mhandle.model, actual_gap)
            elif ring_punch_detection == 2 and len(candidates) > 0:
                FilterCandidatesWithSC(candidates, mhandle.model, actual_gap)
            # skip if nothing found
            if len(candidates) == 0:
                continue

            # update candidate lists
            actual_candidates.append(candidates)
            actual_extended_gaps.append(actual_gap.Copy())
            found_loops += len(candidates)

        ##################################
        # close loop
        ##################################
        new_idx = _CloseLoop(mhandle, gap_orig, actual_candidates,
                             actual_extended_gaps, score_variant,
                             length_dep_weights=length_dep_weights)
        if new_idx == -2:
            # try next one if we failed
            gap_idx += 1
        else:
            # all good: fix sidechains if we're in ring-punch-mode and continue
            if ring_punch_detection == 2:
                ReconstructSidechains(mhandle.model, keep_sidechains=True)
            gap_idx = new_idx


def CloseGaps(mhandle, merge_distance=4, fragment_db=None, structure_db=None,
              torsion_sampler=None, fragger_handles=None, chain_idx=None,
              resnum_range=None, length_dep_weights=False):
    '''
    Tries to close all gaps in a model, except termini. It will go through
    following steps:

    - Try to close small deletions by relaxing them 
      (see :func:`CloseSmallDeletions`)
    - Iteratively merge gaps up to a distance **merge_distance**
      (see :func:`MergeGapsByDistance`) and try to fill them with a database 
      approach (see :func:`FillLoopsByDatabase`)
    - Try to fill remaining gaps using a Monte Carlo approach
      (see :func:`FillLoopsByMonteCarlo`)
    - Large deletions get closed using a last resort approach
      (see :func:`CloseLargeDeletions`)

    :param mhandle: Modelling handle on which to apply change.
    :type mhandle:  :class:`ModellingHandle`

    :param merge_distance:  Max. merge distance when performing the database 
                            approach
    :type merge_distance:   :class:`int`
    :param fragment_db:     Database for searching fragments in database 
                            approach, must be consistent with provided
                            **structure_db**. A default is loaded if None.
    :type fragment_db:      :class:`~promod3.loop.FragDB`
    :param structure_db:    Structure db from which the **fragment_db** gets
                            it's structural information. A default is loaded 
                            if None.
    :type structure_db:     :class:`~promod3.loop.StructureDB`
    :param torsion_sampler: Used as parameter for :func:`FillLoopsByDatabase`
                            and :func:`FillLoopsByMonteCarlo` A default one is 
                            loaded if None.
    :type torsion_sampler: :class:`promod3.loop.TorsionSampler`
    :param fragger_handles: A list of :class:`promod3.modelling.FraggerHandle`
                            objects for each chain in **mhandle**. 
                            If provided, fragments will be used for
                            sampling when the :func:`FillLoopsByMonteCarlo`
                            gets executed.
    :type fragger_handles: :class:`list`
    :param chain_idx: If not None, only gaps from chain with given index get
                      processed
    :type chain_idx:  :class:`int`

    :param resnum_range: If not None, only gaps within this resnum range get
                         processed.
    :type resnum_range: :class:`tuple` containing two :class:`int`  

    :param length_dep_weights: :class:`ScoringWeights` provides different sets
                               of weights that have been trained on different
                               loop subsets. If this flag is true, the length
                               dependent weights are used to close loops with
                               database / Monte Carlo.
    :type length_dep_weights: :class:`bool`
    '''

    # load stuff if needed
    if fragment_db is None:
        fragment_db = loop.LoadFragDB()
    if structure_db is None:
        structure_db = loop.LoadStructureDB()
    if torsion_sampler is None:
        torsion_sampler = loop.LoadTorsionSamplerCoil()

    # try to close small deletions by relaxing them
    CloseSmallDeletions(mhandle, chain_idx=chain_idx, 
                        resnum_range=resnum_range)

    # iteratively merge gaps of distance i and fill loops by database
    for distance in range(merge_distance):
        MergeGapsByDistance(mhandle, distance, chain_idx=chain_idx,
                            resnum_range=resnum_range)
        FillLoopsByDatabase(mhandle, fragment_db, structure_db,
                            torsion_sampler=torsion_sampler, 
                            min_loops_required=-1, max_res_extension=6, 
                            chain_idx=chain_idx, resnum_range=resnum_range, 
                            clash_thresh=10, 
                            length_dep_weights=length_dep_weights)
        
    # if above fails, try DB-fill with less restrictions
    FillLoopsByDatabase(mhandle, fragment_db, structure_db,
                        torsion_sampler=torsion_sampler, min_loops_required=-1,
                        chain_idx=chain_idx, resnum_range=resnum_range,
                        clash_thresh=10, 
                        length_dep_weights=length_dep_weights)
    FillLoopsByDatabase(mhandle, fragment_db, structure_db,
                        torsion_sampler=torsion_sampler, chain_idx=chain_idx,
                        resnum_range=resnum_range,
                        length_dep_weights=length_dep_weights)

    # close remaining gaps by Monte Carlo
    FillLoopsByMonteCarlo(mhandle, torsion_sampler=torsion_sampler, 
                          fragger_handles=fragger_handles, 
                          chain_idx=chain_idx,
                          resnum_range=resnum_range,
                          length_dep_weights=length_dep_weights)

    # last resort approach to close large deletions
    CloseLargeDeletions(mhandle, structure_db, chain_idx=chain_idx, 
                        resnum_range=resnum_range)

    # NOTE:
    # In the function above, we call :func:`FillLoopsByDatabase` multiple
    # times. First, we try to close "easy" gaps which require few extensions
    # (we wish to limit the damage we do on the template) and for which we have
    # plenty of loop candidates. If some gaps cannot be closed like this, we try
    # less restrictive options. This approach is helpful if neighboring gaps are
    # close together and the one closer to the C-terminus is easier to close.
    # Several variants were evaluated on 1752 target-template-pairs and this one
    # worked best.

def ModelTermini(mhandle, torsion_sampler, fragger_handles=None,
                 mc_num_loops=20, mc_steps=5000):
    '''Try to model termini with Monte Carlo sampling.

    Use with care! This is an experimental feature which will increase coverage
    but we do not assume that the resulting termini are of high quality!

    The termini are modelled by either sampling the dihedral angles or (if
    *fragger_handles* is given) :class:`~promod3.loop.Fragger` lists. The latter
    is only used if the gap length is >= the length of fragments stored.

    .. literalinclude:: ../../../tests/doc/scripts/modelling_model_termini.py

    :param mhandle: Modelling handle on which to apply change.
    :type mhandle:  :class:`ModellingHandle`

    :param torsion_sampler: A sampler for torsion angles.
    :type torsion_sampler: :class:`~promod3.loop.TorsionSampler`

    :param fragger_handles: Either None (no fragger sampling used) or one
                            fragger handle for each chain in *mhandle*.
    :type fragger_handles:  :class:`list` of :class:`~promod3.modelling.FraggerHandle`

    :param mc_num_loops: Number of loop candidates to consider for each terminal gap
                         (see :meth:`~LoopCandidates.FillFromMonteCarloSampler`)
    :type mc_num_loops:  :class:`int`

    :param mc_steps: Number of MC steps to perform for each loop candidate
                     (see :meth:`~LoopCandidates.FillFromMonteCarloSampler`)
    :type mc_steps:  :class:`int`
    '''

    prof_name = 'closegaps::ModelTermini'
    prof = core.StaticRuntimeProfiler.StartScoped(prof_name)

    # get terminal gaps (copies as we'll clear them as we go)
    terminal_gaps = [g.Copy() for g in mhandle.gaps if g.IsTerminal()]
    if len(terminal_gaps) > 0:
        ost.LogInfo("Trying to model %d terminal gap(s)." % len(terminal_gaps))
    else:
        return

    # check/setup scoring
    if not IsBackboneScoringSetUp(mhandle):
        SetupDefaultBackboneScoring(mhandle)

    # model them
    for actual_gap in terminal_gaps:
        
        # extract info
        if actual_gap.IsNTerminal():
            start_resnum = 1
        else:
            start_resnum = actual_gap.before.GetNumber().GetNum()
        actual_chain = actual_gap.GetChain()
        actual_chain_idx = actual_gap.GetChainIndex()
        if fragger_handles is not None:
            fragger_handle = fragger_handles[actual_chain_idx]

        # choose sampler
        if fragger_handles is None or \
           actual_gap.length < fragger_handle.fragment_length:
            mc_sampler = PhiPsiSampler(actual_gap.full_seq,
                                       torsion_sampler)
        else:
            end_pos = start_resnum-1 + actual_gap.length
            fragger_list = fragger_handle.GetList(start_resnum-1, end_pos)
            mc_sampler = FragmentSampler(actual_gap.full_seq, fragger_list,
                                         init_fragments=5)

        # choose closer
        if actual_gap.IsNTerminal():
            mc_closer = NTerminalCloser(actual_gap.after)
        else:
            mc_closer = CTerminalCloser(actual_gap.before)

        # setup scorer
        weights = _GetMCWeights()
        mc_scorer = LinearScorer(mhandle.backbone_scorer, 
                                 mhandle.backbone_scorer_env,
                                 start_resnum, len(actual_gap.full_seq), 
                                 actual_chain_idx, weights)

        # setup cooler
        start_temperature = 100
        cooling_factor = 0.9
        # the number of 109 is roughly the number of times we have to apply
        # a factor of 0.9 to 100 until it reaches a value of 0.001
        cooling_interval = int(round(mc_steps/109.0))
        mc_cooler = ExponentialCooler(cooling_interval, start_temperature,
                                      cooling_factor)

        # try to get loop candidates
        ost.LogVerbose("Firing MC for " + str(actual_gap))
        candidates = LoopCandidates.FillFromMonteCarloSampler(
                actual_gap.full_seq, mc_num_loops, mc_steps, mc_sampler,
                mc_closer, mc_scorer, mc_cooler)

        if len(candidates) > 0:
            # score candidates
            bb_scores = ScoreContainer()
            candidates.CalculateBackboneScores(bb_scores, mhandle,
                                               start_resnum, actual_chain_idx)
            scores = bb_scores.LinearCombine(ScoringWeights.GetWeights())
            min_score = min(scores)
            min_idx = scores.index(min_score)
            # report
            ost.LogInfo("Resolved terminal gap %s (%d candidates)" % \
                        (str(actual_gap), len(candidates)))
            # update model and clear gap
            InsertLoopClearGaps(mhandle, candidates[min_idx], actual_gap)
        else:
            ost.LogInfo("Failed to model terminal gap (%s)" % str(actual_gap))
            

def CloseLargeDeletions(mhandle, structure_db, linker_length=8,
                        num_fragments=500, use_scoring_extender=True,
                        use_full_extender=True, chain_idx=None,
                        resnum_range=None):

    '''Try to close large deletions.

    This is meant as a "last-resort" approach. In some cases you cannot 
    close very large deletions simply because the two parts separated
    by a deletion are too far apart. The idea is to sample a linker region
    and always move the whole chain towards the n-terminus. 
    
    :param mhandle: Modelling handle on which to apply change.
    :type mhandle:  :class:`ModellingHandle`

    :param structure_db: The database from which to extract fragments for
                         the linker region.
    :type structure_db: :class:`~promod3.loop.StructureDB`

    :param linker_length: Desired length (in residues w/o stems) for the
                          linker. This may be shorter if extender cannot
                          extend further.
    :type linker_length:  :class:`int`

    :param num_fragments: Number of fragments to sample the linker.
    :type num_fragments:  :class:`int`

    :param use_scoring_extender: True = use :class:`ScoringGapExtender` instead
                                 of :class:`GapExtender`.
                                 See :func:`CloseSmallDeletions`.
    :type use_scoring_extender:  :class:`bool`

    :param use_full_extender: True = use :class:`FullGapExtender` instead of
                              :class:`GapExtender`.
                              See :func:`CloseSmallDeletions`.
    :type use_full_extender:  :class:`bool`

    :param chain_idx: If not None, only gaps from chain with given index get
                      processed
    :type chain_idx:  :class:`int`

    :param resnum_range: If not None, only gaps within this resnum range get
                         processed
    :type resnum_range: :class:`tuple` containing two :class:`int`
    '''

    prof_name = 'closegaps::CloseLargeDeletions'
    prof = core.StaticRuntimeProfiler.StartScoped(prof_name)

    n_non_terminal_gaps = 0
    for gap in mhandle.gaps:
        if not gap.IsTerminal():
            n_non_terminal_gaps += 1

    if n_non_terminal_gaps > 0:
        ost.LogInfo("Trying to resolve large deletions (%d gap(s) "
                    "left) by sampling a linker region." % n_non_terminal_gaps)
    else:
        return

    # check/setup scoring
    if not IsBackboneScoringSetUp(mhandle):
        SetupDefaultBackboneScoring(mhandle)

    # point to the current gap
    gap_idx = 0

    # Iterate all gaps. Since the number of gaps may change, always compare to
    # an updated list. gap_idx is only increased when necessary, e.g. current
    # gap could not be removed.
    while gap_idx < len(mhandle.gaps) and gap_idx >= 0:

        # keep copy of original gap
        gap_orig = mhandle.gaps[gap_idx].Copy()

        # check whether we are in the desired range
        if not _InRange(gap_orig, chain_idx, resnum_range):
            gap_idx += 1
            continue

        # get chain for gap
        actual_chain_idx = gap_orig.GetChainIndex()
        actual_chain = mhandle.model.chains[actual_chain_idx]
        
        # terminal gaps are not deletions...
        if gap_orig.IsTerminal():
            gap_idx += 1
            continue

        # check if too long
        if gap_orig.length > linker_length:
            ost.LogInfo("Failed to close large deletion (%s), gap too long."\
                        % str(gap_orig))
            gap_idx += 1
            continue

        # the function only works, if there are no gaps (del. AND insert.)
        # towards the n-ter in the same chain, except terminal gaps.
        clean_nter = True
        for i in range(gap_idx):
            if actual_chain_idx == mhandle.gaps[i].GetChainIndex():
                if not mhandle.gaps[i].IsTerminal():
                    clean_nter = False
                    break
        if not clean_nter:
            ost.LogInfo("Failed to close large deletion (%s), more gaps found."\
                        % str(gap_orig))
            gap_idx += 1
            continue

        # extend gap to desired length
        actual_gap = gap_orig.Copy()
        actual_extender = _GetGapExtender(mhandle, actual_gap,
                                          use_scoring_extender,
                                          use_full_extender,
                                          linker_length)
        while actual_gap.length < linker_length:
            # extend the gap
            if not actual_extender.Extend():
                break

        # FAIL (Fragger needs at least 3 residues)
        if len(actual_gap.full_seq) < 3:
            ost.LogInfo("Failed to close large deletion (%s), linker too short."\
                        % str(gap_orig))
            gap_idx += 1
            continue
        
        # extract gap info
        n_stem_res = actual_gap.before
        c_stem_res = actual_gap.after
        n_stem_res_num = n_stem_res.GetNumber().GetNum()
        c_stem_res_num = c_stem_res.GetNumber().GetNum()

        # let's find fragments!
        fragger = loop.Fragger(actual_gap.full_seq)
        seqsim_matrix = ost.seq.alg.BLOSUM62
        fragger.AddSeqSimParameters(1.0, seqsim_matrix)
        fragger.Fill(structure_db, 0.2, num_fragments)

        # We generate two backbonelists based on residues beginning from the 
        # n-terminus:
        # - bb_list: covers the full thing being sampled (incl. stuff in gap)
        # - initial_n_stem: n-stem residue before the fragment insertion
        #
        # After having found the ideal fragment, we cannot simply insert it into
        # the model, since all sidechain information would be lost.
        # We therefore need the initial_n_stem to store the initial N stem
        # positions. We can then calculate a transformation in the end and 
        # apply it manually in the end for the according atom positions.

        # only put valid residues in bb_seq
        first_num = actual_chain.residues[0].GetNumber().GetNum()
        bb_seq = mhandle.seqres[actual_chain_idx][first_num-1:c_stem_res_num]
        bb_list = loop.BackboneList(bb_seq)
        actual_res_num = ost.mol.ResNum(first_num)
        for i in range(len(bb_seq)):
            actual_res = actual_chain.FindResidue(actual_res_num)
            if actual_res.IsValid():
                bb_list.Set(i, actual_res, bb_seq[i])
            actual_res_num += 1

        # define region in fragger and region before, including the n_stem of 
        # the fragment
        frag_start_idx = n_stem_res_num - first_num
        initial_n_stem = bb_list.Extract(frag_start_idx, frag_start_idx+1)

        # all fragments get now sampled and scored
        # the idea is to sample the fragments by moving the full part towards
        # the n-terminus
        closer = NTerminalCloser(c_stem_res)
        best_score = float("inf")
        best_idx = 0
        scorer_weights = {"clash": 1, "reduced": 1}
        for i in range(len(fragger)):

            bb_list.ReplaceFragment(fragger[i], frag_start_idx, True)
            closer.Close(bb_list, bb_list)

            # a simple score gets calculated and best chosen
            mhandle.backbone_scorer_env.SetEnvironment(bb_list, first_num, 
                                                       actual_chain_idx)
            score = mhandle.backbone_scorer.CalculateLinearCombination(\
                        scorer_weights, first_num, len(bb_list), actual_chain_idx)

            if score < best_score:
                best_score = score
                best_idx = i

        # set best fragment into bb_list and 
        fragment = fragger[best_idx]
        bb_list.ReplaceFragment(fragment, frag_start_idx, True)
        closer.Close(bb_list, bb_list)
        # reextract fragment, since the whole thing has undergone a 
        # transformation
        fragment = bb_list.Extract(frag_start_idx, frag_start_idx + 
                                                   len(fragment))

        # We finally calculate the transformation from the initial positions
        # of all residues towards the n-terminus and apply it manually.
        # this is done to not loose all the sidechain information
        t = initial_n_stem.GetTransform(0, bb_list, frag_start_idx)
        ed = mhandle.model.EditXCS(ost.mol.BUFFERED_EDIT)
        for r in actual_chain.residues[:frag_start_idx]:
            for a in r.atoms:
                new_pos = t * ost.geom.Vec4(a.GetPos())
                ed.SetAtomPos(a, ost.geom.Vec3(new_pos))
        ed.UpdateICS()

        # replace fragment part
        fragment.InsertInto(actual_chain, n_stem_res_num)

        # update score env. (manual to keep all atom sidechains)
        mhandle.backbone_scorer_env.SetEnvironment(bb_list, first_num,
                                                   actual_chain_idx)
        # will return -1 if last gap removed
        gap_idx = ClearGaps(mhandle, actual_gap)

        ost.LogInfo("Resolved %s by sampling %s as linker" % \
                    (str(gap_orig), str(actual_gap)))
    
    # reset all atom env. if it's set (changes are too drastic so we set all)
    if IsAllAtomScoringSetUp(mhandle):
        mhandle.all_atom_sidechain_env.SetInitialEnvironment(mhandle.model)

# these methods will be exported into module
__all__ = ('CloseSmallDeletions', 'MergeGapsByDistance', 'FillLoopsByDatabase',
           'FillLoopsByMonteCarlo', 'CloseGaps', 'ModelTermini', 
           'CloseLargeDeletions')