diff --git a/modelling/pymod/_closegaps.py b/modelling/pymod/_closegaps.py index 327ef97d1464f96324dfb1fb9fc66dda6bcedc45..e405119fc5681acafdc54819e2e93a46012d4308 100644 --- a/modelling/pymod/_closegaps.py +++ b/modelling/pymod/_closegaps.py @@ -7,15 +7,9 @@ as argument. from promod3 import loop, sidechain, core, scoring from _modelling import * from _ring_punches import * -from _denovo import * # external import ost import sys -import os -try: - import hashlib -except ImportError: - import md5 as hashlib ############################################################################### # loop candidate selection setup @@ -317,77 +311,6 @@ def _InRange(gap, chain_idx, resnum_range): else: return True - -def _GenerateTerminiTrajectories(mhandle, gap, num_trajectories, - mc_steps, fragger_handle): - - termini_type = None - if gap.IsNTerminal(): - termini_type = "nter" - if gap.IsCTerminal(): - termini_type = "cter" - if termini_type == None: - raise RuntimeError("Provided gap must be n- or c-terminal!") - - chain_idx = gap.GetChainIndex() - chain_sequence = mhandle.seqres[chain_idx].GetString() - - idx_range = [None, None] - sampling_sequence = None - mc_closer = None - mc_sampler = None - - if termini_type == "nter": - terminal_residue = gap.after - mc_closer = NTerminalCloser(terminal_residue) - idx_range[0] = 0 - idx_range[1] = terminal_residue.GetNumber().GetNum() - 1 - sampling_sequence = chain_sequence[:idx_range[1]+1] - - if termini_type == "cter": - terminal_residue = gap.before - mc_closer = CTerminalCloser(terminal_residue) - idx_range[0] = terminal_residue.GetNumber().GetNum() - 1 - idx_range[1] = len(chain_sequence) - 1 - sampling_sequence = chain_sequence[idx_range[0]:] - - fragger_list = fragger_handle.GetList(idx_range[0], idx_range[1]) - if len(fragger_list) == 0: - # seems to be too short for fragments - # let's do a phi/psi sampler - torsion_sampler = loop.LoadTorsionSamplerCoil() - mc_sampler = PhiPsiSampler(sampling_sequence, - torsion_sampler) - else: - mc_sampler = FragmentSampler(sampling_sequence, - fragger_list, - init_fragments = 5) - - scoring_weights = ScoringWeights.GetWeights() - - mc_scorer = LinearScorer(mhandle.backbone_scorer, - idx_range[0] + 1, chain_idx, - scoring_weights) - - # the number of 109 is roughly the number of times we have to apply - # a factor of 0.9 to 100 until it reaches a value of 0.001 - start_temperature = 100 - cooling_factor = 0.9 - cooling_interval = mc_steps/109 - - mc_cooler = ExponentialCooler(start_temperature, - cooling_interval, - cooling_factor) - - candidates = LoopCandidates.FillFromMonteCarloSampler(sampling_sequence, - num_trajectories, - mc_steps, - mc_sampler, - mc_closer, - mc_scorer, - mc_cooler) - return candidates - ############################################################################### def CloseSmallDeletions(mhandle, max_extension=9, clash_thresh=1.0, @@ -413,9 +336,8 @@ def CloseSmallDeletions(mhandle, max_extension=9, clash_thresh=1.0, (see :class:`GapExtender`) :type max_extension: :class:`int` - :param clash_thresh: Threshold for the clash score, acceptance means being - lower than this. - + :param clash_thresh: Threshold for the backbone clash score. Acceptance + means being lower than this. :type clash_thresh: :class:`float` :param e_thresh: Potential energy should be lower than this. @@ -620,7 +542,8 @@ def FillLoopsByDatabase(mhandle, fragment_db, structure_db, extended_search=True, use_scoring_extender=True, use_full_extender=True, score_variant=0, ring_punch_detection=1, chain_idx=None, - resnum_range=None, max_num_all_atom=0): + resnum_range=None, max_num_all_atom=0, + clash_thresh=-1): '''Try to fill up loops from a structural database. Usually this will extend the gaps a bit to match candidates from the @@ -714,6 +637,10 @@ def FillLoopsByDatabase(mhandle, fragment_db, structure_db, approx. 2x slower than without and will give a slight improvement in loop selection. :type max_num_all_atom: :class:`int` + + :param clash_thresh: If > 0, we only keep loop candidates which have a + backbone clash score lower than this. + :type clash_thresh: :class:`float` ''' prof_name = 'closegaps::FillLoopsByDatabase' @@ -826,6 +753,20 @@ def FillLoopsByDatabase(mhandle, fragment_db, structure_db, ost.LogWarning("ApplyCCD failure for " + str(actual_gap)) continue + # remove clashing ones if desired + if clash_thresh > 0: + scorer = mhandle.backbone_scorer.Get("clash") + to_keep = [] + for i, bb_list in enumerate(candidates): + score = scorer.CalculateScore(bb_list, + n_stem.GetNumber().GetNum(), + actual_gap.GetChainIndex()) + if score < clash_thresh: + to_keep.append(i) + candidates = candidates.Extract(to_keep) + orig_indices = [orig_indices[i] for i in to_keep] + assert(len(orig_indices) == len(candidates)) + # check for ring punchings if ring_punch_detection == 1 and len(candidates) > 0: FilterCandidates(candidates, mhandle.model, @@ -1112,67 +1053,133 @@ def FillLoopsByMonteCarlo(mhandle, torsion_sampler, max_loops_to_search=6, gap_idx = new_idx -def ModelTermini(mhandle, structure_db = None, mc_num_loops = 20, - mc_steps = 10000, fragment_cache_dir = None, - scratch_dir = None): - '''Try to model termini with Monte Carlo sampling. - Use with care! This is an experimental feature which will increase coverage - but we do not assume that the resulting termini are of high quality! As an - optional feature, the function provides a fragment caching functionality. If - you call this function several times for a chain with the same SEQRES, you - might want to provide a **fragment_cache_dir** and a **scratch_dir**. - The idea is, that **fragment_cache_dir** is a central fragment cache based - on SEQRES md5 hashes to search for fragments of a certain chain. - It copies over the cache file to **scratch_dir**, - does all required magic and then copies the fragments back to - **fragment_cache_dir** so it can be used in later function calls. +def CloseGaps(mhandle, merge_distance=4, fragment_db=None, structure_db=None, + torsion_sampler=None, chain_idx=None, resnum_range=None): + ''' + Tries to close all gaps in a model, except termini. It will go through + following steps: + + - Try to close small deletions by relaxing them + (see :func:`CloseSmallDeletions`) + - Iteratively merge gaps up to a distance **merge_distance** + (see :func:`MergeGapsByDistance`) and try to fill them with a database + approach (see :func:`FillLoopsByDatabase`) + - Try to fill remaining gaps using a Monte Carlo approach + (see :func:`FillLoopsByMonteCarlo`) + - Large deletions get closed using a last resort approach + (see :func:`CloseLargeDeletions`) :param mhandle: Modelling handle on which to apply change. :type mhandle: :class:`ModellingHandle` - :param structure_db: Database for fragment extraction, - default db will be loaded if not provided - :type structure_db: :class:`~promod3.loop.StructureDB` + :param merge_distance: Max. merge distance when performing the database + approach + :type merge_distance: :class:`int` + :param fragment_db: Database for searching fragments in database + approach, must be consistent with provided + **structure_db**. A default is loaded if None. + :type fragment_db: :class:`~promod3.loop.FragDB` + :param structure_db: Structure db from which the **fragment_db** gets + it's structural information. A default is loaded + if None. + :type structure_db: :class:`~promod3.loop.StructureDB` + :param torsion_sampler: Used as parameter for :func:`FillLoopsByDatabase` + and :func:`FillLoopsByMonteCarlo` A default one is + loaded if None. + :param chain_idx: If not None, only gaps from chain with given index get + processed + :type chain_idx: :class:`int` - :param mc_num_loops: Number of Monte Carlo trajectories that are generated - :param mc_num_loops: :class:`int` + :param resnum_range: If not None, only gaps within this resnum range get + processed. + :type resnum_range: :class:`tuple` containing two :class:`int` - :param mc_steps: Number of steps per Monte Carlo trajectory - :type mc_steps: :class:`int` + ''' - :param fragment_cache_dir: Central fragment cache directory - :type fragment_cache_dir: :class:`str` + # load stuff if needed + if fragment_db is None: + fragment_db = loop.LoadFragDB() + if structure_db is None: + structure_db = loop.LoadStructureDB() + if torsion_sampler is None: + torsion_sampler = loop.LoadTorsionSamplerCoil() - :param scratch_dir: Local scratch directory - :type scratch_dir: :class:`str` - ''' + # try to close small deletions by relaxing them + CloseSmallDeletions(mhandle, chain_idx=chain_idx, + resnum_range=resnum_range) - prof_name = 'closegaps::ModelTermini' - prof = core.StaticRuntimeProfiler.StartScoped(prof_name) + # iteratively merge gaps of distance i and fill loops by database + for distance in range(merge_distance): + MergeGapsByDistance(mhandle, distance, chain_idx=chain_idx, + resnum_range=resnum_range) + FillLoopsByDatabase(mhandle, fragment_db, structure_db, + torsion_sampler, min_loops_required=-1, + max_res_extension=6, chain_idx=chain_idx, + resnum_range=resnum_range, clash_thresh=10) + + # if above fails, try DB-fill with less restrictions + FillLoopsByDatabase(mhandle, fragment_db, structure_db, + torsion_sampler, min_loops_required=-1, + chain_idx=chain_idx, resnum_range=resnum_range, + clash_thresh=10) + FillLoopsByDatabase(mhandle, fragment_db, structure_db, + torsion_sampler, chain_idx=chain_idx, + resnum_range=resnum_range) - # check/setup scoring - if not IsBackboneScoringSetUp(mhandle): - SetupDefaultBackboneScoring(mhandle) + # close remaining gaps by Monte Carlo + FillLoopsByMonteCarlo(mhandle, torsion_sampler, chain_idx=chain_idx, + resnum_range=resnum_range) - if structure_db == None: - structure_db = loop.LoadStructureDB() + # last resort approach to close large deletions + CloseLargeDeletions(mhandle, structure_db, chain_idx=chain_idx, + resnum_range=resnum_range) + + # NOTE: + # In the function above, we call :func:`FillLoopsByDatabase` multiple + # times. First, we try to close "easy" gaps which require few extensions + # (we wish to limit the damage we do on the template) and for which we have + # plenty of loop candidates. If some gaps cannot be closed like this, we try + # less restrictive options. This approach is helpful if neighboring gaps are + # close together and the one closer to the C-terminus is easier to close. + # Several variants were evaluated on 1752 target-template-pairs and this one + # worked best. + +def ModelTermini(mhandle, torsion_sampler, fragger_handles=None, + mc_num_loops=20, mc_steps=5000): + '''Try to model termini with Monte Carlo sampling. + + Use with care! This is an experimental feature which will increase coverage + but we do not assume that the resulting termini are of high quality! - fragment_cache = None - if fragment_cache_dir != None: - if not os.path.exists(fragment_cache_dir): - err = "You provided a fragment cache path, that doesn't exist!" - raise RuntimeError(err) + The termini are modelled by either sampling the dihedral angles or (if + *fragger_handles* is given) :class:`~promod3.loop.Fragger` lists. The latter + is only used if the gap length is >= the length of fragments stored. + + Terminal gaps of length 1 are ignored by this function! - # if there is a valid fragment_cache dir, we also need a valid - # scratch_dir - if not os.path.exists(scratch_dir): - err = "If a fragment_cache_dir is provided you also must provide " - err += "a valid scratch_dir. " - err += scratch_dir - err += " seems to be invalid..." - raise RuntimeError(err) + .. literalinclude:: ../../../tests/doc/scripts/modelling_model_termini.py - fragment_cache = core.FileCache(fragment_cache_dir) + :param mhandle: Modelling handle on which to apply change. + :type mhandle: :class:`ModellingHandle` + + :param torsion_sampler: A sampler for torsion angles. + :type torsion_sampler: :class:`~promod3.loop.TorsionSampler` + + :param fragger_handles: Either None (no fragger sampling used) or one + fragger handle for each chain in *mhandle*. + :type fragger_handles: :class:`list` of :class:`~promod3.loop.FraggerHandle` + + :param mc_num_loops: Number of loop candidates to consider for each terminal gap + (see :meth:`~LoopCandidates.FillFromMonteCarloSampler`) + :type mc_num_loops: :class:`int` + + :param mc_steps: Number of MC steps to perform for each loop candidate + (see :meth:`~LoopCandidates.FillFromMonteCarloSampler`) + :type mc_steps: :class:`int` + ''' + + prof_name = 'closegaps::ModelTermini' + prof = core.StaticRuntimeProfiler.StartScoped(prof_name) # get terminal gaps (copies as we'll clear them as we go) terminal_gaps = [g.Copy() for g in mhandle.gaps if g.IsTerminal() and g.length > 1] @@ -1188,61 +1195,53 @@ def ModelTermini(mhandle, structure_db = None, mc_num_loops = 20, # model them for actual_gap in terminal_gaps: - start_resnum = None + # extract info if actual_gap.IsNTerminal(): start_resnum = 1 - elif actual_gap.IsCTerminal(): + else: start_resnum = actual_gap.before.GetNumber().GetNum() + actual_chain = actual_gap.GetChain() + actual_chain_idx = actual_gap.GetChainIndex() + if fragger_handles is not None: + fragger_handle = fragger_handles[actual_chain_idx] + + # choose sampler + if fragger_handles is None or \ + actual_gap.length < fragger_handle.fragment_length: + mc_sampler = PhiPsiSampler(actual_gap.full_seq, + torsion_sampler) else: - err = "Can only model N-terminal or C-terminal gaps in " - err += "ModelTermini function!" - raise RuntimeError(err) + end_pos = start_resnum-1 + actual_gap.length + fragger_list = fragger_handle.GetList(start_resnum-1, end_pos) + mc_sampler = FragmentSampler(actual_gap.full_seq, fragger_list, + init_fragments=5) + + # choose closer + if actual_gap.IsNTerminal(): + mc_closer = NTerminalCloser(actual_gap.after) + else: + mc_closer = CTerminalCloser(actual_gap.before) + + # setup scorer + weights = ScoringWeights.GetWeights() + mc_scorer = LinearScorer(mhandle.backbone_scorer, start_resnum, + actual_chain_idx, weights) + + # setup cooler + start_temperature = 100 + cooling_factor = 0.9 + # the number of 109 is roughly the number of times we have to apply + # a factor of 0.9 to 100 until it reaches a value of 0.001 + cooling_interval = int(round(mc_steps/109.0)) + mc_cooler = ExponentialCooler(cooling_interval, start_temperature, + cooling_factor) + + # try to get loop candidates + ost.LogVerbose("Firing MC for " + str(actual_gap)) + candidates = LoopCandidates.FillFromMonteCarloSampler( + actual_gap.full_seq, mc_num_loops, mc_steps, mc_sampler, + mc_closer, mc_scorer, mc_cooler) - # generate a FraggerHandle for that particular chain - actual_chain_idx = actual_gap.GetChainIndex() - actual_chain = mhandle.model.chains[actual_chain_idx] - actual_sequence = mhandle.seqres[actual_chain_idx].GetString() - - actual_profile = None - actual_psipred_pred = None - if len(mhandle.profiles) > 0: - actual_profile = mhandle.profiles[actual_chain_idx] - if len(mhandle.psipred_predictions): - actual_psipred_pred = mhandle.psipred_predictions[actual_chain_idx] - - fragger_handle = loop.FraggerHandle(actual_sequence, - profile = actual_profile, - psipred_pred = actual_psipred_pred, - structure_db = structure_db) - - # if there is a fragment_cache, we try to copy the file to our scratch_dir - # and load the fragments from there - if fragment_cache: - # the caching is based on md5 hashes - md5_hash = hashlib.md5(actual_sequence).hexdigest() - cache_filename = md5_hash + ".frag" - if(fragment_cache.CopyFromCache(cache_filename, scratch_dir)): - # yeah, we can load cached stuff - fragger_handle.LoadCached(os.path.join(scratch_dir, cache_filename)) - - # let's get the loop candidates - candidates = _GenerateTerminiTrajectories(mhandle, actual_gap, - mc_num_loops, mc_steps, - fragger_handle) - - # Once it's done, we copy the fragments back to cache - # in case of multiple jobs accessing the same data in parallel, this leads - # to data loss. We just don't care at this point (it does not lead to - # crashes, but it might be that we loose fragments that already have - # been calculated => we have to recalculate them in the next run) - if fragment_cache: - # the caching is based on md5 hashes - md5_hash = hashlib.md5(actual_sequence).hexdigest() - cache_filename = md5_hash + ".frag" - fragger_handle.SaveCached(os.path.join(scratch_dir, cache_filename)) - fragment_cache.CopyToCache(cache_filename, scratch_dir) - - # select the best candidate if len(candidates) > 0: # score candidates bb_scores = ScoreContainer() @@ -1474,107 +1473,6 @@ def CloseLargeDeletions(mhandle, structure_db, linker_length=8, if IsAllAtomScoringSetUp(mhandle): mhandle.all_atom_sidechain_env.SetInitialEnvironment(mhandle.model) -def CloseGaps(mhandle, merge_distance=4, fragment_db=None, structure_db=None, - torsion_sampler=None, chain_idx = None, resnum_range = None, - fragment_cache_dir = None, model_termini = False, - scratch_dir = os.getcwd()): - ''' - Tries to close all gaps in a model, except termini. It will go through - following steps: - - - Try to close small deletions by relaxing them - (see :func:`CloseSmallDeletions`) - - Iteratively merge gaps up to a distance **merge_distance** - (see :func:`MergeGapsByDistance`) and try to fill them with a database - approach (see :func:`FillLoopsByDatabase`) - - Try to fill remaining gaps using a Monte Carlo approach - (see :func:`FillLoopsByMonteCarlo`) - - Large deletions get closed using a last resort approach - (see :func:`CloseLargeDeletions`) - - :param mhandle: Modelling handle on which to apply change. - :type mhandle: :class:`ModellingHandle` - - :param merge_distance: Max. merge distance when performing the database - approach - :type merge_distance: :class:`int` - :param fragment_db: Database for searching fragments in database - approach, must be consistent with provided - **structure_db**. A default is loaded if None. - :type fragment_db: :class:`~promod3.loop.FragDB` - :param structure_db: Structure db from which the **fragment_db** gets - it's structural information. A default is loaded - if None. - :type structure_db: :class:`~promod3.loop.StructureDB` - :param torsion_sampler: Used as parameter for :func:`FillLoopsByDatabase` - and :func:`FillLoopsByMonteCarlo` A default one is - loaded if None. - :param chain_idx: If not None, only gaps from chain with given index get - processed - :type chain_idx: :class:`int` - - :param resnum_range: If not None, only gaps within this resnum range get - processed. - :type resnum_range: :class:`tuple` containing two :class:`int` - - ''' - - - # check/setup scoring - if not IsBackboneScoringSetUp(mhandle): - SetupDefaultBackboneScoring(mhandle) - - if fragment_db is None: - fragment_db = loop.LoadFragDB() - if structure_db is None: - structure_db = loop.LoadStructureDB() - if torsion_sampler is None: - torsion_sampler = loop.LoadTorsionSamplerCoil() - - #try to close small deletions by relaxing them - CloseSmallDeletions(mhandle, chain_idx = chain_idx, - resnum_range = resnum_range) - - # iteratively merge gaps of distance i and fill loops by database - for distance in range(merge_distance): - MergeGapsByDistance(mhandle, distance, chain_idx = chain_idx, - resnum_range = resnum_range) - FillLoopsByDatabase(mhandle, fragment_db, structure_db, - torsion_sampler, min_loops_required=-1, - max_res_extension=6, chain_idx = chain_idx, - resnum_range = resnum_range) - - # if above fails, try DB-fill with less restrictions - FillLoopsByDatabase(mhandle, fragment_db, structure_db, - torsion_sampler, min_loops_required=-1, - chain_idx = chain_idx, resnum_range = resnum_range) - FillLoopsByDatabase(mhandle, fragment_db, structure_db, - torsion_sampler, chain_idx = chain_idx, - resnum_range = resnum_range) - - # close remaining gaps by Monte Carlo - FillLoopsByMonteCarlo(mhandle, torsion_sampler, chain_idx = chain_idx, - resnum_range = resnum_range) - - # last resort approach to close large deletions - CloseLargeDeletions(mhandle, structure_db, chain_idx = chain_idx, - resnum_range = resnum_range) - - # model termini if required - if model_termini: - ModelTermini(mhandle, structure_db = structure_db, - fragment_cache_dir = fragment_cache_dir, - scratch_dir = scratch_dir) - - #In the function above, we call :func:`FillLoopsByDatabase` multiple - #times. First, we try to close "easy" gaps which require few extensions - #(we wish to limit the damage we do on the template) and for which we have - #plenty of loop candidates. If some gaps cannot be closed like this, we try - #less restrictive options. This approach is helpful if neighboring gaps are - #close together and the one closer to the C-terminus is easier to close. - #Several variants were evaluated on 1752 target-template-pairs and this one - #worked best. - # these methods will be exported into module __all__ = ('CloseSmallDeletions', 'MergeGapsByDistance', 'FillLoopsByDatabase', 'FillLoopsByMonteCarlo', 'CloseGaps', 'ModelTermini', diff --git a/modelling/pymod/_pipeline.py b/modelling/pymod/_pipeline.py index 7c215a3220cd7bacf23af87b16e0c031b8c5e32b..ccfae92766986d40b3d36192f18c18190221f2f6 100644 --- a/modelling/pymod/_pipeline.py +++ b/modelling/pymod/_pipeline.py @@ -404,10 +404,8 @@ def CheckFinalModel(mhandle): ost.LogInfo("Stereo-chemical problem in sidechain " + \ "of residue " + str(res)) -def BuildFromRawModel(mhandle, alternative_mhandles = None, - use_amber_ff=False, extra_force_fields=list(), - fragment_cache_dir = None, model_termini = False, - scratch_dir = os.getcwd()): +def BuildFromRawModel(mhandle, alternative_mhandles=None, + use_amber_ff=False, extra_force_fields=list()): '''Build a model starting with a raw model (see :func:`BuildRawModel`). This function implements a recommended pipeline to generate complete models @@ -470,22 +468,18 @@ def BuildFromRawModel(mhandle, alternative_mhandles = None, while current_num_residues != new_num_residues: current_num_residues = mhandle.model.GetResidueCount() ModelExtensions(mhandle, alternative_mhandles, - extension_strategy = "overlapping") + extension_strategy="overlapping") ModelExtensions(mhandle, alternative_mhandles, - extension_strategy = "non-overlapping") - new_num_residues = mhandle.model.GetResidueCount() + extension_strategy="non-overlapping") + new_num_residues = mhandle.model.GetResidueCount() # remove terminal gaps - if not model_termini: - RemoveTerminalGaps(mhandle) + RemoveTerminalGaps(mhandle) # close gaps - CloseGaps(mhandle, merge_distance = merge_distance, - fragment_db = fragment_db, structure_db = structure_db, - torsion_sampler = torsion_sampler, - fragment_cache_dir = fragment_cache_dir, - model_termini = model_termini, - scratch_dir = scratch_dir) + CloseGaps(mhandle, merge_distance=merge_distance, + fragment_db=fragment_db, structure_db=structure_db, + torsion_sampler=torsion_sampler) # build sidechains BuildSidechains(mhandle, merge_distance, fragment_db,