Source code for promod3.modelling._fragger_handle

'''Python functionality to generate fraggers.'''

from promod3.loop import *
from ost.conop import OneLetterCodeToResidueName
from ost import seq

def _GetSizeIdx(frag_size):
    if frag_size <= 5:
        return 0
    if frag_size <= 7:
        return 1
    if frag_size <= 9:
        return 2
    if frag_size <= 11:
        return 3
    return 4

def _GetWeights(weight_group, frag_size):

    size_idx = _GetSizeIdx(frag_size)

    if weight_group == 1:
        # group one, it's only sequence similarity...
        return [1.0]

    elif weight_group == 2:
        # group two: [sequence_profile_weight, structure_profile_weight]
        if size_idx == 0:
            return [-1.0, -1.404]
        elif size_idx == 1:
            return [-1.0, -1.112]
        elif size_idx == 2:
            return [-1.0, -2.424]
        elif size_idx == 3:
            return [-1.0, -5.131]
        elif size_idx == 4:
            return [-1.0, -6.703] 

    elif weight_group == 3:
        # group_three: [ss_agreement_weight, torsion_weight, seqsim_weight] 
        if size_idx == 0:
            return [1.0, 71.467, 2.276]
        elif size_idx == 1:
            return [1.0, 26.616, 4.086]
        elif size_idx == 2:
            return [1.0, 13.802, 4.601]
        elif size_idx == 3:
            return [1.0, 11.376, 5.601]
        elif size_idx == 4:
            return [1.0, 5.784, 4.618]

    elif weight_group == 4:
        # group_four: [ss_agreement_weight, torsion_weight, 
        #              sequence_profile_weight, structure_profile_weight]
        if size_idx == 0:
            return [1.0, 17.219, -1.930, -3.328]
        elif size_idx == 1:
            return [1.0, 19.561, -2.411, -6.060]
        elif size_idx == 2:
            return [1.0, 27.523, -3.819, -8.318]
        elif size_idx == 3:
            return [1.0, 17.009, -1.805, -7.055] 
        elif size_idx == 4:
            return [1.0, 6.526, -0.798, -11.290] 
        

def _SetupFragger_one(frag_sequence, subst_mat):
    frag_size = len(frag_sequence)
    weights = _GetWeights(1, frag_size)
    fragger = Fragger(frag_sequence)
    fragger.AddSeqSimParameters(weights[0], subst_mat)
    return fragger


def _SetupFragger_two(frag_sequence, frag_profile):
    frag_size = len(frag_sequence)
    weights = _GetWeights(2, frag_size)
    fragger = Fragger(frag_sequence)
    fragger.AddSequenceProfileParameters(weights[0], frag_profile)
    fragger.AddStructureProfileParameters(weights[1], frag_profile)
    return fragger


def _SetupFragger_three(frag_sequence, frag_psipred_pred,
                        frag_t_samplers,
                        aa_before, aa_after,
                        subst_matrix):
    frag_size = len(frag_sequence)
    weights = _GetWeights(3, frag_size)
    fragger = Fragger(frag_sequence)
    fragger.AddSSAgreeParameters(weights[0], frag_psipred_pred)
    fragger.AddTorsionProbabilityParameters(weights[1], frag_t_samplers,
                                            aa_before, aa_after)
    fragger.AddSeqSimParameters(weights[2], subst_matrix)
    return fragger


def _SetupFragger_four(frag_sequence, frag_profile, 
                       frag_psipred_pred,
                       frag_t_samplers,
                       aa_before, aa_after):
    frag_size = len(frag_sequence)
    weights = _GetWeights(4, frag_size)
    fragger = Fragger(frag_sequence)
    fragger.AddSSAgreeParameters(weights[0], frag_psipred_pred)
    fragger.AddTorsionProbabilityParameters(weights[1], frag_t_samplers,
                                            aa_before, aa_after)
    fragger.AddSequenceProfileParameters(weights[2], frag_profile)
    fragger.AddStructureProfileParameters(weights[3], frag_profile)
    return fragger
                       

[docs]class FraggerHandle: '''Handler for :class:`~promod3.loop.Fragger` objects linked to a specific chain. Tries to get the most accurate fragments given your input. You can only provide a SEQRES, the returned fragments are then searched by using sequence similarity as the only target value. You can massively increase the accuracy of the found fragments by providing a secondary structure prediction and / or sequence profile. Following features influence the fragment search given your input: * **sequence**: * Sequence Similarity with BLOSUM62 * **sequence**, **psipred_pred**: * Sequence Similarity with BLOSUM62 * Secondary Structure Agreement * Secondary Structure Dependent Torsion Probabilities * **sequence**, **profile**: * Sequence Profile Score * Structure Profile Score * **sequence**, **psipred_pred**, **profile**: * Secondary Structure Agreement * Secondary Structure Dependent Torsion Probabilities * Sequence Profile Score * Structure Profile Score The FraggerHandle internally uses the :class:`promod3.loop.FraggerMap` for caching. You can therefore request fragments for a certain position several times and the search is performed only once. This also allows to save the FraggerHandle to disk. When loading the FraggerHandle again, you need to provide all parameters again. These parameters must be exactly the same than the ones you used when initially constructing the FraggerHandle, especially the structure database. Weird things are happening otherwise. :param sequence: SEQRES for this chain :type sequence: :class:`str`/:class:`ost.seq.SequenceHandle` :param profile: Sequence profile for this chain. :type profile: :class:`ost.seq.ProfileHandle` :param psipred_pred: Psipred prediction for this chain. :type psipred_pred: :class:`promod3.loop.PsipredPrediction` :param fragment_length: Length (num. residues) of fragments to be extracted. :type fragment_length: :class:`int` :param fragments_per_position: Number of fragments to be extracted at each position. :type fragments_per_position: :class:`int` :param rmsd_thresh: To guarantee structural diversity, no pair of fragments at a given position will have RMSD below `rmsd_thresh`. :type rmsd_thresh: :class:`float` :param structure_db: Source of structural data :type structure_db: :class:`promod3.loop.StructureDB` :param torsion_sampler_coil: Torsion sampler for coil residues. :type torsion_sampler_coil: :class:`promod3.loop.TorsionSampler` :param torsion_sampler_helix: Torsion sampler for helical residues. :type torsion_sampler_helix: :class:`promod3.loop.TorsionSampler` :param torsion_sampler_extended: Torsion sampler for extended residues. :type torsion_sampler_extended: :class:`promod3.loop.TorsionSampler` ''' def __init__(self, sequence, profile = None, psipred_pred = None, fragment_length = 9, fragments_per_position = 100, rmsd_thresh = 0.0, structure_db = None, torsion_sampler_coil = None, torsion_sampler_helix = None, torsion_sampler_extended = None): # check if profile != None: # can either be a SequenceHandle or simple string... try: if sequence.GetString() != profile.sequence: raise ValueError("Sequence must be consistent with profile!") except: if sequence != profile.sequence: raise ValueError("Sequence must be consistent with profile!") if psipred_pred != None: if len(sequence) != len(psipred_pred): raise ValueError("Sequence must be consistent with PsipredPred!") # keep all objects self.sequence = sequence self.profile = profile self.psipred_pred = psipred_pred self.fragment_length = fragment_length self.fragments_per_position = fragments_per_position self.rmsd_thresh = rmsd_thresh self.subst_mat = seq.alg.BLOSUM62 if structure_db is None: self.structure_db = LoadStructureDB() else: self.structure_db = structure_db # the torsion samplers are only required if we have a psipred prediction self.samplers = None self.torsion_sampler_coil = None self.torsion_sampler_helix = None self.torsion_sampler_extended = None if psipred_pred != None: if torsion_sampler_coil == None: self.torsion_sampler_coil = LoadTorsionSamplerCoil() else: self.torsion_sampler_coil = torsion_sampler_coil if torsion_sampler_helix == None: self.torsion_sampler_helix = LoadTorsionSamplerHelical() else: self.torsion_sampler_helix = torsion_sampler_helix if torsion_sampler_extended == None: self.torsion_sampler_extended = LoadTorsionSamplerExtended() else: self.torsion_sampler_extended = torsion_sampler_extended self.samplers = [self.torsion_sampler_coil] * len(psipred_pred) for i in range(len(psipred_pred)): if psipred_pred.GetPrediction(i) == 'H' \ and psipred_pred.GetConfidence(i) >= 6: self.samplers[i] = self.torsion_sampler_helix if psipred_pred.GetPrediction(i) == 'E' \ and psipred_pred.GetConfidence(i) >= 6: self.samplers[i] = self.torsion_sampler_extended # prepare map self.fragger_map = FraggerMap()
[docs] def Get(self, frag_pos): '''Get fragger for sequence at index frag_pos..frag_pos+frag_length-1. :param frag_pos: Start-index (note that sequence-indexing starts at 0) :type frag_pos: :class`int` :return: A :class:`Fragger` object. :raises: :exc:`~exceptions.ValueError` if index out-of-bounds. ''' # this is for ranges (i.e. last touched index is end_pos-1) end_pos = frag_pos + self.fragment_length # check if frag_pos < 0 or end_pos > len(self.sequence): raise ValueError("Invalid fragment position " + str(frag_pos)) # get if not self.fragger_map.Contains(frag_pos): fragger = None frag_sequence = self.sequence[frag_pos:end_pos] if self.profile != None and self.psipred_pred == None: frag_profile = self.profile.Extract(frag_pos,end_pos) fragger = _SetupFragger_two(frag_sequence, frag_profile) elif self.profile == None and self.psipred_pred != None: frag_psipred_pred = self.psipred_pred.Extract(frag_pos, end_pos) frag_t_samplers = self.samplers[frag_pos:end_pos] fragger = _SetupFragger_three(frag_sequence, frag_psipred_pred, frag_t_samplers, self._GetAABefore(frag_pos), self._GetAAAfter(end_pos - 1), self.subst_mat) elif self.profile != None and self.psipred_pred != None: frag_profile = self.profile.Extract(frag_pos,end_pos) frag_psipred_pred = self.psipred_pred.Extract(frag_pos, end_pos) frag_t_samplers = self.samplers[frag_pos:end_pos] fragger = _SetupFragger_four(frag_sequence, frag_profile, frag_psipred_pred, frag_t_samplers, self._GetAABefore(frag_pos), self._GetAAAfter(end_pos - 1)) else: fragger = _SetupFragger_one(frag_sequence, self.subst_mat) fragger.Fill(self.structure_db, self.rmsd_thresh, self.fragments_per_position) # keep cached self.fragger_map[frag_pos] = fragger # get it back return self.fragger_map[frag_pos]
[docs] def GetList(self, pos_start=0, pos_end=-1): '''Get List of fraggers covering sequence indices pos_start..pos_end. This will return an empty list if range is smaller than fragment_length. :param pos_start: Start-index (note that sequence-indexing starts at 0) :type pos_start: :class`int` :param pos_end: End-index or -1 if it should go to the sequence-end. :type pos_end: :class`int` :return: A :class:`list` of :class:`Fragger` objects. :raises: :exc:`~exceptions.ValueError` if indices out-of-bounds. ''' if pos_end == -1: pos_end = len(self.sequence) - 1 fragger_list = list() for frag_pos in range(pos_start, pos_end - self.fragment_length + 2): fragger_list.append(self.Get(frag_pos)) return fragger_list
[docs] def SaveCached(self, filename): '''Save cached fraggers.''' self.fragger_map.Save(filename)
[docs] def LoadCached(self, filename): '''Load fragger objects stored with :meth:`SaveCached`. Note that here we require that the same structure db is set as was used when `filename` was saved.''' self.fragger_map = FraggerMap.Load(filename, self.structure_db)
def _GetAABefore(self, pos): if pos > 0: olc = self.sequence[pos-1] else: olc = 'A' aa_before = OneLetterCodeToResidueName(olc) return aa_before def _GetAAAfter(self, pos): if pos < len(self.sequence) - 1: olc = self.sequence[pos + 1] else: olc = 'A' aa_after = OneLetterCodeToResidueName(olc) return aa_after # these methods will be exported into module
__all__ = ('FraggerHandle',)