Skip to content
Snippets Groups Projects
Select Git revision
  • fbb5102e2bf8b9392bc287f101ae12df6ba7577e
  • master default protected
  • develop protected
  • cmake_boost_refactor
  • ubuntu_ci
  • mmtf
  • non-orthogonal-maps
  • no_boost_filesystem
  • data_viewer
  • 2.11.1
  • 2.11.0
  • 2.10.0
  • 2.9.3
  • 2.9.2
  • 2.9.1
  • 2.9.0
  • 2.8.0
  • 2.7.0
  • 2.6.1
  • 2.6.0
  • 2.6.0-rc4
  • 2.6.0-rc3
  • 2.6.0-rc2
  • 2.6.0-rc
  • 2.5.0
  • 2.5.0-rc2
  • 2.5.0-rc
  • 2.4.0
  • 2.4.0-rc2
29 results

export_sec_structure.cc

Blame
  • _fragger_handle.py 14.34 KiB
    # Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
    #                          Biozentrum - University of Basel
    # 
    # Licensed under the Apache License, Version 2.0 (the "License");
    # you may not use this file except in compliance with the License.
    # You may obtain a copy of the License at
    # 
    #   http://www.apache.org/licenses/LICENSE-2.0
    # 
    # Unless required by applicable law or agreed to in writing, software
    # distributed under the License is distributed on an "AS IS" BASIS,
    # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    # See the License for the specific language governing permissions and
    # limitations under the License.
    
    
    '''Python functionality to generate fraggers.'''
    
    from promod3.loop import *
    from ost.conop import OneLetterCodeToResidueName
    from ost import seq
    
    def _GetSizeIdx(frag_size):
        if frag_size <= 5:
            return 0
        if frag_size <= 7:
            return 1
        if frag_size <= 9:
            return 2
        if frag_size <= 11:
            return 3
        return 4
    
    def _GetWeights(weight_group, frag_size):
    
        size_idx = _GetSizeIdx(frag_size)
    
        if weight_group == 1:
            # group one, it's only sequence similarity...
            return [1.0]
    
        elif weight_group == 2:
            # group two: [sequence_profile_weight, structure_profile_weight]
            if size_idx == 0:
                return [-1.0, -1.404]
            elif size_idx == 1:
                return [-1.0, -1.112]
            elif size_idx == 2:
                return [-1.0, -2.424]
            elif size_idx == 3:
                return [-1.0, -5.131]
            elif size_idx == 4:
                return [-1.0, -6.703] 
    
        elif weight_group == 3:
            # group_three: [ss_agreement_weight, torsion_weight, seqsim_weight] 
            if size_idx == 0:
                return [1.0, 71.467, 2.276]
            elif size_idx == 1:
                return [1.0, 26.616, 4.086]
            elif size_idx == 2:
                return [1.0, 13.802, 4.601]
            elif size_idx == 3:
                return [1.0, 11.376, 5.601]
            elif size_idx == 4:
                return [1.0, 5.784, 4.618]
    
        elif weight_group == 4:
            # group_four: [ss_agreement_weight, torsion_weight, 
            #              sequence_profile_weight, structure_profile_weight]
            if size_idx == 0:
                return [1.0, 17.219, -1.930, -3.328]
            elif size_idx == 1:
                return [1.0, 19.561, -2.411, -6.060]
            elif size_idx == 2:
                return [1.0, 27.523, -3.819, -8.318]
            elif size_idx == 3:
                return [1.0, 17.009, -1.805, -7.055] 
            elif size_idx == 4:
                return [1.0, 6.526, -0.798, -11.290] 
            
    
    def _SetupFragger_one(frag_sequence, subst_mat):
        frag_size = len(frag_sequence)
        weights = _GetWeights(1, frag_size)
        fragger = Fragger(frag_sequence)
        fragger.AddSeqSimParameters(weights[0], subst_mat)
        return fragger
    
    
    def _SetupFragger_two(frag_sequence, frag_profile):
        frag_size = len(frag_sequence)
        weights = _GetWeights(2, frag_size)
        fragger = Fragger(frag_sequence)
        fragger.AddSequenceProfileParameters(weights[0], frag_profile)
        fragger.AddStructureProfileParameters(weights[1], frag_profile)
        return fragger
    
    
    def _SetupFragger_three(frag_sequence, frag_psipred_pred,
                            frag_t_samplers,
                            aa_before, aa_after,
                            subst_matrix):
        frag_size = len(frag_sequence)
        weights = _GetWeights(3, frag_size)
        fragger = Fragger(frag_sequence)
        fragger.AddSSAgreeParameters(weights[0], frag_psipred_pred)
        fragger.AddTorsionProbabilityParameters(weights[1], frag_t_samplers,
                                                aa_before, aa_after)
        fragger.AddSeqSimParameters(weights[2], subst_matrix)
        return fragger
    
    
    def _SetupFragger_four(frag_sequence, frag_profile, 
                           frag_psipred_pred,
                           frag_t_samplers,
                           aa_before, aa_after):
        frag_size = len(frag_sequence)
        weights = _GetWeights(4, frag_size)
        fragger = Fragger(frag_sequence)
        fragger.AddSSAgreeParameters(weights[0], frag_psipred_pred)
        fragger.AddTorsionProbabilityParameters(weights[1], frag_t_samplers,
                                                aa_before, aa_after)
        fragger.AddSequenceProfileParameters(weights[2], frag_profile)
        fragger.AddStructureProfileParameters(weights[3], frag_profile)
        return fragger
                           
    
    class FraggerHandle:
        '''Handler for :class:`~promod3.loop.Fragger` objects linked to a 
        specific chain.
    
        Tries to get the most accurate fragments given your input.
        You can only provide a SEQRES, the returned fragments are
        then searched by using sequence similarity as the only target value.
        You can massively increase the accuracy of the found fragments by
        providing a secondary structure prediction and / or sequence profile.
    
        Following features influence the fragment search given your input:
    
        * **sequence**: 
    
          * Sequence Similarity with BLOSUM62
    
        * **sequence**, **psipred_pred**:
    
          * Sequence Similarity with BLOSUM62
          * Secondary Structure Agreement
          * Secondary Structure Dependent Torsion Probabilities
    
        * **sequence**, **profile**:
    
          * Sequence Profile Score
          * Structure Profile Score
    
        * **sequence**, **psipred_pred**, **profile**:
    
          * Secondary Structure Agreement
          * Secondary Structure Dependent Torsion Probabilities
          * Sequence Profile Score
          * Structure Profile Score
    
        The FraggerHandle internally uses the :class:`promod3.loop.FraggerMap` for caching. You
        can therefore request fragments for a certain position several times and the
        search is performed only once. This also allows to save the FraggerHandle to
        disk. When loading the FraggerHandle again, you need to provide all parameters
        again. These parameters must be exactly the same than the ones you used when
        initially constructing the FraggerHandle, especially the structure database.
        Weird things are happening otherwise.
    
        :param sequence: SEQRES for this chain
        :type sequence: :class:`str`/:class:`ost.seq.SequenceHandle`
        :param profile: Sequence profile for this chain.
        :type profile:  :class:`ost.seq.ProfileHandle`
        :param psipred_pred: Psipred prediction for this chain.
        :type psipred_pred:  :class:`promod3.loop.PsipredPrediction`
        :param fragment_length: Length (num. residues) of fragments to be extracted.
        :type fragment_length:  :class:`int`
        :param fragments_per_position: Number of fragments to be extracted at each
                                       position.
        :type fragments_per_position:  :class:`int`
        :param rmsd_thresh: To guarantee structural diversity, no pair of fragments
                            at a given position will have RMSD below `rmsd_thresh`.
        :type rmsd_thresh:  :class:`float`
        :param structure_db: Source of structural data
        :type structure_db:  :class:`promod3.loop.StructureDB`
        :param torsion_sampler_coil: Torsion sampler for coil residues.
        :type torsion_sampler_coil:  :class:`promod3.loop.TorsionSampler`
        :param torsion_sampler_helix: Torsion sampler for helical residues.
        :type torsion_sampler_helix:  :class:`promod3.loop.TorsionSampler`
        :param torsion_sampler_extended: Torsion sampler for extended residues.
        :type torsion_sampler_extended:  :class:`promod3.loop.TorsionSampler`
        '''
        def __init__(self,
                     sequence, 
                     profile = None, 
                     psipred_pred = None,
                     fragment_length = 9,
                     fragments_per_position = 100,
                     rmsd_thresh = 0.0, 
                     structure_db = None,
                     torsion_sampler_coil = None,
                     torsion_sampler_helix = None,
                     torsion_sampler_extended = None):
            # check
            if profile != None:
                # can either be a SequenceHandle or simple string...
                try:
                    if sequence.GetString() != profile.sequence:
                        raise ValueError("Sequence must be consistent with profile!")
                except:
                    if sequence != profile.sequence:
                        raise ValueError("Sequence must be consistent with profile!")
    
            if psipred_pred != None:
                if len(sequence) != len(psipred_pred):
                    raise ValueError("Sequence must be consistent with PsipredPred!")
            
            # keep all objects
            self.sequence = sequence
            self.profile = profile
            self.psipred_pred = psipred_pred
            self.fragment_length = fragment_length
            self.fragments_per_position = fragments_per_position
            self.rmsd_thresh = rmsd_thresh
            self.subst_mat = seq.alg.BLOSUM62
    
            if structure_db is None:
                self.structure_db = LoadStructureDB()
            else:
                self.structure_db = structure_db
    
            # the torsion samplers are only required if we have a psipred prediction
            self.samplers = None
            self.torsion_sampler_coil = None
            self.torsion_sampler_helix = None
            self.torsion_sampler_extended = None
            if psipred_pred != None:
    
                if torsion_sampler_coil == None:
                    self.torsion_sampler_coil = LoadTorsionSamplerCoil()
                else:
                    self.torsion_sampler_coil = torsion_sampler_coil
    
                if torsion_sampler_helix == None:
                    self.torsion_sampler_helix = LoadTorsionSamplerHelical()
                else:
                    self.torsion_sampler_helix = torsion_sampler_helix
    
                if torsion_sampler_extended == None:
                    self.torsion_sampler_extended = LoadTorsionSamplerExtended()
                else:
                    self.torsion_sampler_extended = torsion_sampler_extended
    
                self.samplers = [self.torsion_sampler_coil] * len(psipred_pred)
                for i in range(len(psipred_pred)):
                    if psipred_pred.GetPrediction(i) == 'H' \
                       and psipred_pred.GetConfidence(i) >= 6:
                        self.samplers[i] = self.torsion_sampler_helix  
                    if psipred_pred.GetPrediction(i) == 'E' \
                       and psipred_pred.GetConfidence(i) >= 6:
                        self.samplers[i] = self.torsion_sampler_extended
            # prepare map
            self.fragger_map = FraggerMap()
    
        def Get(self, frag_pos):
            '''Get fragger for sequence at index frag_pos..frag_pos+frag_length-1.
    
            :param frag_pos: Start-index (note that sequence-indexing starts at 0)
            :type frag_pos:  :class:`int`
            :return: A :class:`Fragger` object.
            :raises: :exc:`~exceptions.ValueError` if index out-of-bounds.
            '''
            # this is for ranges (i.e. last touched index is end_pos-1)
            end_pos = frag_pos + self.fragment_length
            # check
            if frag_pos < 0 or end_pos > len(self.sequence):
                raise ValueError("Invalid fragment position " + str(frag_pos))
            # get
            if not self.fragger_map.Contains(frag_pos):
    
                fragger = None
                frag_sequence = self.sequence[frag_pos:end_pos]
    
                if self.profile != None and self.psipred_pred == None:
                    frag_profile = self.profile.Extract(frag_pos,end_pos)
                    fragger = _SetupFragger_two(frag_sequence, frag_profile)
    
                elif self.profile == None and self.psipred_pred != None:
                    frag_psipred_pred = self.psipred_pred.Extract(frag_pos, end_pos)
                    frag_t_samplers = self.samplers[frag_pos:end_pos]
                    fragger = _SetupFragger_three(frag_sequence, frag_psipred_pred,
                                                  frag_t_samplers,
                                                  self._GetAABefore(frag_pos),
                                                  self._GetAAAfter(end_pos - 1),
                                                  self.subst_mat)
    
                elif self.profile != None and self.psipred_pred != None:
                    frag_profile = self.profile.Extract(frag_pos,end_pos)
                    frag_psipred_pred = self.psipred_pred.Extract(frag_pos, end_pos)
                    frag_t_samplers = self.samplers[frag_pos:end_pos]
                    fragger = _SetupFragger_four(frag_sequence, frag_profile, 
                                                 frag_psipred_pred,
                                                 frag_t_samplers,
                                                 self._GetAABefore(frag_pos),
                                                 self._GetAAAfter(end_pos - 1))
    
                else:
                    fragger = _SetupFragger_one(frag_sequence, self.subst_mat)
    
                fragger.Fill(self.structure_db, self.rmsd_thresh,
                             self.fragments_per_position)
    
                # keep cached
                self.fragger_map[frag_pos] = fragger
    
            # get it back
            return self.fragger_map[frag_pos]
    
        def GetList(self, pos_start=0, pos_end=-1):
            '''Get List of fraggers covering sequence indices pos_start..pos_end.
    
            This will return an empty list if range is smaller than fragment_length.
    
            :param pos_start: Start-index (note that sequence-indexing starts at 0)
            :type pos_start:  :class:`int`
            :param pos_end: End-index or -1 if it should go to the sequence-end.
            :type pos_end:  :class:`int`
            :return: A :class:`list` of :class:`Fragger` objects.
            :raises: :exc:`~exceptions.ValueError` if indices out-of-bounds.
            '''
            if pos_end == -1:
                pos_end = len(self.sequence) - 1
            fragger_list = list()
            for frag_pos in range(pos_start, pos_end - self.fragment_length + 2):
                fragger_list.append(self.Get(frag_pos))
            return fragger_list
    
        def SaveCached(self, filename):
            '''Save cached fraggers.'''
            self.fragger_map.Save(filename)
    
        def LoadCached(self, filename):
            '''Load fragger objects stored with :meth:`SaveCached`.
            Note that here we require that the same structure db is set as was
            used when `filename` was saved.'''
            self.fragger_map = FraggerMap.Load(filename, self.structure_db)
    
        def _GetAABefore(self, pos):
            if pos > 0:
                olc = self.sequence[pos-1]
            else:
                olc = 'A'
            aa_before = OneLetterCodeToResidueName(olc)
            return aa_before
    
        def _GetAAAfter(self, pos):
            if pos < len(self.sequence) - 1:
                olc = self.sequence[pos + 1]
            else:
                olc = 'A'
            aa_after = OneLetterCodeToResidueName(olc)
            return aa_after
    
    
    # these methods will be exported into module
    __all__ = ('FraggerHandle',)