Select Git revision
export_sec_structure.cc
_fragger_handle.py 14.34 KiB
# Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
# Biozentrum - University of Basel
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''Python functionality to generate fraggers.'''
from promod3.loop import *
from ost.conop import OneLetterCodeToResidueName
from ost import seq
def _GetSizeIdx(frag_size):
if frag_size <= 5:
return 0
if frag_size <= 7:
return 1
if frag_size <= 9:
return 2
if frag_size <= 11:
return 3
return 4
def _GetWeights(weight_group, frag_size):
size_idx = _GetSizeIdx(frag_size)
if weight_group == 1:
# group one, it's only sequence similarity...
return [1.0]
elif weight_group == 2:
# group two: [sequence_profile_weight, structure_profile_weight]
if size_idx == 0:
return [-1.0, -1.404]
elif size_idx == 1:
return [-1.0, -1.112]
elif size_idx == 2:
return [-1.0, -2.424]
elif size_idx == 3:
return [-1.0, -5.131]
elif size_idx == 4:
return [-1.0, -6.703]
elif weight_group == 3:
# group_three: [ss_agreement_weight, torsion_weight, seqsim_weight]
if size_idx == 0:
return [1.0, 71.467, 2.276]
elif size_idx == 1:
return [1.0, 26.616, 4.086]
elif size_idx == 2:
return [1.0, 13.802, 4.601]
elif size_idx == 3:
return [1.0, 11.376, 5.601]
elif size_idx == 4:
return [1.0, 5.784, 4.618]
elif weight_group == 4:
# group_four: [ss_agreement_weight, torsion_weight,
# sequence_profile_weight, structure_profile_weight]
if size_idx == 0:
return [1.0, 17.219, -1.930, -3.328]
elif size_idx == 1:
return [1.0, 19.561, -2.411, -6.060]
elif size_idx == 2:
return [1.0, 27.523, -3.819, -8.318]
elif size_idx == 3:
return [1.0, 17.009, -1.805, -7.055]
elif size_idx == 4:
return [1.0, 6.526, -0.798, -11.290]
def _SetupFragger_one(frag_sequence, subst_mat):
frag_size = len(frag_sequence)
weights = _GetWeights(1, frag_size)
fragger = Fragger(frag_sequence)
fragger.AddSeqSimParameters(weights[0], subst_mat)
return fragger
def _SetupFragger_two(frag_sequence, frag_profile):
frag_size = len(frag_sequence)
weights = _GetWeights(2, frag_size)
fragger = Fragger(frag_sequence)
fragger.AddSequenceProfileParameters(weights[0], frag_profile)
fragger.AddStructureProfileParameters(weights[1], frag_profile)
return fragger
def _SetupFragger_three(frag_sequence, frag_psipred_pred,
frag_t_samplers,
aa_before, aa_after,
subst_matrix):
frag_size = len(frag_sequence)
weights = _GetWeights(3, frag_size)
fragger = Fragger(frag_sequence)
fragger.AddSSAgreeParameters(weights[0], frag_psipred_pred)
fragger.AddTorsionProbabilityParameters(weights[1], frag_t_samplers,
aa_before, aa_after)
fragger.AddSeqSimParameters(weights[2], subst_matrix)
return fragger
def _SetupFragger_four(frag_sequence, frag_profile,
frag_psipred_pred,
frag_t_samplers,
aa_before, aa_after):
frag_size = len(frag_sequence)
weights = _GetWeights(4, frag_size)
fragger = Fragger(frag_sequence)
fragger.AddSSAgreeParameters(weights[0], frag_psipred_pred)
fragger.AddTorsionProbabilityParameters(weights[1], frag_t_samplers,
aa_before, aa_after)
fragger.AddSequenceProfileParameters(weights[2], frag_profile)
fragger.AddStructureProfileParameters(weights[3], frag_profile)
return fragger
class FraggerHandle:
'''Handler for :class:`~promod3.loop.Fragger` objects linked to a
specific chain.
Tries to get the most accurate fragments given your input.
You can only provide a SEQRES, the returned fragments are
then searched by using sequence similarity as the only target value.
You can massively increase the accuracy of the found fragments by
providing a secondary structure prediction and / or sequence profile.
Following features influence the fragment search given your input:
* **sequence**:
* Sequence Similarity with BLOSUM62
* **sequence**, **psipred_pred**:
* Sequence Similarity with BLOSUM62
* Secondary Structure Agreement
* Secondary Structure Dependent Torsion Probabilities
* **sequence**, **profile**:
* Sequence Profile Score
* Structure Profile Score
* **sequence**, **psipred_pred**, **profile**:
* Secondary Structure Agreement
* Secondary Structure Dependent Torsion Probabilities
* Sequence Profile Score
* Structure Profile Score
The FraggerHandle internally uses the :class:`promod3.loop.FraggerMap` for caching. You
can therefore request fragments for a certain position several times and the
search is performed only once. This also allows to save the FraggerHandle to
disk. When loading the FraggerHandle again, you need to provide all parameters
again. These parameters must be exactly the same than the ones you used when
initially constructing the FraggerHandle, especially the structure database.
Weird things are happening otherwise.
:param sequence: SEQRES for this chain
:type sequence: :class:`str`/:class:`ost.seq.SequenceHandle`
:param profile: Sequence profile for this chain.
:type profile: :class:`ost.seq.ProfileHandle`
:param psipred_pred: Psipred prediction for this chain.
:type psipred_pred: :class:`promod3.loop.PsipredPrediction`
:param fragment_length: Length (num. residues) of fragments to be extracted.
:type fragment_length: :class:`int`
:param fragments_per_position: Number of fragments to be extracted at each
position.
:type fragments_per_position: :class:`int`
:param rmsd_thresh: To guarantee structural diversity, no pair of fragments
at a given position will have RMSD below `rmsd_thresh`.
:type rmsd_thresh: :class:`float`
:param structure_db: Source of structural data
:type structure_db: :class:`promod3.loop.StructureDB`
:param torsion_sampler_coil: Torsion sampler for coil residues.
:type torsion_sampler_coil: :class:`promod3.loop.TorsionSampler`
:param torsion_sampler_helix: Torsion sampler for helical residues.
:type torsion_sampler_helix: :class:`promod3.loop.TorsionSampler`
:param torsion_sampler_extended: Torsion sampler for extended residues.
:type torsion_sampler_extended: :class:`promod3.loop.TorsionSampler`
'''
def __init__(self,
sequence,
profile = None,
psipred_pred = None,
fragment_length = 9,
fragments_per_position = 100,
rmsd_thresh = 0.0,
structure_db = None,
torsion_sampler_coil = None,
torsion_sampler_helix = None,
torsion_sampler_extended = None):
# check
if profile != None:
# can either be a SequenceHandle or simple string...
try:
if sequence.GetString() != profile.sequence:
raise ValueError("Sequence must be consistent with profile!")
except:
if sequence != profile.sequence:
raise ValueError("Sequence must be consistent with profile!")
if psipred_pred != None:
if len(sequence) != len(psipred_pred):
raise ValueError("Sequence must be consistent with PsipredPred!")
# keep all objects
self.sequence = sequence
self.profile = profile
self.psipred_pred = psipred_pred
self.fragment_length = fragment_length
self.fragments_per_position = fragments_per_position
self.rmsd_thresh = rmsd_thresh
self.subst_mat = seq.alg.BLOSUM62
if structure_db is None:
self.structure_db = LoadStructureDB()
else:
self.structure_db = structure_db
# the torsion samplers are only required if we have a psipred prediction
self.samplers = None
self.torsion_sampler_coil = None
self.torsion_sampler_helix = None
self.torsion_sampler_extended = None
if psipred_pred != None:
if torsion_sampler_coil == None:
self.torsion_sampler_coil = LoadTorsionSamplerCoil()
else:
self.torsion_sampler_coil = torsion_sampler_coil
if torsion_sampler_helix == None:
self.torsion_sampler_helix = LoadTorsionSamplerHelical()
else:
self.torsion_sampler_helix = torsion_sampler_helix
if torsion_sampler_extended == None:
self.torsion_sampler_extended = LoadTorsionSamplerExtended()
else:
self.torsion_sampler_extended = torsion_sampler_extended
self.samplers = [self.torsion_sampler_coil] * len(psipred_pred)
for i in range(len(psipred_pred)):
if psipred_pred.GetPrediction(i) == 'H' \
and psipred_pred.GetConfidence(i) >= 6:
self.samplers[i] = self.torsion_sampler_helix
if psipred_pred.GetPrediction(i) == 'E' \
and psipred_pred.GetConfidence(i) >= 6:
self.samplers[i] = self.torsion_sampler_extended
# prepare map
self.fragger_map = FraggerMap()
def Get(self, frag_pos):
'''Get fragger for sequence at index frag_pos..frag_pos+frag_length-1.
:param frag_pos: Start-index (note that sequence-indexing starts at 0)
:type frag_pos: :class:`int`
:return: A :class:`Fragger` object.
:raises: :exc:`~exceptions.ValueError` if index out-of-bounds.
'''
# this is for ranges (i.e. last touched index is end_pos-1)
end_pos = frag_pos + self.fragment_length
# check
if frag_pos < 0 or end_pos > len(self.sequence):
raise ValueError("Invalid fragment position " + str(frag_pos))
# get
if not self.fragger_map.Contains(frag_pos):
fragger = None
frag_sequence = self.sequence[frag_pos:end_pos]
if self.profile != None and self.psipred_pred == None:
frag_profile = self.profile.Extract(frag_pos,end_pos)
fragger = _SetupFragger_two(frag_sequence, frag_profile)
elif self.profile == None and self.psipred_pred != None:
frag_psipred_pred = self.psipred_pred.Extract(frag_pos, end_pos)
frag_t_samplers = self.samplers[frag_pos:end_pos]
fragger = _SetupFragger_three(frag_sequence, frag_psipred_pred,
frag_t_samplers,
self._GetAABefore(frag_pos),
self._GetAAAfter(end_pos - 1),
self.subst_mat)
elif self.profile != None and self.psipred_pred != None:
frag_profile = self.profile.Extract(frag_pos,end_pos)
frag_psipred_pred = self.psipred_pred.Extract(frag_pos, end_pos)
frag_t_samplers = self.samplers[frag_pos:end_pos]
fragger = _SetupFragger_four(frag_sequence, frag_profile,
frag_psipred_pred,
frag_t_samplers,
self._GetAABefore(frag_pos),
self._GetAAAfter(end_pos - 1))
else:
fragger = _SetupFragger_one(frag_sequence, self.subst_mat)
fragger.Fill(self.structure_db, self.rmsd_thresh,
self.fragments_per_position)
# keep cached
self.fragger_map[frag_pos] = fragger
# get it back
return self.fragger_map[frag_pos]
def GetList(self, pos_start=0, pos_end=-1):
'''Get List of fraggers covering sequence indices pos_start..pos_end.
This will return an empty list if range is smaller than fragment_length.
:param pos_start: Start-index (note that sequence-indexing starts at 0)
:type pos_start: :class:`int`
:param pos_end: End-index or -1 if it should go to the sequence-end.
:type pos_end: :class:`int`
:return: A :class:`list` of :class:`Fragger` objects.
:raises: :exc:`~exceptions.ValueError` if indices out-of-bounds.
'''
if pos_end == -1:
pos_end = len(self.sequence) - 1
fragger_list = list()
for frag_pos in range(pos_start, pos_end - self.fragment_length + 2):
fragger_list.append(self.Get(frag_pos))
return fragger_list
def SaveCached(self, filename):
'''Save cached fraggers.'''
self.fragger_map.Save(filename)
def LoadCached(self, filename):
'''Load fragger objects stored with :meth:`SaveCached`.
Note that here we require that the same structure db is set as was
used when `filename` was saved.'''
self.fragger_map = FraggerMap.Load(filename, self.structure_db)
def _GetAABefore(self, pos):
if pos > 0:
olc = self.sequence[pos-1]
else:
olc = 'A'
aa_before = OneLetterCodeToResidueName(olc)
return aa_before
def _GetAAAfter(self, pos):
if pos < len(self.sequence) - 1:
olc = self.sequence[pos + 1]
else:
olc = 'A'
aa_after = OneLetterCodeToResidueName(olc)
return aa_after
# these methods will be exported into module
__all__ = ('FraggerHandle',)