Skip to content
Snippets Groups Projects
Commit ec28c353 authored by Studer Gabriel's avatar Studer Gabriel
Browse files

cleanup of sidechain reconstruction script

parent 7fb178ee
No related branches found
No related tags found
No related merge requests found
...@@ -33,7 +33,7 @@ The simplest usage of the module is provided by the :func:`Reconstruct` function ...@@ -33,7 +33,7 @@ The simplest usage of the module is provided by the :func:`Reconstruct` function
.. method:: Reconstruct(prot, keep_sidechains=False, build_disulfids, \ .. method:: Reconstruct(prot, keep_sidechains=False, build_disulfids, \
rotamer_model="frm", consider_hbonds=True, \ rotamer_model="frm", consider_hbonds=True, \
rotamer_library=None, add_polar_hydrogens=False) rotamer_library=None)
The function takes a structure and reconstructs its sidechains given the input The function takes a structure and reconstructs its sidechains given the input
parameters. parameters.
...@@ -61,10 +61,6 @@ The simplest usage of the module is provided by the :func:`Reconstruct` function ...@@ -61,10 +61,6 @@ The simplest usage of the module is provided by the :func:`Reconstruct` function
:param rotamer_library: A rotamer library to extract the :param rotamer_library: A rotamer library to extract the
rotamers from. rotamers from.
:param add_polar_hydrogens: All polar hydrogens will be constructed if
**consider_hbonds** is True.
If this flag is activated, they'll be added to the
structure.
:type prot: :class:`ost.mol.EntityHandle` :type prot: :class:`ost.mol.EntityHandle`
:type keep_sidechains: :class:`bool` :type keep_sidechains: :class:`bool`
...@@ -72,7 +68,6 @@ The simplest usage of the module is provided by the :func:`Reconstruct` function ...@@ -72,7 +68,6 @@ The simplest usage of the module is provided by the :func:`Reconstruct` function
:type rotamer_model: :class:`str` :type rotamer_model: :class:`str`
:type consider_hbonds: :class:`bool` :type consider_hbonds: :class:`bool`
:type rotamer_library: :class:`BBDepRotamerLib` / :class:`RotamerLib` :type rotamer_library: :class:`BBDepRotamerLib` / :class:`RotamerLib`
:type add_polar_hydrogens: :class:`bool`
Sidechain Module Functionality Sidechain Module Functionality
......
from . import _sidechain as sidechain from . import _sidechain as sidechain
from ost import geom from ost import geom
from ost import mol
def Reconstruct(ent, keep_sidechains = False, build_disulfids = True, def Reconstruct(ent, keep_sidechains = False, build_disulfids = True,
rotamer_model = "frm", consider_hbonds = True, rotamer_model = "frm", consider_hbonds = True,
rotamer_library = None, add_polar_hydrogens=False): rotamer_library = None):
name_id_mapper = dict()
name_id_mapper["ARG"] = sidechain.ARG
name_id_mapper["ASN"] = sidechain.ASN
name_id_mapper["ASP"] = sidechain.ASP
name_id_mapper["GLN"] = sidechain.GLN
name_id_mapper["GLU"] = sidechain.GLU
name_id_mapper["LYS"] = sidechain.LYS
name_id_mapper["SER"] = sidechain.SER
name_id_mapper["CYS"] = sidechain.CYS
name_id_mapper["MET"] = sidechain.MET
name_id_mapper["TRP"] = sidechain.TRP
name_id_mapper["TYR"] = sidechain.TYR
name_id_mapper["THR"] = sidechain.THR
name_id_mapper["VAL"] = sidechain.VAL
name_id_mapper["ILE"] = sidechain.ILE
name_id_mapper["LEU"] = sidechain.LEU
name_id_mapper["PRO"] = sidechain.PRO
name_id_mapper["HSD"] = sidechain.HSD
name_id_mapper["HSE"] = sidechain.HSE
name_id_mapper["HIS"] = sidechain.HIS
name_id_mapper["PHE"] = sidechain.PHE
name_id_mapper["ALA"] = sidechain.ALA
name_id_mapper["GLY"] = sidechain.GLY
name_id_mapper["MSE"] = sidechain.MET
if rotamer_model.lower() == "frm": if rotamer_model.lower() == "frm":
use_frm = True use_frm = True
...@@ -50,22 +26,52 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True, ...@@ -50,22 +26,52 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True,
prot = ent.Select("peptide=true") prot = ent.Select("peptide=true")
incomplete_sidechains = list() incomplete_sidechains = list()
#extract rotamer ids
rotamer_ids = [sidechain.ALA] * len(prot.residues)
for i,r in enumerate(prot.residues):
rot_id = sidechain.TLCToRotID(r.GetName())
if rot_id == sidechain.XXX:
continue # no idea what it is, so we stick with ALA
# => don't model sidechain
rotamer_ids[i] = rot_id
#extract dihedral angles #extract dihedral angles
phi_angles = [0.0] * len(prot.residues) phi_angles = [0.0] * len(prot.residues)
psi_angles = [0.0] * len(prot.residues) psi_angles = [0.0] * len(prot.residues)
for i,r in enumerate(prot.residues): for i,r in enumerate(prot.residues):
phi = -1.0472 phi = -1.0472
psi = -0.7854 psi = -0.7854
tor = r.GetPhiTorsion() tor = r.GetPhiTorsion()
if tor.IsValid(): if tor.IsValid():
phi = tor.GetAngle() phi = tor.GetAngle()
elif i > 0:
r_prev = prot.residues[i-1]
if mol.InSequence(r_prev.handle,r.handle):
c_prev = r_prev.FindAtom("C")
n = r.FindAtom("N")
ca = r.FindAtom("CA")
c = r.FindAtom("C")
if c_prev.IsValid() and n.IsValid() and ca.IsValid() and c.IsValid():
phi = geom.DihedralAngle(c_prev.GetPos(),n.GetPos(),
ca.GetPos(),c.GetPos())
tor = r.GetPsiTorsion() tor = r.GetPsiTorsion()
if tor.IsValid(): if tor.IsValid():
psi = tor.GetAngle() psi = tor.GetAngle()
elif i < (len(prot.residues) - 1):
r_next = prot.residues[i+1]
if mol.InSequence(r.handle,r_next.handle):
n = r.FindAtom("N")
ca = r.FindAtom("CA")
c = r.FindAtom("C")
n_next = r_next.FindAtom("N")
if n.IsValid() and ca.IsValid() and c.IsValid() and n_next.IsValid():
psi = geom.DihedralAngle(n.GetPos(), ca.GetPos(),
c.GetPos(), n_next.GetPos())
phi_angles[i] = phi phi_angles[i] = phi
psi_angles[i] = psi psi_angles[i] = psi
...@@ -83,11 +89,8 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True, ...@@ -83,11 +89,8 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True,
#build up backbone frame #build up backbone frame
for i,r in enumerate(prot.residues): for i,r in enumerate(prot.residues):
try: try:
rot_id = name_id_mapper[r.GetName()] frame_residue = sidechain.ConstructBackboneFrameResidue(r.handle,
except: rotamer_ids[i],
rot_id = sidechain.ALA
try:
frame_residue = sidechain.ConstructBackboneFrameResidue(r.GetHandle(),rot_id,
i,rotamer_settings, i,rotamer_settings,
phi_angles[i], phi_angles[i],
r.HasProp("n_ter"), r.HasProp("n_ter"),
...@@ -96,209 +99,194 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True, ...@@ -96,209 +99,194 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True,
except: except:
continue continue
frame = None
if build_disulfids:
cystein_indices = list()
if keep_sidechains: if keep_sidechains:
#let's add all complete sidechains except the cysteins to the frame
for i,r in enumerate(prot.residues): for i,r in enumerate(prot.residues):
try:
rot_id = name_id_mapper[r.GetName()] if rotamer_ids[i] == sidechain.CYS:
except: cystein_indices.append(i)
continue
if rot_id == sidechain.CYS:
continue #cysteins will be handled seperately
if rot_id == sidechain.ALA or rot_id == sidechain.GLY:
continue continue
if rotamer_ids[i] == sidechain.ALA or rotamer_ids[i] == sidechain.GLY:
continue #no sidechain to model
try: try:
frame_residue = sidechain.ConstructSidechainFrameResidue(r.GetHandle(),rot_id, frame_residue = sidechain.ConstructSidechainFrameResidue(r.handle,
rotamer_ids[i],
i,rotamer_settings) i,rotamer_settings)
frame_residues.append(frame_residue) frame_residues.append(frame_residue)
except: except:
incomplete_sidechains.append(i) incomplete_sidechains.append(i)
else: else:
#let's add everything except cysteins to the incomplete sidechains
for i,r in enumerate(prot.residues): for i,r in enumerate(prot.residues):
try:
rot_id = name_id_mapper[r.GetName()]
except:
continue
if rot_id not in [sidechain.GLY, sidechain.ALA]:
incomplete_sidechains.append(i)
#look for cysteins and evaluate potential disulfid bonds if rotamer_ids[i] == sidechain.CYS:
final_disulfid_indices = list() cystein_indices.append(i)
if build_disulfids:
cystein_info = list()
for i,r in enumerate(prot.residues):
try:
rot_id = name_id_mapper[r.GetName()]
except:
continue continue
if rot_id == sidechain.CYS: if rotamer_ids[i] == sidechain.ALA or rotamer_ids[i] == sidechain.GLY:
ca = r.FindAtom("CA") continue #no sidechain to model
cb = r.FindAtom("CB")
if ca.IsValid() and cb.IsValid():
cystein_info.append((ca.GetPos(),cb.GetPos(),i))
frame = sidechain.Frame(frame_residues) incomplete_sidechains.append(i)
for i in range(len(cystein_info)): #let's generate the frame without any cystein sidechains
#this is required for the disulfid score evaluation
frame = sidechain.Frame(frame_residues)
if cystein_info[i][2] in final_disulfid_indices: #some info we have to keep track of when evaluating disulfid bonds
continue cystein_rotamers = list()
disulfid_indices = list()
cys_ca_positions = list()
cys_cb_positions = list()
complete_i = keep_sidechains and prot.residues[cystein_info[i][2]].FindAtom("SG").IsValid() for i in cystein_indices:
i_ca_pos = cystein_info[i][0]
i_cb_pos = cystein_info[i][1]
i_index = cystein_info[i][2]
for j in range(i+1,len(cystein_info)): rot_grop = None
r = prot.residues[i]
ca = r.FindAtom("CA")
cb = r.FindAtom("CB")
if cystein_info[j][2] in final_disulfid_indices: if not (ca.IsValid() and cb.IsValid()):
continue continue
complete_j = keep_sidechains and prot.residues[cystein_info[j][2]].FindAtom("SG").IsValid() cys_ca_positions.append(ca.GetPos())
cys_cb_positions.append(cb.GetPos())
if complete_i and complete_j:
continue #they're already there
if geom.Distance(i_ca_pos,cystein_info[j][0]) < 8:
j_ca_pos = cystein_info[j][0]
j_cb_pos = cystein_info[j][1]
j_index = cystein_info[j][2]
if use_frm: if use_frm:
if bbdep: if bbdep:
rot_group_one = sidechain.ConstructFRMRotamerGroup(prot.residues[cystein_info[i][2]].GetHandle(), rot_group = sidechain.ConstructFRMRotamerGroup(r.handle,
sidechain.CYD,cystein_info[i][2], sidechain.CYD,i,
rotamer_library,rotamer_settings, rotamer_library,
phi_angles[i_index],psi_angles[i_index]) rotamer_settings,
phi_angles[i],
rot_group_two = sidechain.ConstructFRMRotamerGroup(prot.residues[cystein_info[j][2]].GetHandle(), psi_angles[i])
sidechain.CYD,cystein_info[j][2],
rotamer_library,rotamer_settings,
phi_angles[j_index],psi_angles[j_index])
else: else:
rot_group_one = sidechain.ConstructFRMRotamerGroup(prot.residues[cystein_info[i][2]].GetHandle(), rot_group = sidechain.ConstructFRMRotamerGroup(r.handle,
sidechain.CYD,cystein_info[i][2], sidechain.CYD,i,
rotamer_library,rotamer_settings) rotamer_library,
rotamer_settings)
rot_group_two = sidechain.ConstructFRMRotamerGroup(prot.residues[cystein_info[j][2]].GetHandle(),
sidechain.CYD,cystein_info[j][2],
rotamer_library,rotamer_settings)
else: else:
if bbdep: if bbdep:
rot_group_one = sidechain.ConstructRRMRotamerGroup(prot.residues[cystein_info[i][2]].GetHandle(), rot_group = sidechain.ConstructRRMRotamerGroup(r.handle,
sidechain.CYD,cystein_info[i][2], sidechain.CYD,i,
rotamer_library,rotamer_settings, rotamer_library,
phi_angles[i_index],psi_angles[i_index]) rotamer_settings,
phi_angles[i],
rot_group_two = sidechain.ConstructRRMRotamerGroup(prot.residues[cystein_info[j][2]].GetHandle(), psi_angles[i])
sidechain.CYD,cystein_info[j][2],
rotamer_library,rotamer_settings,
phi_angles[j_index],psi_angles[j_index])
else: else:
rot_group_one = sidechain.ConstructRRMRotamerGroup(prot.residues[cystein_info[i][2]].GetHandle(), rot_group = sidechain.ConstructRRMRotamerGroup(r.handle,
sidechain.CYD,cystein_info[i][2], sidechain.CYD,i,
rotamer_library,rotamer_settings) rotamer_library,
rotamer_settings)
rot_group_two = sidechain.ConstructRRMRotamerGroup(prot.residues[cystein_info[j][2]].GetHandle(), frame.AddFrameEnergy(rot_group)
sidechain.CYD,cystein_info[j][2], cystein_rotamers.append(rot_group)
rotamer_library,rotamer_settings)
for i in range(len(cystein_rotamers)):
for j in range(i+1, len(cystein_rotamers)):
if geom.Distance(cys_ca_positions[i], cys_ca_positions[j]) > 8.0:
continue #they're too far for a disulfid bond
frame.AddFrameEnergy(rot_group_one) if cystein_indices[i] in disulfid_indices or cystein_indices[j] in disulfid_indices:
frame.AddFrameEnergy(rot_group_two) continue #one of them already participates in a disulfid bond!
#that one might be bether but right now is first come
#first served principle
min_score = float("inf")
min_index_k = -1 min_index_k = -1
min_index_l = -1 min_index_l = -1
min_score = 10000000
for k in range(len(rot_group_one)): for k in range(len(cystein_rotamers[i])):
for l in range(len(rot_group_two)): for l in range(len(cystein_rotamers[j])):
score = sidechain.DisulfidScore(rot_group_one[k],rot_group_two[l], score = sidechain.DisulfidScore(cystein_rotamers[i][k],
i_ca_pos,i_cb_pos,j_ca_pos,j_cb_pos) cystein_rotamers[j][l],
cys_ca_positions[i],
cys_cb_positions[i],
cys_ca_positions[j],
cys_cb_positions[j])
if score < min_score: if score < min_score:
min_index_k = k min_index_k = k
min_index_l = l min_index_l = l
min_score = score min_score = score
if min_score < 45: if min_score < 45.0:
final_disulfid_indices.append(i_index) disulfid_indices.append(cystein_indices[i])
final_disulfid_indices.append(j_index) disulfid_indices.append(cystein_indices[j])
#create new frame residue...
particle_list = list() particle_list = list()
particle_list.append(rot_group_one[min_index_k][0]) particle_list.append(cystein_rotamers[i][min_index_k][0])
particle_list.append(rot_group_two[min_index_l][0]) new_frame_residue_i = sidechain.FrameResidue(particle_list,cystein_indices[i])
new_frame_residue = sidechain.FrameResidue(particle_list,100000) particle_list = list()
frame_residues.append(new_frame_residue) particle_list.append(cystein_rotamers[j][min_index_l][0])
new_frame_residue_j = sidechain.FrameResidue(particle_list,cystein_indices[j])
frame_residues.append(new_frame_residue_i)
frame_residues.append(new_frame_residue_j)
#set the position in the proteins residues #set the position in the proteins residues
rot_group_one[min_index_k].ApplyOnResidue(prot.residues[i_index].GetHandle()) cystein_rotamers[i][min_index_k].ApplyOnResidue(prot.residues[cystein_indices[i]].handle,
rot_group_two[min_index_l].ApplyOnResidue(prot.residues[j_index].GetHandle()) consider_hydrogens=False)
cystein_rotamers[j][min_index_l].ApplyOnResidue(prot.residues[cystein_indices[j]].handle,
#we still have to add the cysteins, that are complete, even though consider_hydrogens=False)
#they're not participating in a disulfid bond
#All cysteins participating in a disulfid bond have been applied to the
#structure and added to the frame.
#All remaining ones have to be handled according the given flags.
for idx in cystein_indices:
if idx in disulfid_indices:
continue #it's all fine
if keep_sidechains: if keep_sidechains:
for i,r in enumerate(prot.residues):
try: try:
rot_id = name_id_mapper[r.GetName()] frame_residue = sidechain.ConstructSidechainFrameResidue(prot.residues[idx].handle,
except: rotamer_ids[idx],
continue idx,rotamer_settings)
if rot_id != sidechain.CYS:
continue #all other sidechains are already handled
if i in final_disulfid_indices:
continue #already added to the frame residues
try:
frame_residue = sidechain.ConstructSidechainFrameResidue(r.GetHandle(),rot_id,
i,rotamer_settings)
frame_residues.append(frame_residue) frame_residues.append(frame_residue)
except: except:
incomplete_sidechains.append(i) incomplete_sidechains.append(idx)
else: else:
#The disulfid cysteins must be removed from incomplete sidechains incomplete_sidechains.append(idx)
for i in final_disulfid_indices:
try: #we finally have to rebuild the frame if any disulfid bonds have been built
incomplete_sidechains.remove(i) if len(disulfid_indices) > 0:
except: frame = sidechain.Frame(frame_residues)
pass #it's not there anyway, so we don't have to remove it
elif keep_sidechains: else:
if keep_sidechains:
for i,r in enumerate(prot.residues): for i,r in enumerate(prot.residues):
if rotamer_ids[i] == sidechain.ALA or rotamer_ids[i] == sidechain.GLY:
continue #no sidechain to model
try: try:
rot_id = name_id_mapper[r.GetName()] frame_residue = sidechain.ConstructSidechainFrameResidue(r.handle,
except: rotamer_ids[i],
continue
if rot_id != CYS:
continue #all other sidechains are already handled
try:
frame_residue = sidechain.ConstructSidechainFrameResidue(r.GetHandle(),rot_id,
i,rotamer_settings) i,rotamer_settings)
frame_residues.append(frame_residue) frame_residues.append(frame_residue)
except: except:
incomplete_sidechains.append(i) incomplete_sidechains.append(i)
else:
for i,r in enumerate(prot.residues):
if rotamer_ids[i] not in [sidechain.GLY, sidechain.ALA]:
incomplete_sidechains.append(i)
#finally build complete frame
frame = sidechain.Frame(frame_residues) frame = sidechain.Frame(frame_residues)
#build rotamers
#build rotamers for incomplete sidechains
for i in incomplete_sidechains: for i in incomplete_sidechains:
r = prot.residues[i] r = prot.residues[i]
rot_id = rotamer_ids[i]
try:
rot_id = name_id_mapper[r.GetName()]
except:
continue
if(rot_id == sidechain.ALA or rot_id == sidechain.GLY): if(rot_id == sidechain.ALA or rot_id == sidechain.GLY):
continue continue
if rot_id == sidechain.CYS: if rot_id == sidechain.CYS:
if i in final_disulfid_indices:
continue
rot_id = sidechain.CYH rot_id = sidechain.CYH
if rot_id == sidechain.PRO: if rot_id == sidechain.PRO:
...@@ -312,19 +300,19 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True, ...@@ -312,19 +300,19 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True,
try: try:
if use_frm: if use_frm:
if bbdep: if bbdep:
rot_group = sidechain.ConstructFRMRotamerGroup(r.GetHandle(),rot_id,i, rot_group = sidechain.ConstructFRMRotamerGroup(r.handle,rot_id,i,
rotamer_library,rotamer_settings, rotamer_library,rotamer_settings,
phi_angles[i],psi_angles[i]) phi_angles[i],psi_angles[i])
else: else:
rot_group = sidechain.ConstructFRMRotamerGroup(r.GetHandle(),rot_id,i, rot_group = sidechain.ConstructFRMRotamerGroup(r.handle,rot_id,i,
rotamer_library,rotamer_settings) rotamer_library,rotamer_settings)
else: else:
if bbdep: if bbdep:
rot_group = sidechain.ConstructRRMRotamerGroup(r.GetHandle(),rot_id,i, rot_group = sidechain.ConstructRRMRotamerGroup(r.handle,rot_id,i,
rotamer_library,rotamer_settings, rotamer_library,rotamer_settings,
phi_angles[i],psi_angles[i]) phi_angles[i],psi_angles[i])
else: else:
rot_group = sidechain.ConstructRRMRotamerGroup(r.GetHandle(),rot_id,i, rot_group = sidechain.ConstructRRMRotamerGroup(r.handle,rot_id,i,
rotamer_library,rotamer_settings) rotamer_library,rotamer_settings)
except: except:
...@@ -347,18 +335,10 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True, ...@@ -347,18 +335,10 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True,
for i,rot_group,sol in zip(residues_with_rotamer_group,rotamer_groups,solution): for i,rot_group,sol in zip(residues_with_rotamer_group,rotamer_groups,solution):
try: try:
rot_group[sol].ApplyOnResidue(prot.residues[i].GetHandle(),consider_hydrogens=add_polar_hydrogens) rot_group[sol].ApplyOnResidue(prot.residues[i].handle,consider_hydrogens=False)
except: except:
print "there is a backbone atom missing... ",prot.residues[i].GetQualifiedName() print "there is a backbone atom missing... ",prot.residues[i].GetQualifiedName()
#if we want to add polar hydrogens, we also have to consider the backbone...
if add_polar_hydrogens:
for r in frame_residues:
i=r.GetResidueIndex()
if i==100000:continue#These are cysteins that were already handled
rh=prot.residues[i].GetHandle()
r.ApplyOnResidue(rh,True)
# these methods will be exported into module # these methods will be exported into module
__all__ = ('Reconstruct',) __all__ = ('Reconstruct',)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment