diff --git a/sidechain/doc/index.rst b/sidechain/doc/index.rst index 7024172270d20bf4cec1ccdabded5110ac8226d7..1c11cb815a948b0886af2b9300556c60302fb71e 100644 --- a/sidechain/doc/index.rst +++ b/sidechain/doc/index.rst @@ -33,7 +33,7 @@ The simplest usage of the module is provided by the :func:`Reconstruct` function .. method:: Reconstruct(prot, keep_sidechains=False, build_disulfids, \ rotamer_model="frm", consider_hbonds=True, \ - rotamer_library=None, add_polar_hydrogens=False) + rotamer_library=None) The function takes a structure and reconstructs its sidechains given the input parameters. @@ -61,10 +61,6 @@ The simplest usage of the module is provided by the :func:`Reconstruct` function :param rotamer_library: A rotamer library to extract the rotamers from. - :param add_polar_hydrogens: All polar hydrogens will be constructed if - **consider_hbonds** is True. - If this flag is activated, they'll be added to the - structure. :type prot: :class:`ost.mol.EntityHandle` :type keep_sidechains: :class:`bool` @@ -72,7 +68,6 @@ The simplest usage of the module is provided by the :func:`Reconstruct` function :type rotamer_model: :class:`str` :type consider_hbonds: :class:`bool` :type rotamer_library: :class:`BBDepRotamerLib` / :class:`RotamerLib` - :type add_polar_hydrogens: :class:`bool` Sidechain Module Functionality diff --git a/sidechain/pymod/_reconstruct_sidechains.py b/sidechain/pymod/_reconstruct_sidechains.py index fabd97e5c0cf37109d824fe7158d62b192b843ad..2fc82977addb81ed388ae3f15076bb286cab26e5 100644 --- a/sidechain/pymod/_reconstruct_sidechains.py +++ b/sidechain/pymod/_reconstruct_sidechains.py @@ -1,34 +1,10 @@ from . import _sidechain as sidechain from ost import geom +from ost import mol def Reconstruct(ent, keep_sidechains = False, build_disulfids = True, rotamer_model = "frm", consider_hbonds = True, - rotamer_library = None, add_polar_hydrogens=False): - - name_id_mapper = dict() - name_id_mapper["ARG"] = sidechain.ARG - name_id_mapper["ASN"] = sidechain.ASN - name_id_mapper["ASP"] = sidechain.ASP - name_id_mapper["GLN"] = sidechain.GLN - name_id_mapper["GLU"] = sidechain.GLU - name_id_mapper["LYS"] = sidechain.LYS - name_id_mapper["SER"] = sidechain.SER - name_id_mapper["CYS"] = sidechain.CYS - name_id_mapper["MET"] = sidechain.MET - name_id_mapper["TRP"] = sidechain.TRP - name_id_mapper["TYR"] = sidechain.TYR - name_id_mapper["THR"] = sidechain.THR - name_id_mapper["VAL"] = sidechain.VAL - name_id_mapper["ILE"] = sidechain.ILE - name_id_mapper["LEU"] = sidechain.LEU - name_id_mapper["PRO"] = sidechain.PRO - name_id_mapper["HSD"] = sidechain.HSD - name_id_mapper["HSE"] = sidechain.HSE - name_id_mapper["HIS"] = sidechain.HIS - name_id_mapper["PHE"] = sidechain.PHE - name_id_mapper["ALA"] = sidechain.ALA - name_id_mapper["GLY"] = sidechain.GLY - name_id_mapper["MSE"] = sidechain.MET + rotamer_library = None): if rotamer_model.lower() == "frm": use_frm = True @@ -50,23 +26,53 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True, prot = ent.Select("peptide=true") incomplete_sidechains = list() + #extract rotamer ids + rotamer_ids = [sidechain.ALA] * len(prot.residues) + for i,r in enumerate(prot.residues): + rot_id = sidechain.TLCToRotID(r.GetName()) + if rot_id == sidechain.XXX: + continue # no idea what it is, so we stick with ALA + # => don't model sidechain + rotamer_ids[i] = rot_id #extract dihedral angles phi_angles = [0.0] * len(prot.residues) psi_angles = [0.0] * len(prot.residues) for i,r in enumerate(prot.residues): + phi = -1.0472 psi = -0.7854 tor = r.GetPhiTorsion() if tor.IsValid(): phi = tor.GetAngle() + elif i > 0: + r_prev = prot.residues[i-1] + if mol.InSequence(r_prev.handle,r.handle): + c_prev = r_prev.FindAtom("C") + n = r.FindAtom("N") + ca = r.FindAtom("CA") + c = r.FindAtom("C") + if c_prev.IsValid() and n.IsValid() and ca.IsValid() and c.IsValid(): + phi = geom.DihedralAngle(c_prev.GetPos(),n.GetPos(), + ca.GetPos(),c.GetPos()) tor = r.GetPsiTorsion() if tor.IsValid(): psi = tor.GetAngle() - + elif i < (len(prot.residues) - 1): + r_next = prot.residues[i+1] + if mol.InSequence(r.handle,r_next.handle): + n = r.FindAtom("N") + ca = r.FindAtom("CA") + c = r.FindAtom("C") + n_next = r_next.FindAtom("N") + if n.IsValid() and ca.IsValid() and c.IsValid() and n_next.IsValid(): + psi = geom.DihedralAngle(n.GetPos(), ca.GetPos(), + c.GetPos(), n_next.GetPos()) + + phi_angles[i] = phi psi_angles[i] = psi @@ -83,11 +89,8 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True, #build up backbone frame for i,r in enumerate(prot.residues): try: - rot_id = name_id_mapper[r.GetName()] - except: - rot_id = sidechain.ALA - try: - frame_residue = sidechain.ConstructBackboneFrameResidue(r.GetHandle(),rot_id, + frame_residue = sidechain.ConstructBackboneFrameResidue(r.handle, + rotamer_ids[i], i,rotamer_settings, phi_angles[i], r.HasProp("n_ter"), @@ -96,209 +99,194 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True, except: continue - if keep_sidechains: - for i,r in enumerate(prot.residues): - try: - rot_id = name_id_mapper[r.GetName()] - except: - continue - if rot_id == sidechain.CYS: - continue #cysteins will be handled seperately - if rot_id == sidechain.ALA or rot_id == sidechain.GLY: - continue - try: - frame_residue = sidechain.ConstructSidechainFrameResidue(r.GetHandle(),rot_id, - i,rotamer_settings) - frame_residues.append(frame_residue) - except: - incomplete_sidechains.append(i) - else: - for i,r in enumerate(prot.residues): - try: - rot_id = name_id_mapper[r.GetName()] - except: - continue - if rot_id not in [sidechain.GLY, sidechain.ALA]: - incomplete_sidechains.append(i) + frame = None - #look for cysteins and evaluate potential disulfid bonds - final_disulfid_indices = list() if build_disulfids: - cystein_info = list() - for i,r in enumerate(prot.residues): - try: - rot_id = name_id_mapper[r.GetName()] - except: - continue - if rot_id == sidechain.CYS: - ca = r.FindAtom("CA") - cb = r.FindAtom("CB") - if ca.IsValid() and cb.IsValid(): - cystein_info.append((ca.GetPos(),cb.GetPos(),i)) + cystein_indices = list() + + if keep_sidechains: + #let's add all complete sidechains except the cysteins to the frame + for i,r in enumerate(prot.residues): + + if rotamer_ids[i] == sidechain.CYS: + cystein_indices.append(i) + continue + + if rotamer_ids[i] == sidechain.ALA or rotamer_ids[i] == sidechain.GLY: + continue #no sidechain to model + + try: + frame_residue = sidechain.ConstructSidechainFrameResidue(r.handle, + rotamer_ids[i], + i,rotamer_settings) + frame_residues.append(frame_residue) + except: + incomplete_sidechains.append(i) + else: + #let's add everything except cysteins to the incomplete sidechains + for i,r in enumerate(prot.residues): + + if rotamer_ids[i] == sidechain.CYS: + cystein_indices.append(i) + continue + + if rotamer_ids[i] == sidechain.ALA or rotamer_ids[i] == sidechain.GLY: + continue #no sidechain to model + + incomplete_sidechains.append(i) + #let's generate the frame without any cystein sidechains + #this is required for the disulfid score evaluation frame = sidechain.Frame(frame_residues) - for i in range(len(cystein_info)): + #some info we have to keep track of when evaluating disulfid bonds + cystein_rotamers = list() + disulfid_indices = list() + cys_ca_positions = list() + cys_cb_positions = list() + + for i in cystein_indices: + + rot_grop = None + r = prot.residues[i] + ca = r.FindAtom("CA") + cb = r.FindAtom("CB") - if cystein_info[i][2] in final_disulfid_indices: + if not (ca.IsValid() and cb.IsValid()): continue - complete_i = keep_sidechains and prot.residues[cystein_info[i][2]].FindAtom("SG").IsValid() - i_ca_pos = cystein_info[i][0] - i_cb_pos = cystein_info[i][1] - i_index = cystein_info[i][2] + cys_ca_positions.append(ca.GetPos()) + cys_cb_positions.append(cb.GetPos()) + + if use_frm: + if bbdep: + rot_group = sidechain.ConstructFRMRotamerGroup(r.handle, + sidechain.CYD,i, + rotamer_library, + rotamer_settings, + phi_angles[i], + psi_angles[i]) + else: + rot_group = sidechain.ConstructFRMRotamerGroup(r.handle, + sidechain.CYD,i, + rotamer_library, + rotamer_settings) + else: + if bbdep: + rot_group = sidechain.ConstructRRMRotamerGroup(r.handle, + sidechain.CYD,i, + rotamer_library, + rotamer_settings, + phi_angles[i], + psi_angles[i]) + else: + rot_group = sidechain.ConstructRRMRotamerGroup(r.handle, + sidechain.CYD,i, + rotamer_library, + rotamer_settings) + + frame.AddFrameEnergy(rot_group) + cystein_rotamers.append(rot_group) + + for i in range(len(cystein_rotamers)): + for j in range(i+1, len(cystein_rotamers)): + + if geom.Distance(cys_ca_positions[i], cys_ca_positions[j]) > 8.0: + continue #they're too far for a disulfid bond + + if cystein_indices[i] in disulfid_indices or cystein_indices[j] in disulfid_indices: + continue #one of them already participates in a disulfid bond! + #that one might be bether but right now is first come + #first served principle + + min_score = float("inf") + min_index_k = -1 + min_index_l = -1 + + for k in range(len(cystein_rotamers[i])): + for l in range(len(cystein_rotamers[j])): + score = sidechain.DisulfidScore(cystein_rotamers[i][k], + cystein_rotamers[j][l], + cys_ca_positions[i], + cys_cb_positions[i], + cys_ca_positions[j], + cys_cb_positions[j]) + if score < min_score: + min_index_k = k + min_index_l = l + min_score = score + + if min_score < 45.0: + disulfid_indices.append(cystein_indices[i]) + disulfid_indices.append(cystein_indices[j]) + particle_list = list() + particle_list.append(cystein_rotamers[i][min_index_k][0]) + new_frame_residue_i = sidechain.FrameResidue(particle_list,cystein_indices[i]) + particle_list = list() + particle_list.append(cystein_rotamers[j][min_index_l][0]) + new_frame_residue_j = sidechain.FrameResidue(particle_list,cystein_indices[j]) + frame_residues.append(new_frame_residue_i) + frame_residues.append(new_frame_residue_j) + #set the position in the proteins residues + cystein_rotamers[i][min_index_k].ApplyOnResidue(prot.residues[cystein_indices[i]].handle, + consider_hydrogens=False) + cystein_rotamers[j][min_index_l].ApplyOnResidue(prot.residues[cystein_indices[j]].handle, + consider_hydrogens=False) + + #All cysteins participating in a disulfid bond have been applied to the + #structure and added to the frame. + #All remaining ones have to be handled according the given flags. + for idx in cystein_indices: + if idx in disulfid_indices: + continue #it's all fine + if keep_sidechains: + try: + frame_residue = sidechain.ConstructSidechainFrameResidue(prot.residues[idx].handle, + rotamer_ids[idx], + idx,rotamer_settings) + frame_residues.append(frame_residue) + except: + incomplete_sidechains.append(idx) + else: + incomplete_sidechains.append(idx) + + #we finally have to rebuild the frame if any disulfid bonds have been built + if len(disulfid_indices) > 0: + frame = sidechain.Frame(frame_residues) - for j in range(i+1,len(cystein_info)): - if cystein_info[j][2] in final_disulfid_indices: - continue - - complete_j = keep_sidechains and prot.residues[cystein_info[j][2]].FindAtom("SG").IsValid() - - if complete_i and complete_j: - continue #they're already there - - if geom.Distance(i_ca_pos,cystein_info[j][0]) < 8: - j_ca_pos = cystein_info[j][0] - j_cb_pos = cystein_info[j][1] - j_index = cystein_info[j][2] - - - if use_frm: - if bbdep: - rot_group_one = sidechain.ConstructFRMRotamerGroup(prot.residues[cystein_info[i][2]].GetHandle(), - sidechain.CYD,cystein_info[i][2], - rotamer_library,rotamer_settings, - phi_angles[i_index],psi_angles[i_index]) - - rot_group_two = sidechain.ConstructFRMRotamerGroup(prot.residues[cystein_info[j][2]].GetHandle(), - sidechain.CYD,cystein_info[j][2], - rotamer_library,rotamer_settings, - phi_angles[j_index],psi_angles[j_index]) - else: - rot_group_one = sidechain.ConstructFRMRotamerGroup(prot.residues[cystein_info[i][2]].GetHandle(), - sidechain.CYD,cystein_info[i][2], - rotamer_library,rotamer_settings) - - rot_group_two = sidechain.ConstructFRMRotamerGroup(prot.residues[cystein_info[j][2]].GetHandle(), - sidechain.CYD,cystein_info[j][2], - rotamer_library,rotamer_settings) - - - else: - if bbdep: - rot_group_one = sidechain.ConstructRRMRotamerGroup(prot.residues[cystein_info[i][2]].GetHandle(), - sidechain.CYD,cystein_info[i][2], - rotamer_library,rotamer_settings, - phi_angles[i_index],psi_angles[i_index]) - - rot_group_two = sidechain.ConstructRRMRotamerGroup(prot.residues[cystein_info[j][2]].GetHandle(), - sidechain.CYD,cystein_info[j][2], - rotamer_library,rotamer_settings, - phi_angles[j_index],psi_angles[j_index]) - else: - rot_group_one = sidechain.ConstructRRMRotamerGroup(prot.residues[cystein_info[i][2]].GetHandle(), - sidechain.CYD,cystein_info[i][2], - rotamer_library,rotamer_settings) - - rot_group_two = sidechain.ConstructRRMRotamerGroup(prot.residues[cystein_info[j][2]].GetHandle(), - sidechain.CYD,cystein_info[j][2], - rotamer_library,rotamer_settings) - - - frame.AddFrameEnergy(rot_group_one) - frame.AddFrameEnergy(rot_group_two) - - min_index_k = -1 - min_index_l = -1 - min_score = 10000000 - - for k in range(len(rot_group_one)): - for l in range(len(rot_group_two)): - score = sidechain.DisulfidScore(rot_group_one[k],rot_group_two[l], - i_ca_pos,i_cb_pos,j_ca_pos,j_cb_pos) - if score < min_score: - min_index_k = k - min_index_l = l - min_score = score - - if min_score < 45: - final_disulfid_indices.append(i_index) - final_disulfid_indices.append(j_index) - #create new frame residue... - particle_list = list() - particle_list.append(rot_group_one[min_index_k][0]) - particle_list.append(rot_group_two[min_index_l][0]) - new_frame_residue = sidechain.FrameResidue(particle_list,100000) - frame_residues.append(new_frame_residue) - #set the position in the proteins residues - rot_group_one[min_index_k].ApplyOnResidue(prot.residues[i_index].GetHandle()) - rot_group_two[min_index_l].ApplyOnResidue(prot.residues[j_index].GetHandle()) - - #we still have to add the cysteins, that are complete, even though - #they're not participating in a disulfid bond + else: if keep_sidechains: for i,r in enumerate(prot.residues): + if rotamer_ids[i] == sidechain.ALA or rotamer_ids[i] == sidechain.GLY: + continue #no sidechain to model try: - rot_id = name_id_mapper[r.GetName()] - except: - continue - if rot_id != sidechain.CYS: - continue #all other sidechains are already handled - if i in final_disulfid_indices: - continue #already added to the frame residues - try: - frame_residue = sidechain.ConstructSidechainFrameResidue(r.GetHandle(),rot_id, + frame_residue = sidechain.ConstructSidechainFrameResidue(r.handle, + rotamer_ids[i], i,rotamer_settings) frame_residues.append(frame_residue) except: incomplete_sidechains.append(i) else: - #The disulfid cysteins must be removed from incomplete sidechains - for i in final_disulfid_indices: - try: - incomplete_sidechains.remove(i) - except: - pass #it's not there anyway, so we don't have to remove it - - elif keep_sidechains: - for i,r in enumerate(prot.residues): - try: - rot_id = name_id_mapper[r.GetName()] - except: - continue - if rot_id != CYS: - continue #all other sidechains are already handled - try: - frame_residue = sidechain.ConstructSidechainFrameResidue(r.GetHandle(),rot_id, - i,rotamer_settings) - frame_residues.append(frame_residue) - except: - incomplete_sidechains.append(i) - + for i,r in enumerate(prot.residues): + if rotamer_ids[i] not in [sidechain.GLY, sidechain.ALA]: + incomplete_sidechains.append(i) - #finally build complete frame - frame = sidechain.Frame(frame_residues) + frame = sidechain.Frame(frame_residues) - #build rotamers + #build rotamers for incomplete sidechains for i in incomplete_sidechains: r = prot.residues[i] - - try: - rot_id = name_id_mapper[r.GetName()] - except: - continue + rot_id = rotamer_ids[i] if(rot_id == sidechain.ALA or rot_id == sidechain.GLY): continue if rot_id == sidechain.CYS: - if i in final_disulfid_indices: - continue rot_id = sidechain.CYH if rot_id == sidechain.PRO: @@ -312,19 +300,19 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True, try: if use_frm: if bbdep: - rot_group = sidechain.ConstructFRMRotamerGroup(r.GetHandle(),rot_id,i, + rot_group = sidechain.ConstructFRMRotamerGroup(r.handle,rot_id,i, rotamer_library,rotamer_settings, phi_angles[i],psi_angles[i]) else: - rot_group = sidechain.ConstructFRMRotamerGroup(r.GetHandle(),rot_id,i, + rot_group = sidechain.ConstructFRMRotamerGroup(r.handle,rot_id,i, rotamer_library,rotamer_settings) else: if bbdep: - rot_group = sidechain.ConstructRRMRotamerGroup(r.GetHandle(),rot_id,i, + rot_group = sidechain.ConstructRRMRotamerGroup(r.handle,rot_id,i, rotamer_library,rotamer_settings, phi_angles[i],psi_angles[i]) else: - rot_group = sidechain.ConstructRRMRotamerGroup(r.GetHandle(),rot_id,i, + rot_group = sidechain.ConstructRRMRotamerGroup(r.handle,rot_id,i, rotamer_library,rotamer_settings) except: @@ -347,18 +335,10 @@ def Reconstruct(ent, keep_sidechains = False, build_disulfids = True, for i,rot_group,sol in zip(residues_with_rotamer_group,rotamer_groups,solution): try: - rot_group[sol].ApplyOnResidue(prot.residues[i].GetHandle(),consider_hydrogens=add_polar_hydrogens) + rot_group[sol].ApplyOnResidue(prot.residues[i].handle,consider_hydrogens=False) except: print "there is a backbone atom missing... ",prot.residues[i].GetQualifiedName() - #if we want to add polar hydrogens, we also have to consider the backbone... - if add_polar_hydrogens: - for r in frame_residues: - i=r.GetResidueIndex() - if i==100000:continue#These are cysteins that were already handled - rh=prot.residues[i].GetHandle() - r.ApplyOnResidue(rh,True) - # these methods will be exported into module __all__ = ('Reconstruct',)