from ost import conop, mol def Cleanup(entity, strip_water=True, canonicalize=True, remove_ligands=True): """ This function returns a cleaned-up (simplified) version of the protein structure. Different parameters affect the behaviour of the function. :param strip_water: Whether to remove water from the structure :param canonicalize: Whether to strip off modifications of amino acids and map them back to their parent standard amino acid, e.g. selenium methionine to methionine.For more complex amino acids, where the relation between the modified and the standard parent amino acid is not known, sidechain atoms are removed. D-peptide-linking residues are completely removed as well. :param remove_ligands: Whether to remove ligands from the structure :return: a cleaned version of the entity """ #setup lib = conop.GetDefaultLib() if not lib: raise RuntimeError("Cleanup requires a compound library.") clean_entity = entity.Copy() ed = clean_entity.EditXCS() #remove water residues if strip_water: _StripWater(clean_entity, ed) #replace modified residues before removing ligands to avoid removing MSE and others if canonicalize: _CanonicalizeResidues(clean_entity, ed, lib) #remove all hetatoms that are not water if remove_ligands: _RemoveLigands(clean_entity, ed) return clean_entity def _StripWater(clean_entity, ed) : """ This function removes water residues from the structure """ for res in clean_entity.residues: if res.IsValid(): if res.chem_class == mol.WATER: ed.DeleteResidue(res.handle) ed.UpdateICS() return def _RemoveLigands(clean_entity, ed) : """ This function removes ligands from the structure """ for res in clean_entity.residues: if res.IsValid(): #WHEN mmCIF WILL BE USED, CHANGE IsPeptideLinking() TO IsProtein() if not res.IsPeptideLinking() and res.atoms[0].is_hetatom and res.chem_class != mol.WATER: ed.DeleteResidue(res.handle) ed.UpdateICS() return def _CanonicalizeResidues(clean_entity, ed, compound_lib) : """ This function strips off modifications of amino acids and maps them back to their parent standard amino acid, e.g. selenium methionine to methionine.For more complex amino acids, where the relation between the modified and the standard parent amino acid is not known, sidechain atoms are removed. D-peptide-linking residues are completely removed as well. """ for res in clean_entity.residues: if res.IsValid() and res.IsPeptideLinking() : parent_olc = res.one_letter_code if parent_olc == "X" : _DeleteSidechain(res, ed) for atom in res.atoms: atom.is_hetatom = False else: parent_tlc = conop.OneLetterCodeToResidueName(parent_olc) parent_res = compound_lib.FindCompound(parent_tlc) if not parent_res: _DeleteSidechain(res, ed) for atom in res.atoms: atom.is_hetatom = False print "Removing sidechain of %s, beacuse it has not been found in the compound library"% parent_tlc else: #collect atom's names modif_atom_names = set([atom.name for atom in res.atoms if atom.element != "H" and atom.element != "D" ]) #if the res is the first or last take all the atoms from the parent res if res.FindAtom("OXT").IsValid() : parent_atom_names = set([atom.name for atom in parent_res.atom_specs if atom.element != "H" and atom.element != "D" ]) else: parent_atom_names = set([atom.name for atom in parent_res.atom_specs if atom.element != "H" and atom.element != "D" and not atom.is_leaving ]) additional_parent_atoms = parent_atom_names - modif_atom_names additional_modif_atoms = modif_atom_names - parent_atom_names #WHEN mmCIF WILL BE USED, CHANGE IsPeptideLinking() TO IsProtein(), TO EXCLUDE LIGANDS FROM CANONICALISATION if res.atoms[0].is_hetatom : old_name = res.name ed.RenameResidue(res, parent_tlc) if additional_parent_atoms: if additional_modif_atoms: #replacement _Replacement(res, ed, old_name) else: #deletion _Deletion(res, ed) elif additional_modif_atoms: #addition _Addition(res, ed, additional_modif_atoms) else: #unchanged, later check stereochemistry or H atoms _Unchanged(res, ed) #the res is a peptide but not a ligand (is a protein res) else: if additional_parent_atoms:# if the sidechain is incomplete _DeleteSidechain(res, ed) ed.UpdateICS() return def _Replacement(res, ed, old_name) : #TEMP ONLY MSE if old_name == "MSE" : for atom in res.atoms: atom.is_hetatom = False sel = res.FindAtom("SE") if sel.IsValid() : ed.InsertAtom( res, "SD", sel.pos, "S", sel.occupancy, sel.b_factor ) #S radius=~1;SE=~1.2 ed.DeleteAtom( sel ) else: _DeleteSidechain(res, ed) else: _DeleteSidechain(res, ed) return def _Deletion(res, ed) : _DeleteSidechain(res, ed) for atom in res.atoms : atom.is_hetatom = False return def _Addition(res, ed, additional_modif_atoms) : for add_atom_name in additional_modif_atoms: add_atom = res.FindAtom( add_atom_name ) if add_atom.IsValid() : ed.DeleteAtom( add_atom ) for atom in res.atoms: atom.is_hetatom = False return def _Unchanged(res, ed) : if res.chem_class == mol.D_PEPTIDE_LINKING: ed.DeleteResidue(res) else: _DeleteSidechain(res, ed) for atom in res.atoms : atom.is_hetatom = False return def _DeleteSidechain(res, ed) : for atom in res.atoms: if not atom.name in ['CA','CB','C','N','O']: ed.DeleteAtom(atom) return #visible functions __all__ = [Cleanup]