diff --git a/modelling/doc/index.rst b/modelling/doc/index.rst index 77095e72f5247b5cace3a087d4b2301e721c429f..32bdc52d2c2f5ad917b8673ebe9a56923694bfad 100644 --- a/modelling/doc/index.rst +++ b/modelling/doc/index.rst @@ -77,9 +77,6 @@ Modelling Pipeline The returned :class:`ModellingHandle` stores the obtained raw model as well as information about insertions and deletions in the gaps list. - Note, that this class is used in SWISS-MODEL and hence, legacy code must be - preserved. - :param aln: Single alignment handle for raw model with single chain or list of alignment handles for raw model with multiple chains. :type aln: :class:`~ost.seq.AlignmentHandle` / :class:`~ost.seq.AlignmentList` diff --git a/modelling/src/model.hh b/modelling/src/model.hh index 736d5462f81502a066144c753ee1710ca386b13d..318427146d8b9b95315e7351d9999972e013a1db 100644 --- a/modelling/src/model.hh +++ b/modelling/src/model.hh @@ -85,9 +85,9 @@ bool CopyModified(ost::mol::ResidueView src_res, bool& has_cbeta); -/// \brief build raw model from alignment. - -/// the second sequence must have a view attached +/// \brief Build raw model from alignment. +/// +/// The second sequence of aln must have a view attached. /// /// Basic protein core modeling algorithm that copies backbone coordinates based /// on the sequence alignment. For matching residues, the sidechain coordinates @@ -101,6 +101,12 @@ bool CopyModified(ost::mol::ResidueView src_res, /// deletions in the gaps list. /// /// Hydrogen an deuterium atoms are not copied into the model +/// +/// Notes: +/// - this is used in SWISS-MODEL and hence, legacy code must be preserved. +/// - this assumes that a default OST compounds library is set (with +/// Conopology::Instance().SetDefaultLib) as is done by default for +/// Python imports (see core __init__.py) ModellingHandle BuildRawModel(const ost::seq::AlignmentHandle& aln, bool include_ligands=false, @@ -108,23 +114,10 @@ BuildRawModel(const ost::seq::AlignmentHandle& aln, "ijklmnopqrstuvwxyz", bool spdbv_style=false); -/// \brief build raw model from alignment list. -/// Every item in the list represents one chain and the -/// second sequence must have a view attached -/// -/// Basic protein core modeling algorithm that copies backbone coordinates based -/// on the sequence alignment. For matching residues, the sidechain coordinates -/// are also copied. Gaps are ignored. -/// -/// Residue numbers are set such that missing residue in gaps are honored and -/// subsequent loop modeling can insert new residues withouth having to -/// renumber. +/// \brief Build raw model from alignment list. /// -/// The returned ModellingHandle stores information about insertions and -/// deletions in the gaps list. -/// -/// Hydrogen an deuterium atoms are not copied into the model - +/// Every item in the list represents one target chain and the +/// second sequence must have a view attached. See above. ModellingHandle BuildRawModel(const ost::seq::AlignmentList& aln, bool include_ligands=false, diff --git a/sidechain/doc/frame.rst b/sidechain/doc/frame.rst index fcf4d6c0fb22b6efd3a2051f00f9e430d723a47d..60f6cf6499a2a8c08727a0f27da1eba3c84ce88d 100644 --- a/sidechain/doc/frame.rst +++ b/sidechain/doc/frame.rst @@ -173,8 +173,8 @@ and sidechain frame residues. Constructs a :class:`FrameResidue` from a :class:`ost.mol.ResidueHandle`. This can be useful to mark a region occupied by a ligand. Note, that - there won't be any parametrization of hbonds in this function. All - Atoms of the residue will be represented as carbons. + there won't be any parametrization of hbonds in this function. All atoms + of the residue will be represented as carbons and hydrogens are skipped. :param residue: Residue from which all atoms will be taken to @@ -185,3 +185,34 @@ and sidechain frame residues. :type residue_index: :class:`int` :returns: :class:`FrameResidue` + +.. method:: ConstructFrameResidueHeuristic(residue, residue_index, settings, comp_lib) + + Constructs a :class:`FrameResidue` from a :class:`ost.mol.ResidueHandle` using + a heuristic treatment of the atoms based on the passed compounds library. + This is meant to be used as an alternative to :func:`ConstructFrameResidue`, + which will be called by this function if the residue is not known by the given + compounds library. + Only non-hydrogen atoms are considered and by default added as uncharged + carbons. Special treatment is used for atoms known by the compounds library + in the following cases: + + - carbons, nitrogens, oxygens and sulfur particles use an appropriate type + as in the :class:`SidechainParticle` enumeration + - carbonyls are added as charged oxygen particles as hbond acceptors + (if hbonds get considered according to `settings`) + + :param residue: Residue from which all atoms will be taken to + construct a :class:`FrameResidue`. + :param residue_index: Index this :class:`FrameResidue` belongs to. + :param settings: Settings to control parametrization of the single + particles. + :param comp_lib: OST compound library to use (e.g. as returned by + :meth:`ost.conop.GetDefaultLib`). + + :type residue: :class:`ost.mol.ResidueHandle` + :type residue_index: :class:`int` + :type settings: :class:`RotamerSettings` + :type comp_lib: :class:`RotamerSettings` + + :returns: :class:`ost.conop.CompoundLib` diff --git a/sidechain/pymod/_reconstruct_sidechains.py b/sidechain/pymod/_reconstruct_sidechains.py index 7f8cbabee890e148d44b8b6ddd414af8baeb2f38..d5a75d1be87018f44d5a35352efea6e47503a02a 100644 --- a/sidechain/pymod/_reconstruct_sidechains.py +++ b/sidechain/pymod/_reconstruct_sidechains.py @@ -1,6 +1,5 @@ from . import _sidechain as sidechain -from ost import geom -from ost import mol +from ost import geom, mol, conop ############################################################################### # helper functions @@ -117,8 +116,13 @@ def _AddLigandFrameResidues(frame_residues, ent_lig, rotamer_settings, offset): if not is_done: # try to add frame residues (skip exceptions) try: - # TODO: allow non-carbon treatment - fr = sidechain.ConstructFrameResidue(res.handle, res_idx) + # NOTES: + # - ConstructFrameResidueHeuristic has fall back if res unknown + # - it only deals with few possible ligand cases and has not + # been tested extensively! + comp_lib = conop.GetDefaultLib() + fr = sidechain.ConstructFrameResidueHeuristic(\ + res.handle, res_idx, rotamer_settings, comp_lib) frame_residues.append(fr) except: continue diff --git a/sidechain/pymod/export_frame.cc b/sidechain/pymod/export_frame.cc index 5b32c6c1cdc978937603441bf04ce800cba37469..845591dc3e1f4fdc6082d3d4dd6e4d41ac0576ac 100644 --- a/sidechain/pymod/export_frame.cc +++ b/sidechain/pymod/export_frame.cc @@ -111,25 +111,25 @@ void export_Frame() .def("__init__",make_constructor(&WrapFrameResidueInit)) .def("__len__", &FrameResidue::size) .def("__getitem__", &WrapGetItem, (arg( "index" ))) - .def("ApplyOnResidue",&FrameResidue::ApplyOnResidue) - .def("GetResidueIndex",&FrameResidue::GetResidueIndex) + .def("ApplyOnResidue", &FrameResidue::ApplyOnResidue) + .def("GetResidueIndex", &FrameResidue::GetResidueIndex) ; register_ptr_to_python<FrameResiduePtr>(); - def("ConstructBackboneFrameResidue",&ConstructFrameResidue_one,(arg("n_pos"),arg("ca_pos"),arg("c_pos"), - arg("o_pos"),arg("cb_pos"),arg("id"),arg("residue_index"), - arg("settings"),arg("phi"),arg("n_ter")=false,arg("c_ter")=false)); + def("ConstructBackboneFrameResidue", &ConstructFrameResidue_one, + (arg("n_pos"), arg("ca_pos"), arg("c_pos"), arg("o_pos"), arg("cb_pos"), + arg("id"), arg("residue_index"), arg("settings"), arg("phi"), + arg("n_ter")=false, arg("c_ter")=false)); - //def("ConstructBackboneFrameResidue",&ConstructFrameResidue_two,(arg("residue"),arg("id"),arg("residue_index"), - // arg("settings"),arg("phi"),arg("n_ter")=false, arg("hydrogen_phi")=M_PI,arg("n_ter")=false, - // arg("c_ter")=false)); + def("ConstructBackboneFrameResidue", &ConstructFrameResidue_two); - def("ConstructBackboneFrameResidue",&ConstructFrameResidue_two); + def("ConstructSidechainFrameResidue", &ConstructFrameResidue_three, + (arg("residue"), arg("id"), arg("residue_index"), arg("settings"))); + def("ConstructFrameResidue", &ConstructFrameResidue_four, + (arg("residue"), arg("residue_index"))); - def("ConstructSidechainFrameResidue",&ConstructFrameResidue_three,(arg("residue"),arg("id"),arg("residue_index"), - arg("settings"))); - - def("ConstructFrameResidue",&ConstructFrameResidue_four,(arg("residue"),arg("residue_index"))); + def("ConstructFrameResidueHeuristic", &ConstructFrameResidueHeuristic, + (arg("residue"), arg("residue_index"), arg("settings"), arg("comp_lib"))); } diff --git a/sidechain/src/frame_constructor.cc b/sidechain/src/frame_constructor.cc index 9c6235f4198a2ada2afac1ce8064806853782a44..755c781f29fe0a18b41965bc9f5f57a0eed41674 100644 --- a/sidechain/src/frame_constructor.cc +++ b/sidechain/src/frame_constructor.cc @@ -1,8 +1,127 @@ #include <promod3/sidechain/frame_constructor.hh> +#include <ost/conop/conop.hh> + +using namespace ost::conop; namespace promod3 { namespace sidechain{ - + +/////////////////////////////////////////////////////////////////////////////// +// HELPERS in anon ns +namespace { + +// is it a hydrogen? +inline bool _IsHydrogen(const String& element) { + return element == "H" || element == "D"; +} + +// forward declaration +struct _AtomInfo; + +// container of useful bond infos for each bond of an atom (extracted from compound) +struct _BondInfo { + // link to other guy + const _AtomInfo* other; + // order of bond + int order; +}; + +// container of useful atom infos for each atom (extracted from compound) +struct _AtomInfo { + // link to atom spec + const AtomSpec* p_atom_spec; + // how many hydrogens attached to this + int num_hydrogens; + // list of non-hydrogen bonds + std::vector<_BondInfo> non_hydrogen_bonds; +}; + +// get atom infos vector (same size and indexing as comp->GetAtomSpecs) +std::vector<_AtomInfo> _GetAtomInfos(CompoundPtr comp) { + const AtomSpecList& atom_specs = comp->GetAtomSpecs(); // vector + const BondSpecList& bond_specs = comp->GetBondSpecs(); // vector + + // extract info for all non-hydrogens + std::vector<_AtomInfo> atom_infos(atom_specs.size()); + for (uint i_a = 0; i_a < atom_specs.size(); ++i_a) { + atom_infos[i_a].p_atom_spec = &atom_specs[i_a]; + atom_infos[i_a].num_hydrogens = 0; + // we only look at bonds for non-hydrogens + if (!_IsHydrogen(atom_specs[i_a].element)) { + // parse bonds + for (uint i_b = 0; i_b < bond_specs.size(); ++i_b) { + // are we involved in it? if yes, set other_idx + int other_idx = -1; + if (bond_specs[i_b].atom_one == int(i_a)) { + other_idx = bond_specs[i_b].atom_two; + } else if (bond_specs[i_b].atom_two == int(i_a)) { + other_idx = bond_specs[i_b].atom_one; + } + if (other_idx >= 0) { + // is the other one hydrogen? + if (_IsHydrogen(atom_specs[other_idx].element)) { + ++atom_infos[i_a].num_hydrogens; + } else { + // get out the bond + _BondInfo bond; + bond.other = &atom_infos[other_idx]; + bond.order = bond_specs[i_b].order; + atom_infos[i_a].non_hydrogen_bonds.push_back(bond); + } + } + } + } + } + return atom_infos; +} + +// is it a carboxyl? (assumes atom_info is an oxygen!) +// if yes, returned vector contains two atom specs of C and X +typedef std::vector<const AtomSpec*> _AtomSpecCPList; +_AtomSpecCPList _GetCarboxylAtoms(const _AtomInfo& atom_info) { + _AtomSpecCPList carbonyl_atoms; + // requirements: O connected (order 2) to C with 2 other connections (X, Y) + const _AtomInfo* C_info = NULL; + const _AtomInfo* X_info = NULL; + if (atom_info.non_hydrogen_bonds.size() == 1 + and atom_info.non_hydrogen_bonds[0].order == 2 + and atom_info.non_hydrogen_bonds[0].other->p_atom_spec->element == "C") { + C_info = atom_info.non_hydrogen_bonds[0].other; + // C is ok, X chosen as non-hydrogen + const std::vector<_BondInfo>& C_bonds = C_info->non_hydrogen_bonds; + if (C_bonds.size() + C_info->num_hydrogens == 3) { + for (uint i = 0; i < C_bonds.size(); ++i) { + const _AtomInfo* other = C_bonds[i].other; + if (other != &atom_info) { + X_info = other; + break; + } + } + } + } + // fill carbonyl_atoms + if (C_info != NULL and X_info != NULL) { + carbonyl_atoms.push_back(C_info->p_atom_spec); + carbonyl_atoms.push_back(X_info->p_atom_spec); + } + return carbonyl_atoms; +} + +// add lone pairs for carbonyl to particle p +void _AddLonePairsCarbonyl(const geom::Vec3& x_pos, const geom::Vec3& c_pos, + const geom::Vec3& o_pos, Particle& p) { + geom::Vec3 lone_pair_center_one, lone_pair_center_two; + promod3::core::ConstructAtomPos(x_pos, c_pos, o_pos, 1.00, 2.0944, + M_PI, lone_pair_center_one); + promod3::core::ConstructAtomPos(x_pos, c_pos, o_pos, 1.00, 2.0944, + 0.0, lone_pair_center_two); + p.AddLonePair(lone_pair_center_one - o_pos); + p.AddLonePair(lone_pair_center_two - o_pos); +} + +} // anon ns +/////////////////////////////////////////////////////////////////////////////// + FrameResiduePtr ConstructSidechainFrameARG(const ost::mol::ResidueHandle& res, uint residue_index, RotamerSettingsPtr settings){ @@ -1713,14 +1832,22 @@ FrameResiduePtr ConstructSidechainFrameResidue(const ost::mol::ResidueHandle& re FrameResiduePtr ConstructFrameResidue(const ost::mol::ResidueHandle& res, uint residue_index){ - + // get atoms and count non-hydrogens (= num. particles to add) ost::mol::AtomHandleList atom_list = res.GetAtomList(); - uint size = atom_list.size(); + uint size = 0; + for (uint i = 0; i < atom_list.size(); ++i) { + if (!_IsHydrogen(atom_list[i].GetElement())) ++size; + } + // generate particles Particle* particles = new Particle[size]; - - for(uint i = 0; i < size; ++i){ - particles[i] = Particle(CParticle,atom_list[i].GetPos(),0.0,atom_list[i].GetName()); + uint p_idx = 0; + for (uint i = 0; i < atom_list.size(); ++i) { + if (!_IsHydrogen(atom_list[i].GetElement())) { + particles[p_idx] = Particle(CParticle, atom_list[i].GetPos(), 0.0, + atom_list[i].GetName()); + ++p_idx; + } } return boost::make_shared<FrameResidue>(particles, size, residue_index); @@ -1755,4 +1882,82 @@ FrameResiduePtr ConstructFrameResidue(FRMRotamerPtr rot, uint index){ return boost::make_shared<FrameResidue>(particles,num_particles,index); } +// NOTE: this only deals with few possible ligand cases and has not been +// tested extensively! +FrameResiduePtr ConstructFrameResidueHeuristic( + const ost::mol::ResidueHandle& res, uint residue_index, + RotamerSettingsPtr settings, CompoundLibPtr comp_lib) { + + // get compound and atom infos + CompoundPtr comp = comp_lib->FindCompound(res.GetName(), Compound::PDB); + // fallback if unknown + if (!comp) return ConstructFrameResidue(res, residue_index); + std::vector<_AtomInfo> atom_infos = _GetAtomInfos(comp); + + // get atoms and count non-hydrogens (= num. particles to add) + ost::mol::AtomHandleList atom_list = res.GetAtomList(); + uint size = 0; + for (uint i = 0; i < atom_list.size(); ++i) { + if (!_IsHydrogen(atom_list[i].GetElement())) ++size; + } + + // generate particles + Particle* particles = new Particle[size]; + uint p_idx = 0; + for (uint i = 0; i < atom_list.size(); ++i) { + // aliases for readability + const geom::Vec3& atom_pos = atom_list[i].GetPos(); + const String& atom_name = atom_list[i].GetName(); + const String& atom_elem = atom_list[i].GetElement(); + // skip hydrogens + if (!_IsHydrogen(atom_elem)) { + // look for it + int idx = comp->GetAtomSpecIndex(atom_name); + if (idx >= 0) { + // create particle according to atom's element + if (atom_elem == "C") { + SidechainParticle p_id = CParticle; + if (atom_infos[idx].num_hydrogens == 1) p_id = CH1Particle; + else if (atom_infos[idx].num_hydrogens == 2) p_id = CH2Particle; + else if (atom_infos[idx].num_hydrogens == 3) p_id = CH3Particle; + particles[p_idx] = Particle(p_id, atom_pos, 0.0, atom_name); + } else if (atom_elem == "N") { + particles[p_idx] = Particle(NParticle, atom_pos, 0.0, atom_name); + } else if (atom_elem == "O") { + // carbonyl? + _AtomSpecCPList carbonyl_atoms = _GetCarboxylAtoms(atom_infos[idx]); + if (settings->consider_hbonds and carbonyl_atoms.size() == 2) { + // charged particle needed for hydrogen bonds + particles[p_idx] = Particle(OParticle, atom_pos, -0.6, atom_name); + // try to get lone pair directions (try name and alt. name) + ost::mol::AtomHandle c = res.FindAtom(carbonyl_atoms[0]->name); + if (!c.IsValid()) c = res.FindAtom(carbonyl_atoms[0]->alt_name); + ost::mol::AtomHandle x = res.FindAtom(carbonyl_atoms[1]->name); + if (!x.IsValid()) x = res.FindAtom(carbonyl_atoms[1]->alt_name); + if (c.IsValid() && x.IsValid()) { + _AddLonePairsCarbonyl(x.GetPos(), c.GetPos(), atom_pos, + particles[p_idx]); + } + } else { + particles[p_idx] = Particle(OParticle, atom_pos, 0.0, atom_name); + } + } else if (atom_elem == "S") { + particles[p_idx] = Particle(SParticle, atom_pos, 0.0, atom_name); + } else { + particles[p_idx] = Particle(CParticle, atom_pos, 0.0, atom_name); + } + } else { + // fallback -> treat as uncharged carbon + particles[p_idx] = Particle(CParticle, atom_pos, 0.0, atom_name); + } + // particle was created anyways... + ++p_idx; + } + + // fallback + } + + return boost::make_shared<FrameResidue>(particles, size, residue_index); +} + }} diff --git a/sidechain/src/frame_constructor.hh b/sidechain/src/frame_constructor.hh index fab5485710636dbf10fb974733455639ef3ea827..76b3b7a1c678c1b637cca30f8ff71297f5e48099 100644 --- a/sidechain/src/frame_constructor.hh +++ b/sidechain/src/frame_constructor.hh @@ -15,14 +15,15 @@ #include <ost/mol/residue_handle.hh> #include <ost/mol/atom_handle.hh> +#include <ost/conop/compound_lib.hh> namespace promod3{ namespace sidechain{ -FrameResiduePtr ConstructBackboneFrameResidue(const geom::Vec3& n_pos, const geom::Vec3& ca_pos, - const geom::Vec3& c_pos, const geom::Vec3& o_pos, - const geom::Vec3& cb_pos, RotamerID id, uint residue_index, - RotamerSettingsPtr settings, Real phi, - bool n_ter=false, bool c_ter=false); +FrameResiduePtr ConstructBackboneFrameResidue( + const geom::Vec3& n_pos, const geom::Vec3& ca_pos, + const geom::Vec3& c_pos, const geom::Vec3& o_pos, + const geom::Vec3& cb_pos, RotamerID id, uint residue_index, + RotamerSettingsPtr settings, Real phi, bool n_ter=false, bool c_ter=false); FrameResiduePtr ConstructBackboneFrameResidue(const ost::mol::ResidueHandle& res, RotamerID id, uint residue_index, RotamerSettingsPtr settings, @@ -39,6 +40,10 @@ FrameResiduePtr ConstructFrameResidue(RRMRotamerPtr rot, uint index); FrameResiduePtr ConstructFrameResidue(FRMRotamerPtr rot, uint index); +FrameResiduePtr ConstructFrameResidueHeuristic( + const ost::mol::ResidueHandle& res, uint residue_index, + RotamerSettingsPtr settings, ost::conop::CompoundLibPtr comp_lib); + }}//ns #endif