diff --git a/doc/buildsystem.rst b/doc/buildsystem.rst index 24f881cb2a0dd30d9a1ea022fa9aabf368d1f353..b286d4b2b8879d437a03d85978a2acd2a4412fad 100644 --- a/doc/buildsystem.rst +++ b/doc/buildsystem.rst @@ -6,13 +6,13 @@ Building |project| -------------------------------------------------------------------------------- Dependencies -------------------------------------------------------------------------------- -|project| is build on top of |ost_l|_ (|ost_s|), requiring at least version -1.5. |ost_s| must be configured and compiled with ``ENABLE_MM=1`` to use |openmm|_. -To create the build system, |cmake|_ is required in version -2.8.7 or higher. |python|_ works well from version 2.7. For |ost_s| and the -|C++| bit of |project|, |boost|_ is required in version 1.47.0 (the same as -used for |ost_s|). Also |eigen3|_ and |lapack|_ are needed. To build -documentation, |sphinx|_ 1.2b1 is required. +|project| is build on top of |ost_l|_ (|ost_s|), requiring at least version 1.5. +|ost_s| must be configured and compiled with ``ENABLE_MM=1`` to use |openmm|_. +To create the build system, |cmake|_ is required in version 2.8.7 or higher. +|python|_ works well from version 2.7. For |ost_s| and the |C++| bit of +|project|, |boost|_ is required in version 1.47.0 (the same as used for +|ost_s|). Also |eigen3|_ and |lapack|_ are needed. To build documentation, +|sphinx|_ 1.2b1 is required. The currently (Nov. 2015) preferred versions are: diff --git a/doc/gettingstarted.rst b/doc/gettingstarted.rst index f13a9e95c1d9e5349037e8f3a84c894f8e2108ab..489002a8f6a3bb03c67692a740cf0de9c259ba65 100644 --- a/doc/gettingstarted.rst +++ b/doc/gettingstarted.rst @@ -38,3 +38,11 @@ the desired target sequence. The modelling steps then are: - Reconstruct sidechains (see :mod:`~promod3.sidechain` module) - Minimize energy of final model using molecular mechanics (using :mod:`ost.mol.mm` from |ost_s|) + +Since a good amount of time is spent in OpenMM routines to minimize energy, we +try to use the fast and multi-threaded "CPU" platform of OpenMM (should be +included and working on any hardware supported by OpenMM). If the platform is +not available, we use the slower "Reference" platform. For the "CPU" platform, +multiple CPU threads can be used by setting the env. variable +``PM3_OPENMM_CPU_THREADS`` to the number of desired threads. If the variable is +not set, 1 thread will be used by default. diff --git a/doc/tests/CMakeLists.txt b/doc/tests/CMakeLists.txt index db07dd8855d875f1dbcb2342ed262735b2d5720c..4436b469efc1e3c8895baaeea7a50f1f8a99316d 100644 --- a/doc/tests/CMakeLists.txt +++ b/doc/tests/CMakeLists.txt @@ -33,6 +33,7 @@ set (DOC_TEST_SCRIPTS scripts/loop_fragger.py scripts/loop_torsion_sampler.py scripts/loop_all_atom.py + scripts/loop_mm_sys_creation.py scripts/scoring_main.py diff --git a/doc/tests/scripts/loop_mm_sys_creation.py b/doc/tests/scripts/loop_mm_sys_creation.py new file mode 100644 index 0000000000000000000000000000000000000000..a3f29d39973a814c4b52b708f6dd15d8701da9b9 --- /dev/null +++ b/doc/tests/scripts/loop_mm_sys_creation.py @@ -0,0 +1,42 @@ +from ost import io, geom +from promod3 import loop + +# setup system creator +ff_lookup = loop.ForcefieldLookup.GetDefault() +mm_sys = loop.MmSystemCreator(ff_lookup) + +# load example (has res. numbering starting at 1) +ent = io.LoadPDB("data/1CRN.pdb") +res_list = ent.residues +num_residues = len(res_list) + +# get all atom positions for full protein +all_atoms = loop.AllAtomPositions(res_list) +# get res. indices for loop followed by rest +# -> here we add everything to rest but in practice this could be +# just a subset of residues relevant to the loop of interest +loop_start = 10 +loop_length = 6 +res_indices = list(range(loop_start, loop_start + loop_length)) \ + + list(range(0, loop_start)) \ + + list(range(loop_start + loop_length, num_residues)) +# define which of the res_indices is terminal +is_n_ter = [False] * num_residues +is_c_ter = [False] * num_residues +is_n_ter[res_indices.index(0)] = True +is_c_ter[res_indices.index(num_residues-1)] = True +# get disulfid bridges +disulfid_bridges = mm_sys.GetDisulfidBridges(all_atoms, res_indices) +# setup MM system +mm_sys.SetupSystem(all_atoms, res_indices, loop_length, + is_n_ter, is_c_ter, disulfid_bridges) + +# run simulation +sim = mm_sys.GetSimulation() +print "Potential energy before: %g" % sim.GetPotentialEnergy() +sim.ApplySD(0.01, 100) +print "Potential energy after: %g" % sim.GetPotentialEnergy() + +# extract new loop positions and store it +mm_sys.ExtractLoopPositions(all_atoms, res_indices) +io.SavePDB(all_atoms.ToEntity(), "mm_sys_output.pdb") diff --git a/doc/tests/test_doctests.py b/doc/tests/test_doctests.py index e06f3f4875b43897d32053eebb4972c6a5d548a6..9b6809d734b87dc9cfbcd669bf713525a513395d 100644 --- a/doc/tests/test_doctests.py +++ b/doc/tests/test_doctests.py @@ -271,7 +271,7 @@ class DocTests(unittest.TestCase): # clean up os.remove('torsion_plot.png') - def testAllAtom(self): + def testLoopAllAtom(self): # run it self.checkPMRun('loop_all_atom.py', [], 0) # check that result exists and is readable @@ -280,6 +280,14 @@ class DocTests(unittest.TestCase): # clean up os.remove('all_atom_pos.pdb') os.remove('all_atom_env.pdb') + + def testLoopMmSysCreation(self): + # just check that it doesn't crash + self.checkPMRun('loop_mm_sys_creation.py', [], 0) + # check that result exists and is readable + io.LoadPDB('mm_sys_output.pdb') + # clean up + os.remove('mm_sys_output.pdb') ################################################################ diff --git a/loop/doc/CMakeLists.txt b/loop/doc/CMakeLists.txt index a6f2ae30e67f8ae7dfbfbc86b4f9aecbe844c1ad..ed3468188ae57a131ca78d05537e6da825e44d86 100644 --- a/loop/doc/CMakeLists.txt +++ b/loop/doc/CMakeLists.txt @@ -4,6 +4,7 @@ set(LOOP_RST torsion_sampler.rst structure_db.rst all_atom.rst + mm_system_creation.rst load_loop_objects.rst ) diff --git a/loop/doc/index.rst b/loop/doc/index.rst index 7120d93106bb3e44a81f71bce43c8ccce46ee99a..f60393092beaea6d8315036c82ee7dfb02b267d7 100644 --- a/loop/doc/index.rst +++ b/loop/doc/index.rst @@ -21,4 +21,5 @@ Contents: torsion_sampler structure_db all_atom + mm_system_creation load_loop_objects diff --git a/loop/doc/mm_system_creation.rst b/loop/doc/mm_system_creation.rst new file mode 100644 index 0000000000000000000000000000000000000000..b081926a558a20d297445c573b69327c030a8db2 --- /dev/null +++ b/loop/doc/mm_system_creation.rst @@ -0,0 +1,184 @@ +Generate :mod:`ost.mol.mm` systems +================================================================================ + +.. currentmodule:: promod3.loop + +To simplify the creation of :mod:`ost.mol.mm` / OpenMM simulations for loops in +proteins, we define a system creator for loops (:class:`MmSystemCreator`) and a +specialized forcefield lookup for amino acids (:class:`ForcefieldLookup`). + +The example below showcases the creation and use of an MM system: + +.. literalinclude:: ../../../tests/doc/scripts/loop_mm_sys_creation.py + + +Create MM systems for loops +-------------------------------------------------------------------------------- + +.. class:: MmSystemCreator(ff_lookup, fix_surrounding_hydrogens=True, \ + kill_electrostatics=False, nonbonded_cutoff=8, \ + inaccurate_pot_energy=False) + + Setup a system creator for a specific forcefield. The constructor only stores + the settings. Most setup work is done by :meth:`SetupSystem`. + + The idea is to have a set of movable loop residues and a set of fixed + surrounding residues which interact with the loop. + + :param ff_lookup: Forcefield to use with this system creator. + :type ff_lookup: :class:`ForcefieldLookup` + :param fix_surrounding_hydrogens: If False, the hydrogens of the surrounding + residues can move to improve H-bond building + (turned off by default as it only has a minor + impact on the result and a big one on + performance). + :type fix_surrounding_hydrogens: :class:`bool` + :param kill_electrostatics: If True, all charges are removed from the system. + This is good for H-bond building, but may be bad + if residues in the surrounding are missing (gaps). + :type kill_electrostatics: :class:`bool` + :param nonbonded_cutoff: Defines cutoff to set for non bonded interactions. + Recommended values: 5 if kill_electrostatics = True, + 8 otherwise. Negative value means no cutoff. + :type nonbonded_cutoff: :class:`float` + :param inaccurate_pot_energy: If True, we do not set correct non-bonded + interactions between fixed atoms. This leads + to inaccurate pot. energies but it is faster and + has no effect on simulation runs (e.g. ApplySD) + otherwise. + :type inaccurate_pot_energy: :class:`bool` + + .. method:: GetDisulfidBridges(all_pos, res_indices) + + :return: Pairs of indices (i,j), where res_indices[i] and res_indices[j] are + assumed to have a disulfid bridge (CYS-SG pairs within 2.5 A). + :rtype: :class:`list` of :class:`tuple` with two :class:`int` + + :param all_pos: Provides positions for each *res_indices[i]*. + :type all_pos: :class:`AllAtomPositions` + :param res_indices: Residue indices into *all_pos*. + :type res_indices: :class:`list` of :class:`int` + + .. method:: SetupSystem(all_pos, res_indices, loop_length, is_n_ter, \ + is_c_ter, disulfid_bridges) + + Setup a new system for the loop / surrounding defined by *all_pos* and + *res_indices*. Positions are taken from *all_pos[res_indices[i]]* and each + of them must have all heavy atoms defined. Residue *all_pos[i+1]* is assumed + to be peptide bound to *all_pos[i]* unless they are labelled as terminal. + + Residues *res_indices[i]* for *i* < *loop_length* are loop, rest is + surrounding. Residues *res_indices[0]* / *res_indices[loop_length-1]* are + assumed to be N- / C-stems of loop. The stems of the loop are kept fixed (N, + CA, CB for N-stem / CA, CB, C, O for C-stem) unless they are terminal + residues. + + If a system was setup previously, it is overwritten completely. + + If possible, this uses the "CPU" platform in OpenMM using the env. variable + ``PM3_OPENMM_CPU_THREADS`` to define the number of desired threads (1 thread + is used if variable is undefined). + + :param all_pos: Provides positions for each *res_indices[i]*. + :type all_pos: :class:`AllAtomPositions` + :param res_indices: Residue indices into *all_pos*. + :type res_indices: :class:`list` of :class:`int` + :param loop_length: Length of loop. + :type loop_length: :class:`int` + :param is_n_ter: For each *res_indices[i]*, *is_n_ter[i]* defines whether + that residue is N-terminal. + :type is_n_ter: :class:`list` :class:`bool` + :param is_c_ter: For each *res_indices[i]*, *is_c_ter[i]* defines whether + that residue is C-terminal. + :type is_c_ter: :class:`list` of :class:`bool` + :param disulfid_bridges: Pairs of indices (i,j), where res_indices[i] and + res_indices[j] form a disulfid bridge (see + :meth:`GetDisulfidBridges`) + :type disulfid_bridges: :class:`list` of :class:`tuple` with two + :class:`int` + + :raises: :exc:`~exceptions.RuntimeError` if *loop_length* is longer than + *res_indices*, *res_indices* / *is_n_ter* / *is_c_ter* have + inconsistent lengths or if any *all_pos[res_indices[i]]* is invalid + or has unset heavy atoms. + + .. method:: UpdatePositions(all_pos, res_indices) + + Updates the positions in the system. Even though :meth:`SetupSystem` is + already very fast, this can speed up resetting a simulation. The data must + be consistent with the data used in the last :meth:`SetupSystem` call. + + :param all_pos: Provides positions for each *res_indices[i]*. + :type all_pos: :class:`AllAtomPositions` + :param res_indices: Residue indices into *all_pos*. + :type res_indices: :class:`list` of :class:`int` + + :raises: :exc:`~exceptions.RuntimeError` if data is inconsistent with last + :meth:`SetupSystem` call (same number of residues, same AA). + + .. method:: ExtractLoopPositions(loop_pos) + ExtractLoopPositions(out_pos, res_indices) + + Extracts loop positions from the current simulation. If the simulation was + run (via :meth:`GetSimulation`), we internally have new positions for the + residues corresponding to *all_pos[res_indices[i]]* passed in + :meth:`SetupSystem`. This function then extracts these positions back into a + compatible output storage. + + :param loop_pos: Storage for loop positions. Residues *loop_pos[i]* and + *all_pos[res_indices[i]]* (of :meth:`SetupSystem`) must + have matching amino acid types for *i* < *loop_length*. + :type loop_pos: :class:`AllAtomPositions` + :param out_pos: Storage for loop positions. Residues + *out_pos[res_indices[i]]* and *all_pos[res_indices[i]]* (of + :meth:`SetupSystem`) must have matching amino acid types for + *i* < *loop_length*. + :type out_pos: :class:`AllAtomPositions` + :param res_indices: Residue indices into *out_pos*. + :type res_indices: :class:`list` of :class:`int` + + :raises: :exc:`~exceptions.RuntimeError` if data is inconsistent with last + :meth:`SetupSystem` call (big enough and matching AA). + + .. method:: GetSimulation + + :return: Simulation object setup by :meth:`SetupSystem`. Use this to run + MM simulations. + :rtype: :class:`ost.mol.mm.Simulation` + + .. method:: GetForcefieldAminoAcids + + :return: Forcefield-specific amino acid type for each residue of last + :meth:`SetupSystem` call. + :rtype: :class:`list` of :class:`ForcefieldAminoAcid` + + .. method:: GetIndexing + + The atoms of residue *i* are stored from *idx[i]* to *idx[i+1]-1*, where + *idx* is the list returned by this function. The atoms themselves are + ordered according to the indexing defined by :class:`ForcefieldLookup`. + + :return: Indexing to positions vector used by the simulation object. + The last item of the list contains the number of atoms in the + system. + :rtype: :class:`list` of :class:`int` + + +Forcefield lookup for amino acids +-------------------------------------------------------------------------------- + +.. class:: ForcefieldLookup + +.. class:: ForcefieldAminoAcid + + Enumerates the amino acid types for forcefields. The first 20 values + correspond to the 20 values of :class:`ost.conop.AminoAcid`. Additionally, + there are values for disulfid bridges (*FF_CYS2*), d-protonated histidine + (*FF_HISD*, default for *ost.conop.HIS* is *FF_HISE*) and *FF_XXX* for unknown + types. The full list of values is: + + *FF_ALA*, *FF_ARG*, *FF_ASN*, *FF_ASP*, *FF_GLN*, *FF_GLU*, *FF_LYS*, + *FF_SER*, *FF_CYS*, *FF_MET*, *FF_TRP*, *FF_TYR*, *FF_THR*, *FF_VAL*, + *FF_ILE*, *FF_LEU*, *FF_GLY*, *FF_PRO* *FF_HISE*, *FF_PHE*, *FF_CYS2*, + *FF_HISD*, *FF_XXX* + diff --git a/loop/pymod/export_mm_system_creator.cc b/loop/pymod/export_mm_system_creator.cc index 5879064d549fb9d9a1be3ab335f0de802dd14fa5..073ff4ab7b697a2a7d01ac43bebd2ef852d6a871 100644 --- a/loop/pymod/export_mm_system_creator.cc +++ b/loop/pymod/export_mm_system_creator.cc @@ -116,7 +116,6 @@ void export_MmSystemCreator() { .def("ExtractLoopPositions", WrapExtractLoopPositions, (arg("loop_pos"))) .def("ExtractLoopPositions", WrapExtractLoopPositionsIndexed, (arg("out_pos"), arg("res_indices"))) - .def("GetTopology", &MmSystemCreator::GetTopology) .def("GetSimulation", &MmSystemCreator::GetSimulation) .def("GetForcefieldAminoAcids", WrapGetFfAa) .def("GetIndexing", WrapGetIndexing) diff --git a/loop/src/mm_system_creator.cc b/loop/src/mm_system_creator.cc index 7a2b82ea1ff5aeed35ec6ac3c12471e153911328..9bd46a1b9d42e693b13d08eaff81b89bccf48854 100644 --- a/loop/src/mm_system_creator.cc +++ b/loop/src/mm_system_creator.cc @@ -362,7 +362,7 @@ void MmSystemCreator::ExtractLoopPositions(AllAtomPositions& loop_pos) { } } -void MmSystemCreator::ExtractLoopPositions(AllAtomPositions& all_pos, +void MmSystemCreator::ExtractLoopPositions(AllAtomPositions& out_pos, const std::vector<uint>& res_indices) { promod3::core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped( @@ -375,11 +375,11 @@ void MmSystemCreator::ExtractLoopPositions(AllAtomPositions& all_pos, } for (uint i_res = 0; i_res < loop_length_; ++i_res) { const uint res_idx = res_indices[i_res]; - if (res_idx > all_pos.GetNumResidues()) { + if (res_idx > out_pos.GetNumResidues()) { throw promod3::Error("Invalid residue index observed in " "MmSystemCreator::ExtractLoopPositions!"); } - if (all_pos.GetAA(res_idx) != ff_lookup_->GetAA(ff_aa_[i_res])) { + if (out_pos.GetAA(res_idx) != ff_lookup_->GetAA(ff_aa_[i_res])) { throw promod3::Error("Inconsistent amino acid types observed in " "MmSystemCreator::ExtractLoopPositions!"); } @@ -397,7 +397,7 @@ void MmSystemCreator::ExtractLoopPositions(AllAtomPositions& all_pos, // get heavy atoms for (uint i = 0; i < aa_lookup_.GetNumAtoms(aa); ++i) { const uint idx = ff_lookup_->GetHeavyIndex(ff_aa, i); - all_pos.SetPos(res_idx, i, positions_[first_idx + idx]); + out_pos.SetPos(res_idx, i, positions_[first_idx + idx]); } } } diff --git a/loop/src/mm_system_creator.hh b/loop/src/mm_system_creator.hh index b07b0aca9a4b0fb2cf235057b391841386619c30..fb72237f0f7bed3bf00e37cfb181509900af72eb 100644 --- a/loop/src/mm_system_creator.hh +++ b/loop/src/mm_system_creator.hh @@ -11,8 +11,6 @@ namespace promod3 { namespace loop { -// TODO: move doc to rst file - class MmSystemCreator; typedef boost::shared_ptr<MmSystemCreator> MmSystemCreatorPtr; @@ -34,37 +32,17 @@ public: typedef std::vector< std::pair<uint,uint> > DisulfidBridgeVector; // setup creator - // -> by default only the loop can move, if !fix_surrounding_hydrogens also - // surrounding hydrogens can move - // -> kill_electrostatics removes all charges - // -> nonbonded_cutoff < 0 means no cutoff, otherwise recommended values are - // 5 if kill_electrostatics = true and 8 otherwise - // -> inaccurate_pot_energy = true doesn't set exclusions between fixed atoms - // this leads to inaccurate pot. energies but it is faster and has no - // effect on simulation runs (e.g. ApplySD) otherwise MmSystemCreator(ForcefieldLookupPtr ff_lookup, bool fix_surrounding_hydrogens = true, bool kill_electrostatics = false, Real nonbonded_cutoff = 8, bool inaccurate_pot_energy = false); // helper to find disulfid bridges for CYS in all_pos[res_indices[i]] - // -> all CYS-SG pairs within 2.5 A are considered as bridged - // -> output pair (i,j): res_indices[i] & res_indices[j] are bridged DisulfidBridgeVector GetDisulfidBridges(const AllAtomPositions& all_pos, const std::vector<uint>& res_indices) const; // setup system (overwrites old one completely!) - // -> each all_pos[res_indices[i]] must have all heavy atoms defined - // -> res_indices[i] for i < loop_length are loop, rest is surrounding - // -> res_indices[0]/res_indices[loop_length-1] are N/C-stems of loop - // -> stems of loop are kept fixed unless they are terminal residues - // -> is_n_ter[i] / is_c_ter[i] return true if res_indices[i] is terminal - // -> disulfid_bridges: see GetDisulfidBridges - // -> all_pos[i+1] is assumed to be peptide bound to all_pos[i] unless they - // are labelled as terminal - // -> if possible, this uses the CPU platform in OpenMM using the env. var. - // PM3_OPENMM_CPU_THREADS to define threads (or 1 if not defined) void SetupSystem(const AllAtomPositions& all_pos, const std::vector<uint>& res_indices, uint loop_length, const std::vector<bool>& is_n_ter, @@ -76,10 +54,9 @@ public: const std::vector<uint>& res_indices); // extract loop heavy atom positions from simulation object into AAP - // v1: loop_pos[i] must be same AA as all_pos[res_indices[i]] of SetupSystem + // v1: loop_pos[i] used as storage for loop void ExtractLoopPositions(AllAtomPositions& loop_pos); - // v2: out_pos[res_indices[i]] must be same AA as all_pos[res_indices[i]] - // of SetupSystem + // v2: out_pos[res_indices[i]] used as storage for loop void ExtractLoopPositions(AllAtomPositions& out_pos, const std::vector<uint>& res_indices);