From adc8ab777399bd7c7117deccfe13a8f4898d5350 Mon Sep 17 00:00:00 2001
From: Gerardo Tauriello <gerardo.tauriello@unibas.ch>
Date: Fri, 4 Nov 2016 11:29:27 +0100
Subject: [PATCH] Documented MmSystemCreator class.
---
doc/buildsystem.rst | 14 +-
doc/gettingstarted.rst | 8 +
doc/tests/CMakeLists.txt | 1 +
doc/tests/scripts/loop_mm_sys_creation.py | 42 +++++
doc/tests/test_doctests.py | 10 +-
loop/doc/CMakeLists.txt | 1 +
loop/doc/index.rst | 1 +
loop/doc/mm_system_creation.rst | 184 ++++++++++++++++++++++
loop/pymod/export_mm_system_creator.cc | 1 -
loop/src/mm_system_creator.cc | 8 +-
loop/src/mm_system_creator.hh | 27 +---
11 files changed, 259 insertions(+), 38 deletions(-)
create mode 100644 doc/tests/scripts/loop_mm_sys_creation.py
create mode 100644 loop/doc/mm_system_creation.rst
diff --git a/doc/buildsystem.rst b/doc/buildsystem.rst
index 24f881cb..b286d4b2 100644
--- a/doc/buildsystem.rst
+++ b/doc/buildsystem.rst
@@ -6,13 +6,13 @@ Building |project|
--------------------------------------------------------------------------------
Dependencies
--------------------------------------------------------------------------------
-|project| is build on top of |ost_l|_ (|ost_s|), requiring at least version
-1.5. |ost_s| must be configured and compiled with ``ENABLE_MM=1`` to use |openmm|_.
-To create the build system, |cmake|_ is required in version
-2.8.7 or higher. |python|_ works well from version 2.7. For |ost_s| and the
-|C++| bit of |project|, |boost|_ is required in version 1.47.0 (the same as
-used for |ost_s|). Also |eigen3|_ and |lapack|_ are needed. To build
-documentation, |sphinx|_ 1.2b1 is required.
+|project| is build on top of |ost_l|_ (|ost_s|), requiring at least version 1.5.
+|ost_s| must be configured and compiled with ``ENABLE_MM=1`` to use |openmm|_.
+To create the build system, |cmake|_ is required in version 2.8.7 or higher.
+|python|_ works well from version 2.7. For |ost_s| and the |C++| bit of
+|project|, |boost|_ is required in version 1.47.0 (the same as used for
+|ost_s|). Also |eigen3|_ and |lapack|_ are needed. To build documentation,
+|sphinx|_ 1.2b1 is required.
The currently (Nov. 2015) preferred versions are:
diff --git a/doc/gettingstarted.rst b/doc/gettingstarted.rst
index f13a9e95..489002a8 100644
--- a/doc/gettingstarted.rst
+++ b/doc/gettingstarted.rst
@@ -38,3 +38,11 @@ the desired target sequence. The modelling steps then are:
- Reconstruct sidechains (see :mod:`~promod3.sidechain` module)
- Minimize energy of final model using molecular mechanics
(using :mod:`ost.mol.mm` from |ost_s|)
+
+Since a good amount of time is spent in OpenMM routines to minimize energy, we
+try to use the fast and multi-threaded "CPU" platform of OpenMM (should be
+included and working on any hardware supported by OpenMM). If the platform is
+not available, we use the slower "Reference" platform. For the "CPU" platform,
+multiple CPU threads can be used by setting the env. variable
+``PM3_OPENMM_CPU_THREADS`` to the number of desired threads. If the variable is
+not set, 1 thread will be used by default.
diff --git a/doc/tests/CMakeLists.txt b/doc/tests/CMakeLists.txt
index db07dd88..4436b469 100644
--- a/doc/tests/CMakeLists.txt
+++ b/doc/tests/CMakeLists.txt
@@ -33,6 +33,7 @@ set (DOC_TEST_SCRIPTS
scripts/loop_fragger.py
scripts/loop_torsion_sampler.py
scripts/loop_all_atom.py
+ scripts/loop_mm_sys_creation.py
scripts/scoring_main.py
diff --git a/doc/tests/scripts/loop_mm_sys_creation.py b/doc/tests/scripts/loop_mm_sys_creation.py
new file mode 100644
index 00000000..a3f29d39
--- /dev/null
+++ b/doc/tests/scripts/loop_mm_sys_creation.py
@@ -0,0 +1,42 @@
+from ost import io, geom
+from promod3 import loop
+
+# setup system creator
+ff_lookup = loop.ForcefieldLookup.GetDefault()
+mm_sys = loop.MmSystemCreator(ff_lookup)
+
+# load example (has res. numbering starting at 1)
+ent = io.LoadPDB("data/1CRN.pdb")
+res_list = ent.residues
+num_residues = len(res_list)
+
+# get all atom positions for full protein
+all_atoms = loop.AllAtomPositions(res_list)
+# get res. indices for loop followed by rest
+# -> here we add everything to rest but in practice this could be
+# just a subset of residues relevant to the loop of interest
+loop_start = 10
+loop_length = 6
+res_indices = list(range(loop_start, loop_start + loop_length)) \
+ + list(range(0, loop_start)) \
+ + list(range(loop_start + loop_length, num_residues))
+# define which of the res_indices is terminal
+is_n_ter = [False] * num_residues
+is_c_ter = [False] * num_residues
+is_n_ter[res_indices.index(0)] = True
+is_c_ter[res_indices.index(num_residues-1)] = True
+# get disulfid bridges
+disulfid_bridges = mm_sys.GetDisulfidBridges(all_atoms, res_indices)
+# setup MM system
+mm_sys.SetupSystem(all_atoms, res_indices, loop_length,
+ is_n_ter, is_c_ter, disulfid_bridges)
+
+# run simulation
+sim = mm_sys.GetSimulation()
+print "Potential energy before: %g" % sim.GetPotentialEnergy()
+sim.ApplySD(0.01, 100)
+print "Potential energy after: %g" % sim.GetPotentialEnergy()
+
+# extract new loop positions and store it
+mm_sys.ExtractLoopPositions(all_atoms, res_indices)
+io.SavePDB(all_atoms.ToEntity(), "mm_sys_output.pdb")
diff --git a/doc/tests/test_doctests.py b/doc/tests/test_doctests.py
index e06f3f48..9b6809d7 100644
--- a/doc/tests/test_doctests.py
+++ b/doc/tests/test_doctests.py
@@ -271,7 +271,7 @@ class DocTests(unittest.TestCase):
# clean up
os.remove('torsion_plot.png')
- def testAllAtom(self):
+ def testLoopAllAtom(self):
# run it
self.checkPMRun('loop_all_atom.py', [], 0)
# check that result exists and is readable
@@ -280,6 +280,14 @@ class DocTests(unittest.TestCase):
# clean up
os.remove('all_atom_pos.pdb')
os.remove('all_atom_env.pdb')
+
+ def testLoopMmSysCreation(self):
+ # just check that it doesn't crash
+ self.checkPMRun('loop_mm_sys_creation.py', [], 0)
+ # check that result exists and is readable
+ io.LoadPDB('mm_sys_output.pdb')
+ # clean up
+ os.remove('mm_sys_output.pdb')
################################################################
diff --git a/loop/doc/CMakeLists.txt b/loop/doc/CMakeLists.txt
index a6f2ae30..ed346818 100644
--- a/loop/doc/CMakeLists.txt
+++ b/loop/doc/CMakeLists.txt
@@ -4,6 +4,7 @@ set(LOOP_RST
torsion_sampler.rst
structure_db.rst
all_atom.rst
+ mm_system_creation.rst
load_loop_objects.rst
)
diff --git a/loop/doc/index.rst b/loop/doc/index.rst
index 7120d931..f6039309 100644
--- a/loop/doc/index.rst
+++ b/loop/doc/index.rst
@@ -21,4 +21,5 @@ Contents:
torsion_sampler
structure_db
all_atom
+ mm_system_creation
load_loop_objects
diff --git a/loop/doc/mm_system_creation.rst b/loop/doc/mm_system_creation.rst
new file mode 100644
index 00000000..b081926a
--- /dev/null
+++ b/loop/doc/mm_system_creation.rst
@@ -0,0 +1,184 @@
+Generate :mod:`ost.mol.mm` systems
+================================================================================
+
+.. currentmodule:: promod3.loop
+
+To simplify the creation of :mod:`ost.mol.mm` / OpenMM simulations for loops in
+proteins, we define a system creator for loops (:class:`MmSystemCreator`) and a
+specialized forcefield lookup for amino acids (:class:`ForcefieldLookup`).
+
+The example below showcases the creation and use of an MM system:
+
+.. literalinclude:: ../../../tests/doc/scripts/loop_mm_sys_creation.py
+
+
+Create MM systems for loops
+--------------------------------------------------------------------------------
+
+.. class:: MmSystemCreator(ff_lookup, fix_surrounding_hydrogens=True, \
+ kill_electrostatics=False, nonbonded_cutoff=8, \
+ inaccurate_pot_energy=False)
+
+ Setup a system creator for a specific forcefield. The constructor only stores
+ the settings. Most setup work is done by :meth:`SetupSystem`.
+
+ The idea is to have a set of movable loop residues and a set of fixed
+ surrounding residues which interact with the loop.
+
+ :param ff_lookup: Forcefield to use with this system creator.
+ :type ff_lookup: :class:`ForcefieldLookup`
+ :param fix_surrounding_hydrogens: If False, the hydrogens of the surrounding
+ residues can move to improve H-bond building
+ (turned off by default as it only has a minor
+ impact on the result and a big one on
+ performance).
+ :type fix_surrounding_hydrogens: :class:`bool`
+ :param kill_electrostatics: If True, all charges are removed from the system.
+ This is good for H-bond building, but may be bad
+ if residues in the surrounding are missing (gaps).
+ :type kill_electrostatics: :class:`bool`
+ :param nonbonded_cutoff: Defines cutoff to set for non bonded interactions.
+ Recommended values: 5 if kill_electrostatics = True,
+ 8 otherwise. Negative value means no cutoff.
+ :type nonbonded_cutoff: :class:`float`
+ :param inaccurate_pot_energy: If True, we do not set correct non-bonded
+ interactions between fixed atoms. This leads
+ to inaccurate pot. energies but it is faster and
+ has no effect on simulation runs (e.g. ApplySD)
+ otherwise.
+ :type inaccurate_pot_energy: :class:`bool`
+
+ .. method:: GetDisulfidBridges(all_pos, res_indices)
+
+ :return: Pairs of indices (i,j), where res_indices[i] and res_indices[j] are
+ assumed to have a disulfid bridge (CYS-SG pairs within 2.5 A).
+ :rtype: :class:`list` of :class:`tuple` with two :class:`int`
+
+ :param all_pos: Provides positions for each *res_indices[i]*.
+ :type all_pos: :class:`AllAtomPositions`
+ :param res_indices: Residue indices into *all_pos*.
+ :type res_indices: :class:`list` of :class:`int`
+
+ .. method:: SetupSystem(all_pos, res_indices, loop_length, is_n_ter, \
+ is_c_ter, disulfid_bridges)
+
+ Setup a new system for the loop / surrounding defined by *all_pos* and
+ *res_indices*. Positions are taken from *all_pos[res_indices[i]]* and each
+ of them must have all heavy atoms defined. Residue *all_pos[i+1]* is assumed
+ to be peptide bound to *all_pos[i]* unless they are labelled as terminal.
+
+ Residues *res_indices[i]* for *i* < *loop_length* are loop, rest is
+ surrounding. Residues *res_indices[0]* / *res_indices[loop_length-1]* are
+ assumed to be N- / C-stems of loop. The stems of the loop are kept fixed (N,
+ CA, CB for N-stem / CA, CB, C, O for C-stem) unless they are terminal
+ residues.
+
+ If a system was setup previously, it is overwritten completely.
+
+ If possible, this uses the "CPU" platform in OpenMM using the env. variable
+ ``PM3_OPENMM_CPU_THREADS`` to define the number of desired threads (1 thread
+ is used if variable is undefined).
+
+ :param all_pos: Provides positions for each *res_indices[i]*.
+ :type all_pos: :class:`AllAtomPositions`
+ :param res_indices: Residue indices into *all_pos*.
+ :type res_indices: :class:`list` of :class:`int`
+ :param loop_length: Length of loop.
+ :type loop_length: :class:`int`
+ :param is_n_ter: For each *res_indices[i]*, *is_n_ter[i]* defines whether
+ that residue is N-terminal.
+ :type is_n_ter: :class:`list` :class:`bool`
+ :param is_c_ter: For each *res_indices[i]*, *is_c_ter[i]* defines whether
+ that residue is C-terminal.
+ :type is_c_ter: :class:`list` of :class:`bool`
+ :param disulfid_bridges: Pairs of indices (i,j), where res_indices[i] and
+ res_indices[j] form a disulfid bridge (see
+ :meth:`GetDisulfidBridges`)
+ :type disulfid_bridges: :class:`list` of :class:`tuple` with two
+ :class:`int`
+
+ :raises: :exc:`~exceptions.RuntimeError` if *loop_length* is longer than
+ *res_indices*, *res_indices* / *is_n_ter* / *is_c_ter* have
+ inconsistent lengths or if any *all_pos[res_indices[i]]* is invalid
+ or has unset heavy atoms.
+
+ .. method:: UpdatePositions(all_pos, res_indices)
+
+ Updates the positions in the system. Even though :meth:`SetupSystem` is
+ already very fast, this can speed up resetting a simulation. The data must
+ be consistent with the data used in the last :meth:`SetupSystem` call.
+
+ :param all_pos: Provides positions for each *res_indices[i]*.
+ :type all_pos: :class:`AllAtomPositions`
+ :param res_indices: Residue indices into *all_pos*.
+ :type res_indices: :class:`list` of :class:`int`
+
+ :raises: :exc:`~exceptions.RuntimeError` if data is inconsistent with last
+ :meth:`SetupSystem` call (same number of residues, same AA).
+
+ .. method:: ExtractLoopPositions(loop_pos)
+ ExtractLoopPositions(out_pos, res_indices)
+
+ Extracts loop positions from the current simulation. If the simulation was
+ run (via :meth:`GetSimulation`), we internally have new positions for the
+ residues corresponding to *all_pos[res_indices[i]]* passed in
+ :meth:`SetupSystem`. This function then extracts these positions back into a
+ compatible output storage.
+
+ :param loop_pos: Storage for loop positions. Residues *loop_pos[i]* and
+ *all_pos[res_indices[i]]* (of :meth:`SetupSystem`) must
+ have matching amino acid types for *i* < *loop_length*.
+ :type loop_pos: :class:`AllAtomPositions`
+ :param out_pos: Storage for loop positions. Residues
+ *out_pos[res_indices[i]]* and *all_pos[res_indices[i]]* (of
+ :meth:`SetupSystem`) must have matching amino acid types for
+ *i* < *loop_length*.
+ :type out_pos: :class:`AllAtomPositions`
+ :param res_indices: Residue indices into *out_pos*.
+ :type res_indices: :class:`list` of :class:`int`
+
+ :raises: :exc:`~exceptions.RuntimeError` if data is inconsistent with last
+ :meth:`SetupSystem` call (big enough and matching AA).
+
+ .. method:: GetSimulation
+
+ :return: Simulation object setup by :meth:`SetupSystem`. Use this to run
+ MM simulations.
+ :rtype: :class:`ost.mol.mm.Simulation`
+
+ .. method:: GetForcefieldAminoAcids
+
+ :return: Forcefield-specific amino acid type for each residue of last
+ :meth:`SetupSystem` call.
+ :rtype: :class:`list` of :class:`ForcefieldAminoAcid`
+
+ .. method:: GetIndexing
+
+ The atoms of residue *i* are stored from *idx[i]* to *idx[i+1]-1*, where
+ *idx* is the list returned by this function. The atoms themselves are
+ ordered according to the indexing defined by :class:`ForcefieldLookup`.
+
+ :return: Indexing to positions vector used by the simulation object.
+ The last item of the list contains the number of atoms in the
+ system.
+ :rtype: :class:`list` of :class:`int`
+
+
+Forcefield lookup for amino acids
+--------------------------------------------------------------------------------
+
+.. class:: ForcefieldLookup
+
+.. class:: ForcefieldAminoAcid
+
+ Enumerates the amino acid types for forcefields. The first 20 values
+ correspond to the 20 values of :class:`ost.conop.AminoAcid`. Additionally,
+ there are values for disulfid bridges (*FF_CYS2*), d-protonated histidine
+ (*FF_HISD*, default for *ost.conop.HIS* is *FF_HISE*) and *FF_XXX* for unknown
+ types. The full list of values is:
+
+ *FF_ALA*, *FF_ARG*, *FF_ASN*, *FF_ASP*, *FF_GLN*, *FF_GLU*, *FF_LYS*,
+ *FF_SER*, *FF_CYS*, *FF_MET*, *FF_TRP*, *FF_TYR*, *FF_THR*, *FF_VAL*,
+ *FF_ILE*, *FF_LEU*, *FF_GLY*, *FF_PRO* *FF_HISE*, *FF_PHE*, *FF_CYS2*,
+ *FF_HISD*, *FF_XXX*
+
diff --git a/loop/pymod/export_mm_system_creator.cc b/loop/pymod/export_mm_system_creator.cc
index 5879064d..073ff4ab 100644
--- a/loop/pymod/export_mm_system_creator.cc
+++ b/loop/pymod/export_mm_system_creator.cc
@@ -116,7 +116,6 @@ void export_MmSystemCreator() {
.def("ExtractLoopPositions", WrapExtractLoopPositions, (arg("loop_pos")))
.def("ExtractLoopPositions", WrapExtractLoopPositionsIndexed,
(arg("out_pos"), arg("res_indices")))
- .def("GetTopology", &MmSystemCreator::GetTopology)
.def("GetSimulation", &MmSystemCreator::GetSimulation)
.def("GetForcefieldAminoAcids", WrapGetFfAa)
.def("GetIndexing", WrapGetIndexing)
diff --git a/loop/src/mm_system_creator.cc b/loop/src/mm_system_creator.cc
index 7a2b82ea..9bd46a1b 100644
--- a/loop/src/mm_system_creator.cc
+++ b/loop/src/mm_system_creator.cc
@@ -362,7 +362,7 @@ void MmSystemCreator::ExtractLoopPositions(AllAtomPositions& loop_pos) {
}
}
-void MmSystemCreator::ExtractLoopPositions(AllAtomPositions& all_pos,
+void MmSystemCreator::ExtractLoopPositions(AllAtomPositions& out_pos,
const std::vector<uint>& res_indices)
{
promod3::core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
@@ -375,11 +375,11 @@ void MmSystemCreator::ExtractLoopPositions(AllAtomPositions& all_pos,
}
for (uint i_res = 0; i_res < loop_length_; ++i_res) {
const uint res_idx = res_indices[i_res];
- if (res_idx > all_pos.GetNumResidues()) {
+ if (res_idx > out_pos.GetNumResidues()) {
throw promod3::Error("Invalid residue index observed in "
"MmSystemCreator::ExtractLoopPositions!");
}
- if (all_pos.GetAA(res_idx) != ff_lookup_->GetAA(ff_aa_[i_res])) {
+ if (out_pos.GetAA(res_idx) != ff_lookup_->GetAA(ff_aa_[i_res])) {
throw promod3::Error("Inconsistent amino acid types observed in "
"MmSystemCreator::ExtractLoopPositions!");
}
@@ -397,7 +397,7 @@ void MmSystemCreator::ExtractLoopPositions(AllAtomPositions& all_pos,
// get heavy atoms
for (uint i = 0; i < aa_lookup_.GetNumAtoms(aa); ++i) {
const uint idx = ff_lookup_->GetHeavyIndex(ff_aa, i);
- all_pos.SetPos(res_idx, i, positions_[first_idx + idx]);
+ out_pos.SetPos(res_idx, i, positions_[first_idx + idx]);
}
}
}
diff --git a/loop/src/mm_system_creator.hh b/loop/src/mm_system_creator.hh
index b07b0aca..fb72237f 100644
--- a/loop/src/mm_system_creator.hh
+++ b/loop/src/mm_system_creator.hh
@@ -11,8 +11,6 @@
namespace promod3 { namespace loop {
-// TODO: move doc to rst file
-
class MmSystemCreator;
typedef boost::shared_ptr<MmSystemCreator> MmSystemCreatorPtr;
@@ -34,37 +32,17 @@ public:
typedef std::vector< std::pair<uint,uint> > DisulfidBridgeVector;
// setup creator
- // -> by default only the loop can move, if !fix_surrounding_hydrogens also
- // surrounding hydrogens can move
- // -> kill_electrostatics removes all charges
- // -> nonbonded_cutoff < 0 means no cutoff, otherwise recommended values are
- // 5 if kill_electrostatics = true and 8 otherwise
- // -> inaccurate_pot_energy = true doesn't set exclusions between fixed atoms
- // this leads to inaccurate pot. energies but it is faster and has no
- // effect on simulation runs (e.g. ApplySD) otherwise
MmSystemCreator(ForcefieldLookupPtr ff_lookup,
bool fix_surrounding_hydrogens = true,
bool kill_electrostatics = false, Real nonbonded_cutoff = 8,
bool inaccurate_pot_energy = false);
// helper to find disulfid bridges for CYS in all_pos[res_indices[i]]
- // -> all CYS-SG pairs within 2.5 A are considered as bridged
- // -> output pair (i,j): res_indices[i] & res_indices[j] are bridged
DisulfidBridgeVector
GetDisulfidBridges(const AllAtomPositions& all_pos,
const std::vector<uint>& res_indices) const;
// setup system (overwrites old one completely!)
- // -> each all_pos[res_indices[i]] must have all heavy atoms defined
- // -> res_indices[i] for i < loop_length are loop, rest is surrounding
- // -> res_indices[0]/res_indices[loop_length-1] are N/C-stems of loop
- // -> stems of loop are kept fixed unless they are terminal residues
- // -> is_n_ter[i] / is_c_ter[i] return true if res_indices[i] is terminal
- // -> disulfid_bridges: see GetDisulfidBridges
- // -> all_pos[i+1] is assumed to be peptide bound to all_pos[i] unless they
- // are labelled as terminal
- // -> if possible, this uses the CPU platform in OpenMM using the env. var.
- // PM3_OPENMM_CPU_THREADS to define threads (or 1 if not defined)
void SetupSystem(const AllAtomPositions& all_pos,
const std::vector<uint>& res_indices, uint loop_length,
const std::vector<bool>& is_n_ter,
@@ -76,10 +54,9 @@ public:
const std::vector<uint>& res_indices);
// extract loop heavy atom positions from simulation object into AAP
- // v1: loop_pos[i] must be same AA as all_pos[res_indices[i]] of SetupSystem
+ // v1: loop_pos[i] used as storage for loop
void ExtractLoopPositions(AllAtomPositions& loop_pos);
- // v2: out_pos[res_indices[i]] must be same AA as all_pos[res_indices[i]]
- // of SetupSystem
+ // v2: out_pos[res_indices[i]] used as storage for loop
void ExtractLoopPositions(AllAtomPositions& out_pos,
const std::vector<uint>& res_indices);
--
GitLab