From 659edf1b6b81cabe125223245adec3814b0783b9 Mon Sep 17 00:00:00 2001
From: Gerardo Tauriello <gerardo.tauriello@unibas.ch>
Date: Fri, 13 May 2016 17:51:49 +0200
Subject: [PATCH] SCHWED-882: included externally parametrized ligands in
 energy minimization

---
 modelling/pymod/_pipeline.py | 165 ++++++++++++++++++++++++++++++-----
 modelling/src/model.cc       |  11 +--
 2 files changed, 146 insertions(+), 30 deletions(-)

diff --git a/modelling/pymod/_pipeline.py b/modelling/pymod/_pipeline.py
index 9790e2ab..0e9c1807 100644
--- a/modelling/pymod/_pipeline.py
+++ b/modelling/pymod/_pipeline.py
@@ -11,7 +11,7 @@ from _closegaps import *
 from _ring_punches import *
 # external
 import ost
-from ost import mol
+from ost import mol, conop
 from ost.mol import mm
 import os
 
@@ -28,41 +28,139 @@ def _RemoveHydrogens(ent):
     for a in ha.atoms:
       edi.DeleteAtom(a.handle)
 
-def _SetupMmSimulation(mhandle):
-    '''Get mm simulation object for the model.'''
+def _AddHeuristicHydrogens(ent, ff):
+    '''Add hydrogens with mm.HeuristicHydrogenConstructor.'''
+    for res in ent.residues:
+        if not res.IsPeptideLinking():
+            bb = ff.GetBuildingBlock(res.name)
+            edi = ent.EditXCS()
+            h_constructor = mm.HeuristicHydrogenConstructor(bb)
+            h_constructor.ApplyOnResidue(res, edi)
+
+def _GetTopology(ent, settings, force_fields, add_heuristic_hydrogens=False):
+    '''Return topology or None if no topology could be created.
+    Note: if successful, this will update ent (adding hydrogens).
+    Set add_heuristic_hydrogens to True for ligands.
+    '''
+    # try all force fields in order
+    last_exception = None
+    for ff in force_fields:
+        settings.forcefield = ff
+        try:
+            # check if we need to add hydrogens heuristically
+            if add_heuristic_hydrogens:
+                _AddHeuristicHydrogens(ent, ff)
+            # ok now we try...
+            topo = mm.TopologyCreator.Create(ent, settings)
+        except Exception as ex:
+            # keep track of error and continue
+            last_exception = type(ex).__name__ + ": " + ex.message
+            continue
+        else:
+            # all good
+            return topo
+    # if we got here, nothing worked
+    ost.LogError("Could not create mm topology. Last exception: "\
+                 + last_exception)
+    return None
+
+def _AddLigands(ent, top, lig_ent, settings, force_fields):
+    '''Add ligands from lig_ent to topology top and update entity ent.'''
+    # connect them first
+    proc = conop.RuleBasedProcessor(conop.GetDefaultLib())
+    proc.Process(lig_ent)
+    cur_res = lig_ent.residues[0]
+    lig_num = 1
+    while cur_res.IsValid():
+        # setup connected components
+        cur_view = lig_ent.CreateEmptyView()
+        cur_view.AddResidue(cur_res, mol.INCLUDE_ATOMS)
+        cur_res = cur_res.next
+        while cur_res.IsValid() and mol.InSequence(cur_res.prev, cur_res):
+            cur_view.AddResidue(cur_res, mol.INCLUDE_ATOMS)
+            cur_res = cur_res.next
+        # try to add topology with special named chain
+        view_res_str = str([str(r) for r in cur_view.residues])
+        cur_ent = mol.CreateEntityFromView(cur_view, True)
+        edi = cur_ent.EditXCS()
+        edi.RenameChain(cur_ent.chains[0], '_' + str(lig_num))
+        lig_num += 1
+        cur_top = _GetTopology(cur_ent, settings, force_fields, True)
+        if cur_top is None:
+            ost.LogError("Failed to add ligands " + view_res_str + \
+                         " for energy minimization! Skipping...")
+        else:
+            # merge into main topology
+            cur_top.SetFudgeLJ(top.GetFudgeLJ())
+            cur_top.SetFudgeQQ(top.GetFudgeQQ())
+            top.Merge(ent, cur_top, cur_ent)
+
+def _SetupMmSimulation(model, force_fields):
+    '''Get mm simulation object for the model (incl. ligands in chain "_").
+    This tries to generate a topology for the protein and for each connected
+    component in the ligand chain separately by evaluating the force fields in
+    the same order as given. Ligands without force fields are skipped.
+    '''
     # get general settings 
     settings = mm.Settings()
     settings.integrator = mm.LangevinIntegrator(310, 1, 0.002)
     settings.init_temperature = 0
     settings.nonbonded_method = mm.NonbondedMethod.CutoffNonPeriodic
     settings.keep_ff_specific_naming = False
-    # use fast CPU platform by default
-    settings.platform = mm.Platform.CPU
-    if os.getenv('PM3_OPENMM_CPU_THREADS') is None:
-        settings.cpu_properties["CpuThreads"] = "1"
-    else:
-        settings.cpu_properties["CpuThreads"] = os.getenv('PM3_OPENMM_CPU_THREADS')
-    # prepare protein
-    model = mol.CreateEntityFromView(mhandle.model.Select("cname!='_'"), True)
+    
+    # prepare entity with protein
     _RemoveHydrogens(model)
-    # NOTE: both FF work, performance is similar, CHARMM slightly better
-    settings.forcefield = mm.LoadCHARMMForcefield()
-    #settings.forcefield = mm.LoadAMBERForcefield()
-
-    # TODO: add ligands
-    # TODO: check when platform problem appears when having multiple topologies!
+    ent = mol.CreateEntityFromView(model.Select("cname!='_'"), True)
+    top = _GetTopology(ent, settings, force_fields)
+    if top is None:
+        raise RuntimeError("Failed to setup protein for energy minimization!")
+    
+    # prepare ligands: we reprocess them to ensure connectivity
+    lig_ent = mol.CreateEntityFromView(model.Select("cname='_'"), True)
+    if lig_ent.residue_count > 0:
+        _AddLigands(ent, top, lig_ent, settings, force_fields)
 
     # finally set up the simulation
     try:
-        sim = mm.Simulation(model, settings)
+        # use fast CPU platform by default
+        settings.platform = mm.Platform.CPU
+        num_cpu_threads = os.getenv('PM3_OPENMM_CPU_THREADS')
+        if num_cpu_threads is None:
+            settings.cpu_properties["CpuThreads"] = "1"
+        else:
+            settings.cpu_properties["CpuThreads"] = num_cpu_threads
+        
+        # TODO: check when platform problem appears when having multiple 
+        #       topologies! -> should be here
+
+        sim = mm.Simulation(top, ent, settings)
     except RuntimeError as ex:
         ost.LogWarning("OpenMM failed to initialize with error: %s" % str(ex))
         # switch to "mm.Platform.Reference" as fallback
         settings.platform = mm.Platform.Reference
-        sim = mm.Simulation(model, settings)
+        sim = mm.Simulation(top, ent, settings)
         ost.LogWarning("Switched to slower reference platform of OpenMM!")
     return sim
 
+def _GetSimEntity(sim):
+    '''Get Entity from mm sim and reverse ligand chain naming.'''
+    ent = sim.GetEntity().Copy()
+    # merge ligand chains into _
+    ent_ed = ent.EditXCS()
+    chain_names = [ch.name for ch in ent.chains]
+    for chain_name in chain_names:
+        # all separate ligand chains start with _
+        if chain_name[0] == '_':
+            # add to chain _
+            if not ent.FindChain('_').IsValid():
+                ent_ed.InsertChain('_')
+            lig_ch = ent.FindChain('_')
+            cur_ch = ent.FindChain(chain_name)
+            for res in cur_ch.residues:
+                ent_ed.AppendResidue(lig_ch, res, deep=True)
+            # remove old chain
+            ent_ed.DeleteChain(cur_ch)
+    return ent
 ###############################################################################
 
 def BuildSidechains(mhandle, merge_distance=4, scorer=None, fragment_db=None,
@@ -134,7 +232,8 @@ def BuildSidechains(mhandle, merge_distance=4, scorer=None, fragment_db=None,
             
 
 def MinimizeModelEnergy(mhandle, max_iterations=12, max_iter_sd=20,
-                        max_iter_lbfgs=10):
+                        max_iter_lbfgs=10, use_amber_ff=False,
+                        extra_force_fields=list()):
     '''Minimize energy of final model using molecular mechanics.
 
     Uses :mod:`ost.mol.mm` to perform energy minimization.
@@ -164,14 +263,36 @@ def MinimizeModelEnergy(mhandle, max_iterations=12, max_iter_sd=20,
     :param max_iter_lbfgs: Max. number of iterations within LBFGS method
     :type max_iter_lbfgs:  :class:`int`
 
+    :param use_amber_ff: if True, use the AMBER force field instead of the def.
+                         CHARMM one (see :meth:`ost.mol.mm.LoadAMBERForcefield`
+                         and :meth:`ost.mol.mm.LoadCHARMMForcefield`).
+                         Both do a similarly good job without ligands (CHARMM
+                         slightly better), but you will want to be consistent
+                         with the optional force fields in `extra_force_fields`.
+    :type use_amber_ff:  :class:`bool`
+
+    :param extra_force_fields: Additional list of force fields to use if a 
+                               (ligand) residue cannot be parametrized with the
+                               default force field. The force fields are tried
+                               in the order as given and ligands without an
+                               existing parametrization are skipped.
+    :type extra_force_fields:  :class:`list` of :class:`ost.mol.Forcefield`.
+
     :return: The model including all oxygens as used in the minimizer.
     :rtype:  :class:`Entity <ost.mol.EntityHandle>`
     '''
     ost.LogInfo("Minimize energy.")
     # ignore LogInfo in stereochemical problems if output up to info done
     ignore_stereo_log = (ost.GetVerbosityLevel() == 3)
+
+    # setup force fields
+    if use_amber_ff:
+        force_fields = [mm.LoadAMBERForcefield()]
+    else:
+        force_fields = [mm.LoadCHARMMForcefield()]
+    force_fields.extend(extra_force_fields)
     # setup mm simulation
-    sim = _SetupMmSimulation(mhandle)
+    sim = _SetupMmSimulation(mhandle.model, force_fields)
     
     # settings to check for stereochemical problems
     clashing_distances = mol.alg.DefaultClashingDistances()
@@ -213,7 +334,7 @@ def MinimizeModelEnergy(mhandle, max_iterations=12, max_iter_sd=20,
     mhandle.model = mol.CreateEntityFromView(\
                             simulation_ent.Select("peptide=true and ele!=H"),
                             True)
-    # return model with oxygens
+    # return model with hydrogens
     return mol.CreateEntityFromView(simulation_ent.Select("peptide=true"), True)
 
 def CheckFinalModel(mhandle):
diff --git a/modelling/src/model.cc b/modelling/src/model.cc
index d53e4d2a..2614d87c 100644
--- a/modelling/src/model.cc
+++ b/modelling/src/model.cc
@@ -479,16 +479,11 @@ void AddLigands(const ost::seq::AlignmentList& aln, XCSEditor& edi,
           continue;
         }
 
+        // deep copy of residue into ligand chain
         mol::ResidueHandle dst_res=edi.AppendResidue(ligand_chain,
-                                                     src_ligand.GetName());
+                                                     src_ligand.GetHandle(),
+                                                     true);
         dst_res.SetIsLigand(true);
-
-        for (AtomViewList::iterator j=atoms.begin(), 
-             e2=atoms.end(); j!=e2; ++j) {
-               edi.InsertAtom(dst_res, j->GetName(), 
-                              j->GetPos(), j->GetElement(), 
-                              j->GetOccupancy(), j->GetBFactor());
-        }
       }
     }
   }
-- 
GitLab