From 7569c2cc69c50dcd539483a100b77dfc5509f1d8 Mon Sep 17 00:00:00 2001
From: Gabriel Studer <gabriel.studer@unibas.ch>
Date: Wed, 3 Apr 2024 08:45:17 +0200
Subject: [PATCH] dockq: enable parameterization for peptides as recommended by
 CAPRI

---
 modules/mol/alg/pymod/dockq.py          | 39 ++++++++++++++++---
 modules/mol/alg/tests/CMakeLists.txt    |  1 +
 modules/mol/alg/tests/test_ost_dockq.py | 51 +++++++++++++++++++++++++
 3 files changed, 85 insertions(+), 6 deletions(-)
 create mode 100644 modules/mol/alg/tests/test_ost_dockq.py

diff --git a/modules/mol/alg/pymod/dockq.py b/modules/mol/alg/pymod/dockq.py
index 2db14d1f6..e8ec3f7a5 100644
--- a/modules/mol/alg/pymod/dockq.py
+++ b/modules/mol/alg/pymod/dockq.py
@@ -154,7 +154,8 @@ def _ContactScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
 
     return (nnat, nmdl, fnat, fnonnat)
 
-def _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, dist_thresh=10.0):
+def _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, dist_thresh=10.0,
+                cb_mode=False):
 
     # backbone atoms used for superposition
     sup_atoms = ['CA','C','N','O']
@@ -173,8 +174,11 @@ def _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, dist_thresh=10.0):
 
     # iRMSD
     #######
-    int1 = ref.Select(f"cname={mol.QueryQuoteName(ref_ch1)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ref_ch2)}]")
-    int2 = ref.Select(f"cname={mol.QueryQuoteName(ref_ch2)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ref_ch1)}]")
+    i_ref = ref
+    if cb_mode:
+        i_ref = ref.Select("aname=CB or (rname=GLY and aname=CA)")
+    int1 = i_ref.Select(f"cname={mol.QueryQuoteName(ref_ch1)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ref_ch2)}]")
+    int2 = i_ref.Select(f"cname={mol.QueryQuoteName(ref_ch2)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ref_ch1)}]")
     ref_pos = geom.Vec3List()
     mdl_pos = geom.Vec3List()
     for r in int1.residues:
@@ -263,7 +267,8 @@ def _DockQ(fnat, lrmsd, irmsd, d1, d2):
     return (fnat + _ScaleRMSD(lrmsd, d1) + _ScaleRMSD(irmsd, d2))/3
 
 def DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
-          ch1_aln=None, ch2_aln=None):
+          ch1_aln=None, ch2_aln=None, contact_dist_thresh = 5.0,
+          interface_dist_thresh = 10.0, interface_cb = False):
     """ Computes DockQ for specified interface
 
     DockQ is described in: Sankar Basu and Bjoern Wallner (2016), "DockQ: A
@@ -294,6 +299,25 @@ def DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
                     The first sequence must match the sequence in *ref_ch2* and
                     the second to *mdl_ch2*.
     :type ch2_aln: :class:`ost.seq.AlignmentHandle`
+    :param contact_dist_thresh: Residues with any atom within this threshold
+                                are considered to be in contact. Affects contact
+                                based scores fnat and fnonnat. CAPRI suggests
+                                to lower the default of 5.0 to 4.0 for
+                                protein-peptide interactions.
+    :type contact_dist_thresh: :class:`float`
+    :param interface_dist_thresh: Residues with any atom within this threshold
+                                  to another chain are considered interface
+                                  residues. Affects irmsd. CAPRI suggests to
+                                  lower the default of 10.0 to 8.0 in
+                                  combination with interface_cb=True for
+                                  protein-peptide interactions.
+    :type interface_dist_thresh: :class:`float`
+    :param interface_cb: Only use CB atoms (CA for GLY) to identify interface
+                         residues for irmsd. CAPRI suggests to enable this
+                         flag in combination with lowering
+                         *interface_dist_thresh* to 8.0 for protein-peptide
+                         interactions.
+    :type interface_cb: :class:`bool`
     :returns: :class:`dict` with keys nnat, nmdl, fnat, fnonnat, irmsd, lrmsd,
               DockQ which corresponds to the equivalent values in the original
               DockQ implementation.
@@ -301,8 +325,11 @@ def DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
     _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
                           ch1_aln = ch1_aln, ch2_aln = ch2_aln)
     nnat, nmdl, fnat, fnonnat = _ContactScores(mdl, ref, mdl_ch1, mdl_ch2,
-                                               ref_ch1, ref_ch2)
-    irmsd, lrmsd = _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2)
+                                               ref_ch1, ref_ch2,
+                                               dist_thresh = contact_dist_thresh)
+    irmsd, lrmsd = _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
+                               dist_thresh = interface_dist_thresh,
+                               cb_mode = interface_cb)
     return {"nnat": nnat,
             "nmdl": nmdl,
             "fnat": fnat,
diff --git a/modules/mol/alg/tests/CMakeLists.txt b/modules/mol/alg/tests/CMakeLists.txt
index 268d09c81..e1a1aaf82 100644
--- a/modules/mol/alg/tests/CMakeLists.txt
+++ b/modules/mol/alg/tests/CMakeLists.txt
@@ -13,6 +13,7 @@ set(OST_MOL_ALG_UNIT_TESTS
   test_stereochemistry.py
   test_contact_score.py
   test_biounit.py
+  test_ost_dockq.py
 )
 
 if (COMPOUND_LIB)
diff --git a/modules/mol/alg/tests/test_ost_dockq.py b/modules/mol/alg/tests/test_ost_dockq.py
new file mode 100644
index 000000000..32c132289
--- /dev/null
+++ b/modules/mol/alg/tests/test_ost_dockq.py
@@ -0,0 +1,51 @@
+import unittest, os, sys
+import ost
+from ost import conop
+from ost import io, mol, seq, settings
+from ost.mol.alg import dockq
+import json
+
+def _LoadFile(file_name):
+    """Helper to avoid repeating input path over and over."""
+    return io.LoadPDB(os.path.join('testfiles', file_name))
+
+class TestDockQ(unittest.TestCase):
+
+    def test_dockq(self):
+
+        mdl = _LoadFile("lddtbs_mdl.pdb").Select("peptide=True")
+        trg = _LoadFile("lddtbs_ref_1r8q.1.pdb").Select("peptide=True")
+
+        dockq_result = dockq.DockQ(mdl, trg, "A", "B", "B", "A")
+
+        # compare to results computed with DockQ from https://github.com/bjornwallner/DockQ
+        # commit: 7a0a1c49ec70e2db68cb160abbe6aaf2f844a4ec
+        self.assertEqual(dockq_result["nnat"], 87)
+        self.assertEqual(dockq_result["nmdl"], 109)
+        self.assertAlmostEqual(dockq_result["fnat"], 0.828, places=3)
+        self.assertAlmostEqual(dockq_result["fnonnat"], 0.339, places=3)
+        self.assertAlmostEqual(dockq_result["irmsd"], 2.411, places=3)
+        self.assertAlmostEqual(dockq_result["lrmsd"], 6.568, places=3)
+        self.assertAlmostEqual(dockq_result["DockQ"], 0.578, places=3)
+
+        # enable "peptide mode" by passing -capri_peptide flag to DockQ executable
+        # not that we're dealing with a peptide here, but we can still check
+        # the slightly different parameterization...
+        dockq_result = dockq.DockQ(mdl, trg, "A", "B", "B", "A",
+                                   contact_dist_thresh = 4.0,
+                                   interface_dist_thresh = 8.0,
+                                   interface_cb = True)
+
+        self.assertEqual(dockq_result["nnat"], 54)
+        self.assertEqual(dockq_result["nmdl"], 76)
+        self.assertAlmostEqual(dockq_result["fnat"], 0.815, places=3)
+        self.assertAlmostEqual(dockq_result["fnonnat"], 0.421, places=3)
+        self.assertAlmostEqual(dockq_result["irmsd"], 1.579, places=3)
+        # for whatever reason, DockQ produces a slightly different number for
+        # lrmsd than above... Let's just slightly reduce accuracy
+        self.assertAlmostEqual(dockq_result["lrmsd"], 6.569, places=2)
+        self.assertAlmostEqual(dockq_result["DockQ"], 0.638, places=3)
+
+if __name__ == "__main__":
+    from ost import testutils
+    testutils.RunTests()
-- 
GitLab