From 4ac2e408b09e7401af0d5395cfa3b2012b1cbea2 Mon Sep 17 00:00:00 2001
From: Gabriel Studer <gabriel.studer@unibas.ch>
Date: Wed, 10 Apr 2024 13:15:31 +0200
Subject: [PATCH] refactor rigid scoring - introduce GDT implementation similar
 to LGA

The Scorer object provides two mappings. The default mapping which is
QS-score based and the rigid_mapping which is RMSD based.
The rigid_mapping is now the basis for global superposition based
scores such as RMSD and GDT. And here's the thing, OpenStructure got
a brand new GDT implementation! Scanning of best possible superpositions
is a bit simpler than in LGA but the results are similar.
Benchmarking against CASP15 TS models returns GDT_TS score where 99.2%
are within 3 GDT points with the LGA results and the maximum observed
difference is 7.39. There is a tendency for slightly lower scores in
OpenStructure meaning LGA sometimes found better superpositions.

BUT: Oligo/RNA support comes for free in the Scorer object!
---
 modules/mol/alg/doc/molalg.rst        |  58 ++++
 modules/mol/alg/pymod/CMakeLists.txt  |   1 +
 modules/mol/alg/pymod/export_gdt.cc   |  49 ++++
 modules/mol/alg/pymod/scoring.py      | 251 ++++++++++++++--
 modules/mol/alg/pymod/wrap_mol_alg.cc |   2 +
 modules/mol/alg/src/CMakeLists.txt    |   2 +
 modules/mol/alg/src/gdt.cc            | 398 ++++++++++++++++++++++++++
 modules/mol/alg/src/gdt.hh            |  34 +++
 8 files changed, 768 insertions(+), 27 deletions(-)
 create mode 100644 modules/mol/alg/pymod/export_gdt.cc
 create mode 100644 modules/mol/alg/src/gdt.cc
 create mode 100644 modules/mol/alg/src/gdt.hh

diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst
index 6cb57ea40..79efa947e 100644
--- a/modules/mol/alg/doc/molalg.rst
+++ b/modules/mol/alg/doc/molalg.rst
@@ -173,6 +173,64 @@ Local Distance Test scores (lDDT, DRMSD)
 .. currentmodule:: ost.mol.alg
 
 
+GDT - Global Distance Test
+--------------------------
+
+  Implements the GDT score, i.e. identifies the largest number of positions
+  that can be superposed within a given distance threshold. The final
+  GDT score is then the returned number divided by the total number of
+  reference positioons. The algorithm is similar to what is described for
+  the LGA tool but simpler. Therefore, the fractions reported by OpenStructure
+  tend to be systematically lower. For benchmarking we computed the full GDT_TS,
+  i.e. average GDT for distance thresholds [1, 2, 4, 8], on all CASP15 TS
+  models. 96.5% of differences to the LGA results from the predictioncenter are
+  within 2 GDT points and 99.2% are within 3 GDT points. The max difference
+  is 7.39 GDT points.
+
+  The algorithm expects two position lists of same length and applies a sliding
+  window with specified length to define a subset of position pairs as starting
+  point for iterative superposition. Each iterative superposition applies the
+  following steps:
+
+  - Compute minimal RMSD superposition on subset of position pairs
+  - Apply superposition on all model positions
+  - Compute pairwise distances of all model positions and reference positions
+  - Define new subset of position pairs: pairs within distance threshold
+  - Stop if subset doesn't change anymore
+
+  The subset in any of the iterations which is largest is stored.
+
+  This is done for each sliding window position and the largest subset ever
+  observed is reported. To avoid long runtimes for large problem sizes, the
+  sliding window is not applied on each possible position but is capped.
+  If the number of positions is larger than this threshold, the sliding
+  window is only applied on N equidistant locations.
+
+  .. function:: GDT(mdl_pos, ref_pos, window_size, max_windows, distance_thresh)
+  
+      Returns number of positions that can be superposed within
+      *distance_thresh* and the respective transformation matrix.
+
+      :param mdl_pos: Positions representing the model, typically alpha-carbon
+                      positions
+      :param ref_pos: Positions representing the reference, typically
+                      alpha-carbon positions
+      :param window_size: Size of the sliding window that is used to serve as
+                          starting point for iterative superposition.
+                          The described benchmark was done with a value of 7.
+      :param max_windows: Cap for number of starting points. The described
+                          benchmark was done with a value of 1000.
+      :param distance_thresh: Distance threshold for GDT algorithm
+      :type mdl_pos: :class:`ost.geom.Vec3List`
+      :type ref_pos: :class:`ost.geom.Vec3List`
+      :type window_size: :class:`int`
+      :type max_windows: :class:`int`
+      :type distance_thresh: :class:`float`
+      :returns: :class:`tuple` with first element being the number of
+                superposable positions (:class:`int`) and the second element the
+                transformation matrix (:class:`ost.geom.Mat4`)
+
+
 .. _steric-clashes:
 
 Steric Clashes
diff --git a/modules/mol/alg/pymod/CMakeLists.txt b/modules/mol/alg/pymod/CMakeLists.txt
index 146774722..8506d7be3 100644
--- a/modules/mol/alg/pymod/CMakeLists.txt
+++ b/modules/mol/alg/pymod/CMakeLists.txt
@@ -12,6 +12,7 @@ set(OST_MOL_ALG_PYMOD_SOURCES
   export_membrane.cc
   export_entity_to_density.cc
   export_biounit.cc
+  export_gdt.cc
 )
 
 set(OST_MOL_ALG_PYMOD_MODULES
diff --git a/modules/mol/alg/pymod/export_gdt.cc b/modules/mol/alg/pymod/export_gdt.cc
new file mode 100644
index 000000000..971b80975
--- /dev/null
+++ b/modules/mol/alg/pymod/export_gdt.cc
@@ -0,0 +1,49 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2024 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+
+#include <boost/python.hpp>
+
+#include <ost/mol/alg/gdt.hh>
+
+using namespace boost::python;
+
+namespace{
+
+boost::python::tuple WrapGDT(const geom::Vec3List& mdl_pos,
+                             const geom::Vec3List& ref_pos,
+                             int window_size, int max_windows,
+                             Real distance_thresh) {
+  int n_superposed;
+  geom::Mat4 transform;
+  ost::mol::alg::GDT(mdl_pos, ref_pos, window_size, max_windows,
+                     distance_thresh, n_superposed, transform);
+  return boost::python::make_tuple(n_superposed, transform);
+}
+
+} // ns
+
+void export_GDT() {
+
+    def("GDT", &WrapGDT, (arg("mdl_pos"), 
+                          arg("ref_pos"),
+                          arg("window_size"),
+                          arg("max_windows"),
+                          arg("distance_thresh")));
+}
diff --git a/modules/mol/alg/pymod/scoring.py b/modules/mol/alg/pymod/scoring.py
index 53a596f52..066af3ad3 100644
--- a/modules/mol/alg/pymod/scoring.py
+++ b/modules/mol/alg/pymod/scoring.py
@@ -14,6 +14,7 @@ from ost.mol.alg import dockq
 from ost.mol.alg.lddt import lDDTScorer
 from ost.mol.alg.qsscore import QSScorer
 from ost.mol.alg.contact_score import ContactScorer
+from ost.mol.alg import GDT
 from ost.mol.alg import Molck, MolckSettings
 from ost import bindings
 from ost.bindings import cadscore
@@ -268,6 +269,7 @@ class Scorer:
         self._target_bad_angles = None
         self._chain_mapper = None
         self._mapping = None
+        self._rigid_mapping = None
         self._model_interface_residues = None
         self._target_interface_residues = None
         self._aln = None
@@ -330,6 +332,18 @@ class Scorer:
         self._transformed_mapped_model_pos = None
         self._n_target_not_mapped = None
         self._transform = None
+
+        self._rigid_mapped_target_pos = None
+        self._rigid_mapped_model_pos = None
+        self._rigid_transformed_mapped_model_pos = None
+        self._rigid_n_target_not_mapped = None
+        self._rigid_transform = None
+
+        self._gdt_05 = None
+        self._gdt_1 = None
+        self._gdt_2 = None
+        self._gdt_4 = None
+        self._gdt_8 = None
         self._gdtts = None
         self._gdtha = None
         self._rmsd = None
@@ -526,6 +540,8 @@ class Scorer:
     def mapping(self):
         """ Full chain mapping result for :attr:`target`/:attr:`model`
 
+        Computed with :func:`ost.mol.alg.ChainMapper.GetMapping`
+
         :type: :class:`ost.mol.alg.chain_mapping.MappingResult` 
         """
         if self._mapping is None:
@@ -535,6 +551,20 @@ class Scorer:
                                          n_max_naive = self.n_max_naive)
         return self._mapping
 
+    @property
+    def rigid_mapping(self):
+        """ Full chain mapping result for :attr:`target`/:attr:`model`
+
+        Computed with :func:`ost.mol.alg.ChainMapper.GetRMSDMapping`
+
+        :type: :class:`ost.mol.alg.chain_mapping.MappingResult` 
+        """
+        if self._rigid_mapping is None:
+            LogScript("Computing rigid chain mapping")
+            self._rigid_mapping = \
+            self.chain_mapper.GetRMSDMapping(self.model)
+        return self._rigid_mapping
+
     @property
     def model_interface_residues(self):
         """ Interface residues in :attr:`~model`
@@ -1215,60 +1245,198 @@ class Scorer:
         return self._transform
 
     @property
-    def gdtts(self):
-        """ GDT with thresholds: 8.0A, 4.0A, 2.0A and 1.0A
+    def rigid_mapped_target_pos(self):
+        """ Mapped representative positions in target
+
+        Thats CA positions for peptide residues and C3' positions for
+        nucleotides. Has same length as :attr:`~rigid_mapped_model_pos` and mapping
+        is based on :attr:`~rigid_mapping`.
+
+        :type: :class:`ost.geom.Vec3List`
+        """
+        if self._rigid_mapped_target_pos is None:
+            self._extract_rigid_mapped_pos()
+        return self._rigid_mapped_target_pos
+
+    @property
+    def rigid_mapped_model_pos(self):
+        """ Mapped representative positions in model
+
+        Thats CA positions for peptide residues and C3' positions for
+        nucleotides. Has same length as :attr:`~mapped_target_pos` and mapping
+        is based on :attr:`~rigid_mapping`.
+
+        :type: :class:`ost.geom.Vec3List`
+        """
+        if self._rigid_mapped_model_pos is None:
+            self._extract_rigid_mapped_pos()
+        return self._rigid_mapped_model_pos
+
+    @property
+    def rigid_transformed_mapped_model_pos(self):
+        """ :attr:`~rigid_mapped_model_pos` with :attr:`~rigid_transform` applied
+
+        :type: :class:`ost.geom.Vec3List`
+        """
+        if self._rigid_transformed_mapped_model_pos is None:
+            self._rigid_transformed_mapped_model_pos = \
+            geom.Vec3List(self.rigid_mapped_model_pos)
+            self._rigid_transformed_mapped_model_pos.ApplyTransform(self.rigid_transform)
+        return self._rigid_transformed_mapped_model_pos
+
+    @property
+    def rigid_n_target_not_mapped(self):
+        """ Number of target residues which have no rigid mapping to model
+
+        :type: :class:`int`
+        """
+        if self._rigid_n_target_not_mapped is None:
+            self._extract_rigid_mapped_pos()
+        return self._rigid_n_target_not_mapped
+
+    @property
+    def rigid_transform(self):
+        """ Transform: :attr:`~rigid_mapped_model_pos` onto :attr:`~rigid_mapped_target_pos`
+
+        Computed using Kabsch minimal rmsd algorithm
+
+        :type: :class:`ost.geom.Mat4`
+        """
+        if self._rigid_transform is None:
+            try:
+                res = mol.alg.SuperposeSVD(self.rigid_mapped_model_pos,
+                                           self.rigid_mapped_target_pos)
+                self._rigid_transform = res.transformation
+            except:
+                self._rigid_transform = geom.Mat4()
+        return self._rigid_transform
+
+    @property
+    def gdt_05(self):
+        """ Fraction CA (C3' for nucleotides) that can be superposed within 0.5A
+
+        Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`.
+        Similar iterative algorithm as LGA tool
+
+        :type: :class:`float` 
+        """
+        if self._gdt_05 is None:
+            n, m = GDT(self.rigid_mapped_model_pos, self.rigid_mapped_target_pos, 7, 1000, 0.5)
+            n_full = len(self.rigid_mapped_target_pos) + self.rigid_n_target_not_mapped
+            if n_full > 0:
+                self._gdt_05 = float(n) / n_full
+            else:
+                self._gdt_05 = 0.0
+        return self._gdt_05
+
+    @property
+    def gdt_1(self):
+        """ Fraction CA (C3' for nucleotides) that can be superposed within 1.0A
 
-        Computed on :attr:`~transformed_mapped_model_pos` and
-        :attr:`mapped_target_pos`
+        Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`.
+        Similar iterative algorithm as LGA tool
+
+        :type: :class:`float` 
+        """
+        if self._gdt_1 is None:
+            n, m = GDT(self.rigid_mapped_model_pos, self.rigid_mapped_target_pos, 7, 1000, 1.0)
+            n_full = len(self.rigid_mapped_target_pos) + self.rigid_n_target_not_mapped
+            if n_full > 0:
+                self._gdt_1 = float(n) / n_full
+            else:
+                self._gdt_1 = 0.0
+        return self._gdt_1
+
+    @property
+    def gdt_2(self):
+        """ Fraction CA (C3' for nucleotides) that can be superposed within 2.0A
+
+        Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`.
+        Similar iterative algorithm as LGA tool
+
+
+        :type: :class:`float` 
+        """
+        if self._gdt_2 is None:
+            n, m = GDT(self.rigid_mapped_model_pos, self.rigid_mapped_target_pos, 7, 1000, 2.0)
+            n_full = len(self.rigid_mapped_target_pos) + self.rigid_n_target_not_mapped
+            if n_full > 0:
+                self._gdt_2 = float(n) / n_full
+            else:
+                self._gdt_2 = 0.0
+        return self._gdt_2
+
+    @property
+    def gdt_4(self):
+        """ Fraction CA (C3' for nucleotides) that can be superposed within 4.0A
+
+        Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`.
+        Similar iterative algorithm as LGA tool
+
+        :type: :class:`float` 
+        """
+        if self._gdt_4 is None:
+            n, m = GDT(self.rigid_mapped_model_pos, self.rigid_mapped_target_pos, 7, 1000, 4.0)
+            n_full = len(self.rigid_mapped_target_pos) + self.rigid_n_target_not_mapped
+            if n_full > 0:
+                self._gdt_4 = float(n) / n_full
+            else:
+                self._gdt_4 = 0.0
+        return self._gdt_4
+
+    @property
+    def gdt_8(self):
+        """ Fraction CA (C3' for nucleotides) that can be superposed within 8.0A
+
+        Similar iterative algorithm as LGA tool
+
+        :type: :class:`float` 
+        """
+        if self._gdt_8 is None:
+            n, m = GDT(self.rigid_mapped_model_pos, self.rigid_mapped_target_pos, 7, 1000, 8.0)
+            n_full = len(self.rigid_mapped_target_pos) + self.rigid_n_target_not_mapped
+            if n_full > 0:
+                self._gdt_8 = float(n) / n_full
+            else:
+                self._gdt_8 = 0.0
+        return self._gdt_8
+    
+
+    @property
+    def gdtts(self):
+        """ avg GDT with thresholds: 8.0A, 4.0A, 2.0A and 1.0A
 
         :type: :class:`float`
         """
         if self._gdtts is None:
             LogScript("Computing GDT-TS score")
-            n = \
-            self.mapped_target_pos.GetGDTTS(self.transformed_mapped_model_pos,
-                                            norm=False)
-            n_full = 4*len(self.mapped_target_pos) + 4*self.n_target_not_mapped
-            if n_full > 0:
-                self._gdtts = float(n) / n_full
-            else:
-                self._gdtts = 0.0
+            self._gdtts = (self.gdt_1 + self.gdt_2 + self.gdt_4 + self.gdt_8) / 4
         return self._gdtts
 
     @property
     def gdtha(self):
-        """ GDT with thresholds: 4.0A, 2.0A, 1.0A and 0.5A
-
-        Computed on :attr:`~transformed_mapped_model_pos` and
-        :attr:`mapped_target_pos`
+        """ avg GDT with thresholds: 4.0A, 2.0A, 1.0A and 0.5A
 
         :type: :class:`float`
         """
         if self._gdtha is None:
             LogScript("Computing GDT-HA score")
-            n = \
-            self.mapped_target_pos.GetGDTHA(self.transformed_mapped_model_pos,
-                                            norm=False)
-            n_full = 4*len(self.mapped_target_pos) + 4*self.n_target_not_mapped
-            if n_full > 0:
-                self._gdtha = float(n) / n_full
-            else:
-                self._gdtha = 0.0
+            self._gdtha = (self.gdt_05 + self.gdt_1 + self.gdt_2 + self.gdt_4) / 4
         return self._gdtha
 
     @property
     def rmsd(self):
         """ RMSD
 
-        Computed on :attr:`~transformed_mapped_model_pos` and
-        :attr:`mapped_target_pos`
+        Computed on :attr:`~rigid_transformed_mapped_model_pos` and
+        :attr:`rigid_mapped_target_pos`
 
         :type: :class:`float`
         """
         if self._rmsd is None:
             LogScript("Computing RMSD")
             self._rmsd = \
-            self.mapped_target_pos.GetRMSD(self.transformed_mapped_model_pos)
+            self.rigid_mapped_target_pos.GetRMSD(self.rigid_transformed_mapped_model_pos)
         return self._rmsd
 
     @property
@@ -1714,6 +1882,35 @@ class Scorer:
             if ch.GetName() not in processed_trg_chains:
                 self._n_target_not_mapped += len(ch.residues)
 
+
+    def _extract_rigid_mapped_pos(self):
+        self._rigid_mapped_target_pos = geom.Vec3List()
+        self._rigid_mapped_model_pos = geom.Vec3List()
+        self._rigid_n_target_not_mapped = 0
+        processed_trg_chains = set()
+        for trg_ch, mdl_ch in self.rigid_mapping.GetFlatMapping().items():
+            processed_trg_chains.add(trg_ch)
+            aln = self.rigid_mapping.alns[(trg_ch, mdl_ch)]
+            for col in aln:
+                if col[0] != '-' and col[1] != '-':
+                    trg_res = col.GetResidue(0)
+                    mdl_res = col.GetResidue(1)
+                    trg_at = trg_res.FindAtom("CA")
+                    mdl_at = mdl_res.FindAtom("CA")
+                    if not trg_at.IsValid():
+                        trg_at = trg_res.FindAtom("C3'")
+                    if not mdl_at.IsValid():
+                        mdl_at = mdl_res.FindAtom("C3'")
+                    self._rigid_mapped_target_pos.append(trg_at.GetPos())
+                    self._rigid_mapped_model_pos.append(mdl_at.GetPos())
+                elif col[0] != '-':
+                    self._rigid_n_target_not_mapped += 1
+        # count number of trg residues from non-mapped chains
+        for ch in self.rigid_mapping.target.chains:
+            if ch.GetName() not in processed_trg_chains:
+                self._rigid_n_target_not_mapped += len(ch.residues)
+
+
     def _compute_cad_score(self):
         if not self.resnum_alignments:
             raise RuntimeError("CAD score computations rely on residue numbers "
diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc
index 295bea2bb..09c008392 100644
--- a/modules/mol/alg/pymod/wrap_mol_alg.cc
+++ b/modules/mol/alg/pymod/wrap_mol_alg.cc
@@ -51,6 +51,7 @@ void export_sec_struct_segments();
 void export_find_membrane();
 void export_entity_to_density();
 void export_biounit();
+void export_GDT();
 
 namespace {
   
@@ -323,6 +324,7 @@ BOOST_PYTHON_MODULE(_ost_mol_alg)
   export_find_membrane();
   export_entity_to_density();
   export_biounit();
+  export_GDT();
   
   def("LocalDistDiffTest", lddt_a, (arg("sequence_separation")=0,arg("local_lddt_property_string")=""));
   def("LocalDistDiffTest", lddt_c, (arg("local_lddt_property_string")=""));
diff --git a/modules/mol/alg/src/CMakeLists.txt b/modules/mol/alg/src/CMakeLists.txt
index 49f58f50e..20d016175 100644
--- a/modules/mol/alg/src/CMakeLists.txt
+++ b/modules/mol/alg/src/CMakeLists.txt
@@ -25,6 +25,7 @@ set(OST_MOL_ALG_HEADERS
   find_membrane.hh
   entity_to_density.hh
   biounit.hh
+  gdt.hh
 )
 
 set(OST_MOL_ALG_SOURCES
@@ -53,6 +54,7 @@ set(OST_MOL_ALG_SOURCES
   find_membrane.cc
   entity_to_density.cc
   biounit.cc
+  gdt.cc
 )
 
 set(MOL_ALG_DEPS ost_mol ost_seq ost_img ost_img_alg ost_seq_alg ost_conop)
diff --git a/modules/mol/alg/src/gdt.cc b/modules/mol/alg/src/gdt.cc
new file mode 100644
index 000000000..93b6e8b17
--- /dev/null
+++ b/modules/mol/alg/src/gdt.cc
@@ -0,0 +1,398 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2024 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+#include <ost/mol/alg/gdt.hh>
+#include <ost/message.hh>
+#include <Eigen/Dense>
+
+namespace {
+
+// get RMSD and rotation matrix using the Theobald method
+// Both position matrices are expected to have the same size
+// and to have their average position at the origin 
+void TheobaldRMSD(const Eigen::Matrix<double,Eigen::Dynamic,3>& pos_one,
+                  const Eigen::Matrix<double,Eigen::Dynamic,3>& pos_two,
+                  Real& rmsd, Eigen::Matrix<double,3,3>& rot){
+
+  if(pos_one.rows() < 3){
+    throw ost::Error("Observed superposition with < 3 positions to "
+                         "superpose!");
+  }
+
+  Eigen::Matrix<double,3,3> M = pos_one.transpose() * pos_two;
+
+  // using floats for the squared norm is fine
+  double GA = pos_one.squaredNorm();
+  double GB = pos_two.squaredNorm();
+
+  Eigen::Matrix<double,4,4> K;
+  K(0,0)  =  M(0,0) + M(1,1) + M(2,2);
+  K(0,1)  =  M(1,2) - M(2,1);
+  K(0,2)  =  M(2,0) - M(0,2);
+  K(0,3)  =  M(0,1) - M(1,0);
+
+  K(1,0)  =  K(0,1);
+  K(1,1)  =  M(0,0) - M(1,1) - M(2,2);
+  K(1,2)  =  M(0,1) + M(1,0);
+  K(1,3)  =  M(0,2) + M(2,0);
+
+  K(2,0)  =  K(0,2);
+  K(2,1)  =  K(1,2);
+  K(2,2)  = -M(0,0) + M(1,1) - M(2,2);
+  K(2,3)  =  M(1,2) + M(2,1);
+
+  K(3,0)  =  K(0,3);
+  K(3,1)  =  K(1,3);
+  K(3,2)  =  K(2,3);
+  K(3,3)  = -M(0,0) - M(1,1) + M(2,2);
+
+  double C0 = K.determinant();
+  double C1 = -8.0*M.determinant();
+  double C2 = -2.0*M.squaredNorm();
+  double lambda = 0.5 * (GA + GB);
+  double a, b, d, lambda_2;
+  for(int i = 0; i < 50; ++i){
+    lambda_2 = lambda * lambda;
+    b = (lambda_2 + C2) * lambda;
+    a = b + C1;
+    d = (a*lambda + C0) / (2.0*lambda_2*lambda + b + a);
+    lambda -= d;
+    if(std::abs(d) < 1e-6){
+      break;
+    }
+  }
+
+  double msd = (GA + GB - 2.0 * lambda) / pos_one.rows();
+  if(msd < 1e-4){
+    // The algorithm never really goes to zero... if msd is super small we just
+    // assign zero. 1e-4 corresponds to an rmsd of 0.01
+    rmsd = 0.0;
+  }
+  else{
+    rmsd = std::sqrt(msd);
+  }
+
+  K -= lambda*Eigen::Matrix<double,4,4>::Identity();
+
+  double helper[6];
+  helper[0] = K(2,2)*K(3,3) - K(3,2)*K(2,3);
+  helper[1] = K(2,1)*K(3,3) - K(3,1)*K(2,3);
+  helper[2] = K(2,1)*K(3,2) - K(3,1)*K(2,2);
+  helper[3] = K(2,0)*K(3,3) - K(3,0)*K(2,3);
+  helper[4] = K(2,0)*K(3,2) - K(3,0)*K(2,2);
+  helper[5] = K(2,0)*K(3,1) - K(3,0)*K(2,1);
+
+  double q1 =  K(1,1)*helper[0] - K(1,2)*helper[1] + K(1,3)*helper[2];
+  double q2 = -K(1,0)*helper[0] + K(1,2)*helper[3] - K(1,3)*helper[4];
+  double q3 =  K(1,0)*helper[1] - K(1,1)*helper[3] + K(1,3)*helper[5];
+  double q4 = -K(1,0)*helper[2] + K(1,1)*helper[4] - K(1,2)*helper[5]; 
+  double norm = q1*q1 + q2*q2 + q3*q3 + q4*q4;
+
+  if(norm < 1e-6){
+    q1 =  K(0,1)*helper[0] - K(0,2)*helper[1] + K(0,3)*helper[2];
+    q2 = -K(0,0)*helper[0] + K(0,2)*helper[3] - K(0,3)*helper[4];
+    q3 =  K(0,0)*helper[1] - K(0,1)*helper[3] + K(0,3)*helper[5];
+    q4 = -K(0,0)*helper[2] + K(0,1)*helper[4] - K(0,2)*helper[5];
+    norm = q1*q1 + q2*q2 +q3*q3 + q4*q4;
+
+    if (norm < 1e-6){
+      helper[0] = K(0,2)*K(1,3) - K(0,3)*K(1,2);
+      helper[1] = K(0,1)*K(1,3) - K(0,3)*K(1,1);
+      helper[2] = K(0,1)*K(1,2) - K(0,2)*K(1,1);
+      helper[3] = K(0,0)*K(1,3) - K(0,3)*K(1,0);
+      helper[4] = K(0,0)*K(1,2) - K(0,2)*K(1,0);
+      helper[5] = K(0,0)*K(1,1) - K(0,1)*K(1,0);
+
+      q1 =  K(3,1)*helper[0] - K(3,2)*helper[1] + K(3,3)*helper[2];
+      q2 = -K(3,0)*helper[0] + K(3,2)*helper[3] - K(3,3)*helper[4];
+      q3 =  K(3,0)*helper[1] - K(3,1)*helper[3] + K(3,3)*helper[5];
+      q4 = -K(3,0)*helper[2] + K(3,1)*helper[4] - K(3,2)*helper[5];
+      norm = q1*q1 + q2*q2 + q3*q3 + q4*q4;
+
+      if (norm < 1e-6){
+        q1 =  K(2,1)*helper[0] - K(2,2)*helper[1] + K(2,3)*helper[2];
+        q2 = -K(2,0)*helper[0] + K(2,2)*helper[3] - K(2,3)*helper[4];
+        q3 =  K(2,0)*helper[1] - K(2,1)*helper[3] + K(2,3)*helper[5];
+        q4 = -K(2,0)*helper[2] + K(2,1)*helper[4] - K(2,2)*helper[5];
+        norm = q1*q1 + q2*q2 + q3*q3 + q4*q4;
+        if (norm < 1e-6){
+          // this should not happen
+          rot = Eigen::Matrix<double,3,3>::Identity();
+          return;
+        }
+      }
+    }
+  }
+
+  norm = 1.0 / std::sqrt(norm);
+  q1 *= norm; q2 *= norm; q3 *= norm; q4 *= norm;
+  rot(0,0) = 1.0 - 2.0*(q3*q3 + q4*q4);
+  rot(0,1) =       2.0*(q2*q3 - q1*q4);
+  rot(0,2) =       2.0*(q2*q4 + q3*q1);
+  rot(1,0) =       2.0*(q2*q3 + q1*q4);
+  rot(1,1) = 1.0 - 2.0*(q2*q2 + q4*q4);
+  rot(1,2) =       2.0*(q3*q4 - q2*q1);
+  rot(2,0) =       2.0*(q2*q4 - q3*q1);
+  rot(2,1) =       2.0*(q3*q4 + q2*q1);
+  rot(2,2) = 1.0 - 2.0*(q2*q2 + q3*q3);
+}
+
+void Superpose(Eigen::Matrix<double,Eigen::Dynamic,3>& pos_one,
+               Eigen::Matrix<double,Eigen::Dynamic,3>& pos_two,
+               Eigen::Matrix<double,1,3>& avg_one,
+               Eigen::Matrix<double,1,3>& avg_two,
+               Real& rmsd,
+               Eigen::Matrix<double,3,3>& rotation){
+
+  if(pos_one.rows() != pos_two.rows()){
+    throw ost::Error("Cannot superpose positions of different size!");
+  }
+
+
+  avg_one = Eigen::Matrix<double,1,3>::Zero();
+  for (uint i = 0; i < pos_one.rows(); ++i) {
+    avg_one += pos_one.row(i);
+  }
+  avg_one = avg_one / pos_one.rows();
+
+  avg_two = Eigen::Matrix<double,1,3>::Zero();
+  for (uint i = 0; i < pos_two.rows(); ++i) {
+    avg_two += pos_two.row(i);
+  }
+  avg_two = avg_two / pos_two.rows();
+
+  // TheobaldRMSD only determines the rotational component of the superposition
+  // we need to shift the centers of the two point sets onto origin
+  for (uint i = 0; i < pos_one.rows(); ++i){
+    pos_one.row(i) -= avg_one;
+    pos_two.row(i) -= avg_two;
+  }
+
+  TheobaldRMSD(pos_one, pos_two, rmsd, rotation);
+}
+
+void SuperposeIterative(const Eigen::Matrix<double,Eigen::Dynamic,3>& pos_one,
+                        const Eigen::Matrix<double,Eigen::Dynamic,3>& pos_two,
+                        int max_iterations, 
+                        Real distance_thresh,
+                        std::vector<int>& indices,
+                        Eigen::Matrix<double,1,3>& avg_one,
+                        Eigen::Matrix<double,1,3>& avg_two,
+                        Eigen::Matrix<double,3,3>& rotation){
+
+  if(pos_one.rows() != pos_two.rows()){
+    throw ost::Error("Position data must be of consistent size!");
+  }
+
+  if(max_iterations <= 0){
+    throw ost::Error("max_iterations must be at least 1!");
+  }
+
+  int num_rows = pos_one.rows();
+
+  if(indices.empty()){
+    //there are no idx, so we use all positions for the initial superposition
+    indices.resize(num_rows);
+    for(int i = 0; i < num_rows; ++i){
+      indices[i] = i;
+    }
+  }
+  else{
+    //its not empty! let's quickly check whether there are at least 3 positions
+    //and whether the indices are all valid
+    if(indices.size() < 3){
+      throw ost::Error("Must have at least 3 start indices for iterative " 
+                       "Superposition!");
+    }
+    for(auto i: indices) {
+      if(i >= num_rows){
+        throw ost::Error("Invalid index in iterative Superposition!");
+      }
+    }
+  }
+
+  Real squared_dist_thresh = distance_thresh * distance_thresh;
+  std::vector<int> new_indices;
+  new_indices.reserve(num_rows);
+  std::vector<Real> squared_distances(num_rows);
+  Eigen::Matrix<double,1,3> temp_vec;
+  Real rmsd;
+
+  // keep track of the indices which give the superposition with maximum
+  // number of superposed positions. Thats not necessarily the last superposition
+  // when the algorithm converges
+  int max_n = -1;
+  std::vector<int> temp_indices = indices;
+  Eigen::Matrix<double,1,3> temp_avg_one = Eigen::Matrix<double,1,3>::Zero();
+  Eigen::Matrix<double,1,3> temp_avg_two = Eigen::Matrix<double,1,3>::Zero();
+  Eigen::Matrix<double,3,3> temp_rotation = Eigen::Matrix<double,3,3>::Identity();
+
+  for(int iteration = 0; iteration < max_iterations; ++iteration){
+
+    if(temp_indices.size() < 3) break; //the thing is not really superposable...
+
+    Eigen::Matrix<double,Eigen::Dynamic,3> temp_pos_one = 
+    Eigen::Matrix<double,Eigen::Dynamic,3>::Zero(temp_indices.size(), 3);
+    Eigen::Matrix<double,Eigen::Dynamic,3> temp_pos_two = 
+    Eigen::Matrix<double,Eigen::Dynamic,3>::Zero(temp_indices.size(), 3);
+
+    for(uint i = 0; i < temp_indices.size(); ++i){
+      temp_pos_one.row(i) = pos_one.row(temp_indices[i]);
+      temp_pos_two.row(i) = pos_two.row(temp_indices[i]);
+    }
+
+    Superpose(temp_pos_one, temp_pos_two, temp_avg_one, temp_avg_two, rmsd,
+              temp_rotation);
+
+    for(int i = 0; i < num_rows; ++i){
+      temp_vec = pos_one.row(i) - temp_avg_one;
+      temp_vec = (temp_rotation * temp_vec.transpose()).transpose() + temp_avg_two;
+      squared_distances[i] = (temp_vec - pos_two.row(i)).squaredNorm();
+    }
+
+    new_indices.clear();
+    for(int i = 0; i < num_rows; ++i){
+      if(squared_distances[i] < squared_dist_thresh){
+        new_indices.push_back(i);
+      }
+    }
+
+    if(static_cast<int>(new_indices.size()) > max_n) {
+      max_n = new_indices.size();
+      indices = new_indices;
+      avg_one = temp_avg_one;
+      avg_two = temp_avg_two;
+      rotation = temp_rotation;
+    }
+
+    if(new_indices == temp_indices) break; //nothing changes anymore
+
+    temp_indices = new_indices;
+  }
+}
+
+}
+
+namespace ost{ namespace mol{ namespace alg {
+
+
+void GDT(const geom::Vec3List& mdl_pos, const geom::Vec3List& ref_pos,
+         int window_size, int max_windows, Real distance_thresh,
+         int& n_superposed, geom::Mat4& transform) {
+
+  if(mdl_pos.size() != ref_pos.size()){
+    throw ost::Error("Position data must be of consistent size!");
+  }
+
+  int n_pos = mdl_pos.size();
+
+  if(window_size > n_pos) {
+    throw ost::Error("Window size in GDT algorithm is larger than positions");
+  }
+
+  Eigen::Matrix<double, Eigen::Dynamic, 3> eigen_mdl_pos = \
+  Eigen::Matrix<double, Eigen::Dynamic, 3>::Zero(n_pos, 3);
+  Eigen::Matrix<double, Eigen::Dynamic, 3> eigen_ref_pos = \
+  Eigen::Matrix<double, Eigen::Dynamic, 3>::Zero(n_pos, 3);
+  for(int i = 0; i < n_pos; ++i) {
+  	eigen_mdl_pos(i, 0) = mdl_pos[i][0];
+  	eigen_mdl_pos(i, 1) = mdl_pos[i][1];
+  	eigen_mdl_pos(i, 2) = mdl_pos[i][2];
+  	eigen_ref_pos(i, 0) = ref_pos[i][0];
+  	eigen_ref_pos(i, 1) = ref_pos[i][1];
+  	eigen_ref_pos(i, 2) = ref_pos[i][2];
+  }
+
+  std::vector<int> start_indices;
+  int last_window_idx = n_pos - window_size;
+  int n_windows = last_window_idx + 1;
+
+  if(n_windows <= max_windows) {
+    start_indices.resize(n_windows);
+    for(int i = 0; i < n_windows; ++i) {
+      start_indices[i] = i;
+    }
+  } else {
+    start_indices.resize(max_windows);
+    Real tmp = 0.0;
+    Real delta = static_cast<Real>(last_window_idx) / (max_windows - 1);
+    for(int i = 0; i < max_windows; ++i) {
+      start_indices[i] = std::round(tmp);
+      tmp += delta;
+    }
+  }
+  
+  size_t max_n = 0;
+  Eigen::Matrix<double,1,3> eigen_avg_mdl = Eigen::Matrix<double,1,3>::Zero();
+  Eigen::Matrix<double,1,3> eigen_avg_ref = Eigen::Matrix<double,1,3>::Zero();
+  Eigen::Matrix<double,3,3> eigen_rotation = Eigen::Matrix<double,3,3>::Identity();
+  Eigen::Matrix<double,1,3> eigen_avg_mdl_tmp = Eigen::Matrix<double,1,3>::Zero();
+  Eigen::Matrix<double,1,3> eigen_avg_ref_tmp = Eigen::Matrix<double,1,3>::Zero();
+  Eigen::Matrix<double,3,3> eigen_rotation_tmp = Eigen::Matrix<double,3,3>::Identity();
+  
+  for(int start_idx: start_indices) {
+
+    std::vector<int> indices(window_size);
+    for(int i = 0; i < window_size; ++i) {
+      indices[i] = start_idx + i;
+    }
+
+    SuperposeIterative(eigen_mdl_pos, eigen_ref_pos,
+                       10, distance_thresh, indices,
+                       eigen_avg_mdl_tmp,
+                       eigen_avg_ref_tmp,
+                       eigen_rotation_tmp);
+
+    if(indices.size() > max_n) {
+      max_n = indices.size();
+      eigen_avg_mdl = eigen_avg_mdl_tmp;
+      eigen_avg_ref = eigen_avg_ref_tmp;
+      eigen_rotation = eigen_rotation_tmp;
+    }
+  }
+
+  // construct transform
+
+  // there are three transformation to be applied to reach ref_pos from mdl_pos:
+  // 1: shift mdl_pos to center
+  // 2: apply estimated rotation
+  // 3: shift onto average of ref_pos
+  Eigen::Matrix<double,1,3> translation = eigen_rotation *
+                                          (-eigen_avg_mdl.transpose()) + 
+                                          eigen_avg_ref.transpose();
+
+
+  transform = geom::Mat4();
+  transform(0, 3) = translation(0, 0);
+  transform(1, 3) = translation(0, 1);
+  transform(2, 3) = translation(0, 2);
+  transform(0, 0) = eigen_rotation(0, 0);
+  transform(0, 1) = eigen_rotation(0, 1);
+  transform(0, 2) = eigen_rotation(0, 2);
+  transform(1, 0) = eigen_rotation(1, 0);
+  transform(1, 1) = eigen_rotation(1, 1);
+  transform(1, 2) = eigen_rotation(1, 2);
+  transform(2, 0) = eigen_rotation(2, 0);
+  transform(2, 1) = eigen_rotation(2, 1);
+  transform(2, 2) = eigen_rotation(2, 2);
+
+  n_superposed = max_n;
+}
+
+}}} // ns
diff --git a/modules/mol/alg/src/gdt.hh b/modules/mol/alg/src/gdt.hh
new file mode 100644
index 000000000..4333470a3
--- /dev/null
+++ b/modules/mol/alg/src/gdt.hh
@@ -0,0 +1,34 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2024 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+#ifndef OST_GDT_HH
+#define OST_GDT_HH
+
+#include <ost/base.hh>
+#include <ost/geom/geom.hh>
+
+namespace ost { namespace mol { namespace alg {
+
+void GDT(const geom::Vec3List& mdl_pos, const geom::Vec3List& ref_pos,
+         int window_size, int max_windows, Real distance_thresh,
+         int& n_superposed, geom::Mat4& transform);
+
+}}} // ns
+
+#endif
-- 
GitLab