From 4ac2e408b09e7401af0d5395cfa3b2012b1cbea2 Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@unibas.ch> Date: Wed, 10 Apr 2024 13:15:31 +0200 Subject: [PATCH] refactor rigid scoring - introduce GDT implementation similar to LGA The Scorer object provides two mappings. The default mapping which is QS-score based and the rigid_mapping which is RMSD based. The rigid_mapping is now the basis for global superposition based scores such as RMSD and GDT. And here's the thing, OpenStructure got a brand new GDT implementation! Scanning of best possible superpositions is a bit simpler than in LGA but the results are similar. Benchmarking against CASP15 TS models returns GDT_TS score where 99.2% are within 3 GDT points with the LGA results and the maximum observed difference is 7.39. There is a tendency for slightly lower scores in OpenStructure meaning LGA sometimes found better superpositions. BUT: Oligo/RNA support comes for free in the Scorer object! --- modules/mol/alg/doc/molalg.rst | 58 ++++ modules/mol/alg/pymod/CMakeLists.txt | 1 + modules/mol/alg/pymod/export_gdt.cc | 49 ++++ modules/mol/alg/pymod/scoring.py | 251 ++++++++++++++-- modules/mol/alg/pymod/wrap_mol_alg.cc | 2 + modules/mol/alg/src/CMakeLists.txt | 2 + modules/mol/alg/src/gdt.cc | 398 ++++++++++++++++++++++++++ modules/mol/alg/src/gdt.hh | 34 +++ 8 files changed, 768 insertions(+), 27 deletions(-) create mode 100644 modules/mol/alg/pymod/export_gdt.cc create mode 100644 modules/mol/alg/src/gdt.cc create mode 100644 modules/mol/alg/src/gdt.hh diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst index 6cb57ea40..79efa947e 100644 --- a/modules/mol/alg/doc/molalg.rst +++ b/modules/mol/alg/doc/molalg.rst @@ -173,6 +173,64 @@ Local Distance Test scores (lDDT, DRMSD) .. currentmodule:: ost.mol.alg +GDT - Global Distance Test +-------------------------- + + Implements the GDT score, i.e. identifies the largest number of positions + that can be superposed within a given distance threshold. The final + GDT score is then the returned number divided by the total number of + reference positioons. The algorithm is similar to what is described for + the LGA tool but simpler. Therefore, the fractions reported by OpenStructure + tend to be systematically lower. For benchmarking we computed the full GDT_TS, + i.e. average GDT for distance thresholds [1, 2, 4, 8], on all CASP15 TS + models. 96.5% of differences to the LGA results from the predictioncenter are + within 2 GDT points and 99.2% are within 3 GDT points. The max difference + is 7.39 GDT points. + + The algorithm expects two position lists of same length and applies a sliding + window with specified length to define a subset of position pairs as starting + point for iterative superposition. Each iterative superposition applies the + following steps: + + - Compute minimal RMSD superposition on subset of position pairs + - Apply superposition on all model positions + - Compute pairwise distances of all model positions and reference positions + - Define new subset of position pairs: pairs within distance threshold + - Stop if subset doesn't change anymore + + The subset in any of the iterations which is largest is stored. + + This is done for each sliding window position and the largest subset ever + observed is reported. To avoid long runtimes for large problem sizes, the + sliding window is not applied on each possible position but is capped. + If the number of positions is larger than this threshold, the sliding + window is only applied on N equidistant locations. + + .. function:: GDT(mdl_pos, ref_pos, window_size, max_windows, distance_thresh) + + Returns number of positions that can be superposed within + *distance_thresh* and the respective transformation matrix. + + :param mdl_pos: Positions representing the model, typically alpha-carbon + positions + :param ref_pos: Positions representing the reference, typically + alpha-carbon positions + :param window_size: Size of the sliding window that is used to serve as + starting point for iterative superposition. + The described benchmark was done with a value of 7. + :param max_windows: Cap for number of starting points. The described + benchmark was done with a value of 1000. + :param distance_thresh: Distance threshold for GDT algorithm + :type mdl_pos: :class:`ost.geom.Vec3List` + :type ref_pos: :class:`ost.geom.Vec3List` + :type window_size: :class:`int` + :type max_windows: :class:`int` + :type distance_thresh: :class:`float` + :returns: :class:`tuple` with first element being the number of + superposable positions (:class:`int`) and the second element the + transformation matrix (:class:`ost.geom.Mat4`) + + .. _steric-clashes: Steric Clashes diff --git a/modules/mol/alg/pymod/CMakeLists.txt b/modules/mol/alg/pymod/CMakeLists.txt index 146774722..8506d7be3 100644 --- a/modules/mol/alg/pymod/CMakeLists.txt +++ b/modules/mol/alg/pymod/CMakeLists.txt @@ -12,6 +12,7 @@ set(OST_MOL_ALG_PYMOD_SOURCES export_membrane.cc export_entity_to_density.cc export_biounit.cc + export_gdt.cc ) set(OST_MOL_ALG_PYMOD_MODULES diff --git a/modules/mol/alg/pymod/export_gdt.cc b/modules/mol/alg/pymod/export_gdt.cc new file mode 100644 index 000000000..971b80975 --- /dev/null +++ b/modules/mol/alg/pymod/export_gdt.cc @@ -0,0 +1,49 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2024 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + + +#include <boost/python.hpp> + +#include <ost/mol/alg/gdt.hh> + +using namespace boost::python; + +namespace{ + +boost::python::tuple WrapGDT(const geom::Vec3List& mdl_pos, + const geom::Vec3List& ref_pos, + int window_size, int max_windows, + Real distance_thresh) { + int n_superposed; + geom::Mat4 transform; + ost::mol::alg::GDT(mdl_pos, ref_pos, window_size, max_windows, + distance_thresh, n_superposed, transform); + return boost::python::make_tuple(n_superposed, transform); +} + +} // ns + +void export_GDT() { + + def("GDT", &WrapGDT, (arg("mdl_pos"), + arg("ref_pos"), + arg("window_size"), + arg("max_windows"), + arg("distance_thresh"))); +} diff --git a/modules/mol/alg/pymod/scoring.py b/modules/mol/alg/pymod/scoring.py index 53a596f52..066af3ad3 100644 --- a/modules/mol/alg/pymod/scoring.py +++ b/modules/mol/alg/pymod/scoring.py @@ -14,6 +14,7 @@ from ost.mol.alg import dockq from ost.mol.alg.lddt import lDDTScorer from ost.mol.alg.qsscore import QSScorer from ost.mol.alg.contact_score import ContactScorer +from ost.mol.alg import GDT from ost.mol.alg import Molck, MolckSettings from ost import bindings from ost.bindings import cadscore @@ -268,6 +269,7 @@ class Scorer: self._target_bad_angles = None self._chain_mapper = None self._mapping = None + self._rigid_mapping = None self._model_interface_residues = None self._target_interface_residues = None self._aln = None @@ -330,6 +332,18 @@ class Scorer: self._transformed_mapped_model_pos = None self._n_target_not_mapped = None self._transform = None + + self._rigid_mapped_target_pos = None + self._rigid_mapped_model_pos = None + self._rigid_transformed_mapped_model_pos = None + self._rigid_n_target_not_mapped = None + self._rigid_transform = None + + self._gdt_05 = None + self._gdt_1 = None + self._gdt_2 = None + self._gdt_4 = None + self._gdt_8 = None self._gdtts = None self._gdtha = None self._rmsd = None @@ -526,6 +540,8 @@ class Scorer: def mapping(self): """ Full chain mapping result for :attr:`target`/:attr:`model` + Computed with :func:`ost.mol.alg.ChainMapper.GetMapping` + :type: :class:`ost.mol.alg.chain_mapping.MappingResult` """ if self._mapping is None: @@ -535,6 +551,20 @@ class Scorer: n_max_naive = self.n_max_naive) return self._mapping + @property + def rigid_mapping(self): + """ Full chain mapping result for :attr:`target`/:attr:`model` + + Computed with :func:`ost.mol.alg.ChainMapper.GetRMSDMapping` + + :type: :class:`ost.mol.alg.chain_mapping.MappingResult` + """ + if self._rigid_mapping is None: + LogScript("Computing rigid chain mapping") + self._rigid_mapping = \ + self.chain_mapper.GetRMSDMapping(self.model) + return self._rigid_mapping + @property def model_interface_residues(self): """ Interface residues in :attr:`~model` @@ -1215,60 +1245,198 @@ class Scorer: return self._transform @property - def gdtts(self): - """ GDT with thresholds: 8.0A, 4.0A, 2.0A and 1.0A + def rigid_mapped_target_pos(self): + """ Mapped representative positions in target + + Thats CA positions for peptide residues and C3' positions for + nucleotides. Has same length as :attr:`~rigid_mapped_model_pos` and mapping + is based on :attr:`~rigid_mapping`. + + :type: :class:`ost.geom.Vec3List` + """ + if self._rigid_mapped_target_pos is None: + self._extract_rigid_mapped_pos() + return self._rigid_mapped_target_pos + + @property + def rigid_mapped_model_pos(self): + """ Mapped representative positions in model + + Thats CA positions for peptide residues and C3' positions for + nucleotides. Has same length as :attr:`~mapped_target_pos` and mapping + is based on :attr:`~rigid_mapping`. + + :type: :class:`ost.geom.Vec3List` + """ + if self._rigid_mapped_model_pos is None: + self._extract_rigid_mapped_pos() + return self._rigid_mapped_model_pos + + @property + def rigid_transformed_mapped_model_pos(self): + """ :attr:`~rigid_mapped_model_pos` with :attr:`~rigid_transform` applied + + :type: :class:`ost.geom.Vec3List` + """ + if self._rigid_transformed_mapped_model_pos is None: + self._rigid_transformed_mapped_model_pos = \ + geom.Vec3List(self.rigid_mapped_model_pos) + self._rigid_transformed_mapped_model_pos.ApplyTransform(self.rigid_transform) + return self._rigid_transformed_mapped_model_pos + + @property + def rigid_n_target_not_mapped(self): + """ Number of target residues which have no rigid mapping to model + + :type: :class:`int` + """ + if self._rigid_n_target_not_mapped is None: + self._extract_rigid_mapped_pos() + return self._rigid_n_target_not_mapped + + @property + def rigid_transform(self): + """ Transform: :attr:`~rigid_mapped_model_pos` onto :attr:`~rigid_mapped_target_pos` + + Computed using Kabsch minimal rmsd algorithm + + :type: :class:`ost.geom.Mat4` + """ + if self._rigid_transform is None: + try: + res = mol.alg.SuperposeSVD(self.rigid_mapped_model_pos, + self.rigid_mapped_target_pos) + self._rigid_transform = res.transformation + except: + self._rigid_transform = geom.Mat4() + return self._rigid_transform + + @property + def gdt_05(self): + """ Fraction CA (C3' for nucleotides) that can be superposed within 0.5A + + Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`. + Similar iterative algorithm as LGA tool + + :type: :class:`float` + """ + if self._gdt_05 is None: + n, m = GDT(self.rigid_mapped_model_pos, self.rigid_mapped_target_pos, 7, 1000, 0.5) + n_full = len(self.rigid_mapped_target_pos) + self.rigid_n_target_not_mapped + if n_full > 0: + self._gdt_05 = float(n) / n_full + else: + self._gdt_05 = 0.0 + return self._gdt_05 + + @property + def gdt_1(self): + """ Fraction CA (C3' for nucleotides) that can be superposed within 1.0A - Computed on :attr:`~transformed_mapped_model_pos` and - :attr:`mapped_target_pos` + Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`. + Similar iterative algorithm as LGA tool + + :type: :class:`float` + """ + if self._gdt_1 is None: + n, m = GDT(self.rigid_mapped_model_pos, self.rigid_mapped_target_pos, 7, 1000, 1.0) + n_full = len(self.rigid_mapped_target_pos) + self.rigid_n_target_not_mapped + if n_full > 0: + self._gdt_1 = float(n) / n_full + else: + self._gdt_1 = 0.0 + return self._gdt_1 + + @property + def gdt_2(self): + """ Fraction CA (C3' for nucleotides) that can be superposed within 2.0A + + Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`. + Similar iterative algorithm as LGA tool + + + :type: :class:`float` + """ + if self._gdt_2 is None: + n, m = GDT(self.rigid_mapped_model_pos, self.rigid_mapped_target_pos, 7, 1000, 2.0) + n_full = len(self.rigid_mapped_target_pos) + self.rigid_n_target_not_mapped + if n_full > 0: + self._gdt_2 = float(n) / n_full + else: + self._gdt_2 = 0.0 + return self._gdt_2 + + @property + def gdt_4(self): + """ Fraction CA (C3' for nucleotides) that can be superposed within 4.0A + + Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`. + Similar iterative algorithm as LGA tool + + :type: :class:`float` + """ + if self._gdt_4 is None: + n, m = GDT(self.rigid_mapped_model_pos, self.rigid_mapped_target_pos, 7, 1000, 4.0) + n_full = len(self.rigid_mapped_target_pos) + self.rigid_n_target_not_mapped + if n_full > 0: + self._gdt_4 = float(n) / n_full + else: + self._gdt_4 = 0.0 + return self._gdt_4 + + @property + def gdt_8(self): + """ Fraction CA (C3' for nucleotides) that can be superposed within 8.0A + + Similar iterative algorithm as LGA tool + + :type: :class:`float` + """ + if self._gdt_8 is None: + n, m = GDT(self.rigid_mapped_model_pos, self.rigid_mapped_target_pos, 7, 1000, 8.0) + n_full = len(self.rigid_mapped_target_pos) + self.rigid_n_target_not_mapped + if n_full > 0: + self._gdt_8 = float(n) / n_full + else: + self._gdt_8 = 0.0 + return self._gdt_8 + + + @property + def gdtts(self): + """ avg GDT with thresholds: 8.0A, 4.0A, 2.0A and 1.0A :type: :class:`float` """ if self._gdtts is None: LogScript("Computing GDT-TS score") - n = \ - self.mapped_target_pos.GetGDTTS(self.transformed_mapped_model_pos, - norm=False) - n_full = 4*len(self.mapped_target_pos) + 4*self.n_target_not_mapped - if n_full > 0: - self._gdtts = float(n) / n_full - else: - self._gdtts = 0.0 + self._gdtts = (self.gdt_1 + self.gdt_2 + self.gdt_4 + self.gdt_8) / 4 return self._gdtts @property def gdtha(self): - """ GDT with thresholds: 4.0A, 2.0A, 1.0A and 0.5A - - Computed on :attr:`~transformed_mapped_model_pos` and - :attr:`mapped_target_pos` + """ avg GDT with thresholds: 4.0A, 2.0A, 1.0A and 0.5A :type: :class:`float` """ if self._gdtha is None: LogScript("Computing GDT-HA score") - n = \ - self.mapped_target_pos.GetGDTHA(self.transformed_mapped_model_pos, - norm=False) - n_full = 4*len(self.mapped_target_pos) + 4*self.n_target_not_mapped - if n_full > 0: - self._gdtha = float(n) / n_full - else: - self._gdtha = 0.0 + self._gdtha = (self.gdt_05 + self.gdt_1 + self.gdt_2 + self.gdt_4) / 4 return self._gdtha @property def rmsd(self): """ RMSD - Computed on :attr:`~transformed_mapped_model_pos` and - :attr:`mapped_target_pos` + Computed on :attr:`~rigid_transformed_mapped_model_pos` and + :attr:`rigid_mapped_target_pos` :type: :class:`float` """ if self._rmsd is None: LogScript("Computing RMSD") self._rmsd = \ - self.mapped_target_pos.GetRMSD(self.transformed_mapped_model_pos) + self.rigid_mapped_target_pos.GetRMSD(self.rigid_transformed_mapped_model_pos) return self._rmsd @property @@ -1714,6 +1882,35 @@ class Scorer: if ch.GetName() not in processed_trg_chains: self._n_target_not_mapped += len(ch.residues) + + def _extract_rigid_mapped_pos(self): + self._rigid_mapped_target_pos = geom.Vec3List() + self._rigid_mapped_model_pos = geom.Vec3List() + self._rigid_n_target_not_mapped = 0 + processed_trg_chains = set() + for trg_ch, mdl_ch in self.rigid_mapping.GetFlatMapping().items(): + processed_trg_chains.add(trg_ch) + aln = self.rigid_mapping.alns[(trg_ch, mdl_ch)] + for col in aln: + if col[0] != '-' and col[1] != '-': + trg_res = col.GetResidue(0) + mdl_res = col.GetResidue(1) + trg_at = trg_res.FindAtom("CA") + mdl_at = mdl_res.FindAtom("CA") + if not trg_at.IsValid(): + trg_at = trg_res.FindAtom("C3'") + if not mdl_at.IsValid(): + mdl_at = mdl_res.FindAtom("C3'") + self._rigid_mapped_target_pos.append(trg_at.GetPos()) + self._rigid_mapped_model_pos.append(mdl_at.GetPos()) + elif col[0] != '-': + self._rigid_n_target_not_mapped += 1 + # count number of trg residues from non-mapped chains + for ch in self.rigid_mapping.target.chains: + if ch.GetName() not in processed_trg_chains: + self._rigid_n_target_not_mapped += len(ch.residues) + + def _compute_cad_score(self): if not self.resnum_alignments: raise RuntimeError("CAD score computations rely on residue numbers " diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc index 295bea2bb..09c008392 100644 --- a/modules/mol/alg/pymod/wrap_mol_alg.cc +++ b/modules/mol/alg/pymod/wrap_mol_alg.cc @@ -51,6 +51,7 @@ void export_sec_struct_segments(); void export_find_membrane(); void export_entity_to_density(); void export_biounit(); +void export_GDT(); namespace { @@ -323,6 +324,7 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) export_find_membrane(); export_entity_to_density(); export_biounit(); + export_GDT(); def("LocalDistDiffTest", lddt_a, (arg("sequence_separation")=0,arg("local_lddt_property_string")="")); def("LocalDistDiffTest", lddt_c, (arg("local_lddt_property_string")="")); diff --git a/modules/mol/alg/src/CMakeLists.txt b/modules/mol/alg/src/CMakeLists.txt index 49f58f50e..20d016175 100644 --- a/modules/mol/alg/src/CMakeLists.txt +++ b/modules/mol/alg/src/CMakeLists.txt @@ -25,6 +25,7 @@ set(OST_MOL_ALG_HEADERS find_membrane.hh entity_to_density.hh biounit.hh + gdt.hh ) set(OST_MOL_ALG_SOURCES @@ -53,6 +54,7 @@ set(OST_MOL_ALG_SOURCES find_membrane.cc entity_to_density.cc biounit.cc + gdt.cc ) set(MOL_ALG_DEPS ost_mol ost_seq ost_img ost_img_alg ost_seq_alg ost_conop) diff --git a/modules/mol/alg/src/gdt.cc b/modules/mol/alg/src/gdt.cc new file mode 100644 index 000000000..93b6e8b17 --- /dev/null +++ b/modules/mol/alg/src/gdt.cc @@ -0,0 +1,398 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2024 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +#include <ost/mol/alg/gdt.hh> +#include <ost/message.hh> +#include <Eigen/Dense> + +namespace { + +// get RMSD and rotation matrix using the Theobald method +// Both position matrices are expected to have the same size +// and to have their average position at the origin +void TheobaldRMSD(const Eigen::Matrix<double,Eigen::Dynamic,3>& pos_one, + const Eigen::Matrix<double,Eigen::Dynamic,3>& pos_two, + Real& rmsd, Eigen::Matrix<double,3,3>& rot){ + + if(pos_one.rows() < 3){ + throw ost::Error("Observed superposition with < 3 positions to " + "superpose!"); + } + + Eigen::Matrix<double,3,3> M = pos_one.transpose() * pos_two; + + // using floats for the squared norm is fine + double GA = pos_one.squaredNorm(); + double GB = pos_two.squaredNorm(); + + Eigen::Matrix<double,4,4> K; + K(0,0) = M(0,0) + M(1,1) + M(2,2); + K(0,1) = M(1,2) - M(2,1); + K(0,2) = M(2,0) - M(0,2); + K(0,3) = M(0,1) - M(1,0); + + K(1,0) = K(0,1); + K(1,1) = M(0,0) - M(1,1) - M(2,2); + K(1,2) = M(0,1) + M(1,0); + K(1,3) = M(0,2) + M(2,0); + + K(2,0) = K(0,2); + K(2,1) = K(1,2); + K(2,2) = -M(0,0) + M(1,1) - M(2,2); + K(2,3) = M(1,2) + M(2,1); + + K(3,0) = K(0,3); + K(3,1) = K(1,3); + K(3,2) = K(2,3); + K(3,3) = -M(0,0) - M(1,1) + M(2,2); + + double C0 = K.determinant(); + double C1 = -8.0*M.determinant(); + double C2 = -2.0*M.squaredNorm(); + double lambda = 0.5 * (GA + GB); + double a, b, d, lambda_2; + for(int i = 0; i < 50; ++i){ + lambda_2 = lambda * lambda; + b = (lambda_2 + C2) * lambda; + a = b + C1; + d = (a*lambda + C0) / (2.0*lambda_2*lambda + b + a); + lambda -= d; + if(std::abs(d) < 1e-6){ + break; + } + } + + double msd = (GA + GB - 2.0 * lambda) / pos_one.rows(); + if(msd < 1e-4){ + // The algorithm never really goes to zero... if msd is super small we just + // assign zero. 1e-4 corresponds to an rmsd of 0.01 + rmsd = 0.0; + } + else{ + rmsd = std::sqrt(msd); + } + + K -= lambda*Eigen::Matrix<double,4,4>::Identity(); + + double helper[6]; + helper[0] = K(2,2)*K(3,3) - K(3,2)*K(2,3); + helper[1] = K(2,1)*K(3,3) - K(3,1)*K(2,3); + helper[2] = K(2,1)*K(3,2) - K(3,1)*K(2,2); + helper[3] = K(2,0)*K(3,3) - K(3,0)*K(2,3); + helper[4] = K(2,0)*K(3,2) - K(3,0)*K(2,2); + helper[5] = K(2,0)*K(3,1) - K(3,0)*K(2,1); + + double q1 = K(1,1)*helper[0] - K(1,2)*helper[1] + K(1,3)*helper[2]; + double q2 = -K(1,0)*helper[0] + K(1,2)*helper[3] - K(1,3)*helper[4]; + double q3 = K(1,0)*helper[1] - K(1,1)*helper[3] + K(1,3)*helper[5]; + double q4 = -K(1,0)*helper[2] + K(1,1)*helper[4] - K(1,2)*helper[5]; + double norm = q1*q1 + q2*q2 + q3*q3 + q4*q4; + + if(norm < 1e-6){ + q1 = K(0,1)*helper[0] - K(0,2)*helper[1] + K(0,3)*helper[2]; + q2 = -K(0,0)*helper[0] + K(0,2)*helper[3] - K(0,3)*helper[4]; + q3 = K(0,0)*helper[1] - K(0,1)*helper[3] + K(0,3)*helper[5]; + q4 = -K(0,0)*helper[2] + K(0,1)*helper[4] - K(0,2)*helper[5]; + norm = q1*q1 + q2*q2 +q3*q3 + q4*q4; + + if (norm < 1e-6){ + helper[0] = K(0,2)*K(1,3) - K(0,3)*K(1,2); + helper[1] = K(0,1)*K(1,3) - K(0,3)*K(1,1); + helper[2] = K(0,1)*K(1,2) - K(0,2)*K(1,1); + helper[3] = K(0,0)*K(1,3) - K(0,3)*K(1,0); + helper[4] = K(0,0)*K(1,2) - K(0,2)*K(1,0); + helper[5] = K(0,0)*K(1,1) - K(0,1)*K(1,0); + + q1 = K(3,1)*helper[0] - K(3,2)*helper[1] + K(3,3)*helper[2]; + q2 = -K(3,0)*helper[0] + K(3,2)*helper[3] - K(3,3)*helper[4]; + q3 = K(3,0)*helper[1] - K(3,1)*helper[3] + K(3,3)*helper[5]; + q4 = -K(3,0)*helper[2] + K(3,1)*helper[4] - K(3,2)*helper[5]; + norm = q1*q1 + q2*q2 + q3*q3 + q4*q4; + + if (norm < 1e-6){ + q1 = K(2,1)*helper[0] - K(2,2)*helper[1] + K(2,3)*helper[2]; + q2 = -K(2,0)*helper[0] + K(2,2)*helper[3] - K(2,3)*helper[4]; + q3 = K(2,0)*helper[1] - K(2,1)*helper[3] + K(2,3)*helper[5]; + q4 = -K(2,0)*helper[2] + K(2,1)*helper[4] - K(2,2)*helper[5]; + norm = q1*q1 + q2*q2 + q3*q3 + q4*q4; + if (norm < 1e-6){ + // this should not happen + rot = Eigen::Matrix<double,3,3>::Identity(); + return; + } + } + } + } + + norm = 1.0 / std::sqrt(norm); + q1 *= norm; q2 *= norm; q3 *= norm; q4 *= norm; + rot(0,0) = 1.0 - 2.0*(q3*q3 + q4*q4); + rot(0,1) = 2.0*(q2*q3 - q1*q4); + rot(0,2) = 2.0*(q2*q4 + q3*q1); + rot(1,0) = 2.0*(q2*q3 + q1*q4); + rot(1,1) = 1.0 - 2.0*(q2*q2 + q4*q4); + rot(1,2) = 2.0*(q3*q4 - q2*q1); + rot(2,0) = 2.0*(q2*q4 - q3*q1); + rot(2,1) = 2.0*(q3*q4 + q2*q1); + rot(2,2) = 1.0 - 2.0*(q2*q2 + q3*q3); +} + +void Superpose(Eigen::Matrix<double,Eigen::Dynamic,3>& pos_one, + Eigen::Matrix<double,Eigen::Dynamic,3>& pos_two, + Eigen::Matrix<double,1,3>& avg_one, + Eigen::Matrix<double,1,3>& avg_two, + Real& rmsd, + Eigen::Matrix<double,3,3>& rotation){ + + if(pos_one.rows() != pos_two.rows()){ + throw ost::Error("Cannot superpose positions of different size!"); + } + + + avg_one = Eigen::Matrix<double,1,3>::Zero(); + for (uint i = 0; i < pos_one.rows(); ++i) { + avg_one += pos_one.row(i); + } + avg_one = avg_one / pos_one.rows(); + + avg_two = Eigen::Matrix<double,1,3>::Zero(); + for (uint i = 0; i < pos_two.rows(); ++i) { + avg_two += pos_two.row(i); + } + avg_two = avg_two / pos_two.rows(); + + // TheobaldRMSD only determines the rotational component of the superposition + // we need to shift the centers of the two point sets onto origin + for (uint i = 0; i < pos_one.rows(); ++i){ + pos_one.row(i) -= avg_one; + pos_two.row(i) -= avg_two; + } + + TheobaldRMSD(pos_one, pos_two, rmsd, rotation); +} + +void SuperposeIterative(const Eigen::Matrix<double,Eigen::Dynamic,3>& pos_one, + const Eigen::Matrix<double,Eigen::Dynamic,3>& pos_two, + int max_iterations, + Real distance_thresh, + std::vector<int>& indices, + Eigen::Matrix<double,1,3>& avg_one, + Eigen::Matrix<double,1,3>& avg_two, + Eigen::Matrix<double,3,3>& rotation){ + + if(pos_one.rows() != pos_two.rows()){ + throw ost::Error("Position data must be of consistent size!"); + } + + if(max_iterations <= 0){ + throw ost::Error("max_iterations must be at least 1!"); + } + + int num_rows = pos_one.rows(); + + if(indices.empty()){ + //there are no idx, so we use all positions for the initial superposition + indices.resize(num_rows); + for(int i = 0; i < num_rows; ++i){ + indices[i] = i; + } + } + else{ + //its not empty! let's quickly check whether there are at least 3 positions + //and whether the indices are all valid + if(indices.size() < 3){ + throw ost::Error("Must have at least 3 start indices for iterative " + "Superposition!"); + } + for(auto i: indices) { + if(i >= num_rows){ + throw ost::Error("Invalid index in iterative Superposition!"); + } + } + } + + Real squared_dist_thresh = distance_thresh * distance_thresh; + std::vector<int> new_indices; + new_indices.reserve(num_rows); + std::vector<Real> squared_distances(num_rows); + Eigen::Matrix<double,1,3> temp_vec; + Real rmsd; + + // keep track of the indices which give the superposition with maximum + // number of superposed positions. Thats not necessarily the last superposition + // when the algorithm converges + int max_n = -1; + std::vector<int> temp_indices = indices; + Eigen::Matrix<double,1,3> temp_avg_one = Eigen::Matrix<double,1,3>::Zero(); + Eigen::Matrix<double,1,3> temp_avg_two = Eigen::Matrix<double,1,3>::Zero(); + Eigen::Matrix<double,3,3> temp_rotation = Eigen::Matrix<double,3,3>::Identity(); + + for(int iteration = 0; iteration < max_iterations; ++iteration){ + + if(temp_indices.size() < 3) break; //the thing is not really superposable... + + Eigen::Matrix<double,Eigen::Dynamic,3> temp_pos_one = + Eigen::Matrix<double,Eigen::Dynamic,3>::Zero(temp_indices.size(), 3); + Eigen::Matrix<double,Eigen::Dynamic,3> temp_pos_two = + Eigen::Matrix<double,Eigen::Dynamic,3>::Zero(temp_indices.size(), 3); + + for(uint i = 0; i < temp_indices.size(); ++i){ + temp_pos_one.row(i) = pos_one.row(temp_indices[i]); + temp_pos_two.row(i) = pos_two.row(temp_indices[i]); + } + + Superpose(temp_pos_one, temp_pos_two, temp_avg_one, temp_avg_two, rmsd, + temp_rotation); + + for(int i = 0; i < num_rows; ++i){ + temp_vec = pos_one.row(i) - temp_avg_one; + temp_vec = (temp_rotation * temp_vec.transpose()).transpose() + temp_avg_two; + squared_distances[i] = (temp_vec - pos_two.row(i)).squaredNorm(); + } + + new_indices.clear(); + for(int i = 0; i < num_rows; ++i){ + if(squared_distances[i] < squared_dist_thresh){ + new_indices.push_back(i); + } + } + + if(static_cast<int>(new_indices.size()) > max_n) { + max_n = new_indices.size(); + indices = new_indices; + avg_one = temp_avg_one; + avg_two = temp_avg_two; + rotation = temp_rotation; + } + + if(new_indices == temp_indices) break; //nothing changes anymore + + temp_indices = new_indices; + } +} + +} + +namespace ost{ namespace mol{ namespace alg { + + +void GDT(const geom::Vec3List& mdl_pos, const geom::Vec3List& ref_pos, + int window_size, int max_windows, Real distance_thresh, + int& n_superposed, geom::Mat4& transform) { + + if(mdl_pos.size() != ref_pos.size()){ + throw ost::Error("Position data must be of consistent size!"); + } + + int n_pos = mdl_pos.size(); + + if(window_size > n_pos) { + throw ost::Error("Window size in GDT algorithm is larger than positions"); + } + + Eigen::Matrix<double, Eigen::Dynamic, 3> eigen_mdl_pos = \ + Eigen::Matrix<double, Eigen::Dynamic, 3>::Zero(n_pos, 3); + Eigen::Matrix<double, Eigen::Dynamic, 3> eigen_ref_pos = \ + Eigen::Matrix<double, Eigen::Dynamic, 3>::Zero(n_pos, 3); + for(int i = 0; i < n_pos; ++i) { + eigen_mdl_pos(i, 0) = mdl_pos[i][0]; + eigen_mdl_pos(i, 1) = mdl_pos[i][1]; + eigen_mdl_pos(i, 2) = mdl_pos[i][2]; + eigen_ref_pos(i, 0) = ref_pos[i][0]; + eigen_ref_pos(i, 1) = ref_pos[i][1]; + eigen_ref_pos(i, 2) = ref_pos[i][2]; + } + + std::vector<int> start_indices; + int last_window_idx = n_pos - window_size; + int n_windows = last_window_idx + 1; + + if(n_windows <= max_windows) { + start_indices.resize(n_windows); + for(int i = 0; i < n_windows; ++i) { + start_indices[i] = i; + } + } else { + start_indices.resize(max_windows); + Real tmp = 0.0; + Real delta = static_cast<Real>(last_window_idx) / (max_windows - 1); + for(int i = 0; i < max_windows; ++i) { + start_indices[i] = std::round(tmp); + tmp += delta; + } + } + + size_t max_n = 0; + Eigen::Matrix<double,1,3> eigen_avg_mdl = Eigen::Matrix<double,1,3>::Zero(); + Eigen::Matrix<double,1,3> eigen_avg_ref = Eigen::Matrix<double,1,3>::Zero(); + Eigen::Matrix<double,3,3> eigen_rotation = Eigen::Matrix<double,3,3>::Identity(); + Eigen::Matrix<double,1,3> eigen_avg_mdl_tmp = Eigen::Matrix<double,1,3>::Zero(); + Eigen::Matrix<double,1,3> eigen_avg_ref_tmp = Eigen::Matrix<double,1,3>::Zero(); + Eigen::Matrix<double,3,3> eigen_rotation_tmp = Eigen::Matrix<double,3,3>::Identity(); + + for(int start_idx: start_indices) { + + std::vector<int> indices(window_size); + for(int i = 0; i < window_size; ++i) { + indices[i] = start_idx + i; + } + + SuperposeIterative(eigen_mdl_pos, eigen_ref_pos, + 10, distance_thresh, indices, + eigen_avg_mdl_tmp, + eigen_avg_ref_tmp, + eigen_rotation_tmp); + + if(indices.size() > max_n) { + max_n = indices.size(); + eigen_avg_mdl = eigen_avg_mdl_tmp; + eigen_avg_ref = eigen_avg_ref_tmp; + eigen_rotation = eigen_rotation_tmp; + } + } + + // construct transform + + // there are three transformation to be applied to reach ref_pos from mdl_pos: + // 1: shift mdl_pos to center + // 2: apply estimated rotation + // 3: shift onto average of ref_pos + Eigen::Matrix<double,1,3> translation = eigen_rotation * + (-eigen_avg_mdl.transpose()) + + eigen_avg_ref.transpose(); + + + transform = geom::Mat4(); + transform(0, 3) = translation(0, 0); + transform(1, 3) = translation(0, 1); + transform(2, 3) = translation(0, 2); + transform(0, 0) = eigen_rotation(0, 0); + transform(0, 1) = eigen_rotation(0, 1); + transform(0, 2) = eigen_rotation(0, 2); + transform(1, 0) = eigen_rotation(1, 0); + transform(1, 1) = eigen_rotation(1, 1); + transform(1, 2) = eigen_rotation(1, 2); + transform(2, 0) = eigen_rotation(2, 0); + transform(2, 1) = eigen_rotation(2, 1); + transform(2, 2) = eigen_rotation(2, 2); + + n_superposed = max_n; +} + +}}} // ns diff --git a/modules/mol/alg/src/gdt.hh b/modules/mol/alg/src/gdt.hh new file mode 100644 index 000000000..4333470a3 --- /dev/null +++ b/modules/mol/alg/src/gdt.hh @@ -0,0 +1,34 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2024 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +#ifndef OST_GDT_HH +#define OST_GDT_HH + +#include <ost/base.hh> +#include <ost/geom/geom.hh> + +namespace ost { namespace mol { namespace alg { + +void GDT(const geom::Vec3List& mdl_pos, const geom::Vec3List& ref_pos, + int window_size, int max_windows, Real distance_thresh, + int& n_superposed, geom::Mat4& transform); + +}}} // ns + +#endif -- GitLab