Skip to content
Snippets Groups Projects
  • Studer Gabriel's avatar
    4ac2e408
    refactor rigid scoring - introduce GDT implementation similar to LGA · 4ac2e408
    Studer Gabriel authored
    The Scorer object provides two mappings. The default mapping which is
    QS-score based and the rigid_mapping which is RMSD based.
    The rigid_mapping is now the basis for global superposition based
    scores such as RMSD and GDT. And here's the thing, OpenStructure got
    a brand new GDT implementation! Scanning of best possible superpositions
    is a bit simpler than in LGA but the results are similar.
    Benchmarking against CASP15 TS models returns GDT_TS score where 99.2%
    are within 3 GDT points with the LGA results and the maximum observed
    difference is 7.39. There is a tendency for slightly lower scores in
    OpenStructure meaning LGA sometimes found better superpositions.
    
    BUT: Oligo/RNA support comes for free in the Scorer object!
    4ac2e408
    History
    refactor rigid scoring - introduce GDT implementation similar to LGA
    Studer Gabriel authored
    The Scorer object provides two mappings. The default mapping which is
    QS-score based and the rigid_mapping which is RMSD based.
    The rigid_mapping is now the basis for global superposition based
    scores such as RMSD and GDT. And here's the thing, OpenStructure got
    a brand new GDT implementation! Scanning of best possible superpositions
    is a bit simpler than in LGA but the results are similar.
    Benchmarking against CASP15 TS models returns GDT_TS score where 99.2%
    are within 3 GDT points with the LGA results and the maximum observed
    difference is 7.39. There is a tendency for slightly lower scores in
    OpenStructure meaning LGA sometimes found better superpositions.
    
    BUT: Oligo/RNA support comes for free in the Scorer object!
gdt.cc 13.51 KiB
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2024 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
//------------------------------------------------------------------------------

#include <ost/mol/alg/gdt.hh>
#include <ost/message.hh>
#include <Eigen/Dense>

namespace {

// get RMSD and rotation matrix using the Theobald method
// Both position matrices are expected to have the same size
// and to have their average position at the origin 
void TheobaldRMSD(const Eigen::Matrix<double,Eigen::Dynamic,3>& pos_one,
                  const Eigen::Matrix<double,Eigen::Dynamic,3>& pos_two,
                  Real& rmsd, Eigen::Matrix<double,3,3>& rot){

  if(pos_one.rows() < 3){
    throw ost::Error("Observed superposition with < 3 positions to "
                         "superpose!");
  }

  Eigen::Matrix<double,3,3> M = pos_one.transpose() * pos_two;

  // using floats for the squared norm is fine
  double GA = pos_one.squaredNorm();
  double GB = pos_two.squaredNorm();

  Eigen::Matrix<double,4,4> K;
  K(0,0)  =  M(0,0) + M(1,1) + M(2,2);
  K(0,1)  =  M(1,2) - M(2,1);
  K(0,2)  =  M(2,0) - M(0,2);
  K(0,3)  =  M(0,1) - M(1,0);

  K(1,0)  =  K(0,1);
  K(1,1)  =  M(0,0) - M(1,1) - M(2,2);
  K(1,2)  =  M(0,1) + M(1,0);
  K(1,3)  =  M(0,2) + M(2,0);

  K(2,0)  =  K(0,2);
  K(2,1)  =  K(1,2);
  K(2,2)  = -M(0,0) + M(1,1) - M(2,2);
  K(2,3)  =  M(1,2) + M(2,1);

  K(3,0)  =  K(0,3);
  K(3,1)  =  K(1,3);
  K(3,2)  =  K(2,3);
  K(3,3)  = -M(0,0) - M(1,1) + M(2,2);

  double C0 = K.determinant();
  double C1 = -8.0*M.determinant();
  double C2 = -2.0*M.squaredNorm();
  double lambda = 0.5 * (GA + GB);
  double a, b, d, lambda_2;
  for(int i = 0; i < 50; ++i){
    lambda_2 = lambda * lambda;
    b = (lambda_2 + C2) * lambda;
    a = b + C1;
    d = (a*lambda + C0) / (2.0*lambda_2*lambda + b + a);
    lambda -= d;
    if(std::abs(d) < 1e-6){
      break;
    }
  }

  double msd = (GA + GB - 2.0 * lambda) / pos_one.rows();
  if(msd < 1e-4){
    // The algorithm never really goes to zero... if msd is super small we just
    // assign zero. 1e-4 corresponds to an rmsd of 0.01
    rmsd = 0.0;
  }
  else{
    rmsd = std::sqrt(msd);
  }

  K -= lambda*Eigen::Matrix<double,4,4>::Identity();

  double helper[6];
  helper[0] = K(2,2)*K(3,3) - K(3,2)*K(2,3);
  helper[1] = K(2,1)*K(3,3) - K(3,1)*K(2,3);
  helper[2] = K(2,1)*K(3,2) - K(3,1)*K(2,2);
  helper[3] = K(2,0)*K(3,3) - K(3,0)*K(2,3);
  helper[4] = K(2,0)*K(3,2) - K(3,0)*K(2,2);
  helper[5] = K(2,0)*K(3,1) - K(3,0)*K(2,1);

  double q1 =  K(1,1)*helper[0] - K(1,2)*helper[1] + K(1,3)*helper[2];
  double q2 = -K(1,0)*helper[0] + K(1,2)*helper[3] - K(1,3)*helper[4];
  double q3 =  K(1,0)*helper[1] - K(1,1)*helper[3] + K(1,3)*helper[5];
  double q4 = -K(1,0)*helper[2] + K(1,1)*helper[4] - K(1,2)*helper[5]; 
  double norm = q1*q1 + q2*q2 + q3*q3 + q4*q4;

  if(norm < 1e-6){
    q1 =  K(0,1)*helper[0] - K(0,2)*helper[1] + K(0,3)*helper[2];
    q2 = -K(0,0)*helper[0] + K(0,2)*helper[3] - K(0,3)*helper[4];
    q3 =  K(0,0)*helper[1] - K(0,1)*helper[3] + K(0,3)*helper[5];
    q4 = -K(0,0)*helper[2] + K(0,1)*helper[4] - K(0,2)*helper[5];
    norm = q1*q1 + q2*q2 +q3*q3 + q4*q4;

    if (norm < 1e-6){
      helper[0] = K(0,2)*K(1,3) - K(0,3)*K(1,2);
      helper[1] = K(0,1)*K(1,3) - K(0,3)*K(1,1);
      helper[2] = K(0,1)*K(1,2) - K(0,2)*K(1,1);
      helper[3] = K(0,0)*K(1,3) - K(0,3)*K(1,0);
      helper[4] = K(0,0)*K(1,2) - K(0,2)*K(1,0);
      helper[5] = K(0,0)*K(1,1) - K(0,1)*K(1,0);

      q1 =  K(3,1)*helper[0] - K(3,2)*helper[1] + K(3,3)*helper[2];
      q2 = -K(3,0)*helper[0] + K(3,2)*helper[3] - K(3,3)*helper[4];
      q3 =  K(3,0)*helper[1] - K(3,1)*helper[3] + K(3,3)*helper[5];
      q4 = -K(3,0)*helper[2] + K(3,1)*helper[4] - K(3,2)*helper[5];
      norm = q1*q1 + q2*q2 + q3*q3 + q4*q4;

      if (norm < 1e-6){
        q1 =  K(2,1)*helper[0] - K(2,2)*helper[1] + K(2,3)*helper[2];
        q2 = -K(2,0)*helper[0] + K(2,2)*helper[3] - K(2,3)*helper[4];
        q3 =  K(2,0)*helper[1] - K(2,1)*helper[3] + K(2,3)*helper[5];
        q4 = -K(2,0)*helper[2] + K(2,1)*helper[4] - K(2,2)*helper[5];
        norm = q1*q1 + q2*q2 + q3*q3 + q4*q4;
        if (norm < 1e-6){
          // this should not happen
          rot = Eigen::Matrix<double,3,3>::Identity();
          return;
        }
      }
    }
  }

  norm = 1.0 / std::sqrt(norm);
  q1 *= norm; q2 *= norm; q3 *= norm; q4 *= norm;
  rot(0,0) = 1.0 - 2.0*(q3*q3 + q4*q4);
  rot(0,1) =       2.0*(q2*q3 - q1*q4);
  rot(0,2) =       2.0*(q2*q4 + q3*q1);
  rot(1,0) =       2.0*(q2*q3 + q1*q4);
  rot(1,1) = 1.0 - 2.0*(q2*q2 + q4*q4);
  rot(1,2) =       2.0*(q3*q4 - q2*q1);
  rot(2,0) =       2.0*(q2*q4 - q3*q1);
  rot(2,1) =       2.0*(q3*q4 + q2*q1);
  rot(2,2) = 1.0 - 2.0*(q2*q2 + q3*q3);
}

void Superpose(Eigen::Matrix<double,Eigen::Dynamic,3>& pos_one,
               Eigen::Matrix<double,Eigen::Dynamic,3>& pos_two,
               Eigen::Matrix<double,1,3>& avg_one,
               Eigen::Matrix<double,1,3>& avg_two,
               Real& rmsd,
               Eigen::Matrix<double,3,3>& rotation){

  if(pos_one.rows() != pos_two.rows()){
    throw ost::Error("Cannot superpose positions of different size!");
  }


  avg_one = Eigen::Matrix<double,1,3>::Zero();
  for (uint i = 0; i < pos_one.rows(); ++i) {
    avg_one += pos_one.row(i);
  }
  avg_one = avg_one / pos_one.rows();

  avg_two = Eigen::Matrix<double,1,3>::Zero();
  for (uint i = 0; i < pos_two.rows(); ++i) {
    avg_two += pos_two.row(i);
  }
  avg_two = avg_two / pos_two.rows();

  // TheobaldRMSD only determines the rotational component of the superposition
  // we need to shift the centers of the two point sets onto origin
  for (uint i = 0; i < pos_one.rows(); ++i){
    pos_one.row(i) -= avg_one;
    pos_two.row(i) -= avg_two;
  }

  TheobaldRMSD(pos_one, pos_two, rmsd, rotation);
}

void SuperposeIterative(const Eigen::Matrix<double,Eigen::Dynamic,3>& pos_one,
                        const Eigen::Matrix<double,Eigen::Dynamic,3>& pos_two,
                        int max_iterations, 
                        Real distance_thresh,
                        std::vector<int>& indices,
                        Eigen::Matrix<double,1,3>& avg_one,
                        Eigen::Matrix<double,1,3>& avg_two,
                        Eigen::Matrix<double,3,3>& rotation){

  if(pos_one.rows() != pos_two.rows()){
    throw ost::Error("Position data must be of consistent size!");
  }

  if(max_iterations <= 0){
    throw ost::Error("max_iterations must be at least 1!");
  }

  int num_rows = pos_one.rows();

  if(indices.empty()){
    //there are no idx, so we use all positions for the initial superposition
    indices.resize(num_rows);
    for(int i = 0; i < num_rows; ++i){
      indices[i] = i;
    }
  }
  else{
    //its not empty! let's quickly check whether there are at least 3 positions
    //and whether the indices are all valid
    if(indices.size() < 3){
      throw ost::Error("Must have at least 3 start indices for iterative " 
                       "Superposition!");
    }
    for(auto i: indices) {
      if(i >= num_rows){
        throw ost::Error("Invalid index in iterative Superposition!");
      }
    }
  }

  Real squared_dist_thresh = distance_thresh * distance_thresh;
  std::vector<int> new_indices;
  new_indices.reserve(num_rows);
  std::vector<Real> squared_distances(num_rows);
  Eigen::Matrix<double,1,3> temp_vec;
  Real rmsd;

  // keep track of the indices which give the superposition with maximum
  // number of superposed positions. Thats not necessarily the last superposition
  // when the algorithm converges
  int max_n = -1;
  std::vector<int> temp_indices = indices;
  Eigen::Matrix<double,1,3> temp_avg_one = Eigen::Matrix<double,1,3>::Zero();
  Eigen::Matrix<double,1,3> temp_avg_two = Eigen::Matrix<double,1,3>::Zero();
  Eigen::Matrix<double,3,3> temp_rotation = Eigen::Matrix<double,3,3>::Identity();

  for(int iteration = 0; iteration < max_iterations; ++iteration){

    if(temp_indices.size() < 3) break; //the thing is not really superposable...

    Eigen::Matrix<double,Eigen::Dynamic,3> temp_pos_one = 
    Eigen::Matrix<double,Eigen::Dynamic,3>::Zero(temp_indices.size(), 3);
    Eigen::Matrix<double,Eigen::Dynamic,3> temp_pos_two = 
    Eigen::Matrix<double,Eigen::Dynamic,3>::Zero(temp_indices.size(), 3);

    for(uint i = 0; i < temp_indices.size(); ++i){
      temp_pos_one.row(i) = pos_one.row(temp_indices[i]);
      temp_pos_two.row(i) = pos_two.row(temp_indices[i]);
    }

    Superpose(temp_pos_one, temp_pos_two, temp_avg_one, temp_avg_two, rmsd,
              temp_rotation);

    for(int i = 0; i < num_rows; ++i){
      temp_vec = pos_one.row(i) - temp_avg_one;
      temp_vec = (temp_rotation * temp_vec.transpose()).transpose() + temp_avg_two;
      squared_distances[i] = (temp_vec - pos_two.row(i)).squaredNorm();
    }

    new_indices.clear();
    for(int i = 0; i < num_rows; ++i){
      if(squared_distances[i] < squared_dist_thresh){
        new_indices.push_back(i);
      }
    }

    if(static_cast<int>(new_indices.size()) > max_n) {
      max_n = new_indices.size();
      indices = new_indices;
      avg_one = temp_avg_one;
      avg_two = temp_avg_two;
      rotation = temp_rotation;
    }

    if(new_indices == temp_indices) break; //nothing changes anymore

    temp_indices = new_indices;
  }
}

}

namespace ost{ namespace mol{ namespace alg {


void GDT(const geom::Vec3List& mdl_pos, const geom::Vec3List& ref_pos,
         int window_size, int max_windows, Real distance_thresh,
         int& n_superposed, geom::Mat4& transform) {

  if(mdl_pos.size() != ref_pos.size()){
    throw ost::Error("Position data must be of consistent size!");
  }

  int n_pos = mdl_pos.size();

  if(window_size > n_pos) {
    throw ost::Error("Window size in GDT algorithm is larger than positions");
  }

  Eigen::Matrix<double, Eigen::Dynamic, 3> eigen_mdl_pos = \
  Eigen::Matrix<double, Eigen::Dynamic, 3>::Zero(n_pos, 3);
  Eigen::Matrix<double, Eigen::Dynamic, 3> eigen_ref_pos = \
  Eigen::Matrix<double, Eigen::Dynamic, 3>::Zero(n_pos, 3);
  for(int i = 0; i < n_pos; ++i) {
  	eigen_mdl_pos(i, 0) = mdl_pos[i][0];
  	eigen_mdl_pos(i, 1) = mdl_pos[i][1];
  	eigen_mdl_pos(i, 2) = mdl_pos[i][2];
  	eigen_ref_pos(i, 0) = ref_pos[i][0];
  	eigen_ref_pos(i, 1) = ref_pos[i][1];
  	eigen_ref_pos(i, 2) = ref_pos[i][2];
  }

  std::vector<int> start_indices;
  int last_window_idx = n_pos - window_size;
  int n_windows = last_window_idx + 1;

  if(n_windows <= max_windows) {
    start_indices.resize(n_windows);
    for(int i = 0; i < n_windows; ++i) {
      start_indices[i] = i;
    }
  } else {
    start_indices.resize(max_windows);
    Real tmp = 0.0;
    Real delta = static_cast<Real>(last_window_idx) / (max_windows - 1);
    for(int i = 0; i < max_windows; ++i) {
      start_indices[i] = std::round(tmp);
      tmp += delta;
    }
  }
  
  size_t max_n = 0;
  Eigen::Matrix<double,1,3> eigen_avg_mdl = Eigen::Matrix<double,1,3>::Zero();
  Eigen::Matrix<double,1,3> eigen_avg_ref = Eigen::Matrix<double,1,3>::Zero();
  Eigen::Matrix<double,3,3> eigen_rotation = Eigen::Matrix<double,3,3>::Identity();
  Eigen::Matrix<double,1,3> eigen_avg_mdl_tmp = Eigen::Matrix<double,1,3>::Zero();
  Eigen::Matrix<double,1,3> eigen_avg_ref_tmp = Eigen::Matrix<double,1,3>::Zero();
  Eigen::Matrix<double,3,3> eigen_rotation_tmp = Eigen::Matrix<double,3,3>::Identity();
  
  for(int start_idx: start_indices) {
    std::vector<int> indices(window_size);
    for(int i = 0; i < window_size; ++i) {
      indices[i] = start_idx + i;
    }

    SuperposeIterative(eigen_mdl_pos, eigen_ref_pos,
                       10, distance_thresh, indices,
                       eigen_avg_mdl_tmp,
                       eigen_avg_ref_tmp,
                       eigen_rotation_tmp);

    if(indices.size() > max_n) {
      max_n = indices.size();
      eigen_avg_mdl = eigen_avg_mdl_tmp;
      eigen_avg_ref = eigen_avg_ref_tmp;
      eigen_rotation = eigen_rotation_tmp;
    }
  }

  // construct transform

  // there are three transformation to be applied to reach ref_pos from mdl_pos:
  // 1: shift mdl_pos to center
  // 2: apply estimated rotation
  // 3: shift onto average of ref_pos
  Eigen::Matrix<double,1,3> translation = eigen_rotation *
                                          (-eigen_avg_mdl.transpose()) + 
                                          eigen_avg_ref.transpose();


  transform = geom::Mat4();
  transform(0, 3) = translation(0, 0);
  transform(1, 3) = translation(0, 1);
  transform(2, 3) = translation(0, 2);
  transform(0, 0) = eigen_rotation(0, 0);
  transform(0, 1) = eigen_rotation(0, 1);
  transform(0, 2) = eigen_rotation(0, 2);
  transform(1, 0) = eigen_rotation(1, 0);
  transform(1, 1) = eigen_rotation(1, 1);
  transform(1, 2) = eigen_rotation(1, 2);
  transform(2, 0) = eigen_rotation(2, 0);
  transform(2, 1) = eigen_rotation(2, 1);
  transform(2, 2) = eigen_rotation(2, 2);

  n_superposed = max_n;
}

}}} // ns