Skip to content
Snippets Groups Projects
scoring_weights.cc 19.14 KiB
// Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
//                          Biozentrum - University of Basel
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//   http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <promod3/modelling/scoring_weights.hh>
#include <promod3/core/message.hh>

namespace promod3 { namespace modelling {

namespace {

Real GetWeight(const std::map<String, Real>& weights, const String& key) {
  std::map<String, Real>::const_iterator found = weights.find(key);
  if (found == weights.end()) {
    throw promod3::Error("Intenal error: Key '" + key
                         + "' not found in ScoringWeights!");
  } else {
    return found->second;
  }
}

} // anon ns

ScoringWeights::ScoringWeights() {
  // historical note:
  // - until Dec. 21, 2016: these were the weights trained by Niklaus Johner:
  //   weights["hbond"] = 1
  //   weights["clash"] = 0.08880086
  //   weights["torsion"] = 0.69712072
  //   weights["reduced"] = 2.13535565
  //   weights["cbeta"] = 3.59177335
  //   weights["cb_packing"] = 1.17280667
  //   weights["pairwise"] = -0.7

  // weights trained with scripts in extras/scoring_weight_training
  // BB_DB_AANR
  weights_bb_db_aa_["aa_clash"] = 0.0171401;
  weights_bb_db_aa_["aa_interaction"] = 0.00256632;
  weights_bb_db_aa_["aa_packing"] = 0.0383423;
  weights_bb_db_aa_["cb_packing"] = 1.15539;
  weights_bb_db_aa_["cbeta"] = 0.262183;
  weights_bb_db_aa_["clash"] = 0.0134694;
  weights_bb_db_aa_["hbond"] = 1;
  weights_bb_db_aa_["reduced"] = 0.201682;
  weights_bb_db_aa_["seq_prof_score"] = -0.338147;
  weights_bb_db_aa_["stem_rmsd"] = 0.0814165;
  weights_bb_db_aa_["str_prof_score"] = -1.75976;
  weights_bb_db_aa_["torsion"] = 0.809906;
  weights_bb_db_aa_["pairwise"] = -0.7;
  // BB_DB
  weights_bb_db_["cb_packing"] = 0.843167;
  weights_bb_db_["cbeta"] = 0.371329;
  weights_bb_db_["clash"] = 0.0319755;
  weights_bb_db_["hbond"] = 1;
  weights_bb_db_["reduced"] = 0.229909;
  weights_bb_db_["seq_prof_score"] = -0.313212;
  weights_bb_db_["stem_rmsd"] = 0.0580353;
  weights_bb_db_["str_prof_score"] = -1.32155;
  weights_bb_db_["torsion"] = 0.602527;
  weights_bb_db_["pairwise"] = -0.7;
  // BB_AANR
  weights_bb_aa_["aa_clash"] = 0.0268085;
  weights_bb_aa_["aa_interaction"] = 0.00464609;
  weights_bb_aa_["aa_packing"] = 0.0890843;
  weights_bb_aa_["cb_packing"] = 1.13975;
  weights_bb_aa_["cbeta"] = 0.383702;
  weights_bb_aa_["clash"] = 0.0320327;
  weights_bb_aa_["hbond"] = 1;
  weights_bb_aa_["reduced"] = 0.229512;
  weights_bb_aa_["torsion"] = 0.839257;
  weights_bb_aa_["pairwise"] = -0.7;
  // BB
  weights_bb_["cb_packing"] = 0.793275;
  weights_bb_["cbeta"] = 0.500879;
  weights_bb_["clash"] = 0.0801205;
  weights_bb_["hbond"] = 1;
  weights_bb_["reduced"] = 0.238478;
  weights_bb_["torsion"] = 0.729424;
  weights_bb_["pairwise"] = -0.7;




  // BB_DB_AANR length dependent
  std::map<String, Real> weight_map;
  weight_map["aa_clash"] = 0.0120408;
  weight_map["aa_interaction"] = 0.0114092;
  weight_map["aa_packing"] = 0.000125384;
  weight_map["cb_packing"] = 0.040684;
  weight_map["cbeta"] = 0.356074;
  weight_map["clash"] = 0.848652;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.0470799;
  weight_map["seq_prof_score"] = -0.036081;
  weight_map["stem_rmsd"] = 0.144414;
  weight_map["str_prof_score"] = -0.302214;
  weight_map["torsion"] = 0.32667;
  length_dependent_weights_bb_db_aa_[0] = weight_map;
  length_dependent_weights_bb_db_aa_[1] = weight_map;
  length_dependent_weights_bb_db_aa_[2] = weight_map;
  length_dependent_weights_bb_db_aa_[3] = weight_map;
  length_dependent_weights_bb_db_aa_[4] = weight_map;

  weight_map["aa_clash"] = 0.0301478;
  weight_map["aa_interaction"] = 0.00839642;
  weight_map["aa_packing"] = 0.038416;
  weight_map["cb_packing"] = 0.250168;
  weight_map["cbeta"] = 0.1976;
  weight_map["clash"] = 0.0756704;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.135068;
  weight_map["seq_prof_score"] = -0.111772;
  weight_map["stem_rmsd"] = 0.0624659;
  weight_map["str_prof_score"] = -0.856625;
  weight_map["torsion"] = 0.443652;
  length_dependent_weights_bb_db_aa_[5] = weight_map;
  length_dependent_weights_bb_db_aa_[6] = weight_map;

  weight_map["aa_clash"] = 0.0266467;
  weight_map["aa_interaction"] = 0.00495266;
  weight_map["aa_packing"] = 0.0570628;
  weight_map["cb_packing"] = 0.55212;
  weight_map["cbeta"] = 0.331541;
  weight_map["clash"] = 0.0341554;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.189735;
  weight_map["seq_prof_score"] = -0.293038;
  weight_map["stem_rmsd"] = 0.0809159;
  weight_map["str_prof_score"] = -0.851421;
  weight_map["torsion"] = 0.548451;
  length_dependent_weights_bb_db_aa_[7] = weight_map;
  length_dependent_weights_bb_db_aa_[8] = weight_map;

  weight_map["aa_clash"] = 0.012002;
  weight_map["aa_interaction"] = 0.00860129;
  weight_map["aa_packing"] = 0.0903933;
  weight_map["cb_packing"] = 1.63694;
  weight_map["cbeta"] = 0.171588;
  weight_map["clash"] = 4.51029e-05;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.299024;
  weight_map["seq_prof_score"] = -0.368841;
  weight_map["stem_rmsd"] = 0.103716;
  weight_map["str_prof_score"] = -2.17521;
  weight_map["torsion"] = 1.12767;
  length_dependent_weights_bb_db_aa_[9] = weight_map;
  length_dependent_weights_bb_db_aa_[10] = weight_map;

  weight_map["aa_clash"] = 0.0025594;
  weight_map["aa_interaction"] = 0.00290232;
  weight_map["aa_packing"] = 0.0819373;
  weight_map["cb_packing"] = 1.52471;
  weight_map["cbeta"] = 0.274828;
  weight_map["clash"] = 0.0161732;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.124048;
  weight_map["seq_prof_score"] = -0.407456;
  weight_map["stem_rmsd"] = 0.074869;
  weight_map["str_prof_score"] = -2.70179;
  weight_map["torsion"] = 0.972806;
  length_dependent_weights_bb_db_aa_[11] = weight_map;
  length_dependent_weights_bb_db_aa_[12] = weight_map;

  weight_map["aa_clash"] = 0.000384705;
  weight_map["aa_interaction"] = 0.00239001;
  weight_map["aa_packing"] = 0.0020213;
  weight_map["cb_packing"] = 2.16407;
  weight_map["cbeta"] = 0.288299;
  weight_map["clash"] = 0.00155468;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.238341;
  weight_map["seq_prof_score"] = -0.1732;
  weight_map["stem_rmsd"] = 0.0975998;
  weight_map["str_prof_score"] = -3.22575;
  weight_map["torsion"] = 1.06902;
  length_dependent_weights_bb_db_aa_[13] = weight_map;
  length_dependent_weights_bb_db_aa_[14] = weight_map;
  length_dependent_weights_bb_db_aa_[-1] = weights_bb_db_aa_;

  // BB_DB length dependent
  weight_map.clear();
  weight_map["cb_packing"] = 0.426734;
  weight_map["cbeta"] = 0.116532;
  weight_map["clash"] = 0.963033;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.173565;
  weight_map["seq_prof_score"] = -0.0381537;
  weight_map["stem_rmsd"] = 0.150966;
  weight_map["str_prof_score"] = -0.219757;
  weight_map["torsion"] = 0.338468;
  length_dependent_weights_bb_db_[0] = weight_map;
  length_dependent_weights_bb_db_[1] = weight_map;
  length_dependent_weights_bb_db_[2] = weight_map;
  length_dependent_weights_bb_db_[3] = weight_map;
  length_dependent_weights_bb_db_[4] = weight_map;

  weight_map["cb_packing"] = 0.102926;
  weight_map["cbeta"] = 0.32014;
  weight_map["clash"] = 0.15599;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.228402;
  weight_map["seq_prof_score"] = -0.173496;
  weight_map["stem_rmsd"] = 0.0571533;
  weight_map["str_prof_score"] = -0.622599;
  weight_map["torsion"] = 0.356714;
  length_dependent_weights_bb_db_[5] = weight_map;
  length_dependent_weights_bb_db_[6] = weight_map;

  weight_map["cb_packing"] = 0.448069;
  weight_map["cbeta"] = 0.525517;
  weight_map["clash"] = 0.0448608;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.192292;
  weight_map["seq_prof_score"] = -0.332023;
  weight_map["stem_rmsd"] = 0.0882085;
  weight_map["str_prof_score"] = -0.645988;
  weight_map["torsion"] = 0.493079;
  length_dependent_weights_bb_db_[7] = weight_map;
  length_dependent_weights_bb_db_[8] = weight_map;

  weight_map["cb_packing"] = 1.07076;
  weight_map["cbeta"] = 0.531162;
  weight_map["clash"] = 0.079354;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.503881;
  weight_map["seq_prof_score"] = -0.49335;
  weight_map["stem_rmsd"] = 0.110282;
  weight_map["str_prof_score"] = -2.06894;
  weight_map["torsion"] = 0.897905;
  length_dependent_weights_bb_db_[9] = weight_map;
  length_dependent_weights_bb_db_[10] = weight_map;

  weight_map["cb_packing"] = 1.99384;
  weight_map["cbeta"] = 0.713817;
  weight_map["clash"] = 0.046714;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.500903;
  weight_map["seq_prof_score"] = -0.761715;
  weight_map["stem_rmsd"] = 0.102398;
  weight_map["str_prof_score"] = -2.71036;
  weight_map["torsion"] = 1.03345;
  length_dependent_weights_bb_db_[11] = weight_map;
  length_dependent_weights_bb_db_[12] = weight_map;

  weight_map["cb_packing"] = 1.99907;
  weight_map["cbeta"] = 0.278163;
  weight_map["clash"] = 0.0423564;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.20795;
  weight_map["seq_prof_score"] = -0.106216;
  weight_map["stem_rmsd"] = 0.0810602;
  weight_map["str_prof_score"] = -2.98924;
  weight_map["torsion"] = 1.06368;
  length_dependent_weights_bb_db_[13] = weight_map;
  length_dependent_weights_bb_db_[14] = weight_map;  
  length_dependent_weights_bb_db_[-1] = weights_bb_db_;

  // BB_AANR length dependent
  weight_map.clear();
  weight_map["aa_clash"] = 0.283751;
  weight_map["aa_interaction"] = 0.017465;
  weight_map["aa_packing"] = 0.241799;
  weight_map["cb_packing"] = 0.127742;
  weight_map["cbeta"] = 0.352376;
  weight_map["clash"] = 0.704888;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.149259;
  weight_map["torsion"] = 0.490918;
  length_dependent_weights_bb_aa_[0] = weight_map;
  length_dependent_weights_bb_aa_[1] = weight_map;
  length_dependent_weights_bb_aa_[2] = weight_map;
  length_dependent_weights_bb_aa_[3] = weight_map;
  length_dependent_weights_bb_aa_[4] = weight_map;

  weight_map["aa_clash"] = 0.036239;
  weight_map["aa_interaction"] = 0.00620728;
  weight_map["aa_packing"] = 0.0614946;
  weight_map["cb_packing"] = 0.288402;
  weight_map["cbeta"] = 0.272684;
  weight_map["clash"] = 0.0750436;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.0720939;
  weight_map["torsion"] = 0.348388;
  length_dependent_weights_bb_aa_[5] = weight_map;
  length_dependent_weights_bb_aa_[6] = weight_map;

  weight_map["aa_clash"] = 0.0616729;
  weight_map["aa_interaction"] = 0.00672076;
  weight_map["aa_packing"] = 0.0850889;
  weight_map["cb_packing"] = 0.617773;
  weight_map["cbeta"] = 0.252535;
  weight_map["clash"] = 0.0521921;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.244583;
  weight_map["torsion"] = 0.760732;
  length_dependent_weights_bb_aa_[7] = weight_map;
  length_dependent_weights_bb_aa_[8] = weight_map;

  weight_map["aa_clash"] = 0.0171586;
  weight_map["aa_interaction"] = 0.0057374;
  weight_map["aa_packing"] = 0.132462;
  weight_map["cb_packing"] = 1.05146;
  weight_map["cbeta"] = 0.0751599;
  weight_map["clash"] = 0.0412901;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.233706;
  weight_map["torsion"] = 0.932057;
  length_dependent_weights_bb_aa_[9] = weight_map;
  length_dependent_weights_bb_aa_[10] = weight_map;

  weight_map["aa_clash"] = 0.015698;
  weight_map["aa_interaction"] = 0.0080754;
  weight_map["aa_packing"] = 0.0604609;
  weight_map["cb_packing"] = 2.82079;
  weight_map["cbeta"] = 0.289487;
  weight_map["clash"] = 0.0174437;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.367916;
  weight_map["torsion"] = 1.84005;
  length_dependent_weights_bb_aa_[11] = weight_map;
  length_dependent_weights_bb_aa_[12] = weight_map;

  weight_map["aa_clash_no_relax"] = 0.00320302;
  weight_map["aa_interaction_no_relax"] = 0.00431749;
  weight_map["aa_packing_no_relax"] = 0.0794247;
  weight_map["cb_packing"] = 2.55054;
  weight_map["cbeta"] = 0.0981701;
  weight_map["clash"] = 0.0181231;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.243598;
  weight_map["torsion"] = 1.72607;
  length_dependent_weights_bb_aa_[13] = weight_map;
  length_dependent_weights_bb_aa_[14] = weight_map;
  length_dependent_weights_bb_aa_[-1] = weights_bb_aa_;

  // BB
  weight_map.clear();
  weight_map["cb_packing"] = 0.224327;
  weight_map["cbeta"] = 0.226355;
  weight_map["clash"] = 0.424003;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.076963;
  weight_map["torsion"] = 0.253095;
  length_dependent_weights_bb_[0] = weight_map;
  length_dependent_weights_bb_[1] = weight_map;
  length_dependent_weights_bb_[2] = weight_map;
  length_dependent_weights_bb_[3] = weight_map;
  length_dependent_weights_bb_[4] = weight_map;

  weight_map["cb_packing"] = 0.137013;
  weight_map["cbeta"] = 0.194254;
  weight_map["clash"] = 0.283518;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.12837;
  weight_map["torsion"] = 0.318854;
  length_dependent_weights_bb_[5] = weight_map;
  length_dependent_weights_bb_[6] = weight_map;

  weight_map["cb_packing"] = 0.618752;
  weight_map["cbeta"] = 0.50412;
  weight_map["clash"] = 0.108268;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.183393;
  weight_map["torsion"] = 0.636929;
  length_dependent_weights_bb_[7] = weight_map;
  length_dependent_weights_bb_[8] = weight_map;

  weight_map["cb_packing"] = 0.943519;
  weight_map["cbeta"] = 0.946992;
  weight_map["clash"] = 0.0956615;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.251022;
  weight_map["torsion"] = 0.921721;
  length_dependent_weights_bb_[9] = weight_map;
  length_dependent_weights_bb_[10] = weight_map;

  weight_map["cb_packing"] = 1.06123;
  weight_map["cbeta"] = 0.469593;
  weight_map["clash"] = 0.0831533;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.268347;
  weight_map["torsion"] = 0.88921;
  length_dependent_weights_bb_[11] = weight_map;
  length_dependent_weights_bb_[12] = weight_map;


  weight_map["cb_packing"] = 2.05415;
  weight_map["cbeta"] = 0.604888;
  weight_map["clash"] = 0.024504;
  weight_map["hbond"] = 1;
  weight_map["reduced"] = 0.367079;
  weight_map["torsion"] = 1.34862;
  length_dependent_weights_bb_[13] = weight_map;
  length_dependent_weights_bb_[14] = weight_map;
  length_dependent_weights_bb_[-1] = weights_bb_;
  
  // key naming consistent with default scorers and weights above
  stem_rmsd_key_ = "stem_rmsd";
  seq_prof_score_key_ = "seq_prof_score";
  str_prof_score_key_ = "str_prof_score";
  bb_scoring_keys_.push_back("cb_packing");
  bb_scoring_keys_.push_back("cbeta");
  bb_scoring_keys_.push_back("clash");
  bb_scoring_keys_.push_back("hbond");
  bb_scoring_keys_.push_back("pairwise");
  bb_scoring_keys_.push_back("reduced");
  bb_scoring_keys_.push_back("torsion");
  aa_scoring_keys_.push_back("aa_clash");
  aa_scoring_keys_.push_back("aa_interaction");
  aa_scoring_keys_.push_back("aa_packing");
}

const std::map<String, Real>& ScoringWeights::GetWeights(bool with_db,
                                                         bool with_aa,
                                                         bool length_dependent,
                                                         int loop_length) const {

  if(!length_dependent) {
    // its not length dependent, so lets return the length agnostic weights!
    if (with_db) {
      if (with_aa) {
        return weights_bb_db_aa_;
      }
      else {        
        return weights_bb_db_;
      }
    } else {
      if (with_aa) {
        return weights_bb_aa_;
      }
      else {        
        return weights_bb_;
      }
    }
  } else {
    // its length dependent!
    std::map<int, std::map<String, Real> >::const_iterator it;
    if (with_db) {
      if (with_aa) {
        it = length_dependent_weights_bb_db_aa_.find(loop_length);
        if(it == length_dependent_weights_bb_db_aa_.end()) {
          it = length_dependent_weights_bb_db_aa_.find(-1);
        } 
        return it->second;
      }
      else {  
        it = length_dependent_weights_bb_db_.find(loop_length);
        if(it == length_dependent_weights_bb_db_.end()) {
          it = length_dependent_weights_bb_db_.find(-1);
        } 
        return it->second;
      }
    } else {
      if (with_aa) {
        it = length_dependent_weights_bb_aa_.find(loop_length);
        if(it == length_dependent_weights_bb_aa_.end()) {
          it = length_dependent_weights_bb_aa_.find(-1);
        } 
        return it->second;
      }
      else {     
        it = length_dependent_weights_bb_.find(loop_length);
        if(it == length_dependent_weights_bb_.end()) {
          it = length_dependent_weights_bb_.find(-1);
        } 
        return it->second;
      }
    }
  }
}

void ScoringWeights::SetWeights(bool with_db, bool with_aa,
                                const std::map<String, Real>& weights,
                                bool length_dependent, int loop_length) {

  if(!length_dependent){
    // its not length dependent, so lets set the length agnostic weights!
    if (with_db) {
      if (with_aa) {
        weights_bb_db_aa_ = weights;
      }
      else {
        weights_bb_db_ = weights;
      }
    } else {
      if (with_aa) {
        weights_bb_aa_ = weights;
      }
      else {
        weights_bb_ = weights;
      }
    }
  } else {
    // its length dependent!
    if (with_db) {
      if (with_aa) {
        length_dependent_weights_bb_db_aa_[loop_length] = weights;
      }
      else {
        length_dependent_weights_bb_db_[loop_length] = weights;
      }
    } else {
      if (with_aa) {
        length_dependent_weights_bb_aa_[loop_length] = weights;
      }
      else {
        length_dependent_weights_bb_[loop_length] = weights;
      }
    }
  }
}

std::map<String, Real> ScoringWeights::GetBackboneWeights(bool with_db,
                                                        bool with_aa,
                                                        bool length_dependent,
                                                        int loop_length) const {
  std::map<String, Real> result;
  const std::map<String, Real>& all_weights = GetWeights(with_db, with_aa, 
                                                         length_dependent,
                                                         loop_length);
  for (uint i = 0; i < bb_scoring_keys_.size(); ++i) {
    result[bb_scoring_keys_[i]] = GetWeight(all_weights, bb_scoring_keys_[i]);
  }
  return result;
}

std::map<String, Real> ScoringWeights::GetAllAtomWeights(bool with_db,
                                                        bool length_dependent,
                                                        int loop_length) const {
  std::map<String, Real> result;
  const std::map<String, Real>& all_weights = GetWeights(with_db, true,
                                                         length_dependent,
                                                         loop_length);
  for (uint i = 0; i < aa_scoring_keys_.size(); ++i) {
    result[aa_scoring_keys_[i]] = GetWeight(all_weights, aa_scoring_keys_[i]);
  }
  return result;
}

}} // ns