Something went wrong on our end
scoring_weights.cc 19.14 KiB
// Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
// Biozentrum - University of Basel
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <promod3/modelling/scoring_weights.hh>
#include <promod3/core/message.hh>
namespace promod3 { namespace modelling {
namespace {
Real GetWeight(const std::map<String, Real>& weights, const String& key) {
std::map<String, Real>::const_iterator found = weights.find(key);
if (found == weights.end()) {
throw promod3::Error("Intenal error: Key '" + key
+ "' not found in ScoringWeights!");
} else {
return found->second;
}
}
} // anon ns
ScoringWeights::ScoringWeights() {
// historical note:
// - until Dec. 21, 2016: these were the weights trained by Niklaus Johner:
// weights["hbond"] = 1
// weights["clash"] = 0.08880086
// weights["torsion"] = 0.69712072
// weights["reduced"] = 2.13535565
// weights["cbeta"] = 3.59177335
// weights["cb_packing"] = 1.17280667
// weights["pairwise"] = -0.7
// weights trained with scripts in extras/scoring_weight_training
// BB_DB_AANR
weights_bb_db_aa_["aa_clash"] = 0.0171401;
weights_bb_db_aa_["aa_interaction"] = 0.00256632;
weights_bb_db_aa_["aa_packing"] = 0.0383423;
weights_bb_db_aa_["cb_packing"] = 1.15539;
weights_bb_db_aa_["cbeta"] = 0.262183;
weights_bb_db_aa_["clash"] = 0.0134694;
weights_bb_db_aa_["hbond"] = 1;
weights_bb_db_aa_["reduced"] = 0.201682;
weights_bb_db_aa_["seq_prof_score"] = -0.338147;
weights_bb_db_aa_["stem_rmsd"] = 0.0814165;
weights_bb_db_aa_["str_prof_score"] = -1.75976;
weights_bb_db_aa_["torsion"] = 0.809906;
weights_bb_db_aa_["pairwise"] = -0.7;
// BB_DB
weights_bb_db_["cb_packing"] = 0.843167;
weights_bb_db_["cbeta"] = 0.371329;
weights_bb_db_["clash"] = 0.0319755;
weights_bb_db_["hbond"] = 1;
weights_bb_db_["reduced"] = 0.229909;
weights_bb_db_["seq_prof_score"] = -0.313212;
weights_bb_db_["stem_rmsd"] = 0.0580353;
weights_bb_db_["str_prof_score"] = -1.32155;
weights_bb_db_["torsion"] = 0.602527;
weights_bb_db_["pairwise"] = -0.7;
// BB_AANR
weights_bb_aa_["aa_clash"] = 0.0268085;
weights_bb_aa_["aa_interaction"] = 0.00464609;
weights_bb_aa_["aa_packing"] = 0.0890843;
weights_bb_aa_["cb_packing"] = 1.13975;
weights_bb_aa_["cbeta"] = 0.383702;
weights_bb_aa_["clash"] = 0.0320327;
weights_bb_aa_["hbond"] = 1;
weights_bb_aa_["reduced"] = 0.229512;
weights_bb_aa_["torsion"] = 0.839257;
weights_bb_aa_["pairwise"] = -0.7;
// BB
weights_bb_["cb_packing"] = 0.793275;
weights_bb_["cbeta"] = 0.500879;
weights_bb_["clash"] = 0.0801205;
weights_bb_["hbond"] = 1;
weights_bb_["reduced"] = 0.238478;
weights_bb_["torsion"] = 0.729424;
weights_bb_["pairwise"] = -0.7;
// BB_DB_AANR length dependent
std::map<String, Real> weight_map;
weight_map["aa_clash"] = 0.0120408;
weight_map["aa_interaction"] = 0.0114092;
weight_map["aa_packing"] = 0.000125384;
weight_map["cb_packing"] = 0.040684;
weight_map["cbeta"] = 0.356074;
weight_map["clash"] = 0.848652;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.0470799;
weight_map["seq_prof_score"] = -0.036081;
weight_map["stem_rmsd"] = 0.144414;
weight_map["str_prof_score"] = -0.302214;
weight_map["torsion"] = 0.32667;
length_dependent_weights_bb_db_aa_[0] = weight_map;
length_dependent_weights_bb_db_aa_[1] = weight_map;
length_dependent_weights_bb_db_aa_[2] = weight_map;
length_dependent_weights_bb_db_aa_[3] = weight_map;
length_dependent_weights_bb_db_aa_[4] = weight_map;
weight_map["aa_clash"] = 0.0301478;
weight_map["aa_interaction"] = 0.00839642;
weight_map["aa_packing"] = 0.038416;
weight_map["cb_packing"] = 0.250168;
weight_map["cbeta"] = 0.1976;
weight_map["clash"] = 0.0756704;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.135068;
weight_map["seq_prof_score"] = -0.111772;
weight_map["stem_rmsd"] = 0.0624659;
weight_map["str_prof_score"] = -0.856625;
weight_map["torsion"] = 0.443652;
length_dependent_weights_bb_db_aa_[5] = weight_map;
length_dependent_weights_bb_db_aa_[6] = weight_map;
weight_map["aa_clash"] = 0.0266467;
weight_map["aa_interaction"] = 0.00495266;
weight_map["aa_packing"] = 0.0570628;
weight_map["cb_packing"] = 0.55212;
weight_map["cbeta"] = 0.331541;
weight_map["clash"] = 0.0341554;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.189735;
weight_map["seq_prof_score"] = -0.293038;
weight_map["stem_rmsd"] = 0.0809159;
weight_map["str_prof_score"] = -0.851421;
weight_map["torsion"] = 0.548451;
length_dependent_weights_bb_db_aa_[7] = weight_map;
length_dependent_weights_bb_db_aa_[8] = weight_map;
weight_map["aa_clash"] = 0.012002;
weight_map["aa_interaction"] = 0.00860129;
weight_map["aa_packing"] = 0.0903933;
weight_map["cb_packing"] = 1.63694;
weight_map["cbeta"] = 0.171588;
weight_map["clash"] = 4.51029e-05;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.299024;
weight_map["seq_prof_score"] = -0.368841;
weight_map["stem_rmsd"] = 0.103716;
weight_map["str_prof_score"] = -2.17521;
weight_map["torsion"] = 1.12767;
length_dependent_weights_bb_db_aa_[9] = weight_map;
length_dependent_weights_bb_db_aa_[10] = weight_map;
weight_map["aa_clash"] = 0.0025594;
weight_map["aa_interaction"] = 0.00290232;
weight_map["aa_packing"] = 0.0819373;
weight_map["cb_packing"] = 1.52471;
weight_map["cbeta"] = 0.274828;
weight_map["clash"] = 0.0161732;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.124048;
weight_map["seq_prof_score"] = -0.407456;
weight_map["stem_rmsd"] = 0.074869;
weight_map["str_prof_score"] = -2.70179;
weight_map["torsion"] = 0.972806;
length_dependent_weights_bb_db_aa_[11] = weight_map;
length_dependent_weights_bb_db_aa_[12] = weight_map;
weight_map["aa_clash"] = 0.000384705;
weight_map["aa_interaction"] = 0.00239001;
weight_map["aa_packing"] = 0.0020213;
weight_map["cb_packing"] = 2.16407;
weight_map["cbeta"] = 0.288299;
weight_map["clash"] = 0.00155468;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.238341;
weight_map["seq_prof_score"] = -0.1732;
weight_map["stem_rmsd"] = 0.0975998;
weight_map["str_prof_score"] = -3.22575;
weight_map["torsion"] = 1.06902;
length_dependent_weights_bb_db_aa_[13] = weight_map;
length_dependent_weights_bb_db_aa_[14] = weight_map;
length_dependent_weights_bb_db_aa_[-1] = weights_bb_db_aa_;
// BB_DB length dependent
weight_map.clear();
weight_map["cb_packing"] = 0.426734;
weight_map["cbeta"] = 0.116532;
weight_map["clash"] = 0.963033;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.173565;
weight_map["seq_prof_score"] = -0.0381537;
weight_map["stem_rmsd"] = 0.150966;
weight_map["str_prof_score"] = -0.219757;
weight_map["torsion"] = 0.338468;
length_dependent_weights_bb_db_[0] = weight_map;
length_dependent_weights_bb_db_[1] = weight_map;
length_dependent_weights_bb_db_[2] = weight_map;
length_dependent_weights_bb_db_[3] = weight_map;
length_dependent_weights_bb_db_[4] = weight_map;
weight_map["cb_packing"] = 0.102926;
weight_map["cbeta"] = 0.32014;
weight_map["clash"] = 0.15599;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.228402;
weight_map["seq_prof_score"] = -0.173496;
weight_map["stem_rmsd"] = 0.0571533;
weight_map["str_prof_score"] = -0.622599;
weight_map["torsion"] = 0.356714;
length_dependent_weights_bb_db_[5] = weight_map;
length_dependent_weights_bb_db_[6] = weight_map;
weight_map["cb_packing"] = 0.448069;
weight_map["cbeta"] = 0.525517;
weight_map["clash"] = 0.0448608;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.192292;
weight_map["seq_prof_score"] = -0.332023;
weight_map["stem_rmsd"] = 0.0882085;
weight_map["str_prof_score"] = -0.645988;
weight_map["torsion"] = 0.493079;
length_dependent_weights_bb_db_[7] = weight_map;
length_dependent_weights_bb_db_[8] = weight_map;
weight_map["cb_packing"] = 1.07076;
weight_map["cbeta"] = 0.531162;
weight_map["clash"] = 0.079354;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.503881;
weight_map["seq_prof_score"] = -0.49335;
weight_map["stem_rmsd"] = 0.110282;
weight_map["str_prof_score"] = -2.06894;
weight_map["torsion"] = 0.897905;
length_dependent_weights_bb_db_[9] = weight_map;
length_dependent_weights_bb_db_[10] = weight_map;
weight_map["cb_packing"] = 1.99384;
weight_map["cbeta"] = 0.713817;
weight_map["clash"] = 0.046714;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.500903;
weight_map["seq_prof_score"] = -0.761715;
weight_map["stem_rmsd"] = 0.102398;
weight_map["str_prof_score"] = -2.71036;
weight_map["torsion"] = 1.03345;
length_dependent_weights_bb_db_[11] = weight_map;
length_dependent_weights_bb_db_[12] = weight_map;
weight_map["cb_packing"] = 1.99907;
weight_map["cbeta"] = 0.278163;
weight_map["clash"] = 0.0423564;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.20795;
weight_map["seq_prof_score"] = -0.106216;
weight_map["stem_rmsd"] = 0.0810602;
weight_map["str_prof_score"] = -2.98924;
weight_map["torsion"] = 1.06368;
length_dependent_weights_bb_db_[13] = weight_map;
length_dependent_weights_bb_db_[14] = weight_map;
length_dependent_weights_bb_db_[-1] = weights_bb_db_;
// BB_AANR length dependent
weight_map.clear();
weight_map["aa_clash"] = 0.283751;
weight_map["aa_interaction"] = 0.017465;
weight_map["aa_packing"] = 0.241799;
weight_map["cb_packing"] = 0.127742;
weight_map["cbeta"] = 0.352376;
weight_map["clash"] = 0.704888;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.149259;
weight_map["torsion"] = 0.490918;
length_dependent_weights_bb_aa_[0] = weight_map;
length_dependent_weights_bb_aa_[1] = weight_map;
length_dependent_weights_bb_aa_[2] = weight_map;
length_dependent_weights_bb_aa_[3] = weight_map;
length_dependent_weights_bb_aa_[4] = weight_map;
weight_map["aa_clash"] = 0.036239;
weight_map["aa_interaction"] = 0.00620728;
weight_map["aa_packing"] = 0.0614946;
weight_map["cb_packing"] = 0.288402;
weight_map["cbeta"] = 0.272684;
weight_map["clash"] = 0.0750436;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.0720939;
weight_map["torsion"] = 0.348388;
length_dependent_weights_bb_aa_[5] = weight_map;
length_dependent_weights_bb_aa_[6] = weight_map;
weight_map["aa_clash"] = 0.0616729;
weight_map["aa_interaction"] = 0.00672076;
weight_map["aa_packing"] = 0.0850889;
weight_map["cb_packing"] = 0.617773;
weight_map["cbeta"] = 0.252535;
weight_map["clash"] = 0.0521921;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.244583;
weight_map["torsion"] = 0.760732;
length_dependent_weights_bb_aa_[7] = weight_map;
length_dependent_weights_bb_aa_[8] = weight_map;
weight_map["aa_clash"] = 0.0171586;
weight_map["aa_interaction"] = 0.0057374;
weight_map["aa_packing"] = 0.132462;
weight_map["cb_packing"] = 1.05146;
weight_map["cbeta"] = 0.0751599;
weight_map["clash"] = 0.0412901;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.233706;
weight_map["torsion"] = 0.932057;
length_dependent_weights_bb_aa_[9] = weight_map;
length_dependent_weights_bb_aa_[10] = weight_map;
weight_map["aa_clash"] = 0.015698;
weight_map["aa_interaction"] = 0.0080754;
weight_map["aa_packing"] = 0.0604609;
weight_map["cb_packing"] = 2.82079;
weight_map["cbeta"] = 0.289487;
weight_map["clash"] = 0.0174437;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.367916;
weight_map["torsion"] = 1.84005;
length_dependent_weights_bb_aa_[11] = weight_map;
length_dependent_weights_bb_aa_[12] = weight_map;
weight_map["aa_clash_no_relax"] = 0.00320302;
weight_map["aa_interaction_no_relax"] = 0.00431749;
weight_map["aa_packing_no_relax"] = 0.0794247;
weight_map["cb_packing"] = 2.55054;
weight_map["cbeta"] = 0.0981701;
weight_map["clash"] = 0.0181231;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.243598;
weight_map["torsion"] = 1.72607;
length_dependent_weights_bb_aa_[13] = weight_map;
length_dependent_weights_bb_aa_[14] = weight_map;
length_dependent_weights_bb_aa_[-1] = weights_bb_aa_;
// BB
weight_map.clear();
weight_map["cb_packing"] = 0.224327;
weight_map["cbeta"] = 0.226355;
weight_map["clash"] = 0.424003;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.076963;
weight_map["torsion"] = 0.253095;
length_dependent_weights_bb_[0] = weight_map;
length_dependent_weights_bb_[1] = weight_map;
length_dependent_weights_bb_[2] = weight_map;
length_dependent_weights_bb_[3] = weight_map;
length_dependent_weights_bb_[4] = weight_map;
weight_map["cb_packing"] = 0.137013;
weight_map["cbeta"] = 0.194254;
weight_map["clash"] = 0.283518;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.12837;
weight_map["torsion"] = 0.318854;
length_dependent_weights_bb_[5] = weight_map;
length_dependent_weights_bb_[6] = weight_map;
weight_map["cb_packing"] = 0.618752;
weight_map["cbeta"] = 0.50412;
weight_map["clash"] = 0.108268;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.183393;
weight_map["torsion"] = 0.636929;
length_dependent_weights_bb_[7] = weight_map;
length_dependent_weights_bb_[8] = weight_map;
weight_map["cb_packing"] = 0.943519;
weight_map["cbeta"] = 0.946992;
weight_map["clash"] = 0.0956615;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.251022;
weight_map["torsion"] = 0.921721;
length_dependent_weights_bb_[9] = weight_map;
length_dependent_weights_bb_[10] = weight_map;
weight_map["cb_packing"] = 1.06123;
weight_map["cbeta"] = 0.469593;
weight_map["clash"] = 0.0831533;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.268347;
weight_map["torsion"] = 0.88921;
length_dependent_weights_bb_[11] = weight_map;
length_dependent_weights_bb_[12] = weight_map;
weight_map["cb_packing"] = 2.05415;
weight_map["cbeta"] = 0.604888;
weight_map["clash"] = 0.024504;
weight_map["hbond"] = 1;
weight_map["reduced"] = 0.367079;
weight_map["torsion"] = 1.34862;
length_dependent_weights_bb_[13] = weight_map;
length_dependent_weights_bb_[14] = weight_map;
length_dependent_weights_bb_[-1] = weights_bb_;
// key naming consistent with default scorers and weights above
stem_rmsd_key_ = "stem_rmsd";
seq_prof_score_key_ = "seq_prof_score";
str_prof_score_key_ = "str_prof_score";
bb_scoring_keys_.push_back("cb_packing");
bb_scoring_keys_.push_back("cbeta");
bb_scoring_keys_.push_back("clash");
bb_scoring_keys_.push_back("hbond");
bb_scoring_keys_.push_back("pairwise");
bb_scoring_keys_.push_back("reduced");
bb_scoring_keys_.push_back("torsion");
aa_scoring_keys_.push_back("aa_clash");
aa_scoring_keys_.push_back("aa_interaction");
aa_scoring_keys_.push_back("aa_packing");
}
const std::map<String, Real>& ScoringWeights::GetWeights(bool with_db,
bool with_aa,
bool length_dependent,
int loop_length) const {
if(!length_dependent) {
// its not length dependent, so lets return the length agnostic weights!
if (with_db) {
if (with_aa) {
return weights_bb_db_aa_;
}
else {
return weights_bb_db_;
}
} else {
if (with_aa) {
return weights_bb_aa_;
}
else {
return weights_bb_;
}
}
} else {
// its length dependent!
std::map<int, std::map<String, Real> >::const_iterator it;
if (with_db) {
if (with_aa) {
it = length_dependent_weights_bb_db_aa_.find(loop_length);
if(it == length_dependent_weights_bb_db_aa_.end()) {
it = length_dependent_weights_bb_db_aa_.find(-1);
}
return it->second;
}
else {
it = length_dependent_weights_bb_db_.find(loop_length);
if(it == length_dependent_weights_bb_db_.end()) {
it = length_dependent_weights_bb_db_.find(-1);
}
return it->second;
}
} else {
if (with_aa) {
it = length_dependent_weights_bb_aa_.find(loop_length);
if(it == length_dependent_weights_bb_aa_.end()) {
it = length_dependent_weights_bb_aa_.find(-1);
}
return it->second;
}
else {
it = length_dependent_weights_bb_.find(loop_length);
if(it == length_dependent_weights_bb_.end()) {
it = length_dependent_weights_bb_.find(-1);
}
return it->second;
}
}
}
}
void ScoringWeights::SetWeights(bool with_db, bool with_aa,
const std::map<String, Real>& weights,
bool length_dependent, int loop_length) {
if(!length_dependent){
// its not length dependent, so lets set the length agnostic weights!
if (with_db) {
if (with_aa) {
weights_bb_db_aa_ = weights;
}
else {
weights_bb_db_ = weights;
}
} else {
if (with_aa) {
weights_bb_aa_ = weights;
}
else {
weights_bb_ = weights;
}
}
} else {
// its length dependent!
if (with_db) {
if (with_aa) {
length_dependent_weights_bb_db_aa_[loop_length] = weights;
}
else {
length_dependent_weights_bb_db_[loop_length] = weights;
}
} else {
if (with_aa) {
length_dependent_weights_bb_aa_[loop_length] = weights;
}
else {
length_dependent_weights_bb_[loop_length] = weights;
}
}
}
}
std::map<String, Real> ScoringWeights::GetBackboneWeights(bool with_db,
bool with_aa,
bool length_dependent,
int loop_length) const {
std::map<String, Real> result;
const std::map<String, Real>& all_weights = GetWeights(with_db, with_aa,
length_dependent,
loop_length);
for (uint i = 0; i < bb_scoring_keys_.size(); ++i) {
result[bb_scoring_keys_[i]] = GetWeight(all_weights, bb_scoring_keys_[i]);
}
return result;
}
std::map<String, Real> ScoringWeights::GetAllAtomWeights(bool with_db,
bool length_dependent,
int loop_length) const {
std::map<String, Real> result;
const std::map<String, Real>& all_weights = GetWeights(with_db, true,
length_dependent,
loop_length);
for (uint i = 0; i < aa_scoring_keys_.size(); ++i) {
result[aa_scoring_keys_[i]] = GetWeight(all_weights, aa_scoring_keys_[i]);
}
return result;
}
}} // ns