Something went wrong on our end
-
Gerardo Tauriello authoredGerardo Tauriello authored
model.cc 37.95 KiB
// Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
// Biozentrum - University of Basel
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <ctype.h>
#include <ost/log.hh>
#include <ost/dyn_cast.hh>
#include <ost/mol/mol.hh>
#include <ost/seq/aligned_column.hh>
#include <ost/conop/amino_acids.hh>
#include <ost/conop/rule_based.hh>
#include <ost/conop/heuristic.hh>
#include <ost/conop/conop.hh>
#include <ost/conop/compound_lib.hh>
#include <promod3/core/geom_base.hh>
#include <promod3/core/runtime_profiling.hh>
#include <promod3/scoring/scoring_object_loader.hh>
#include <promod3/modelling/model.hh>
using namespace ost::mol;
using namespace ost;
using namespace ost::seq;
using namespace ost::conop;
using namespace promod3::scoring;
namespace promod3 { namespace modelling {
namespace {
bool CheckBackboneAtoms(ResidueView res)
{
String atom_names[] = {"N", "CA", "C", "O"};
std::vector<String> missing;
for (int i = 0; i < 4; ++i) {
if (!res.FindAtom(atom_names[i])) {
missing.push_back(atom_names[i]);
}
}
if (!missing.empty()) {
std::stringstream ss;
ss << "residue " << res.GetQualifiedName() << " is missing atoms ";
for (std::vector<String>::const_iterator
i = missing.begin(), e = missing.end(); i != e; ++i) {
if (i != missing.begin()) {
ss << ", ";
}
ss << *i;
}
LOG_WARNING(ss.str());
return false;
}
return true;
}
void CleanupAtomConflicts(ResidueView& res) {
// remove atoms which are on top of any other atoms in the tpl entity
EntityView tpl = res.GetEntity();
AtomViewList atom_views = res.GetAtomList();
for (uint i = 0; i < atom_views.size(); ++i) {
AtomViewList on_top_atoms = tpl.FindWithin(atom_views[i].GetPos(), 0.0);
if (on_top_atoms.size() > 1) {
res.RemoveAtom(atom_views[i]);
// report
std::stringstream ss;
ss << "residue " << res.GetQualifiedName() << " has conflicting atom "
<< atom_views[i].GetName() << " - skipping atom.";
LOG_WARNING(ss.str());
}
}
}
} // anon ns
ModellingHandle ModellingHandle::Copy() const{
ModellingHandle r_mhandle;
// The model itself is copied with a deep copy
r_mhandle.model = model.Copy();
// All gaps have to be transferred to the new EntityHandle
for(StructuralGapList::const_iterator i = gaps.begin(); i != gaps.end(); ++i){
String chain_name = i->GetChainName();
mol::ChainHandle ch = r_mhandle.model.FindChain(chain_name);
r_mhandle.gaps.push_back(*i);
r_mhandle.gaps.back().Transfer(ch);
}
r_mhandle.seqres = seqres;
r_mhandle.psipred_predictions = psipred_predictions;
r_mhandle.profiles = profiles;
return r_mhandle;
}
int CountEnclosedGaps(const ModellingHandle& mhandle, const StructuralGap& gap,
bool insertions_only)
{
core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
"modelling::CountEnclosedGaps", 2);
int num_gaps = 0;
// extract info for this gap
String my_chain_name = gap.GetChainName();
ResNum my_b_res(0);
if (!gap.IsNTerminal()) my_b_res = gap.before.GetNumber();
ResNum my_a_res = my_b_res + gap.GetLength() + 1;
for (StructuralGapList::const_iterator i = mhandle.gaps.begin();
i != mhandle.gaps.end(); ++i) {
// skip different chains
if (i->GetChainName() != my_chain_name) continue;
// get res_nums
ResNum b_res(0);
if (!i->IsNTerminal()) b_res = i->before.GetNumber();
ResNum a_res = b_res + i->GetLength() + 1;
// check full overlap
if (b_res >= my_b_res && a_res <= my_a_res) {
if (!insertions_only || i->GetLength() > 0) ++num_gaps;
}
}
return num_gaps;
}
int ClearGaps(ModellingHandle& mhandle, const StructuralGap& gap) {
core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
"modelling::ClearGaps", 2);
// return -1 if no more gaps
int next_gap = -1;
// extract info for this gap
String my_chain_name = gap.GetChainName();
ResNum my_b_res(0);
if (!gap.IsNTerminal()) my_b_res = gap.before.GetNumber();
ResNum my_a_res = my_b_res + gap.GetLength() + 1;
for (StructuralGapList::iterator i = mhandle.gaps.begin();
i != mhandle.gaps.end(); ) {
// skip different chains
if (i->GetChainName() != my_chain_name) {
++i;
continue;
}
// get res_nums
ResNum b_res(0);
if (!i->IsNTerminal()) b_res = i->before.GetNumber();
ResNum a_res = b_res + i->GetLength() + 1;
// check full overlap
if (b_res >= my_b_res && a_res <= my_a_res) {
i = mhandle.gaps.erase(i);
if (i == mhandle.gaps.end()) {
next_gap = -1;
} else {
next_gap = i - mhandle.gaps.begin();
}
} else {
// check partial overlaps
if ( (my_b_res > b_res && my_b_res < a_res)
|| (my_a_res > b_res && my_a_res < a_res)) {
std::stringstream ss;
ss << "Tried to clear gap " << *i << " but " << gap
<< " is only overlapping not fully enclosing!";
throw promod3::Error(ss.str());
}
// moving on
++i;
}
}
return next_gap;
}
void MergeGaps(ModellingHandle& mhandle, uint index) {
core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
"modelling::MergeGaps", 2);
if(index >= mhandle.gaps.size()-1){
throw promod3::Error("Invalid index provided when merging gaps!");
}
if(mhandle.gaps[index].GetChain() != mhandle.gaps[index+1].GetChain()){
throw promod3::Error("Subsequent gap must be of the same chain to be merged in!");
}
if(mhandle.gaps[index].IsNTerminal() && mhandle.gaps[index+1].IsCTerminal()){
std::stringstream ss;
ss << "You don't want to merge an N-terminal gap with a C-terminal gap... ";
ss << "You might want to use the RemoveTerminalGaps function to get rid of them!";
throw promod3::Error(ss.str());
}
//assumes, that the gaps are sequential!
String full_seq = mhandle.seqres[mhandle.gaps[index].GetChainIndex()].GetGaplessString();
int before_number, after_number;
if(mhandle.gaps[index].IsNTerminal()) before_number = 0;
else before_number = mhandle.gaps[index].before.GetNumber().GetNum();
if(mhandle.gaps[index+1].IsCTerminal()) after_number = full_seq.size()+1;
else after_number = mhandle.gaps[index+1].after.GetNumber().GetNum();
String gap_seq = full_seq.substr(before_number,after_number-before_number-1);
//kill all residues in between the two new stems
mol::XCSEditor ed = mhandle.model.EditXCS(mol::BUFFERED_EDIT);
mol::ChainHandle chain = mhandle.gaps[index].GetChain();
mol::ResNum i(before_number+1);
mol::ResNum e(after_number);
for(; i < e; ++i){
mol::ResidueHandle res = chain.FindResidue(i);
if(res.IsValid()) ed.DeleteResidue(res);
}
StructuralGap new_gap(mhandle.gaps[index].before,
mhandle.gaps[index+1].after,
gap_seq);
mhandle.gaps[index] = new_gap;
mhandle.gaps.erase(mhandle.gaps.begin()+index+1);
}
int RemoveTerminalGaps(ModellingHandle& mhandle) {
core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
"modelling::RemoveTerminalGaps", 2);
int removed_gaps = 0;
for(StructuralGapList::iterator i = mhandle.gaps.begin(); i != mhandle.gaps.end(); ){
if(i->IsNTerminal() || i->IsCTerminal()){
i = mhandle.gaps.erase(i);
++removed_gaps;
}
else ++i;
}
LOG_INFO("Removed " << removed_gaps << " terminal gap(s).");
return removed_gaps;
}
void ReorderGaps(ModellingHandle& mhandle){
std::vector<std::vector<uint> > gap_data;
for(uint i = 0; i < mhandle.gaps.size(); ++i){
gap_data.push_back(std::vector<uint>());
//first element is chain index
gap_data.back().push_back(mhandle.gaps[i].GetChainIndex());
//second element is start resnum
uint start_resnum;
if(mhandle.gaps[i].IsNTerminal()){
start_resnum = 1;
}
else{
start_resnum = mhandle.gaps[i].before.GetNumber().GetNum() + 1;
}
gap_data.back().push_back(start_resnum);
//last element is the idx of the current gap
gap_data.back().push_back(i);
}
std::sort(gap_data.begin(), gap_data.end());
StructuralGapList sorted_gaps;
for(uint i = 0; i < gap_data.size(); ++i){
sorted_gaps.push_back(mhandle.gaps[gap_data[i].back()].Copy());
}
mhandle.gaps = sorted_gaps;
}
void SetupDefaultBackboneScoring(ModellingHandle& mhandle){
// setup environment
BackboneScoreEnvPtr env(new BackboneScoreEnv(mhandle.seqres));
env->SetInitialEnvironment(mhandle.model);
BackboneOverallScorerPtr scorer =
promod3::scoring::LoadDefaultBackboneOverallScorer();
scorer->AttachEnvironment(*env);
mhandle.backbone_scorer_env = env;
mhandle.backbone_scorer = scorer;
LOG_INFO("Initialized default backbone scoring for modelling.");
}
void SetupDefaultAllAtomScoring(ModellingHandle& mhandle){
// setup scoring
loop::AllAtomEnvPtr env(new loop::AllAtomEnv(mhandle.seqres));
env->SetInitialEnvironment(mhandle.model);
AllAtomOverallScorerPtr scorer =
promod3::scoring::LoadDefaultAllAtomOverallScorer();
scorer->AttachEnvironment(*env);
// setup sidechaining needed for scoring
loop::AllAtomEnvPtr env_sc(new loop::AllAtomEnv(mhandle.seqres));
env_sc->SetInitialEnvironment(mhandle.model);
SidechainReconstructorPtr sc_rec;
sc_rec.reset(new SidechainReconstructor);
sc_rec->AttachEnvironment(*env_sc);
// set mhandle values
mhandle.all_atom_scorer_env = env;
mhandle.all_atom_scorer = scorer;
mhandle.all_atom_sidechain_env = env_sc;
mhandle.sidechain_reconstructor = sc_rec;
LOG_INFO("Initialized default all atom scoring for modelling.");
}
void SetSequenceProfiles(ModellingHandle& mhandle,
const seq::ProfileHandleList& profiles){
// check whether the profiles match the internal SEQRES
if(static_cast<int>(profiles.size()) != mhandle.seqres.GetCount()){
throw promod3::Error("Must have one profile per chain!");
}
for(uint i = 0; i < profiles.size(); ++i){
if(profiles[i]->GetSequence() != mhandle.seqres[i].GetString()){
throw promod3::Error("Sequence in profile must match sequence in SEQRES!");
}
}
// set the profiles
mhandle.profiles = profiles;
}
void SetPsipredPredictions(ModellingHandle& mhandle,
const promod3::loop::PsipredPredictionList&
psipred_predictions){
// check whether the predictions match the internal SEQRES
if(static_cast<int>(psipred_predictions.size()) != mhandle.seqres.GetCount()){
throw promod3::Error("Must have one prediction per chain!");
}
for(uint i = 0; i < psipred_predictions.size(); ++i){
// there is no sequence to check, let's at least check the size of the
// psipred prediction
if(static_cast<int>(psipred_predictions[i]->size()) !=
mhandle.seqres[i].GetLength()){
throw promod3::Error("Prediction must have same size than SEQRES!");
}
}
// set the predictions
mhandle.psipred_predictions = psipred_predictions;
}
void MergeMHandle(const ModellingHandle& source_mhandle,
ModellingHandle& target_mhandle,
uint source_chain_idx, uint target_chain_idx,
uint start_resnum, uint end_resnum,
const geom::Mat4& transform){
//check the input data
if(source_chain_idx >= static_cast<uint>(source_mhandle.seqres.GetCount()) ||
target_chain_idx >= static_cast<uint>(target_mhandle.seqres.GetCount())){
throw promod3::Error("Invalid chain idx encountered!");
}
if(source_mhandle.seqres[source_chain_idx].GetString() !=
target_mhandle.seqres[target_chain_idx].GetString()){
throw promod3::Error("The SEQRES of the specified chains must match!");
}
if(start_resnum <= 0){
throw promod3::Error("start_resnum must be at least 1!");
}
if(start_resnum > end_resnum){
throw promod3::Error("start_resnum must be smaller or equal end_resnum!");
}
const String& seqres = source_mhandle.seqres[source_chain_idx].GetString();
if(end_resnum > seqres.size()){
throw promod3::Error("end_resnum is too big for the underlying SEQRES of "
"the chain you specified!");
}
//start_resnum and end_resnum must not lie within a gap in the source mhandle
for(uint gap_idx = 0; gap_idx < source_mhandle.gaps.size(); ++gap_idx){
const StructuralGap& gap = source_mhandle.gaps[gap_idx];
if(source_chain_idx == gap.GetChainIndex()){
uint gap_before_resnum = 0;
uint gap_after_resnum = seqres.size() + 1;
if(gap.before.IsValid()){
gap_before_resnum = gap.before.GetNumber().GetNum();
}
if(gap.after.IsValid()){
gap_after_resnum = gap.after.GetNumber().GetNum();
}
if(gap_before_resnum < start_resnum && gap_after_resnum > start_resnum){
std::stringstream ss;
ss <<"start_resnum ("<<start_resnum << ") is enclosed by: " << gap;
throw promod3::Error(ss.str());
}
if(gap_before_resnum < end_resnum && gap_after_resnum > end_resnum){
std::stringstream ss;
ss <<"end_resnum ("<<end_resnum << ") is enclosed by: " << gap;
throw promod3::Error(ss.str());
}
}
}
mol::ChainHandle source_chain = source_mhandle.model.GetChainList()[source_chain_idx];
mol::ChainHandle target_chain = target_mhandle.model.GetChainList()[target_chain_idx];
//residues with start_resnum and end_resnum must be there in source
mol::ResNum num(start_resnum);
mol::ResidueHandle source_start_res = source_chain.FindResidue(num);
num.SetNum(end_resnum);
mol::ResidueHandle source_end_res = source_chain.FindResidue(num);
if(!(source_start_res.IsValid() && source_end_res.IsValid())){
throw promod3::Error("Residues with start_resnum and end_resnum must be "
"present in specified chain of source_mhandle!");
}
//clear all residues in target mhandle in specified range
mol::XCSEditor ed = target_mhandle.model.EditXCS(mol::BUFFERED_EDIT);
for(uint i = start_resnum; i <= end_resnum; ++i){
num.SetNum(i);
mol::ResidueHandle r = target_chain.FindResidue(num);
if(r.IsValid()) ed.DeleteResidue(r);
}
//copy over coordinates
mol::ResidueHandle source_r, target_r, last_target_r;
num.SetNum(start_resnum-1);
last_target_r = target_chain.FindResidue(num);
for(uint i = start_resnum; i <= end_resnum; ++i){
num.SetNum(i);
source_r = source_chain.FindResidue(num);
if(source_r.IsValid()){
mol::AtomHandleList source_atoms = source_r.GetAtomList();
target_r = ed.AppendResidue(target_chain, source_r);
for(mol::AtomHandleList::iterator at_it = source_atoms.begin();
at_it != source_atoms.end(); ++at_it){
geom::Vec4 t_pos = geom::Vec4(at_it->GetPos());
t_pos = transform * t_pos;
geom::Vec3 pos(t_pos[0], t_pos[1], t_pos[2]);
ed.InsertAtom(target_r, at_it->GetName(), pos, at_it->GetElement(),
at_it->GetOccupancy(), at_it->GetBFactor(),
at_it->IsHetAtom());
}
//the atoms are now copied over and the positions are transformed
//but we still need the bonds
for(mol::AtomHandleList::iterator at_it = source_atoms.begin();
at_it != source_atoms.end(); ++at_it){
//iterate over all connected atoms and generate a bond if they're
//from the same residue. Peptide bonds are built seperately
mol::AtomHandleList connected_atoms = at_it->GetBondPartners();
for(mol::AtomHandleList::iterator connected_at_it =
connected_atoms.begin(); connected_at_it != connected_atoms.end();
++connected_at_it){
if(at_it->GetResidue() == connected_at_it->GetResidue()){
mol::AtomHandle at_one = target_r.FindAtom(at_it->GetName());
mol::AtomHandle at_two = target_r.FindAtom(connected_at_it->GetName());
if(at_one.IsValid() && at_two.IsValid()) ed.Connect(at_one, at_two);
}
}
}
//build peptide bond if the last and the current target residues are
//consecutive in terms of residue number
if(last_target_r.IsValid()){
if(last_target_r.GetNumber() + 1 == target_r.GetNumber()){
mol::AtomHandle c = last_target_r.FindAtom("C");
mol::AtomHandle n = target_r.FindAtom("N");
if(c.IsValid() && n.IsValid()) ed.Connect(c, n);
}
}
last_target_r = target_r;
}
}
//we potentially still have to build the last peptide bond
num.SetNum(end_resnum + 1);
target_r = target_chain.FindResidue(num);
if(target_r.IsValid() && last_target_r.IsValid() &&
last_target_r.GetNumber() + 1 == target_r.GetNumber()){
mol::AtomHandle c = last_target_r.FindAtom("C");
mol::AtomHandle n = target_r.FindAtom("N");
if(c.IsValid() && n.IsValid()) ed.Connect(c, n);
}
ed.ReorderResidues(target_chain);
//fix torsions
conop::AssignBackboneTorsions(target_chain);
//delete the gaps in the target mhandle or shorten them...
num.SetNum(start_resnum);
mol::ResidueHandle target_start_res = target_chain.FindResidue(num);
num.SetNum(end_resnum);
mol::ResidueHandle target_end_res = target_chain.FindResidue(num);
StructuralGapList gaps_to_add;
for(uint gap_idx = 0; gap_idx < target_mhandle.gaps.size(); ++gap_idx){
const StructuralGap& gap = target_mhandle.gaps[gap_idx];
bool delete_gap = false;
if(gap.GetChainIndex() == target_chain_idx){
uint gap_before_resnum = 0;
uint gap_after_resnum = seqres.size() + 1;
if(gap.before.IsValid()){
gap_before_resnum = gap.before.GetNumber().GetNum();
}
if(gap.after.IsValid()){
gap_after_resnum = gap.after.GetNumber().GetNum();
}
if(gap_before_resnum >= start_resnum &&
gap_after_resnum <= end_resnum){
//the gap is fully enclosed by start and end => can be silently deleted
delete_gap = true;
}
if(gap_before_resnum < start_resnum && gap_after_resnum >= start_resnum){
//the gap extends towards nter
uint new_length = start_resnum - gap_before_resnum - 1;
String gap_seq = seqres.substr(gap_before_resnum, new_length);
gaps_to_add.push_back(StructuralGap(gap.before,
target_start_res,
gap_seq));
delete_gap = true;
}
if(gap_after_resnum > end_resnum && gap_before_resnum <= end_resnum){
//the gap extends towards cter
uint new_length = gap_after_resnum - end_resnum - 1;
String gap_seq = seqres.substr(end_resnum, new_length);
gaps_to_add.push_back(StructuralGap(target_end_res,
gap.after,
gap_seq));
delete_gap = true;
}
}
if(delete_gap){
target_mhandle.gaps.erase(target_mhandle.gaps.begin() + gap_idx);
--gap_idx;
}
}
//add all original gaps, that had to be processed before
for(StructuralGapList::iterator i = gaps_to_add.begin();
i != gaps_to_add.end(); ++i){
target_mhandle.gaps.push_back(*i);
}
//copy over all enclosed gaps from the source
for(StructuralGapList::const_iterator i = source_mhandle.gaps.begin();
i != source_mhandle.gaps.end(); ++i){
if(i->GetChainIndex() == source_chain_idx){
uint gap_before_resnum = 0;
uint gap_after_resnum = seqres.size() + 1;
if(i->before.IsValid()){
gap_before_resnum = i->before.GetNumber().GetNum();
}
if(i->after.IsValid()){
gap_after_resnum = i->after.GetNumber().GetNum();
}
if(gap_before_resnum >= start_resnum && gap_after_resnum <= end_resnum){
StructuralGap copied_gap = i->Copy();
copied_gap.Transfer(target_chain);
target_mhandle.gaps.push_back(copied_gap);
}
}
}
//finally reorder the gaps to ensure consistency
ReorderGaps(target_mhandle);
if (IsBackboneScoringSetUp(target_mhandle)) {
target_mhandle.backbone_scorer_env->SetInitialEnvironment(target_mhandle.model);
}
if(IsAllAtomScoringSetUp(target_mhandle)){
target_mhandle.all_atom_sidechain_env->SetInitialEnvironment(target_mhandle.model);
}
}
void InsertLoop(ModellingHandle& mhandle, const loop::BackboneList& bb_list,
uint start_resnum, uint chain_idx) {
// update model
mol::ChainHandleList chain_list = mhandle.model.GetChainList();
bb_list.InsertInto(chain_list[chain_idx], start_resnum);
// update scoring
if (IsBackboneScoringSetUp(mhandle)) {
mhandle.backbone_scorer_env->SetEnvironment(bb_list, start_resnum,
chain_idx);
}
if (IsAllAtomScoringSetUp(mhandle)) {
mhandle.all_atom_sidechain_env->SetEnvironment(bb_list, start_resnum,
chain_idx);
}
}
int InsertLoopClearGaps(ModellingHandle& mhandle,
const loop::BackboneList& bb_list,
const StructuralGap& gap) {
// check consistency
if (bb_list.size() != gap.GetFullSeq().size()) {
throw promod3::Error("Inconsistent gap for given loop to insert.");
}
// update model
uint chain_idx = gap.GetChainIndex();
uint start_resnum = 1;
if (!gap.IsNTerminal()) start_resnum = gap.before.GetNumber().GetNum();
InsertLoop(mhandle, bb_list, start_resnum, chain_idx);
return ClearGaps(mhandle, gap);
}
bool CopyConserved(ResidueView src_res, ResidueHandle dst_res, XCSEditor& edi,
bool& has_cbeta)
{
// check if the residue name of dst and src are the same. In the easy
// case, the two residue names match and we just copy over all atoms.
// If they don't match, we are dealing with modified residues.
if (dst_res.GetName() == src_res.GetName()) {
return CopyIdentical(src_res, dst_res, edi, has_cbeta);
} else if (src_res.GetName() == "MSE") {
return CopyMSE(src_res, dst_res, edi, has_cbeta);
} else {
return CopyModified(src_res, dst_res, edi, has_cbeta);
}
}
bool CopyIdentical(ResidueView src_res, ResidueHandle dst_res, XCSEditor& edi,
bool& has_cbeta)
{
AtomViewList atoms = src_res.GetAtomList();
for (AtomViewList::const_iterator
a = atoms.begin(), e = atoms.end(); a != e; ++a) {
if (a->GetName() == "CB") {
has_cbeta = true;
}
if (a->GetName() == "OXT") {
continue;
}
if (a->GetElement() == "D" || a->GetElement() == "H") {
continue;
}
edi.InsertAtom(dst_res, a->GetName(), a->GetPos(), a->GetElement(),
1.0, a->GetBFactor());
}
return true;
}
bool CopyMSE(ResidueView src_res, ResidueHandle dst_res, XCSEditor& edi,
bool& has_cbeta)
{
// selenium methionine is easy. We copy all atoms and substitute the SE
// with SD
AtomViewList atoms=src_res.GetAtomList();
for (AtomViewList::const_iterator
a = atoms.begin(), e = atoms.end(); a != e; ++a) {
if (a->GetName() == "CB") {
has_cbeta = true;
}
if (a->GetName() == "OXT" || a->GetElement() == "D" || a->GetElement() == "H") {
continue;
}
if (a->GetName() == "SE") {
edi.InsertAtom(dst_res, "SD", a->GetPos(), "S",
1.0, a->GetBFactor());
} else {
edi.InsertAtom(dst_res, a->GetName(), a->GetPos(), a->GetElement(),
1.0, a->GetBFactor());
}
}
return true;
}
bool CopyModified(ResidueView src_res, ResidueHandle dst_res,
XCSEditor& edi, bool& has_cbeta)
{
// FIXME: for now this functions ignores chirality changes of sidechain
// chiral atoms. To detect those changes, we would need to store the
// chirality of those centers in the compound library.
// For now, this function just handles cases where the src_res contains
// additional atoms, but the dst_atom doesn't contain any atoms the src_res
// doesn't have. It these two requirements are not met, we fall back to
// CopyNonConserved.
// first let's get our hands on the component library
conop::CompoundLibPtr comp_lib = conop::Conopology::Instance().GetDefaultLib();
CompoundPtr src_compound = comp_lib->FindCompound(src_res.GetName(),
Compound::PDB);
if (!src_compound) {
// OK, that's bad. Let's copy the backbone and be done with it!
return CopyNonConserved(src_res, dst_res, edi, has_cbeta);
}
// since dst_res is one of the 20 amino acids, we don't have to check for
// existence of the compound. We know it is there!
CompoundPtr dst_compound = comp_lib->FindCompound(dst_res.GetName(),
Compound::PDB);
std::set<String> dst_atoms;
std::set<String> src_atoms;
// to compare the atoms of dst_res with those of src_res, let's create two
// sets containing all heavy atoms.
for (AtomSpecList::const_iterator i = dst_compound->GetAtomSpecs().begin(),
e = dst_compound->GetAtomSpecs().end(); i != e; ++i) {
if (i->element == "H" || i->element == "D") {
continue;
}
dst_atoms.insert(i->name);
}
for (AtomSpecList::const_iterator i = src_compound->GetAtomSpecs().begin(),
e = src_compound->GetAtomSpecs().end(); i != e; ++i) {
if (i->element == "H" || i->element == "D") {
continue;
}
src_atoms.insert(i->name);
}
for (std::set<String>::const_iterator i = dst_atoms.begin(),
e = dst_atoms.end(); i != e; ++i) {
if (src_atoms.find(*i) == src_atoms.end()) {
return CopyNonConserved(src_res, dst_res, edi, has_cbeta);
}
}
// Muahaha, all is good. Let's copy the atoms. Muahaha
AtomViewList atoms = src_res.GetAtomList();
for (AtomViewList::const_iterator
a = atoms.begin(), e = atoms.end(); a != e; ++a) {
if (a->GetName() == "CB") {
if (dst_res.GetName() == "GLY") {
continue;
}
has_cbeta = true;
}
if (a->GetName() == "OXT") {
continue;
}
if (a->GetElement() == "D" || a->GetElement() == "H") {
continue;
}
if (dst_atoms.find(a->GetName()) == dst_atoms.end()) {
continue;
}
edi.InsertAtom(dst_res, a->GetName(), a->GetPos(), a->GetElement(),
1.0, a->GetBFactor());
}
return true;
}
bool CopyNonConserved(ResidueView src_res, ResidueHandle dst_res,
XCSEditor& edi, bool& has_cbeta)
{
AtomViewList atoms = src_res.GetAtomList();
for (AtomViewList::const_iterator
a = atoms.begin(), e = atoms.end(); a != e; ++a) {
String name=a->GetName();
if (name == "CA" || name == "N" || name == "C" || name == "O" || name == "CB") {
if (name == "CB") {
if (dst_res.GetName() == "GLY") {
continue;
}
has_cbeta = true;
}
if (a->GetElement() == "D" || a->GetElement() == "H") {
continue;
}
edi.InsertAtom(dst_res, a->GetName(), a->GetPos(), a->GetElement(),
1.0, a->GetBFactor());
}
}
return false;
}
// HELPER: for export SPDBV projects in SWISS-MODEL
// -> there it is expected that residues of insertions are present as dummy
// residues which have atoms with coordinates all set to 9999.999.
bool InsertDummyAtoms(ResidueHandle dst_res, XCSEditor& edi) {
conop::CompoundLibPtr comp_lib=conop::Conopology::Instance().GetDefaultLib();
CompoundPtr dst_compound=comp_lib->FindCompound(dst_res.GetName(),
Compound::PDB);
if (!dst_compound) {
throw promod3::Error("compound for residue '"+dst_res.GetName()+
"' does not exist");
}
Real dummy_coord = 9999.999;
for (AtomSpecList::const_iterator i = dst_compound->GetAtomSpecs().begin(),
e = dst_compound->GetAtomSpecs().end(); i != e; ++i) {
if (dst_res.FindAtom(i->name)) {
continue;
}
if (i->element == "D" || i->element == "H" || i->is_leaving) {
continue;
}
edi.InsertAtom(dst_res, i->name,
geom::Vec3(dummy_coord, dummy_coord, dummy_coord),
i->element, 1.0, 99.99, false);
}
return true;
}
ModellingHandle BuildRawModel(const seq::AlignmentHandle& aln,
bool include_ligands,
const String& chain_names,
bool spdbv_style)
{
seq::AlignmentList l;
l.push_back(aln);
return BuildRawModel(l,
include_ligands,
chain_names,
spdbv_style);
}
ModellingHandle BuildRawModel(const seq::AlignmentList& aln,
bool include_ligands,
const String& chain_names,
bool spdbv_style)
{
core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
"modelling::BuildRawModel", 2);
ModellingHandle result;
EntityHandle model=CreateEntity();
XCSEditor edi=model.EditXCS(BUFFERED_EDIT);
result.model=model;
uint chain_name_idx = 0;
for(seq::AlignmentList::const_iterator it=aln.begin();
it!=aln.end(); ++it, ++chain_name_idx) {
if(chain_name_idx >= chain_names.size()) {
throw promod3::Error("running out of chain names");
}
StructuralGapList gap_list;
BuildRawChain(*it, edi, gap_list, chain_names[chain_name_idx], spdbv_style);
result.gaps.insert(result.gaps.end(), gap_list.begin(), gap_list.end());
seq::SequenceHandle seqres = seq::CreateSequence(
chain_names.substr(chain_name_idx, 1),
it->GetSequence(0).GetGaplessString());
result.seqres.AddSequence(seqres);
}
// handle ligands
if (include_ligands) {
AddLigands(aln, edi, model);
}
HeuristicProcessor heu_proc;
// This is on purpose!
// The heuristic processor would connect residues by distance criteria.
// Residues flanking an insertion would therefore be connected!
// We rather build the peptide bonds in BuildRawChain.
heu_proc.SetConnectAminoAcids(false);
heu_proc.Process(model);
result.model=model;
return result;
}
void AddLigands(const seq::AlignmentList& aln, XCSEditor& edi,
EntityHandle& model)
{
core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
"modelling::AddLigands", 2);
// make sure we handle duplicates
std::set<mol::EntityHandle> unique_handles;
std::vector<mol::ResidueViewList> ligand_views;
for(seq::AlignmentList::const_iterator it = aln.begin();
it != aln.end(); ++it) {
mol::EntityHandle handle = it->GetSequence(1).GetAttachedView().GetHandle();
// did we already do that?
if (unique_handles.find(handle) != unique_handles.end()) continue;
mol::EntityView ligand_view = handle.Select("ligand=true or cname='_'");
mol::ResidueViewList ligands = ligand_view.GetResidueList();
if (!ligands.empty()) {
ligand_views.push_back(ligands);
}
unique_handles.insert(handle);
}
if (ligand_views.size()>0) {
mol::ChainHandle ligand_chain = edi.InsertChain("_");
for(std::vector<mol::ResidueViewList>::iterator ligand_view_it =
ligand_views.begin();
ligand_view_it != ligand_views.end(); ++ligand_view_it) {
for (mol::ResidueViewList::iterator
i = ligand_view_it->begin(), e = ligand_view_it->end(); i != e; ++i) {
ResidueView src_ligand = *i;
AtomViewList atoms = src_ligand.GetAtomList();
bool within = false;
for (AtomViewList::iterator j = atoms.begin(),
e2 = atoms.end(); j != e2; ++j) {
// check if there is at least one residue within 6.0 A of the
// ligand. If not, don't bother to add the ligand.
AtomHandleList atoms_within = model.FindWithin(j->GetPos(), 6.0);
for (AtomHandleList::iterator
k = atoms_within.begin(), e3 = atoms_within.end(); k != e3; ++k) {
if (k->GetName() == "CA") {
within = true;
break;
}
}
if (within) {
break;
}
}
if (!within) {
continue;
}
// deep copy of residue into ligand chain
mol::ResidueHandle dst_res = edi.AppendResidue(ligand_chain,
src_ligand.GetHandle(),
true);
dst_res.SetIsLigand(true);
}
}
}
}
void BuildRawChain(const seq::AlignmentHandle& aln,
XCSEditor& edi,
StructuralGapList& gap_list,
char chain_name,
bool spdbv_style)
{
core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
"modelling::BuildRawChain", 2);
if (aln.GetCount() != 2) {
throw promod3::Error("alignment must contain exactly two sequences");
}
if (!aln.GetSequence(1).HasAttachedView()) {
throw promod3::Error("second sequence must have a view attached");
}
if (aln.GetSequence(0).GetOffset() != 0) {
throw promod3::Error("we cannot honor non-zero offsets for targets");
}
ChainHandle chain=edi.InsertChain(String(1,chain_name));
// we enforce res. numbering for target to start at 1
int res_num = 0;
int last_num = res_num;
int last_index = -1;
ResidueHandle before;
String gap_seq;
for (int i = 0; i < aln.GetLength(); ++i) {
AlignedColumn col = aln[i];
const char trg_olc = col[0];
if (trg_olc == '-') {
continue;
}
res_num += 1;
ResidueView src_res = col.GetResidue(1);
if (!src_res.IsValid()) {
gap_seq += trg_olc;
if (spdbv_style) {
String rname = conop::OneLetterCodeToResidueName(trg_olc);
ResidueHandle dst_res = edi.AppendResidue(chain, rname, ResNum(res_num));
InsertDummyAtoms(dst_res, edi);
}
continue;
}
const char src_olc = src_res.GetOneLetterCode();
// sanity check that src_res matches aligned sequence
if (src_olc != col[1]) {
std::stringstream ss;
ss << "Alignment-structure mismatch at pos " << i << " in chain " <<
chain_name <<
", alignment is '" << col[1] << "' structure residue is '" <<
src_olc << "'";
// We often have non-standard AA residues changing OLC mapping over time.
// This can make it likely for this check to fail and hence we only
// check the 20 standard amino acids.
if (conop::ResidueToAminoAcid(src_res.GetHandle()) == conop::XXX) {
ss << ". Ok for modified residue " << src_res.GetName() << ".";
LOG_WARNING(ss.str())
} else {
ss << ". Not ok for standard amino acid " << src_res.GetName() << "!";
throw promod3::Error(ss.str());
}
}
// remove atoms with conflicting positions (i.e. on top of each other)
CleanupAtomConflicts(src_res);
// check for complete backbone, or in case of Calpha only model building,
// if the src_res has a Calpha atom
if (!CheckBackboneAtoms(src_res)) {
LOG_INFO(src_res << " has incomplete backbone. skipping");
gap_seq += trg_olc;
continue;
}
// check if src_res is L_PEPTIDE_LINKING or PEPTIDE_LINKING.
if (src_res.GetChemClass() == mol::ChemClass::D_PEPTIDE_LINKING) {
LOG_INFO(src_res << " is d-peptide-linking. skipping");
gap_seq += trg_olc;
continue;
}
String name = conop::OneLetterCodeToResidueName(trg_olc);
ResidueHandle dst_res = edi.AppendResidue(chain, name, ResNum(res_num));
dst_res.SetIsProtein(true);
bool consecutive_residues = false;
if (last_num + 1 != res_num) {
// that's an insertion
gap_list.push_back(StructuralGap(before, dst_res, gap_seq));
gap_seq = "";
} else if (last_index > -1 && last_index + 1 != i) {
// that's a deletion
gap_list.push_back(StructuralGap(before, dst_res, ""));
} else {
consecutive_residues = true;
}
bool has_cbeta = false;
if (toupper(trg_olc) == toupper(src_olc)) {
CopyConserved(src_res, dst_res, edi, has_cbeta);
} else {
CopyNonConserved(src_res, dst_res, edi, has_cbeta);
}
dst_res.SetSecStructure(src_res.GetSecStructure());
// insert Cbeta, unless dst residue is glycine.
if (!has_cbeta && !(trg_olc == 'G' || trg_olc == 'g')) {
mol::AtomHandle ca = dst_res.FindAtom("CA");
mol::AtomHandle n = dst_res.FindAtom("N");
mol::AtomHandle c = dst_res.FindAtom("C");
geom::Vec3 cbeta_pos;
promod3::core::ConstructCBetaPos(n.GetPos(), ca.GetPos(), c.GetPos(),
cbeta_pos);
edi.InsertAtom(dst_res, "CB", cbeta_pos, "C");
}
if (spdbv_style) {
InsertDummyAtoms(dst_res, edi);
}
if (consecutive_residues && before.IsValid()) {
AtomHandle c_before = before.FindAtom("C");
AtomHandle n_this = dst_res.FindAtom("N");
if(conop::IsBondFeasible(c_before, n_this)) {
edi.Connect(c_before, n_this);
} else {
// not possible to generate feasible peptide bond
// add gap to enforce rebuilding during the modelling process
gap_list.push_back(StructuralGap(before, dst_res, ""));
}
}
last_num = res_num;
last_index = i;
before = dst_res;
}
// check if we have added anything at all
if (last_num == 0) {
LOG_ERROR("No residues added for chain " << chain_name << "! "
"This typically indicates that the template is a Calpha trace "
"or only contains D-peptides.")
} else if (last_num != res_num) {
// add trailing gap (only if there are residues at all)
gap_list.push_back(StructuralGap(before, ResidueHandle(), gap_seq));
}
}
}}