Skip to content
Snippets Groups Projects
model.cc 37.95 KiB
// Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
//                          Biozentrum - University of Basel
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//   http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <ctype.h>
#include <ost/log.hh>
#include <ost/dyn_cast.hh>
#include <ost/mol/mol.hh>
#include <ost/seq/aligned_column.hh>
#include <ost/conop/amino_acids.hh>
#include <ost/conop/rule_based.hh>
#include <ost/conop/heuristic.hh>
#include <ost/conop/conop.hh>
#include <ost/conop/compound_lib.hh>
#include <promod3/core/geom_base.hh>
#include <promod3/core/runtime_profiling.hh>
#include <promod3/scoring/scoring_object_loader.hh>
#include <promod3/modelling/model.hh>

using namespace ost::mol;
using namespace ost;
using namespace ost::seq;
using namespace ost::conop;
using namespace promod3::scoring;

namespace promod3 { namespace modelling {

namespace {

bool CheckBackboneAtoms(ResidueView res)
{
  String atom_names[] = {"N", "CA", "C", "O"};
  std::vector<String> missing;
  for (int i = 0; i < 4; ++i) {
    if (!res.FindAtom(atom_names[i])) {
      missing.push_back(atom_names[i]);
    }
  }
  if (!missing.empty()) {
    std::stringstream ss;
    ss << "residue " << res.GetQualifiedName() << " is missing atoms ";
    for (std::vector<String>::const_iterator
         i = missing.begin(), e = missing.end(); i != e; ++i) {
      if (i != missing.begin()) {
        ss << ", ";
      }
      ss << *i;
    }
    LOG_WARNING(ss.str());
    return false;
  }
  return true;
}

void CleanupAtomConflicts(ResidueView& res) {
  // remove atoms which are on top of any other atoms in the tpl entity
  EntityView tpl = res.GetEntity();
  AtomViewList atom_views = res.GetAtomList();
  for (uint i = 0; i < atom_views.size(); ++i) {
    AtomViewList on_top_atoms = tpl.FindWithin(atom_views[i].GetPos(), 0.0);
    if (on_top_atoms.size() > 1) {
      res.RemoveAtom(atom_views[i]);
      // report
      std::stringstream ss;
      ss << "residue " << res.GetQualifiedName() << " has conflicting atom "
         << atom_views[i].GetName() << " - skipping atom.";
      LOG_WARNING(ss.str());
    }
  }
}

} // anon ns

ModellingHandle ModellingHandle::Copy() const{
  
  ModellingHandle r_mhandle;

  // The model itself is copied with a deep copy
  r_mhandle.model = model.Copy();

  // All gaps have to be transferred to the new EntityHandle  
  for(StructuralGapList::const_iterator i = gaps.begin(); i != gaps.end(); ++i){
    String chain_name = i->GetChainName();
    mol::ChainHandle ch = r_mhandle.model.FindChain(chain_name);
    r_mhandle.gaps.push_back(*i);
    r_mhandle.gaps.back().Transfer(ch);
  }

  r_mhandle.seqres = seqres;
  r_mhandle.psipred_predictions = psipred_predictions;
  r_mhandle.profiles = profiles;

  return r_mhandle;
}


int CountEnclosedGaps(const ModellingHandle& mhandle, const StructuralGap& gap,
                      bool insertions_only)
{
  core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
                                "modelling::CountEnclosedGaps", 2);

  int num_gaps = 0;
  
  // extract info for this gap
  String my_chain_name = gap.GetChainName();
  ResNum my_b_res(0);
  if (!gap.IsNTerminal()) my_b_res = gap.before.GetNumber();
  ResNum my_a_res = my_b_res + gap.GetLength() + 1;

  for (StructuralGapList::const_iterator i = mhandle.gaps.begin();
       i != mhandle.gaps.end(); ++i) {
    
    // skip different chains
    if (i->GetChainName() != my_chain_name) continue;

    // get res_nums
    ResNum b_res(0);
    if (!i->IsNTerminal()) b_res = i->before.GetNumber();
    ResNum a_res = b_res + i->GetLength() + 1;

    // check full overlap
    if (b_res >= my_b_res && a_res <= my_a_res) {
      if (!insertions_only || i->GetLength() > 0) ++num_gaps;
    }
  }
  return num_gaps;
}

int ClearGaps(ModellingHandle& mhandle, const StructuralGap& gap) {
  
  core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
                                "modelling::ClearGaps", 2);

  // return -1 if no more gaps
  int next_gap = -1;
  
  // extract info for this gap
  String my_chain_name = gap.GetChainName();
  ResNum my_b_res(0);
  if (!gap.IsNTerminal()) my_b_res = gap.before.GetNumber();
  ResNum my_a_res = my_b_res + gap.GetLength() + 1;

  for (StructuralGapList::iterator i = mhandle.gaps.begin();
       i != mhandle.gaps.end(); ) {
    
    // skip different chains
    if (i->GetChainName() != my_chain_name) {
      ++i;
      continue;
    }

    // get res_nums
    ResNum b_res(0);
    if (!i->IsNTerminal()) b_res = i->before.GetNumber();
    ResNum a_res = b_res + i->GetLength() + 1;

    // check full overlap
    if (b_res >= my_b_res && a_res <= my_a_res) {
      i = mhandle.gaps.erase(i);
      if (i == mhandle.gaps.end()) {
        next_gap = -1;
      } else {
        next_gap = i - mhandle.gaps.begin();
      }
    } else {
      // check partial overlaps
      if (   (my_b_res > b_res && my_b_res < a_res)
          || (my_a_res > b_res && my_a_res < a_res)) {
        std::stringstream ss;
        ss << "Tried to clear gap " << *i << " but " << gap
           << " is only overlapping not fully enclosing!";
        throw promod3::Error(ss.str());
      }
      // moving on
      ++i;
    }
  }
  return next_gap;
}

void MergeGaps(ModellingHandle& mhandle, uint index) {

  core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
                                "modelling::MergeGaps", 2);

  if(index >= mhandle.gaps.size()-1){
    throw promod3::Error("Invalid index provided when merging gaps!");
  }

  if(mhandle.gaps[index].GetChain() != mhandle.gaps[index+1].GetChain()){
    throw promod3::Error("Subsequent gap must be of the same chain to be merged in!");
  }

  if(mhandle.gaps[index].IsNTerminal() && mhandle.gaps[index+1].IsCTerminal()){
    std::stringstream ss;
    ss << "You don't want to merge an N-terminal gap with a C-terminal gap... ";
    ss << "You might want to use the RemoveTerminalGaps function to get rid of them!";
    throw promod3::Error(ss.str());
  }

  //assumes, that the gaps are sequential!
  String full_seq = mhandle.seqres[mhandle.gaps[index].GetChainIndex()].GetGaplessString();
  int before_number, after_number;

  if(mhandle.gaps[index].IsNTerminal()) before_number = 0;
  else before_number = mhandle.gaps[index].before.GetNumber().GetNum();
  
  if(mhandle.gaps[index+1].IsCTerminal()) after_number = full_seq.size()+1;
  else after_number = mhandle.gaps[index+1].after.GetNumber().GetNum();

  String gap_seq = full_seq.substr(before_number,after_number-before_number-1);

  //kill all residues in between the two new stems
  mol::XCSEditor ed = mhandle.model.EditXCS(mol::BUFFERED_EDIT);
  mol::ChainHandle chain = mhandle.gaps[index].GetChain();
  mol::ResNum i(before_number+1);
  mol::ResNum e(after_number);
  for(; i < e; ++i){
    mol::ResidueHandle res = chain.FindResidue(i);
    if(res.IsValid()) ed.DeleteResidue(res);
  }

  StructuralGap new_gap(mhandle.gaps[index].before,
                        mhandle.gaps[index+1].after,
                        gap_seq);

  mhandle.gaps[index] = new_gap;

  mhandle.gaps.erase(mhandle.gaps.begin()+index+1);
}

int RemoveTerminalGaps(ModellingHandle& mhandle) {

  core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
                                "modelling::RemoveTerminalGaps", 2);

  int removed_gaps = 0;
  for(StructuralGapList::iterator i = mhandle.gaps.begin(); i != mhandle.gaps.end(); ){
    if(i->IsNTerminal() || i->IsCTerminal()){
      i = mhandle.gaps.erase(i);
      ++removed_gaps;
    }
    else ++i;
  }
  LOG_INFO("Removed " << removed_gaps << " terminal gap(s).");
  return removed_gaps;
}

void ReorderGaps(ModellingHandle& mhandle){

  std::vector<std::vector<uint> > gap_data;
  for(uint i = 0; i < mhandle.gaps.size(); ++i){
    gap_data.push_back(std::vector<uint>());

    //first element is chain index
    gap_data.back().push_back(mhandle.gaps[i].GetChainIndex());

    //second element is start resnum
    uint start_resnum;
    if(mhandle.gaps[i].IsNTerminal()){
      start_resnum = 1;
    }
    else{
      start_resnum = mhandle.gaps[i].before.GetNumber().GetNum() + 1;
    }
    gap_data.back().push_back(start_resnum);
    //last element is the idx of the current gap
    gap_data.back().push_back(i);
  }

  std::sort(gap_data.begin(), gap_data.end());

  StructuralGapList sorted_gaps;
  for(uint i = 0; i < gap_data.size(); ++i){
    sorted_gaps.push_back(mhandle.gaps[gap_data[i].back()].Copy());
  }

  mhandle.gaps = sorted_gaps;
}

void SetupDefaultBackboneScoring(ModellingHandle& mhandle){
  
  // setup environment
  BackboneScoreEnvPtr env(new BackboneScoreEnv(mhandle.seqres));
  env->SetInitialEnvironment(mhandle.model);

  BackboneOverallScorerPtr scorer = 
  promod3::scoring::LoadDefaultBackboneOverallScorer();

  scorer->AttachEnvironment(*env);

  mhandle.backbone_scorer_env = env;
  mhandle.backbone_scorer = scorer;
  LOG_INFO("Initialized default backbone scoring for modelling.");
}

void SetupDefaultAllAtomScoring(ModellingHandle& mhandle){
  
  // setup scoring
  loop::AllAtomEnvPtr env(new loop::AllAtomEnv(mhandle.seqres));
  env->SetInitialEnvironment(mhandle.model);
  AllAtomOverallScorerPtr scorer =
  promod3::scoring::LoadDefaultAllAtomOverallScorer();

  scorer->AttachEnvironment(*env);

  // setup sidechaining needed for scoring
  loop::AllAtomEnvPtr env_sc(new loop::AllAtomEnv(mhandle.seqres));
  env_sc->SetInitialEnvironment(mhandle.model);
  SidechainReconstructorPtr sc_rec;
  sc_rec.reset(new SidechainReconstructor);
  sc_rec->AttachEnvironment(*env_sc);

  // set mhandle values
  mhandle.all_atom_scorer_env = env;
  mhandle.all_atom_scorer = scorer;
  mhandle.all_atom_sidechain_env = env_sc;
  mhandle.sidechain_reconstructor = sc_rec;

  LOG_INFO("Initialized default all atom scoring for modelling.");
}

void SetSequenceProfiles(ModellingHandle& mhandle, 
                         const seq::ProfileHandleList& profiles){

  // check whether the profiles match the internal SEQRES
  if(static_cast<int>(profiles.size()) != mhandle.seqres.GetCount()){
    throw promod3::Error("Must have one profile per chain!");
  }

  for(uint i = 0; i < profiles.size(); ++i){
    if(profiles[i]->GetSequence() != mhandle.seqres[i].GetString()){
      throw promod3::Error("Sequence in profile must match sequence in SEQRES!");
    }
  }
  // set the profiles
  mhandle.profiles = profiles;
}

void SetPsipredPredictions(ModellingHandle& mhandle,
                           const promod3::loop::PsipredPredictionList& 
                           psipred_predictions){

  // check whether the predictions match the internal SEQRES
  if(static_cast<int>(psipred_predictions.size()) != mhandle.seqres.GetCount()){
    throw promod3::Error("Must have one prediction per chain!");
  }

  for(uint i = 0; i < psipred_predictions.size(); ++i){
    // there is no sequence to check, let's at least check the size of the
    // psipred prediction
    if(static_cast<int>(psipred_predictions[i]->size()) != 
       mhandle.seqres[i].GetLength()){
      throw promod3::Error("Prediction must have same size than SEQRES!");
    }
  }

  // set the predictions
  mhandle.psipred_predictions = psipred_predictions;
}

void MergeMHandle(const ModellingHandle& source_mhandle,
                  ModellingHandle& target_mhandle,
                  uint source_chain_idx, uint target_chain_idx,
                  uint start_resnum, uint end_resnum,
                  const geom::Mat4& transform){

  //check the input data
  if(source_chain_idx >= static_cast<uint>(source_mhandle.seqres.GetCount()) || 
     target_chain_idx >= static_cast<uint>(target_mhandle.seqres.GetCount())){
    throw promod3::Error("Invalid chain idx encountered!");
  }

  if(source_mhandle.seqres[source_chain_idx].GetString() != 
     target_mhandle.seqres[target_chain_idx].GetString()){
    throw promod3::Error("The SEQRES of the specified chains must match!");
  }

  if(start_resnum <= 0){
    throw promod3::Error("start_resnum must be at least 1!");
  }

  if(start_resnum > end_resnum){
    throw promod3::Error("start_resnum must be smaller or equal end_resnum!");
  }

  const String& seqres = source_mhandle.seqres[source_chain_idx].GetString();

  if(end_resnum > seqres.size()){
    throw promod3::Error("end_resnum is too big for the underlying SEQRES of "
                         "the chain you specified!");
  }

  //start_resnum and end_resnum must not lie within a gap in the source mhandle 
  for(uint gap_idx = 0; gap_idx < source_mhandle.gaps.size(); ++gap_idx){

    const StructuralGap& gap = source_mhandle.gaps[gap_idx];

    if(source_chain_idx == gap.GetChainIndex()){

      uint gap_before_resnum = 0;
      uint gap_after_resnum = seqres.size() + 1;

      if(gap.before.IsValid()){
        gap_before_resnum = gap.before.GetNumber().GetNum();
      }

      if(gap.after.IsValid()){
        gap_after_resnum = gap.after.GetNumber().GetNum();
      }

      if(gap_before_resnum < start_resnum && gap_after_resnum > start_resnum){
         std::stringstream ss;
        ss <<"start_resnum ("<<start_resnum << ") is enclosed by: " << gap;
        throw promod3::Error(ss.str());
      }

      if(gap_before_resnum < end_resnum && gap_after_resnum > end_resnum){
         std::stringstream ss;
         ss <<"end_resnum ("<<end_resnum << ") is enclosed by: " << gap;
         throw promod3::Error(ss.str());
      }
    }
  }

  mol::ChainHandle source_chain = source_mhandle.model.GetChainList()[source_chain_idx];
  mol::ChainHandle target_chain = target_mhandle.model.GetChainList()[target_chain_idx];

  //residues with start_resnum and end_resnum must be there in source
  mol::ResNum num(start_resnum);
  mol::ResidueHandle source_start_res = source_chain.FindResidue(num);
  num.SetNum(end_resnum);
  mol::ResidueHandle source_end_res = source_chain.FindResidue(num);

  if(!(source_start_res.IsValid() && source_end_res.IsValid())){
    throw promod3::Error("Residues with start_resnum and end_resnum must be "
                         "present in specified chain of source_mhandle!");
  }

  //clear all residues in target mhandle in specified range
  mol::XCSEditor ed = target_mhandle.model.EditXCS(mol::BUFFERED_EDIT);
  
  for(uint i = start_resnum; i <= end_resnum; ++i){
    num.SetNum(i);
    mol::ResidueHandle r = target_chain.FindResidue(num);
    if(r.IsValid()) ed.DeleteResidue(r);
  }

  //copy over coordinates
  mol::ResidueHandle source_r, target_r, last_target_r;
  num.SetNum(start_resnum-1);
  last_target_r = target_chain.FindResidue(num);
  
  for(uint i = start_resnum; i <= end_resnum; ++i){

    num.SetNum(i);

    source_r = source_chain.FindResidue(num);

    if(source_r.IsValid()){

      mol::AtomHandleList source_atoms = source_r.GetAtomList();
      target_r = ed.AppendResidue(target_chain, source_r);
      for(mol::AtomHandleList::iterator at_it = source_atoms.begin();
          at_it != source_atoms.end(); ++at_it){
        geom::Vec4 t_pos = geom::Vec4(at_it->GetPos());
        t_pos = transform * t_pos;
        geom::Vec3 pos(t_pos[0], t_pos[1], t_pos[2]);
        ed.InsertAtom(target_r, at_it->GetName(), pos, at_it->GetElement(),
                      at_it->GetOccupancy(), at_it->GetBFactor(), 
                      at_it->IsHetAtom());
      }

      //the atoms are now copied over and the positions are transformed 
      //but we still need the bonds
      for(mol::AtomHandleList::iterator at_it = source_atoms.begin();
          at_it != source_atoms.end(); ++at_it){
  
        //iterate over all connected atoms and generate a bond if they're 
        //from the same residue. Peptide bonds are built seperately
        mol::AtomHandleList connected_atoms = at_it->GetBondPartners();
        for(mol::AtomHandleList::iterator connected_at_it = 
            connected_atoms.begin(); connected_at_it != connected_atoms.end(); 
            ++connected_at_it){
  
          if(at_it->GetResidue() == connected_at_it->GetResidue()){
            mol::AtomHandle at_one = target_r.FindAtom(at_it->GetName());
            mol::AtomHandle at_two = target_r.FindAtom(connected_at_it->GetName());
            if(at_one.IsValid() && at_two.IsValid()) ed.Connect(at_one, at_two);
          }
        }
      }  
      
      //build peptide bond if the last and the current target residues are
      //consecutive in terms of residue number
      if(last_target_r.IsValid()){
        if(last_target_r.GetNumber() + 1 == target_r.GetNumber()){
          mol::AtomHandle c = last_target_r.FindAtom("C");
          mol::AtomHandle n = target_r.FindAtom("N");
          if(c.IsValid() && n.IsValid()) ed.Connect(c, n);
        }
      }

      last_target_r = target_r;
    }
  }

  //we potentially still have to build the last peptide bond
  num.SetNum(end_resnum + 1);
  target_r = target_chain.FindResidue(num);

  if(target_r.IsValid() && last_target_r.IsValid() &&
     last_target_r.GetNumber() + 1 == target_r.GetNumber()){
    mol::AtomHandle c = last_target_r.FindAtom("C");
    mol::AtomHandle n = target_r.FindAtom("N");
    if(c.IsValid() && n.IsValid()) ed.Connect(c, n);
  }
  ed.ReorderResidues(target_chain);
  //fix torsions
  conop::AssignBackboneTorsions(target_chain);

  //delete the gaps in the target mhandle or shorten them...
  num.SetNum(start_resnum);
  mol::ResidueHandle target_start_res = target_chain.FindResidue(num);
  num.SetNum(end_resnum);
  mol::ResidueHandle target_end_res = target_chain.FindResidue(num);

  StructuralGapList gaps_to_add;

  for(uint gap_idx = 0; gap_idx < target_mhandle.gaps.size(); ++gap_idx){

    const StructuralGap& gap = target_mhandle.gaps[gap_idx];
    bool delete_gap = false;

    if(gap.GetChainIndex() == target_chain_idx){

      uint gap_before_resnum = 0;
      uint gap_after_resnum = seqres.size() + 1;

      if(gap.before.IsValid()){
        gap_before_resnum = gap.before.GetNumber().GetNum();
      }

      if(gap.after.IsValid()){
        gap_after_resnum = gap.after.GetNumber().GetNum();
      }

      if(gap_before_resnum >= start_resnum && 
         gap_after_resnum <= end_resnum){
         //the gap is fully enclosed by start and end => can be silently deleted
         delete_gap = true;
      }

      if(gap_before_resnum < start_resnum && gap_after_resnum >= start_resnum){
         //the gap extends towards nter
         uint new_length = start_resnum - gap_before_resnum - 1;
         String gap_seq = seqres.substr(gap_before_resnum, new_length);
         gaps_to_add.push_back(StructuralGap(gap.before,
                                             target_start_res,
                                             gap_seq));
         delete_gap = true;
      }

      if(gap_after_resnum > end_resnum && gap_before_resnum <= end_resnum){
        //the gap extends towards cter
        uint new_length = gap_after_resnum - end_resnum - 1;
        String gap_seq = seqres.substr(end_resnum, new_length);
        gaps_to_add.push_back(StructuralGap(target_end_res,
                                            gap.after,
                                            gap_seq)); 
        delete_gap = true;
      }
    }

    if(delete_gap){
      target_mhandle.gaps.erase(target_mhandle.gaps.begin() + gap_idx);
      --gap_idx;
    }
  }

  //add all original gaps, that had to be processed before
  for(StructuralGapList::iterator i = gaps_to_add.begin();
      i != gaps_to_add.end(); ++i){
    target_mhandle.gaps.push_back(*i);
  }

  //copy over all enclosed gaps from the source
  for(StructuralGapList::const_iterator i = source_mhandle.gaps.begin(); 
      i != source_mhandle.gaps.end(); ++i){
    if(i->GetChainIndex() == source_chain_idx){

      uint gap_before_resnum = 0;
      uint gap_after_resnum = seqres.size() + 1;

      if(i->before.IsValid()){
        gap_before_resnum = i->before.GetNumber().GetNum();
      }

      if(i->after.IsValid()){
        gap_after_resnum = i->after.GetNumber().GetNum();
      }

      if(gap_before_resnum >= start_resnum && gap_after_resnum <= end_resnum){
        StructuralGap copied_gap = i->Copy();
        copied_gap.Transfer(target_chain);
        target_mhandle.gaps.push_back(copied_gap);
      }
    }
  }  

  //finally reorder the gaps to ensure consistency
  ReorderGaps(target_mhandle);  

  if (IsBackboneScoringSetUp(target_mhandle)) {
    target_mhandle.backbone_scorer_env->SetInitialEnvironment(target_mhandle.model); 
  }

  if(IsAllAtomScoringSetUp(target_mhandle)){
    target_mhandle.all_atom_sidechain_env->SetInitialEnvironment(target_mhandle.model);
  }
}

void InsertLoop(ModellingHandle& mhandle, const loop::BackboneList& bb_list,
                uint start_resnum, uint chain_idx) {
  // update model
  mol::ChainHandleList chain_list = mhandle.model.GetChainList();
  bb_list.InsertInto(chain_list[chain_idx], start_resnum);
  // update scoring
  if (IsBackboneScoringSetUp(mhandle)) {
    mhandle.backbone_scorer_env->SetEnvironment(bb_list, start_resnum,
                                                chain_idx);
  }
  if (IsAllAtomScoringSetUp(mhandle)) {
    mhandle.all_atom_sidechain_env->SetEnvironment(bb_list, start_resnum,
                                                   chain_idx);
  }
}

int InsertLoopClearGaps(ModellingHandle& mhandle,
                        const loop::BackboneList& bb_list,
                        const StructuralGap& gap) {
  // check consistency
  if (bb_list.size() != gap.GetFullSeq().size()) {
    throw promod3::Error("Inconsistent gap for given loop to insert.");
  }
  // update model
  uint chain_idx = gap.GetChainIndex();
  uint start_resnum = 1;
  if (!gap.IsNTerminal()) start_resnum = gap.before.GetNumber().GetNum();
  InsertLoop(mhandle, bb_list, start_resnum, chain_idx);
  return ClearGaps(mhandle, gap);
}

bool CopyConserved(ResidueView src_res, ResidueHandle dst_res, XCSEditor& edi,
                   bool& has_cbeta)
{
  // check if the residue name of dst and src are the same. In the easy 
  // case, the two residue names match and we just copy over all atoms.
  // If they don't match, we are dealing with modified residues.
  if (dst_res.GetName() == src_res.GetName()) {
    return CopyIdentical(src_res, dst_res, edi, has_cbeta);
  } else if (src_res.GetName() == "MSE") {
    return CopyMSE(src_res, dst_res, edi, has_cbeta);
  } else {
    return CopyModified(src_res, dst_res, edi, has_cbeta);
  }
}

bool CopyIdentical(ResidueView src_res, ResidueHandle dst_res, XCSEditor& edi, 
                   bool& has_cbeta)
{
  AtomViewList atoms = src_res.GetAtomList();
  for (AtomViewList::const_iterator 
       a = atoms.begin(), e = atoms.end(); a != e; ++a) {
    if (a->GetName() == "CB") {
      has_cbeta = true;
    }
    if (a->GetName() == "OXT") {
      continue;
    }
    if (a->GetElement() == "D" || a->GetElement() == "H") {
      continue;
    }
    edi.InsertAtom(dst_res, a->GetName(), a->GetPos(), a->GetElement(), 
                   1.0, a->GetBFactor());
  }
  return true;
}


bool CopyMSE(ResidueView src_res, ResidueHandle dst_res, XCSEditor& edi, 
             bool& has_cbeta)
{
  // selenium methionine is easy. We copy all atoms and substitute the SE 
  // with SD
  AtomViewList atoms=src_res.GetAtomList();
  for (AtomViewList::const_iterator 
       a = atoms.begin(), e = atoms.end(); a != e; ++a) {
    if (a->GetName() == "CB") {
      has_cbeta = true;
    }
    if (a->GetName() == "OXT" || a->GetElement() == "D" || a->GetElement() == "H") {
      continue;
    }
    if (a->GetName() == "SE") {
      edi.InsertAtom(dst_res, "SD", a->GetPos(), "S", 
                     1.0, a->GetBFactor());             
    } else {
      edi.InsertAtom(dst_res, a->GetName(), a->GetPos(), a->GetElement(), 
                     1.0, a->GetBFactor());            
    }
  }
  return true;
}

bool CopyModified(ResidueView src_res, ResidueHandle dst_res, 
                  XCSEditor& edi, bool& has_cbeta)
{
  // FIXME: for now this functions ignores chirality changes of sidechain 
  //        chiral atoms. To detect those changes, we would need to store the 
  //        chirality of those centers in the compound library.
  
  // For now, this function just handles cases where the src_res contains 
  // additional atoms, but the dst_atom doesn't contain any atoms the src_res 
  // doesn't have. It these two requirements are not met, we fall back to 
  // CopyNonConserved.
  
  // first let's get our hands on the component library
  conop::CompoundLibPtr comp_lib = conop::Conopology::Instance().GetDefaultLib();

  CompoundPtr src_compound = comp_lib->FindCompound(src_res.GetName(), 
                                                    Compound::PDB);
  if (!src_compound) {
    // OK, that's bad. Let's copy the backbone and be done with it!
    return CopyNonConserved(src_res, dst_res, edi, has_cbeta);
  }
  // since dst_res is one of the 20 amino acids, we don't have to check for 
  // existence of the compound. We know it is there!
  CompoundPtr dst_compound = comp_lib->FindCompound(dst_res.GetName(),
                                                    Compound::PDB);
  std::set<String> dst_atoms;
  std::set<String> src_atoms;
  // to compare the atoms of dst_res with those of src_res, let's create two 
  // sets containing all heavy atoms.
  for (AtomSpecList::const_iterator i = dst_compound->GetAtomSpecs().begin(), 
       e = dst_compound->GetAtomSpecs().end(); i != e; ++i) {
    if (i->element == "H" || i->element == "D") {
      continue;
    }
    dst_atoms.insert(i->name);
  }
  for (AtomSpecList::const_iterator i = src_compound->GetAtomSpecs().begin(), 
       e = src_compound->GetAtomSpecs().end(); i != e; ++i) {
    if (i->element == "H" || i->element == "D") {
      continue;
    }
    src_atoms.insert(i->name);
  }
  for (std::set<String>::const_iterator i = dst_atoms.begin(), 
       e = dst_atoms.end(); i != e; ++i) {
    if (src_atoms.find(*i) == src_atoms.end()) {
      return CopyNonConserved(src_res, dst_res, edi, has_cbeta);
    }
  }
  // Muahaha, all is good. Let's copy the atoms. Muahaha
  AtomViewList atoms = src_res.GetAtomList();
  for (AtomViewList::const_iterator 
       a = atoms.begin(), e = atoms.end(); a != e; ++a) {
    if (a->GetName() == "CB") {
      if (dst_res.GetName() == "GLY") {
        continue;
      }
      has_cbeta = true;
    }
    if (a->GetName() == "OXT") {
      continue;
    }
    if (a->GetElement() == "D" || a->GetElement() == "H") {
      continue;
    }
    if (dst_atoms.find(a->GetName()) == dst_atoms.end()) {
      continue;
    }
    edi.InsertAtom(dst_res, a->GetName(), a->GetPos(), a->GetElement(), 
                   1.0, a->GetBFactor());
  }
  return true;
}

bool CopyNonConserved(ResidueView src_res, ResidueHandle dst_res, 
                      XCSEditor& edi, bool& has_cbeta) 
{  
  AtomViewList atoms = src_res.GetAtomList();
  for (AtomViewList::const_iterator 
       a = atoms.begin(), e = atoms.end(); a != e; ++a) {
    String name=a->GetName();
    if (name == "CA" || name == "N" || name == "C" || name == "O" || name == "CB") {
      if (name == "CB") {
        if (dst_res.GetName() == "GLY") {
          continue;
        }
        has_cbeta = true;
      }
      if (a->GetElement() == "D" || a->GetElement() == "H") {
        continue;
      }
      edi.InsertAtom(dst_res, a->GetName(), a->GetPos(), a->GetElement(), 
                     1.0, a->GetBFactor());     
    }
  }
  return false;
}

// HELPER: for export SPDBV projects in SWISS-MODEL
// -> there it is expected that residues of insertions are present as dummy
//    residues which have atoms with coordinates all set to 9999.999.
bool InsertDummyAtoms(ResidueHandle dst_res, XCSEditor& edi) {
  conop::CompoundLibPtr comp_lib=conop::Conopology::Instance().GetDefaultLib();
  CompoundPtr dst_compound=comp_lib->FindCompound(dst_res.GetName(), 
                                                  Compound::PDB);
  if (!dst_compound) {
    throw promod3::Error("compound for residue '"+dst_res.GetName()+
                             "' does not exist");
  }
  Real dummy_coord = 9999.999;
  for (AtomSpecList::const_iterator i = dst_compound->GetAtomSpecs().begin(),
       e = dst_compound->GetAtomSpecs().end(); i != e; ++i) {
    if (dst_res.FindAtom(i->name)) {
      continue;
    }
    if (i->element == "D" || i->element == "H" || i->is_leaving) {
      continue;
    }
    edi.InsertAtom(dst_res, i->name, 
                   geom::Vec3(dummy_coord, dummy_coord, dummy_coord),
                   i->element, 1.0, 99.99, false);
  }
  return true;
}

ModellingHandle BuildRawModel(const seq::AlignmentHandle& aln, 
                              bool include_ligands,
                              const String& chain_names,
                              bool spdbv_style)
{
  seq::AlignmentList l;
  l.push_back(aln);
  return BuildRawModel(l,
                       include_ligands,
                       chain_names,
                       spdbv_style);
}

ModellingHandle BuildRawModel(const seq::AlignmentList& aln, 
                              bool include_ligands, 
                              const String& chain_names,
                              bool spdbv_style)
{
  core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
                                "modelling::BuildRawModel", 2);

  ModellingHandle result;
  EntityHandle model=CreateEntity();
  XCSEditor edi=model.EditXCS(BUFFERED_EDIT);
  result.model=model;

  uint chain_name_idx = 0;

  for(seq::AlignmentList::const_iterator it=aln.begin(); 
      it!=aln.end(); ++it, ++chain_name_idx) {
    if(chain_name_idx >= chain_names.size()) {
      throw promod3::Error("running out of chain names");
    }
    StructuralGapList gap_list;
    BuildRawChain(*it, edi, gap_list, chain_names[chain_name_idx], spdbv_style);
    result.gaps.insert(result.gaps.end(), gap_list.begin(), gap_list.end());
    seq::SequenceHandle seqres = seq::CreateSequence(
                                    chain_names.substr(chain_name_idx, 1),
                                    it->GetSequence(0).GetGaplessString());
    result.seqres.AddSequence(seqres);
  }

  // handle ligands
  if (include_ligands) {
    AddLigands(aln, edi, model);
  }

  HeuristicProcessor heu_proc;
  // This is on purpose!
  // The heuristic processor would connect residues by distance criteria.
  // Residues flanking an insertion would therefore be connected!
  // We rather build the peptide bonds in BuildRawChain.
  heu_proc.SetConnectAminoAcids(false);
  heu_proc.Process(model);
  result.model=model;

  return result;
}

void AddLigands(const seq::AlignmentList& aln, XCSEditor& edi,
                EntityHandle& model)
{
  
  core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
                                "modelling::AddLigands", 2);

  // make sure we handle duplicates
  std::set<mol::EntityHandle> unique_handles;
  std::vector<mol::ResidueViewList> ligand_views;
  for(seq::AlignmentList::const_iterator it = aln.begin();
      it != aln.end(); ++it) {
    mol::EntityHandle handle = it->GetSequence(1).GetAttachedView().GetHandle();
    // did we already do that?
    if (unique_handles.find(handle) != unique_handles.end()) continue;
    mol::EntityView ligand_view = handle.Select("ligand=true or cname='_'");
    mol::ResidueViewList ligands = ligand_view.GetResidueList();
    if (!ligands.empty()) {
      ligand_views.push_back(ligands);
    }
    unique_handles.insert(handle);
  }

  if (ligand_views.size()>0) {
    mol::ChainHandle ligand_chain = edi.InsertChain("_");

    for(std::vector<mol::ResidueViewList>::iterator ligand_view_it =
          ligand_views.begin();
        ligand_view_it != ligand_views.end(); ++ligand_view_it) {
      for (mol::ResidueViewList::iterator 
           i = ligand_view_it->begin(), e = ligand_view_it->end(); i != e; ++i) {
        ResidueView src_ligand = *i;
        AtomViewList atoms = src_ligand.GetAtomList();
        bool within = false;
        for (AtomViewList::iterator j = atoms.begin(), 
             e2 = atoms.end(); j != e2; ++j) {
          // check if there is at least one residue within 6.0 A of the 
          // ligand. If not, don't bother to add the ligand.               
          AtomHandleList atoms_within = model.FindWithin(j->GetPos(), 6.0);
          for (AtomHandleList::iterator 
               k = atoms_within.begin(), e3 = atoms_within.end(); k != e3; ++k) {
            if (k->GetName() == "CA") {
              within = true;
              break;
            }
          }
          if (within) {
            break;
          }
        }
        if (!within) {
          continue;
        }

        // deep copy of residue into ligand chain
        mol::ResidueHandle dst_res = edi.AppendResidue(ligand_chain,
                                                       src_ligand.GetHandle(),
                                                       true);
        dst_res.SetIsLigand(true);
      }
    }
  }
}

void BuildRawChain(const seq::AlignmentHandle& aln,
                   XCSEditor& edi, 
                   StructuralGapList& gap_list,
                   char chain_name,
                   bool spdbv_style)
{
  
  core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped(
                                "modelling::BuildRawChain", 2);

  if (aln.GetCount() != 2) {
    throw promod3::Error("alignment must contain exactly two sequences");
  }
  if (!aln.GetSequence(1).HasAttachedView()) {
    throw promod3::Error("second sequence must have a view attached");
  }
  if (aln.GetSequence(0).GetOffset() != 0) {
    throw promod3::Error("we cannot honor non-zero offsets for targets");
  }
  
  ChainHandle chain=edi.InsertChain(String(1,chain_name));

  // we enforce res. numbering for target to start at 1
  int res_num = 0;
  int last_num = res_num;
  int last_index = -1;
  ResidueHandle before;
  String gap_seq;

  for (int i = 0; i < aln.GetLength(); ++i) {
    AlignedColumn col = aln[i];
    const char trg_olc = col[0];
    if (trg_olc == '-') {
      continue;
    }
    res_num += 1;
    ResidueView src_res = col.GetResidue(1);
    if (!src_res.IsValid()) {
      gap_seq += trg_olc;
      if (spdbv_style) {
        String rname = conop::OneLetterCodeToResidueName(trg_olc);
        ResidueHandle dst_res = edi.AppendResidue(chain, rname, ResNum(res_num));
        InsertDummyAtoms(dst_res, edi);
      }
      continue;
    }
    const char src_olc = src_res.GetOneLetterCode();
    // sanity check that src_res matches aligned sequence
    if (src_olc != col[1]) {
      std::stringstream ss;
      ss << "Alignment-structure mismatch at pos " << i << " in chain " <<
        chain_name <<
        ", alignment is '" << col[1] << "' structure residue is '" <<
        src_olc << "'";
      // We often have non-standard AA residues changing OLC mapping over time.
      // This can make it likely for this check to fail and hence we only
      // check the 20 standard amino acids.
      if (conop::ResidueToAminoAcid(src_res.GetHandle()) == conop::XXX) {
        ss << ". Ok for modified residue " << src_res.GetName() << ".";
        LOG_WARNING(ss.str())
      } else {
        ss << ". Not ok for standard amino acid " << src_res.GetName() << "!";
        throw promod3::Error(ss.str());
      }
    }
    // remove atoms with conflicting positions (i.e. on top of each other)
    CleanupAtomConflicts(src_res);
    // check for complete backbone, or in case of Calpha only model building,
    // if the src_res has a Calpha atom
    if (!CheckBackboneAtoms(src_res)) {
      LOG_INFO(src_res << " has incomplete backbone. skipping");
      gap_seq += trg_olc;
      continue;
    }
    // check if src_res is L_PEPTIDE_LINKING or PEPTIDE_LINKING.
    if (src_res.GetChemClass() == mol::ChemClass::D_PEPTIDE_LINKING) {
      LOG_INFO(src_res << " is d-peptide-linking. skipping");
      gap_seq += trg_olc;
      continue;
    }
    String name = conop::OneLetterCodeToResidueName(trg_olc);    
    ResidueHandle dst_res = edi.AppendResidue(chain, name, ResNum(res_num));
    dst_res.SetIsProtein(true);
    bool consecutive_residues = false;
    if (last_num + 1 != res_num) {
      // that's an insertion
      gap_list.push_back(StructuralGap(before, dst_res, gap_seq));
      gap_seq = "";
    } else if (last_index > -1 && last_index + 1 != i) {
      // that's a deletion
      gap_list.push_back(StructuralGap(before, dst_res, ""));
    } else {
      consecutive_residues = true;
    }
    bool has_cbeta = false;
    if (toupper(trg_olc) == toupper(src_olc)) {
      CopyConserved(src_res, dst_res, edi, has_cbeta);
    } else {
      CopyNonConserved(src_res, dst_res, edi, has_cbeta);
    }

    dst_res.SetSecStructure(src_res.GetSecStructure());

    // insert Cbeta, unless dst residue is glycine.
    if (!has_cbeta && !(trg_olc == 'G' || trg_olc == 'g')) {
      mol::AtomHandle ca = dst_res.FindAtom("CA");
      mol::AtomHandle n = dst_res.FindAtom("N");
      mol::AtomHandle c = dst_res.FindAtom("C");
      geom::Vec3 cbeta_pos;
      promod3::core::ConstructCBetaPos(n.GetPos(), ca.GetPos(), c.GetPos(),
                                       cbeta_pos);

      edi.InsertAtom(dst_res, "CB", cbeta_pos, "C");
    }
    if (spdbv_style) {
      InsertDummyAtoms(dst_res, edi);
    }
    if (consecutive_residues && before.IsValid()) {
      AtomHandle c_before = before.FindAtom("C");
      AtomHandle n_this = dst_res.FindAtom("N");
      if(conop::IsBondFeasible(c_before, n_this)) {
        edi.Connect(c_before, n_this);
      } else {
        // not possible to generate feasible peptide bond
        // add gap to enforce rebuilding during the modelling process
        gap_list.push_back(StructuralGap(before, dst_res, ""));
      }
    }

    last_num = res_num;
    last_index = i;
    before = dst_res;
  }
  // check if we have added anything at all
  if (last_num == 0) {
    LOG_ERROR("No residues added for chain " << chain_name << "! "
              "This typically indicates that the template is a Calpha trace "
              "or only contains D-peptides.")
  } else if (last_num != res_num) {
    // add trailing gap (only if there are residues at all)
    gap_list.push_back(StructuralGap(before, ResidueHandle(), gap_seq));
  }
}

}}