diff --git a/sidechain/doc/disulfid.rst b/sidechain/doc/disulfid.rst index 199482985781ce51fc1a0c0adf8b99890a8204f3..6bf6078b9dc311a000be25d2fbf9c408d20aa781 100644 --- a/sidechain/doc/disulfid.rst +++ b/sidechain/doc/disulfid.rst @@ -10,7 +10,11 @@ reconstruction regarding cysteins when finding and separately handle disulfid bonds. PROMOD3 implements a simple geometrical description [canutescu2003b]_ . The paper proposes two rotamers to be in a disulfid bonded state, if the resulting disulfid score plus the self energies of the -involved rotamers is below 45. +involved rotamers is below 45. If there are several cysteines close together, +this problem gets another layer of complexity. One has to assure, that +every cysteine only participates in one disulfid bond but the network +of disulfid bonds is geometrically optimal. SCWRL4 proposes an approach, +that has also been implemented here. .. method:: DisulfidRawScore(ca_pos_one, cb_pos_one, sg_pos_one, \ ca_pos_two, cb_pos_two, sg_pos_two) @@ -65,7 +69,37 @@ involved rotamers is below 45. exactly (in case of :class:`RRMRotamer`) or at least (in case of :class:`FRMRotamer`) one particle representing the gamma sulfur. - :returns: The result of the raw score plus the self energies of - the input rotamers + :returns: The result of the raw score plus the average + self energies of the input rotamers + + +.. method:: ResolveCysteins(rotamer_groups, ca_positions, cb_positions,\ + [score_threshold=45.0]) + + Tries to optimize disulfid bond network. In a first step, all disulfid bonds + get detected using :func:`DisulfidScore`. If the value between two rotamers is + below **score_threshold**, they're assumed to be bonded. The function then + tries to detect the largest possible set of disulfide bonds, with no + cysteine being part of more than one bond. If several largest sets are + possible, the one with the optimal sum of scores gets estimated. + + :param rotamer_groups: Every group represents a cysteine + :param ca_positions: The CA positions of the according rotamers + :param cb_positions: The CB positions of the according rotamers + :param score_threshold: The score two rotamers must have to be considered + as a disulfid bond + + :type rotamer_groups: :class:`list` of + :class:`FRMRotamerGroup`/:class:`RRMRotamerGroup` + :type ca_positions: :class:`list` of :class:`ost.geom.Vec3` + :type cb_positions: :class:`list` of :class:`ost.geom.Vec3` + :type score_threshold: :class:`float` + + :returns: A :class:`tuple` containing two :class:`list` objects with equal + length. Both lists contain :class:`tuple` objects with two elements. + The tuples in the first list describe the indices of cysteins + participating in disulfid bonds. The tuples in the second list + describe the optimal rotamers in the according rotamer groups. + .. [canutescu2003b] Canutescu AA, Shelenkov AA, Dunbrack RL Jr. (2003). A graph-theory algorithm for rapid protein side-chain prediction. Protein Sci (2003). diff --git a/sidechain/doc/reconstruct.rst b/sidechain/doc/reconstruct.rst index 9ebbcc4ce344dd06785203b8352a6d83fe6123aa..d5361b7bf072b9a8582184314ab6ecbb310161fa 100644 --- a/sidechain/doc/reconstruct.rst +++ b/sidechain/doc/reconstruct.rst @@ -22,7 +22,6 @@ SidechainReconstructor Class .. class:: SidechainReconstructor(keep_sidechains=True, build_disulfids=True, \ cutoff=20, graph_max_complexity=100000000, \ graph_intial_epsilon=0.02, \ - disulfid_max_distance=8, \ disulfid_score_thresh=45) Reconstruct sidechains for single loops. Must be linked to an all atom env. @@ -52,9 +51,6 @@ SidechainReconstructor Class :meth:`RotamerGraph.TreeSolve`. :type graph_intial_epsilon: :class:`float` - :param disulfid_max_distance: Max. distance for 2 CYS-CA atoms to be - considered close enough to check for bridges. - :type disulfid_max_distance: :class:`float` :param disulfid_score_thresh: If :meth:`DisulfidScore` between two CYS is below this threshold, we consider them to be disulfid-bonded. diff --git a/sidechain/pymod/_reconstruct_sidechains.py b/sidechain/pymod/_reconstruct_sidechains.py index ce043c8affd8af53f0b0518ee2292356d84412d7..b9e13dbd689f1200263fc2a923ca38f04f798ccf 100644 --- a/sidechain/pymod/_reconstruct_sidechains.py +++ b/sidechain/pymod/_reconstruct_sidechains.py @@ -306,9 +306,10 @@ def _GetDisulfidBridges(frame_residues, cystein_indices, res_list, rotamer_libra frame = sidechain.Frame(frame_residues) # some info we have to keep track of when evaluating disulfid bonds - cystein_rotamers = list() + cystein_rot_groups = list() cys_ca_positions = list() cys_cb_positions = list() + indices_with_rot_groups = list() for i in cystein_indices: # check ca, cb @@ -324,53 +325,26 @@ def _GetDisulfidBridges(frame_residues, cystein_indices, res_list, rotamer_libra rotamer_constructor, phi_angles[i], psi_angles[i], use_frm, bbdep) rot_group.AddFrameEnergy(frame) - cystein_rotamers.append(rot_group) + cystein_rot_groups.append(rot_group) + indices_with_rot_groups.append(i) + + bond_result, \ + rotamer_result = sidechain.ResolveCysteins(cystein_rot_groups, + cys_ca_positions, + cys_cb_positions, 45.0) # get CYS with disulfid bonds and the chosen rotamers disulfid_indices = list() disulfid_rotamers = list() - for i in range(len(cystein_rotamers)): - for j in range(i+1, len(cystein_rotamers)): - - # too far for a disulfid bond? - if geom.Distance(cys_ca_positions[i], cys_ca_positions[j]) > 8.0: - continue - - # already done? NOTE: new one might be better, but here we do - # first come, first served - if cystein_indices[i] in disulfid_indices \ - or cystein_indices[j] in disulfid_indices: - continue - - min_score = float("inf") - min_index_k = -1 - min_index_l = -1 - - for k in range(len(cystein_rotamers[i])): - for l in range(len(cystein_rotamers[j])): - score = sidechain.DisulfidScore(cystein_rotamers[i][k], - cystein_rotamers[j][l], - cys_ca_positions[i], - cys_cb_positions[i], - cys_ca_positions[j], - cys_cb_positions[j]) - if score < min_score: - min_index_k = k - min_index_l = l - min_score = score - - if min_score < 45.0: - # update indices - cys_idx_i = cystein_indices[i] - cys_idx_j = cystein_indices[j] - cys_rot_i = cystein_rotamers[i][min_index_k] - cys_rot_j = cystein_rotamers[j][min_index_l] - disulfid_indices.append(cys_idx_i) - disulfid_indices.append(cys_idx_j) - disulfid_rotamers.append(cys_rot_i) - disulfid_rotamers.append(cys_rot_j) + + for a, b in zip(bond_result, rotamer_result): + disulfid_indices.append(indices_with_rot_groups[a[0]]) + disulfid_indices.append(indices_with_rot_groups[a[1]]) + disulfid_rotamers.append(cystein_rot_groups[a[0]][b[0]]) + disulfid_rotamers.append(cystein_rot_groups[a[1]][b[1]]) return disulfid_indices, disulfid_rotamers + ############################################################################### diff --git a/sidechain/pymod/export_disulfid.cc b/sidechain/pymod/export_disulfid.cc index dbf95e81f869d3e57ca6245e044a053e538adbd6..b9000e9a21aac42c1a1810b0f3961f066ea6111b 100644 --- a/sidechain/pymod/export_disulfid.cc +++ b/sidechain/pymod/export_disulfid.cc @@ -1,7 +1,9 @@ #include <boost/python.hpp> #include <boost/python/iterator.hpp> #include <boost/python/register_ptr_to_python.hpp> +#include <exception> +#include <promod3/core/export_helper.hh> #include <promod3/sidechain/disulfid.hh> using namespace promod3; @@ -26,8 +28,51 @@ Real WrapDisulfidTwo(FRMRotamerPtr rot_one, FRMRotamerPtr rot_two, ca_pos_two, cb_pos_two); } +boost::python::tuple WrapResolveCysteins(const boost::python::list& rot_groups, + const boost::python::list& ca_pos, + const boost::python::list& cb_pos, + Real score_threshold){ + boost::python::list disulfid_return_list; + boost::python::list rotamer_return_list; + if(boost::python::len(rot_groups) == 0){ + return boost::python::make_tuple(disulfid_return_list, + rotamer_return_list); + } + std::vector<geom::Vec3> v_ca_pos; + std::vector<geom::Vec3> v_cb_pos; + promod3::core::ConvertListToVector(ca_pos, v_ca_pos); + promod3::core::ConvertListToVector(cb_pos, v_cb_pos); + std::vector<std::pair<uint, uint> > disulfid_indices; + std::vector<std::pair<uint, uint> > rotamer_indices; + // find out what we're dealing with by only looking at the first element + // since we're only dealing with smart users, this will be sufficient... + boost::python::extract<FRMRotamerGroupPtr> extractor(rot_groups[0]); + + if(extractor.check()){ + std::vector<promod3::sidechain::FRMRotamerGroupPtr> v_rot_groups; + promod3::core::ConvertListToVector(rot_groups, v_rot_groups); + ResolveCysteins(v_rot_groups, v_ca_pos, v_cb_pos, score_threshold, + disulfid_indices, rotamer_indices); + } + else{ + std::vector<promod3::sidechain::RRMRotamerGroupPtr> v_rot_groups; + promod3::core::ConvertListToVector(rot_groups, v_rot_groups); + ResolveCysteins(v_rot_groups, v_ca_pos, v_cb_pos, score_threshold, + disulfid_indices, rotamer_indices); + } + + for(uint i = 0; i < disulfid_indices.size(); ++i){ + disulfid_return_list.append(boost::python::make_tuple(disulfid_indices[i].first, + disulfid_indices[i].second)); + + rotamer_return_list.append(boost::python::make_tuple(rotamer_indices[i].first, + rotamer_indices[i].second)); + } + + return boost::python::make_tuple(disulfid_return_list, rotamer_return_list); +} } @@ -47,4 +92,9 @@ void export_Disulfid(){ def("DisulfidScore",&WrapDisulfidTwo,(arg("rotamer_one"),arg("rotamer_two"), arg("ca_pos_one"),arg("cb_pos_one"), arg("ca_pos_two"),arg("cb_pos_two"))); + + def("ResolveCysteins", &WrapResolveCysteins, (arg("rotamer_groups"), + arg("ca_positions"), + arg("cb_positions"), + arg("score_threshold") = 45.0)); } \ No newline at end of file diff --git a/sidechain/pymod/export_sidechain_reconstructor.cc b/sidechain/pymod/export_sidechain_reconstructor.cc index 1ec066d6aa1bc88d2093f87b7221bb5501371a9c..4f97d55a4cf714539b90dba820d96c625e5ca61e 100644 --- a/sidechain/pymod/export_sidechain_reconstructor.cc +++ b/sidechain/pymod/export_sidechain_reconstructor.cc @@ -91,10 +91,10 @@ void export_SidechainReconstructor() { class_<SidechainReconstructor, SidechainReconstructorPtr> ("SidechainReconstructor", no_init) - .def(init<bool, bool, Real, uint64_t, Real, Real, Real>( + .def(init<bool, bool, Real, uint64_t, Real, Real>( (arg("keep_sidechains")=true, arg("build_disulfids")=true, arg("cutoff")=20, arg("graph_max_complexity")=100000000, - arg("graph_intial_epsilon")=0.02, arg("disulfid_max_distance")=8, + arg("graph_intial_epsilon")=0.02, arg("disulfid_score_thresh")=45))) .def("Reconstruct", WrapReconstruct, (arg("start_resnum"), arg("num_residues"), arg("chain_idx")=0)) diff --git a/sidechain/src/disulfid.cc b/sidechain/src/disulfid.cc index a187ca812e15cd010015f215fe1d64fe8bd066ae..f7837bafefa8782953b8a8bacb8260a49832d9fd 100644 --- a/sidechain/src/disulfid.cc +++ b/sidechain/src/disulfid.cc @@ -1,6 +1,8 @@ #include <promod3/sidechain/disulfid.hh> #include <promod3/sidechain/rotamer.hh> #include <promod3/sidechain/particle.hh> +#include <promod3/core/graph.hh> +#include <promod3/core/enumerator.hh> #include <vector> namespace{ @@ -25,6 +27,170 @@ void ExtractCYSSG(T rot_one, T rot_two, } } +void Resolve(const std::vector<Real>& scores, + const promod3::core::Graph& graph, + std::vector<int>& active_bonds){ + + int size = scores.size(); + std::vector<int> current_combination(size, 0); + std::vector<int> stupid_enumerator_input(size, 2); + + promod3::core::Enumerator<int> enumerator(current_combination, + stupid_enumerator_input); + + + Real optimal_score = std::numeric_limits<Real>::max(); + + for(int enum_idx = 0; enum_idx < enumerator.GetMaxEnumerations(); ++enum_idx){ + + Real current_score = 0.0; + + for(int i = 0; i < size; ++i){ + current_score += current_combination[i] * scores[i]; + for(int j = 0; j < size; ++j){ + if(i != j) current_score += graph.IsConnected(i, j) * + current_combination[i] * + current_combination[j] * Real(1000000.0); + } + } + + if(current_score < optimal_score){ + optimal_score = current_score; + active_bonds = current_combination; + } + + enumerator.Next(); + } +} + + +template <typename T> +void CysteineResolve(const std::vector<T>& rot_groups, + const std::vector<geom::Vec3>& ca_pos, + const std::vector<geom::Vec3>& cb_pos, + Real disulfid_score_thresh, + std::vector<std::pair<uint, uint> >& disulfid_indices, + std::vector<std::pair<uint, uint> >& rotamer_indices){ + + // do the trivial cases... + if(rot_groups.size() <= 1){ + return; + } + + // do some input checks + if(ca_pos.size() != cb_pos.size()){ + throw promod3::Error("Inconsistency in size of input lists observed!"); + } + + if(ca_pos.size() != rot_groups.size()){ + throw promod3::Error("Inconsistency in size of input lists observed!"); + } + + // buildup disulfids, they might still be ambiguous! + std::vector<Real> scores; + std::vector<std::pair<uint, uint> > disulfids; + std::vector<std::pair<uint, uint> > optimal_rotamers; + uint num_rotamer_groups = rot_groups.size(); + + for(uint i = 0; i < num_rotamer_groups; ++i){ + for(uint j = i + 1; j < num_rotamer_groups; ++j){ + if(geom::Distance(ca_pos[i], ca_pos[j]) < Real(8.0)){ + // they are close and potentially build a disulfid bond... + Real min_score = std::numeric_limits<Real>::max(); + uint min_k = 0; + uint min_l = 0; + Real score; + for(uint k = 0; k < rot_groups[i]->size(); ++k){ + for(uint l = 0; l < rot_groups[j]->size(); ++l){ + score = promod3::sidechain::DisulfidScore((*rot_groups[i])[k], + (*rot_groups[j])[l], + ca_pos[i], cb_pos[i], + ca_pos[j], cb_pos[j]); + if(score < min_score){ + min_score = score; + min_k = k; + min_l = l; + } + } + } + + if(min_score < disulfid_score_thresh){ + // just found a potential disulfid bond! + scores.push_back(min_score - disulfid_score_thresh); + disulfids.push_back(std::make_pair(i, j)); + optimal_rotamers.push_back(std::make_pair(min_k, min_l)); + } + } + } + } + + // in here we fill all indices of the active disulfid bonds... + std::vector<int> final_disulfids; + + if(disulfids.size() > 1){ + // resolve disambiguities + promod3::core::Graph graph(disulfids.size()); + for(uint i = 0; i < disulfids.size(); ++i){ + for(uint j = i + 1; j < disulfids.size(); ++j){ + // check whether same rotamer group is involved in two disulfid bonds + if(disulfids[j].first == disulfids[i].first || + disulfids[j].first == disulfids[i].second || + disulfids[j].second == disulfids[i].first || + disulfids[j].second == disulfids[i].second){ + graph.Connect(i, j); + } + } + } + + std::vector<std::vector<int> > connected_components; + graph.ConnectedComponents(connected_components); + + for(uint component_idx = 0; component_idx < connected_components.size(); + ++component_idx){ + + const std::vector<int>& connected_component = + connected_components[component_idx]; + + if(connected_component.size() > 1){ + + std::vector<Real> component_scores; + for(uint i = 0; i < connected_component.size(); ++i){ + component_scores.push_back(scores[connected_component[i]]); + } + promod3::core::Graph component_graph = + graph.SubGraph(connected_component); + std::vector<int> active_bonds; + //OPTIMIZE THIS FUNCTION!!! + Resolve(component_scores, component_graph, active_bonds); + //OPTIMIZE THIS FUNCTION!!! + for(uint i = 0; i < active_bonds.size(); ++i){ + if(active_bonds[i] == 1){ + final_disulfids.push_back(connected_component[i]); + } + } + } + else{ + final_disulfids.push_back(connected_component[0]); + } + } + } + else if(disulfids.size() == 1){ + final_disulfids.push_back(0); + } + + // fill output + for(std::vector<int>::iterator i = final_disulfids.begin(); + i != final_disulfids.end(); ++i){ + uint group_idx_one = disulfids[*i].first; + uint group_idx_two = disulfids[*i].second; + uint rot_idx_one = optimal_rotamers[*i].first; + uint rot_idx_two = optimal_rotamers[*i].second; + disulfid_indices.push_back(std::make_pair(group_idx_one, group_idx_two)); + rotamer_indices.push_back(std::make_pair(rot_idx_one, rot_idx_two)); + } +} + + } @@ -55,7 +221,7 @@ Real DisulfidRawScore(const geom::Vec3& ca_pos_one, / Real(0.17453) + std::min(std::abs(x4-Real(1.3963)), std::abs(x4-Real(M_PI))) / Real(0.17453) - + std::abs(x3-Real(M_PI)/2) / Real(0.34907); + + std::abs(x3-Real(M_PI)*Real(0.5)) / Real(0.34907); } @@ -63,7 +229,6 @@ Real DisulfidScore(RRMRotamerPtr rot_one, RRMRotamerPtr rot_two, const geom::Vec3& ca_pos_one, const geom::Vec3& cb_pos_one, const geom::Vec3& ca_pos_two, const geom::Vec3& cb_pos_two){ - std::vector<geom::Vec3> sg_pos_one, sg_pos_two; ExtractCYSSG(*rot_one, *rot_two, sg_pos_one, sg_pos_two); @@ -75,6 +240,7 @@ Real DisulfidScore(RRMRotamerPtr rot_one, RRMRotamerPtr rot_two, Real raw_score = DisulfidRawScore(ca_pos_one, cb_pos_one, sg_pos_one[0], ca_pos_two, cb_pos_two, sg_pos_two[0]); Real self_energy = rot_one->GetSelfEnergy() + rot_two->GetSelfEnergy(); + self_energy *= Real(0.5); return raw_score + self_energy; } @@ -103,7 +269,37 @@ Real DisulfidScore(FRMRotamerPtr rot_one, FRMRotamerPtr rot_two, } Real self_energy = rot_one->GetSelfEnergy() + rot_two->GetSelfEnergy(); + self_energy *= Real(0.5); return min_raw_score + self_energy; } +// template functionility of following functions has been hidden in +// the source file to avoid bloating the header... +void ResolveCysteins( + const std::vector<RRMRotamerGroupPtr>& rot_groups, + const std::vector<geom::Vec3>& ca_pos, + const std::vector<geom::Vec3>& cb_pos, + Real disulfid_score_thresh, + std::vector<std::pair<uint, uint> >& disulfid_indices, + std::vector<std::pair<uint, uint> >& rotamer_indices){ + + CysteineResolve<RRMRotamerGroupPtr>(rot_groups, ca_pos, cb_pos, + disulfid_score_thresh, disulfid_indices, + rotamer_indices); + +} + +void ResolveCysteins(const std::vector<FRMRotamerGroupPtr>& rot_groups, + const std::vector<geom::Vec3>& ca_pos, + const std::vector<geom::Vec3>& cb_pos, + Real disulfid_score_thresh, + std::vector<std::pair<uint, uint> >& disulfid_indices, + std::vector<std::pair<uint, uint> >& rotamer_indices){ + + CysteineResolve<FRMRotamerGroupPtr>(rot_groups, ca_pos, cb_pos, + disulfid_score_thresh, disulfid_indices, + rotamer_indices); +} + + }} //ns diff --git a/sidechain/src/disulfid.hh b/sidechain/src/disulfid.hh index 53efd01f4447166ab95840ba4400df33bceef936..40613ca192d58f655d319de66b68803a2df30890 100644 --- a/sidechain/src/disulfid.hh +++ b/sidechain/src/disulfid.hh @@ -25,6 +25,19 @@ Real DisulfidScore(FRMRotamerPtr rot_one, FRMRotamerPtr rot_two, const geom::Vec3& ca_pos_one, const geom::Vec3& cb_pos_one, const geom::Vec3& ca_pos_two, const geom::Vec3& cb_pos_two); + void ResolveCysteins(const std::vector<FRMRotamerGroupPtr>& rot_groups, + const std::vector<geom::Vec3>& ca_pos, + const std::vector<geom::Vec3>& cb_pos, + Real disulfid_score_thresh, + std::vector<std::pair<uint, uint> >& disulfid_indices, + std::vector<std::pair<uint, uint> >& rotamer_indices); + + void ResolveCysteins(const std::vector<RRMRotamerGroupPtr>& rot_groups, + const std::vector<geom::Vec3>& ca_pos, + const std::vector<geom::Vec3>& cb_pos, + Real disulfid_score_thresh, + std::vector<std::pair<uint, uint> >& disulfid_indices, + std::vector<std::pair<uint, uint> >& rotamer_indices); }}//ns diff --git a/sidechain/src/sidechain_reconstructor.cc b/sidechain/src/sidechain_reconstructor.cc index 8f506f6e697ca1e8b2a05487d07b9ddcec410ac9..71cb3ccf7f3cd89d81bb23801ca0679afa579429 100644 --- a/sidechain/src/sidechain_reconstructor.cc +++ b/sidechain/src/sidechain_reconstructor.cc @@ -211,84 +211,92 @@ template<typename RotamerGroup> void SidechainReconstructor::BuildDisulfids_( typedef typename RotamerGroup::value_type RotamerPtr; loop::AllAtomPositions& out_pos = *(res->env_pos->all_pos); - // collect rotamers and assign frame energies with what we know so far + // collect rotamers and data required to resolve the disulfid bridges FramePtr frame(new Frame(frame_residues)); - std::vector<RotamerGroupPtr> cys_rotamer_groups(cys_indices.size()); - for (uint i = 0; i < cys_indices.size(); ++i) { - const uint res_idx = res_indices[cys_indices[i]]; - env_->GetCydRotamerGroup(cys_rotamer_groups[i], res_idx); - if (cys_rotamer_groups[i]) cys_rotamer_groups[i]->SetFrameEnergy(frame); - } + std::vector<RotamerGroupPtr> cys_rotamer_groups; + std::vector<geom::Vec3> ca_pos; + std::vector<geom::Vec3> cb_pos; + std::vector<uint> cys_with_rot_groups; + + uint num_cys_indices = cys_indices.size(); + cys_rotamer_groups.reserve(num_cys_indices); + ca_pos.reserve(num_cys_indices); + cb_pos.reserve(num_cys_indices); + cys_with_rot_groups.reserve(num_cys_indices); - // look for bridges (cys_rotamers[i] only non-null if bridge found) - std::vector<RotamerPtr> cys_rotamers(cys_indices.size(), RotamerPtr()); - geom::Vec3 ca_pos_i, ca_pos_j, cb_pos_i, cb_pos_j; for (uint i = 0; i < cys_indices.size(); ++i) { - const uint cys_idx = cys_indices[i]; // for out_pos, has_sidechain - const uint res_idx = res_indices[cys_idx]; // for env_, all_pos_ - // only handle if rot. group there and not handled yet - if (cys_rotamer_groups[i] && !cys_rotamers[i]) { - ca_pos_i = all_pos_->GetPos(res_idx, loop::BB_CA_INDEX); - cb_pos_i = all_pos_->GetPos(res_idx, loop::BB_CB_INDEX); - for (uint j = i+1; j < cys_indices.size(); ++j) { - // NOTE: we go first come, first served - // -> if we already found a disulfid bridge we keep it... - if (cys_rotamer_groups[j] && !cys_rotamers[j]) { - const uint cys_idx_j = cys_indices[j]; - const uint res_idx_j = res_indices[cys_idx_j]; - ca_pos_j = all_pos_->GetPos(res_idx_j, loop::BB_CA_INDEX); - cb_pos_j = all_pos_->GetPos(res_idx_j, loop::BB_CB_INDEX); - if (geom::Distance(ca_pos_i, ca_pos_j) <= disulfid_max_distance_) { - // get best disulfid score - RotamerGroup& rot_group_i = *(cys_rotamer_groups[i]); - RotamerGroup& rot_group_j = *(cys_rotamer_groups[j]); - Real min_score = disulfid_score_thresh_; - uint min_index_k = 0; - uint min_index_l = 0; - for (uint k = 0; k < rot_group_i.size(); ++k) { - for (uint l = 0; l < rot_group_j.size(); ++l) { - const Real score = DisulfidScore(rot_group_i[k], - rot_group_j[l], - ca_pos_i, cb_pos_i, - ca_pos_j, cb_pos_j); - if (score < min_score) { - min_score = score; - min_index_k = k; - min_index_l = l; - } - } - } - // set cys_rotamers[i] and cys_rotamers[j] if bridge found - if (min_score < disulfid_score_thresh_) { - cys_rotamers[i] = rot_group_i[min_index_k]; - cys_rotamers[j] = rot_group_j[min_index_l]; - res->disulfid_bridges.push_back(std::make_pair(cys_idx, cys_idx_j)); - } - } - } - } + const uint global_idx = res_indices[cys_indices[i]]; + RotamerGroupPtr p; + env_->GetCydRotamerGroup(p, global_idx); + if (p) { + p->SetFrameEnergy(frame); + cys_rotamer_groups.push_back(p); + ca_pos.push_back(all_pos_->GetPos(global_idx, loop::BB_CA_INDEX)); + cb_pos.push_back(all_pos_->GetPos(global_idx, loop::BB_CB_INDEX)); + cys_with_rot_groups.push_back(cys_indices[i]); } - // check if we did find a bridge for this one... - if (cys_rotamers[i]) { - // add new frame residue (1 particle for CYS) - std::vector<Particle> particles(1); - particles[0] = *(cys_rotamers[i]->begin()); - FrameResiduePtr frame_res(new FrameResidue(particles, res_idx)); - frame_residues.push_back(frame_res); - // apply solution - cys_rotamers[i]->ApplyOnResidue(out_pos, cys_idx); - has_sidechain[cys_idx] = true; - } else { - // do we need to reconstruct it? - if (keep_sidechains_) { - FrameResiduePtr frame_res = env_->GetScFrameResidue(res_idx); - if (frame_res) { + } + + std::vector<std::pair<uint, uint> > bond_solution; + std::vector<std::pair<uint, uint> > rot_solution; + + ResolveCysteins(cys_rotamer_groups, ca_pos, cb_pos, + disulfid_score_thresh_, bond_solution, rot_solution); + + std::vector<int> is_bonded(cys_rotamer_groups.size(), 0); + + for(uint solution_idx = 0; solution_idx < bond_solution.size(); + ++solution_idx) { + + const std::pair<uint, uint>& bond_pair = bond_solution[solution_idx]; + const std::pair<uint, uint>& rot_pair = rot_solution[solution_idx]; + + // fetch required data + RotamerPtr r_one = (*(cys_rotamer_groups[bond_pair.first]))[rot_pair.first]; + RotamerPtr r_two = (*(cys_rotamer_groups[bond_pair.second]))[rot_pair.second]; + uint idx_one = cys_with_rot_groups[bond_pair.first]; + uint idx_two = cys_with_rot_groups[bond_pair.second]; + uint global_idx_one = res_indices[idx_one]; + uint global_idx_two = res_indices[idx_two]; + std::vector<Particle> particles(1); + + // update the rotamer groups participating in disulfid bonds + is_bonded[bond_pair.first] = 1; + is_bonded[bond_pair.second] = 1; + + // do first disulfid bond partner + particles[0] = *(r_one->begin()); + FrameResiduePtr frame_res_one(new FrameResidue(particles, global_idx_one)); + frame_residues.push_back(frame_res_one); + r_one->ApplyOnResidue(out_pos, idx_one); + has_sidechain[idx_one] = true; + + // do second disulfid bond partner + particles[0] = *(r_two->begin()); + FrameResiduePtr frame_res_two(new FrameResidue(particles, global_idx_two)); + frame_residues.push_back(frame_res_two); + r_two->ApplyOnResidue(out_pos, idx_two); + has_sidechain[idx_two] = true; + + res->disulfid_bridges.push_back(std::make_pair(idx_one, idx_two)); + } + + // in case of keep_sidechains_, we add all complete cysteins not participating + // to any disulfid bond also to the frame + if(keep_sidechains_) { + for(uint i = 0; i < is_bonded.size(); ++i){ + if(is_bonded[i] == 0){ + const uint idx = cys_with_rot_groups[i]; + const uint global_idx = res_indices[idx]; + FrameResiduePtr frame_res = env_->GetScFrameResidue(global_idx); + if(frame_res) { frame_residues.push_back(frame_res); - has_sidechain[cys_idx] = true; + has_sidechain[idx] = true; } } } } + } template<typename RotamerGroupPtr> diff --git a/sidechain/src/sidechain_reconstructor.hh b/sidechain/src/sidechain_reconstructor.hh index df65f521f56a9f31c6cea593c8a238669ac77295..2e052b386fbec9bbc8cf1492a40a3cf8d4ca3ec0 100644 --- a/sidechain/src/sidechain_reconstructor.hh +++ b/sidechain/src/sidechain_reconstructor.hh @@ -41,14 +41,12 @@ public: bool build_disulfids = true, Real cutoff = 20, uint64_t graph_max_complexity = 100000000, Real graph_intial_epsilon = 0.02, - Real disulfid_max_distance = 8, Real disulfid_score_thresh = 45) : keep_sidechains_(keep_sidechains) , build_disulfids_(build_disulfids) , cutoff_(cutoff) , graph_max_complexity_(graph_max_complexity) , graph_intial_epsilon_(graph_intial_epsilon) - , disulfid_max_distance_(disulfid_max_distance) , disulfid_score_thresh_(disulfid_score_thresh) , attached_environment_(false) { } @@ -109,7 +107,6 @@ private: Real cutoff_; uint64_t graph_max_complexity_; Real graph_intial_epsilon_; - Real disulfid_max_distance_; Real disulfid_score_thresh_; // environment specific information, set upon calling AttachEnvironment