diff --git a/sidechain/src/disulfid.cc b/sidechain/src/disulfid.cc
index 233db4b5754ad79838ec9df2dba0446e0c2ecd08..388894b0ecc25573898b7326d107c10647a08eaf 100644
--- a/sidechain/src/disulfid.cc
+++ b/sidechain/src/disulfid.cc
@@ -27,19 +27,17 @@ void ExtractCYSSG(T rot_one, T rot_two,
}
}
-void Resolve(const std::vector<Real>& scores,
- const promod3::core::Graph& graph,
- std::vector<int>& active_bonds){
+void SingleResolve(const std::vector<Real>& scores,
+ const promod3::core::Graph& graph,
+ std::vector<int>& active_bonds) {
int size = scores.size();
-
std::vector<int> stupid_enumerator_input(size, 2);
promod3::core::Enumerator<int> enumerator(stupid_enumerator_input);
std::vector<int>& current_combination = enumerator.CurrentEnum();
Real optimal_score = std::numeric_limits<Real>::max();
-
do {
Real current_score = 0.0;
@@ -61,6 +59,107 @@ void Resolve(const std::vector<Real>& scores,
} while(enumerator.Next());
}
+void Resolve(const std::vector<Real>& scores,
+ const promod3::core::Graph& graph,
+ uint max_problem_size,
+ std::vector<int>& active_bonds){
+
+ uint size = scores.size();
+
+ if(size <= max_problem_size) {
+ SingleResolve(scores, graph, active_bonds);
+ return;
+ }
+
+ // If we reach this point, the thing is not really solvable...
+ // The idea is to always kill the disulfid bond with the worst energy
+ // in the largest component.
+ // This should result in smaller and smaller connected components that
+ // become solvable.
+
+ std::vector<std::vector<int> > connected_components(1);
+
+ // at the beginning everything is in the same component
+ for(int i = 0; i < graph.NumNodes(); ++i){
+ connected_components.back().push_back(i);
+ }
+
+ while(true) {
+
+ uint largest_size = 0;
+ uint largest_idx = 0;
+
+ // search for the largest component
+ for(uint i = 0; i < connected_components.size(); ++i){
+ uint current_size = connected_components[i].size();
+ if(current_size > largest_size){
+ largest_size = current_size;
+ largest_idx = i;
+ }
+ }
+
+ // is it small enough?
+ if(largest_size <= max_problem_size) break;
+
+ const std::vector<int>& largest_component = connected_components[largest_idx];
+
+ // let's search for the item with the worst score
+ Real worst_score = -std::numeric_limits<Real>::max();
+ Real worst_score_idx = 0;
+
+ for(uint i = 0; i < largest_component.size(); ++i){
+ Real actual_score = scores[largest_component[i]];
+ if(actual_score > worst_score){
+ worst_score = actual_score;
+ worst_score_idx = i;
+ }
+ }
+
+ std::vector<int> indices_to_keep;
+ for(uint i = 0; i < worst_score_idx; ++i){
+ indices_to_keep.push_back(largest_component[i]);
+ }
+ for(uint i = worst_score_idx + 1; i < largest_size; ++i){
+ indices_to_keep.push_back(largest_component[i]);
+ }
+
+ std::vector<std::vector<int> > new_components;
+ promod3::core::Graph subgraph = graph.SubGraph(indices_to_keep);
+ subgraph.ConnectedComponents(new_components);
+
+ // reorganize connected components
+ for(uint i = 0; i < new_components.size(); ++i){
+ connected_components.push_back(std::vector<int>());
+ for(uint j = 0; j < new_components[i].size(); ++j){
+ int idx_to_add = indices_to_keep[new_components[i][j]];
+ connected_components.back().push_back(idx_to_add);
+ }
+ }
+ connected_components.erase(connected_components.begin() + largest_idx);
+ }
+
+ // all components are now solvable... let's do it!
+ active_bonds.assign(scores.size(), 0);
+ for(uint i = 0; i < connected_components.size(); ++i){
+ uint component_size = connected_components[i].size();
+ if(component_size == 1){
+ active_bonds[connected_components[i][0]] = 1;
+ }
+ else{
+ std::vector<Real> component_scores(component_size, 0.0);
+ for(uint j = 0; j < component_size; ++j){
+ component_scores[j] = scores[connected_components[i][j]];
+ }
+ promod3::core::Graph component_graph = graph.SubGraph(connected_components[i]);
+ std::vector<int> component_active_bonds;
+ SingleResolve(component_scores, component_graph, component_active_bonds);
+ for(uint j = 0; j < component_size; ++j){
+ active_bonds[connected_components[i][j]] = component_active_bonds[j];
+ }
+ }
+ }
+}
+
template <typename T>
void CysteineResolve(const std::vector<T>& rot_groups,
@@ -158,17 +257,7 @@ void CysteineResolve(const std::vector<T>& rot_groups,
promod3::core::Graph component_graph =
graph.SubGraph(connected_component);
std::vector<int> active_bonds;
- //OPTIMIZE THIS FUNCTION!!!
-
- // If there are ambiguous disulfid bonds, they get resolved by
- // evaluating all possible combinations of active / inactive disulfids.
- // This should never be a problem, since those clusters should not
- // contain too many bonds in any applied case. However, if somebody is
- // stupid enough to create an articial protein only consisting of
- // cysteines, this function might become rather slow... A graph approach
- // would be nice.
- Resolve(component_scores, component_graph, active_bonds);
- //OPTIMIZE THIS FUNCTION!!!
+ Resolve(component_scores, component_graph, 15, active_bonds);
for(uint i = 0; i < active_bonds.size(); ++i){
if(active_bonds[i] == 1){
final_disulfids.push_back(connected_component[i]);