diff --git a/doc/tests/scripts/sidechain_steps.py b/doc/tests/scripts/sidechain_steps.py index 67d890d6c348c2e25daf714686e5072f4e94fba7..738ffdd20957bcb8ba814c560f90d8a821a00e89 100644 --- a/doc/tests/scripts/sidechain_steps.py +++ b/doc/tests/scripts/sidechain_steps.py @@ -68,8 +68,8 @@ for i,r in enumerate(prot.residues): # buildup a graph given the rotamer groups graph = sidechain.RotamerGraph.CreateFromFRMList(rotamer_groups) -# and get a solution out of it -solution = graph.Solve() +# and get a solution out of it using a minimal width tree approach +solution = graph.TreeSolve() # let's finally apply the solution to the residues for (i, rot_group, sol) in zip(aa_with_rotamers, diff --git a/sidechain/doc/graph.rst b/sidechain/doc/graph.rst index 5fc2299161d79fc7f9a2025f53a5df2dde4ad5cc..196b35bee5e1ccd839f4239331416312c04597a5 100644 --- a/sidechain/doc/graph.rst +++ b/sidechain/doc/graph.rst @@ -38,13 +38,17 @@ The Rotamer Graph :type rotamer_groups: :class:`list` - .. method:: Solve([max_complexity=inf, initial_epsilon=0.02]) - - The method solves a rotamer graph by iteratively performing dead end - elimination and edge decomposition until the number of possible - permutations, that have to be enumerated in the resulting tree, fall below - **max_complexity**. In every iteration the epsilon value gets doubled to - enforce more edges to be approximated by edge decomposition. This function + .. method:: TreeSolve([max_complexity=inf, initial_epsilon=0.02]) + + The method solves a rotamer graph using a minimal width tree decomposition + approach in an iterative manner. In every iteration, the algorithm performs + a pruning step (DEE / Edge Decomposition) and subsequently tries to solve + each connected component in the graph separately. + If the number of possible enumerations in the tree constructetd from a + particular connected component is is larger **max_complexity**, + this component is solved in a later iteration. The algorithm iterates until + all connected components are solved and steadily increases the epsilon value + resulting in a more and more agressive edge decomposition. This function assumes all self energies of the rotamers to be calculated and properly set! :param max_complexity: Max number of possible permutations, that have to diff --git a/sidechain/pymod/_reconstruct_sidechains.py b/sidechain/pymod/_reconstruct_sidechains.py index 97162d05f3cd5118d1e1bfdf863f053a1982780d..76c991b0eb494cc4f248f4aaf767b2ef26179ca3 100644 --- a/sidechain/pymod/_reconstruct_sidechains.py +++ b/sidechain/pymod/_reconstruct_sidechains.py @@ -501,7 +501,7 @@ def Reconstruct(ent, keep_sidechains=False, build_disulfids=True, else: graph = sidechain.RotamerGraph.CreateFromRRMList(rotamer_groups) - solution = graph.Solve(100000000,0.02) + solution = graph.TreeSolve(100000000,0.02) # update structure for i,rot_group,sol in zip(residues_with_rotamer_group,rotamer_groups,solution): diff --git a/sidechain/pymod/export_graph.cc b/sidechain/pymod/export_graph.cc index 87f647daa1a20ada25889b68cf8de204c3d42f8e..fdbb4839cbf54a7117e40aba29798bd566982465 100644 --- a/sidechain/pymod/export_graph.cc +++ b/sidechain/pymod/export_graph.cc @@ -27,9 +27,9 @@ RotamerGraphPtr WrapFRMList(boost::python::list& rotamer_groups, return RotamerGraph::CreateFromFRMList(v_rotamer_groups,v_rt_operators); } -boost::python::list WrapSolve(RotamerGraphPtr graph, uint64_t max_complexity, - Real initial_epsilon) { - std::vector<int> solution = graph->Solve(max_complexity,initial_epsilon); +boost::python::list WrapTreeSolve(RotamerGraphPtr graph, uint64_t max_complexity, + Real initial_epsilon) { + std::vector<int> solution = graph->TreeSolve(max_complexity,initial_epsilon); boost::python::list return_list; core::AppendVectorToList(solution, return_list); return return_list; @@ -51,7 +51,7 @@ void export_Graph() .def("GetNumEdges", &RotamerGraph::GetNumEdges) .def("GetNumActiveNodes", &RotamerGraph::GetNumActiveNodes) .def("GetNumActiveEdges", &RotamerGraph::GetNumActiveEdges) - .def("Solve",&WrapSolve,(arg("max_complexity")=std::numeric_limits<uint64_t>::max(),arg("initial_epsilon")=0.02)) + .def("TreeSolve",&WrapTreeSolve,(arg("max_complexity")=std::numeric_limits<uint64_t>::max(),arg("initial_epsilon")=0.02)) ; register_ptr_to_python<RotamerGraphPtr>(); diff --git a/sidechain/src/CMakeLists.txt b/sidechain/src/CMakeLists.txt index 011b2dec847e1490aa044c75afd83d8b04cc99a0..e957c001923200e64ddeb88ca6a80c3b33a4adde 100644 --- a/sidechain/src/CMakeLists.txt +++ b/sidechain/src/CMakeLists.txt @@ -21,7 +21,6 @@ set(SIDECHAIN_HEADERS sidechain_lookup.hh sidechain_object_loader.hh tetrahedral_polytope.hh - tree.hh ) set(SIDECHAIN_SOURCES @@ -46,7 +45,6 @@ set(SIDECHAIN_SOURCES sidechain_lookup.cc sidechain_object_loader.cc tetrahedral_polytope.cc - tree.cc ) module(NAME sidechain HEADERS ${SIDECHAIN_HEADERS} SOURCES ${SIDECHAIN_SOURCES} diff --git a/sidechain/src/graph.cc b/sidechain/src/graph.cc index d70e75613a98d671562c3758ecfa55bc82df91ee..f4de7f18c437a1cec4d13a3e5f2540f184d6d7c2 100644 --- a/sidechain/src/graph.cc +++ b/sidechain/src/graph.cc @@ -1,15 +1,489 @@ +#include <algorithm> + #include <promod3/sidechain/graph.hh> -#include <promod3/sidechain/tree.hh> +#include <promod3/core/tree.hh> #include <promod3/core/runtime_profiling.hh> +#include <promod3/core/enumerator.hh> +#include <promod3/core/message.hh> + + namespace{ - bool active_rotamer_sorter(std::pair<uint,Real> a, std::pair<uint,Real> b){ - return a.second < b.second; +bool active_rotamer_sorter(std::pair<uint,Real> a, std::pair<uint,Real> b){ + return a.second < b.second; +} + + + +//////////////////////////////////////////////////////////////// +//FUNCITONS AND DATA STRUCTURES FOR THE TREE SOLVING ALGORITHM// +//////////////////////////////////////////////////////////////// + +typedef promod3::core::TreeBag<int> TreeBag; + +//calculates a linear idx for a certain combination in the lset +struct LSetIdxCalculator{ + + LSetIdxCalculator() { } + + LSetIdxCalculator(const std::vector<int>& num_candidates){ + lset_size = num_candidates.size(); + for(int i = 0; i < lset_size; ++i){ + int temp = 1; + for(int j = i + 1; j < lset_size; ++j){ + temp *= num_candidates[j]; + } + idx_helper.push_back(temp); + } + idx_helper.push_back(1); + } + + inline int GetLIdx(const std::vector<int>& indices) const{ + int idx = 0; + for(int i = 0; i < lset_size; ++i){ + idx += idx_helper[i] * indices[i]; + } + return idx; + } + + inline int GetIdxHelper(int idx){ + return idx_helper[idx]; + } + + std::vector<int> idx_helper; + int lset_size; +}; + +//wrapper around pairwise rotamer energies considering the fact, +//that the node with lower idx is by construction defining the row, +//whereas the other defines the column +struct PairwiseEWrapper{ + + PairwiseEWrapper(int node_one, + int node_two, + int num_rotamers_one, + int num_rotamers_two, + Real* e) { + + if(node_two > node_one) { + multiplicator_one = num_rotamers_two; + multiplicator_two = 1; + } + else { + multiplicator_one = 1; + multiplicator_two = num_rotamers_one; + } + + energies = e; + } + + inline Real GetEnergy(int rotamer_one, int rotamer_two){ + return energies[rotamer_one * multiplicator_one + + rotamer_two * multiplicator_two]; + } + + int multiplicator_one; + int multiplicator_two; + Real* energies; +}; + + + +struct BagData{ + //members of the bag that are also present in parent bag + std::vector<int> lset; + //members of the bag that are not present in parent bag + std::vector<int> rset; + + //number of rotamers for every entry in lset/rset + uint num_l_combinations; + uint num_r_combinations; + std::vector<int> num_l_rotamers; + std::vector<int> num_r_rotamers; + + //optimal solution in r set given a certain combination in l + std::vector<std::vector<int> > optimal_rset_combination; + //the according score + std::vector<Real> optimal_r_score; + + //calculates an index in optimal_r_combination/optimal_r_score + //given a certain combination in l + LSetIdxCalculator lset_idx_calculator; + + //the bag data indices for the children attached to this bag + std::vector<int> children_indices; + int num_children; + + //super complicated mapping structure of getting stuff out from the children + //the idea is, that we can calculate the lset idx in several steps in the + //enumeration procedure... + + //for every child we have a vector of with indices describing the + //elements in the current lset being part of the childrens lset + std::vector<std::vector<int> > lset_child_l_indices; + //the number it has to be multiplied to construct the index for + //that particular child lset + std::vector<std::vector<int> > lset_child_l_index_helpers; + //store the size of the stuff above for every child + std::vector<int> num_lset_child_l; + + //equivalent for the current rset + std::vector<std::vector<int> > rset_child_l_indices; + std::vector<std::vector<int> > rset_child_l_index_helpers; + std::vector<int> num_rset_child_l; + + //this data structure will be altered during the calculation and is a + //placeholder to query optimal solutions for certain lsets of the children + std::vector<std::vector<int> > children_l_combinations; + + //self energies of the rset + std::vector<Real*> r_self_energies; + + //stuff for lr and rl pairwise energies + std::vector<PairwiseEWrapper> lr_energies; + std::vector<int> lr_index_in_I; + std::vector<int> lr_index_in_R; + + //stuff for rr pairwise energies + std::vector<PairwiseEWrapper> rr_energies; + std::vector<int> rr_index_in_R_one; + std::vector<int> rr_index_in_R_two; +}; + +//Fills only the bare minimum of data into the BagData vector and already +//estimates the complexity of the tree solving procedure. All other data +//gets filled in the FillBagData function +uint64_t FillRawBagData(const std::vector<TreeBag*>& traversal, + const std::vector<int>& num_active_rotamers, + std::vector<BagData>& bag_data){ + + uint64_t complexity = 0; + + bag_data.clear(); + bag_data.resize(traversal.size()); + + for(uint bag_idx = 0; bag_idx < traversal.size(); ++bag_idx){ + + TreeBag* bag = traversal[bag_idx]; + TreeBag* parent = traversal[bag_idx]->GetParent(); + + //assign rset and lset + if(parent == NULL){ + //it's the root, lets shuffle all to rset + bag_data[bag_idx].rset.assign(bag->members_begin(), + bag->members_end()); + } + else{ + for(TreeBag::const_member_iterator it = bag->members_begin(); + it != bag->members_end(); ++it){ + if(parent->HasMember(*it)) bag_data[bag_idx].lset.push_back(*it); + else bag_data[bag_idx].rset.push_back(*it); + } + } + + //figure out how many rotamers there are + bag_data[bag_idx].num_l_combinations = 1; + bag_data[bag_idx].num_r_combinations = 1; + + for(std::vector<int>::iterator i = bag_data[bag_idx].lset.begin(); + i != bag_data[bag_idx].lset.end(); ++i){ + bag_data[bag_idx].num_l_rotamers.push_back(num_active_rotamers[*i]); + bag_data[bag_idx].num_l_combinations *= num_active_rotamers[*i]; + } + + for(std::vector<int>::iterator i = bag_data[bag_idx].rset.begin(); + i != bag_data[bag_idx].rset.end(); ++i){ + bag_data[bag_idx].num_r_rotamers.push_back(num_active_rotamers[*i]); + bag_data[bag_idx].num_r_combinations *= num_active_rotamers[*i]; + } + + complexity += (bag_data[bag_idx].num_l_combinations * + bag_data[bag_idx].num_r_combinations); + } + + return complexity; +} + + +void FillBagData(const std::vector<TreeBag*>& traversal, + std::vector<Real*> self_energies, + std::map<std::pair<int,int>, Real*> pairwise_energies, + std::vector<BagData>& bag_data){ + + for(uint bag_idx = 0; bag_idx < traversal.size(); ++bag_idx){ + + TreeBag* bag = traversal[bag_idx]; + BagData& current_data = bag_data[bag_idx]; + + //allocate required space for optimal r combinations for every possible lset + uint l_combinations = current_data.num_l_combinations; + current_data.optimal_rset_combination.resize(l_combinations); + current_data.optimal_r_score.resize(l_combinations); + + //generate a new lset idxgenerator... + current_data.lset_idx_calculator = + LSetIdxCalculator(current_data.num_l_rotamers); + + //get all the children stuff right + for(TreeBag::const_child_iterator it = bag->children_begin(); + it != bag->children_end(); ++it){ + current_data.children_indices.push_back((*it)->GetIdx()); + } + current_data.num_children = current_data.children_indices.size(); + + //handle current lset + current_data.lset_child_l_indices.resize(current_data.num_children); + current_data.lset_child_l_index_helpers.resize(current_data.num_children); + for(uint i = 0; i < current_data.lset.size(); ++i){ + int val = current_data.lset[i]; + //go through all children + for(int j = 0; j < current_data.num_children; ++j){ + int ch_idx = current_data.children_indices[j]; + //we can be sure of the children lsets being set due to the postorder + //traversal of the tree + const std::vector<int>& child_lset = bag_data[ch_idx].lset; + std::vector<int>::const_iterator it = std::find(child_lset.begin(), + child_lset.end(), + val); + if(it != child_lset.end()){ + //its in the childs lset! + int idx_in_child_l = it - child_lset.begin(); + current_data.lset_child_l_indices[j].push_back(i); + int helper = bag_data[ch_idx].lset_idx_calculator.GetIdxHelper(idx_in_child_l); + current_data.lset_child_l_index_helpers[j].push_back(helper); + } + } + } + for(int i = 0; i < current_data.num_children; ++i){ + current_data.num_lset_child_l.push_back(current_data.lset_child_l_indices[i].size()); + } + + //do the same for the rset + current_data.rset_child_l_indices.resize(current_data.num_children); + current_data.rset_child_l_index_helpers.resize(current_data.num_children); + for(uint i = 0; i < current_data.rset.size(); ++i){ + int val = current_data.rset[i]; + //go through all children + for(int j = 0; j < current_data.num_children; ++j){ + int ch_idx = current_data.children_indices[j]; + //we can be sure of the children lsets being set due to the postorder + //traversal of the tree + const std::vector<int>& child_lset = bag_data[ch_idx].lset; + std::vector<int>::const_iterator it = std::find(child_lset.begin(), + child_lset.end(), + val); + if(it != child_lset.end()){ + //its in the childs lset! + int idx_in_child_l = it - child_lset.begin(); + current_data.rset_child_l_indices[j].push_back(i); + int helper = bag_data[ch_idx].lset_idx_calculator.GetIdxHelper(idx_in_child_l); + current_data.rset_child_l_index_helpers[j].push_back(helper); + } + } + } + for(int i = 0; i < current_data.num_children; ++i){ + current_data.num_rset_child_l.push_back(current_data.rset_child_l_indices[i].size()); + } + + //prepare the lset combinations + for(std::vector<int>::iterator i = current_data.children_indices.begin(); + i != current_data.children_indices.end(); ++i){ + int size = bag_data[*i].lset.size(); + current_data.children_l_combinations.push_back(std::vector<int>(size,0)); + } + + //fill the rset self energies + for(std::vector<int>::iterator i = current_data.rset.begin(); + i != current_data.rset.end(); ++i){ + current_data.r_self_energies.push_back(self_energies[*i]); + } + + //fill the lr pairwise energies + for(uint i = 0; i < current_data.lset.size(); ++i){ + for(uint j = 0; j < current_data.rset.size(); ++j){ + std::pair<int,int> idx_pair = std::make_pair(current_data.lset[i], + current_data.rset[j]); + std::map<std::pair<int,int>, Real*>::iterator e_it = + pairwise_energies.find(idx_pair); + if(e_it != pairwise_energies.end()){ + current_data.lr_energies.push_back(PairwiseEWrapper(current_data.lset[i], + current_data.rset[j], + current_data.num_l_rotamers[i], + current_data.num_r_rotamers[j], + e_it->second)); + current_data.lr_index_in_I.push_back(i); + current_data.lr_index_in_R.push_back(j); + continue; + } + idx_pair = std::make_pair(current_data.rset[j], current_data.lset[i]); + e_it = pairwise_energies.find(idx_pair); + if(e_it != pairwise_energies.end()){ + current_data.lr_energies.push_back(PairwiseEWrapper(current_data.lset[i], + current_data.rset[j], + current_data.num_l_rotamers[i], + current_data.num_r_rotamers[j], + e_it->second)); + current_data.lr_index_in_I.push_back(i); + current_data.lr_index_in_R.push_back(j); + } + } + } + + //fill the rr pairwise energies + for(uint i = 0; i < current_data.rset.size(); ++i){ + for(uint j = i + 1; j < current_data.rset.size(); ++j){ + + std::pair<int,int> idx_pair = std::make_pair(current_data.rset[i], + current_data.rset[j]); + std::map<std::pair<int,int>, Real*>::iterator e_it = + pairwise_energies.find(idx_pair); + if(e_it != pairwise_energies.end()){ + current_data.rr_energies.push_back(PairwiseEWrapper(current_data.rset[i], + current_data.rset[j], + current_data.num_r_rotamers[i], + current_data.num_r_rotamers[j], + e_it->second)); + current_data.rr_index_in_R_one.push_back(i); + current_data.rr_index_in_R_two.push_back(j); + continue; + } + idx_pair = std::make_pair(current_data.rset[j], current_data.rset[i]); + e_it = pairwise_energies.find(idx_pair); + if(e_it != pairwise_energies.end()){ + current_data.rr_energies.push_back(PairwiseEWrapper(current_data.rset[i], + current_data.rset[j], + current_data.num_r_rotamers[i], + current_data.num_r_rotamers[j], + e_it->second)); + current_data.rr_index_in_R_one.push_back(i); + current_data.rr_index_in_R_two.push_back(j); + } + } + } + } +} + +void EnumerateBagData(std::vector<BagData>& bag_data){ + + for(uint bag_idx = 0; bag_idx < bag_data.size(); ++bag_idx){ + + //data of the current bag + BagData& data = bag_data[bag_idx]; + int lset_size = data.lset.size(); + int rset_size = data.rset.size(); + int num_lr_energies = data.lr_energies.size(); + int num_rr_energies = data.rr_energies.size(); + + //the lset of this bag we have to enumerate + std::vector<int> current_l_enum(lset_size, 0); + promod3::core::Enumerator l_set_enumerator(current_l_enum, + &data.num_l_rotamers[0]); + + //let's iterate over all possible lset combinations + //in case of an empty lset (root node) we enforce one iteration to + //optimize the rset + int num_iterations = std::max(l_set_enumerator.GetMaxEnumerations(), 1); + int num_children = data.children_indices.size(); + + int partial_l_indices[num_children]; + + for(int i = 0; i < num_iterations; ++i){ + + memset(partial_l_indices, 0 , num_children * sizeof(int)); + for(int j = 0; j < num_children; ++j){ + for(int k = 0; k < data.num_lset_child_l[j]; ++k){ + partial_l_indices[j] += + (current_l_enum[data.lset_child_l_indices[j][k]] * + data.lset_child_l_index_helpers[j][k]); + } + } + + std::vector<int> current_r_enum(rset_size,0); + promod3::core::Enumerator r_set_enumerator(current_r_enum, + &data.num_r_rotamers[0]); + Real score; + Real best_score = std::numeric_limits<Real>::max(); + std::vector<int> best_rset = current_r_enum; + + //let's find the optimal r combination given the current l combination... + for(int j = 0; j < r_set_enumerator.GetMaxEnumerations(); ++j){ + + score = 0; + + //sum up the r self energies + for(int k = 0; k < rset_size; ++k){ + score += data.r_self_energies[k][current_r_enum[k]]; + } + + //sum up the lr pairwise energies + for(int k = 0; k < num_lr_energies; ++k){ + score += + data.lr_energies[k].GetEnergy(current_l_enum[data.lr_index_in_I[k]], + current_r_enum[data.lr_index_in_R[k]]); + } + + //sum up the rr pairwise energies + for(int k = 0; k < num_rr_energies; ++k){ + score += + data.rr_energies[k].GetEnergy(current_r_enum[data.rr_index_in_R_one[k]], + current_r_enum[data.rr_index_in_R_two[k]]); + } + + for(int k = 0; k < num_children; ++k){ + int l_idx = partial_l_indices[k]; + for(int l = 0; l < data.num_rset_child_l[k]; ++l){ + l_idx += + (current_r_enum[data.rset_child_l_indices[k][l]] * + data.rset_child_l_index_helpers[k][l]); + } + score += bag_data[data.children_indices[k]].optimal_r_score[l_idx]; + } + + if(score < best_score){ + best_score = score; + best_rset = current_r_enum; + } + + r_set_enumerator.Next(); + } + int l_idx = data.lset_idx_calculator.GetLIdx(current_l_enum); + data.optimal_rset_combination[l_idx] = best_rset; + data.optimal_r_score[l_idx] = best_score; + l_set_enumerator.Next(); + } + } +} + +void CollectSolutionFromBagData(const std::vector<BagData>& bag_data, + int bag_idx, std::vector<int>& solution){ + + const BagData& bag = bag_data[bag_idx]; + + //collect the lset values from the solution + std::vector<int> l_solution(bag.lset.size(), 0); + for(uint i = 0; i < l_solution.size(); ++i){ + l_solution[i] = solution[bag.lset[i]]; + } + + //get the index of the ideal r_combination given that l solution + int r_idx = bag.lset_idx_calculator.GetLIdx(l_solution); + //and fill the according values into the solution + for(uint i = 0; i < bag.rset.size(); ++i){ + solution[bag.rset[i]] = bag.optimal_rset_combination[r_idx][i]; } + //recursively call this function for all the children + for(std::vector<int>::const_iterator i = bag.children_indices.begin(); + i != bag.children_indices.end(); ++i){ + CollectSolutionFromBagData(bag_data, *i, solution); + } } + + +} // anon ns + namespace promod3{ namespace sidechain{ RotamerGraphEdge::RotamerGraphEdge(uint index, @@ -217,19 +691,27 @@ void RotamerGraphNode::Reset(){ } } -void RotamerGraphNode::Deactivate(){ +void RotamerGraphNode::Deactivate(int index){ if(!active_) return; active_ = false; - //lets first figure out which is the lowest energy - //rotamer and set it to the only active rotamer - Real e_min = std::numeric_limits<Real>::max(); - uint index = 0; - for(uint i = 0; i < active_rotamers_.size(); ++i){ - if(self_energies_[active_rotamers_[i]] < e_min){ - e_min = self_energies_[active_rotamers_[i]]; - index = active_rotamers_[i]; + + //if the given index is -1 (default), we simply search for the + //the rotamer with the lowest self energy as the last active rotamer, + //a.k.a. the solution for this node. + //In all other cases we take the rotamer at given index as solution. + if(index == -1){ + //lets first figure out which is the lowest energy + //rotamer and set it to the only active rotamer + Real e_min = std::numeric_limits<Real>::max(); + index = 0; + for(uint i = 0; i < active_rotamers_.size(); ++i){ + if(self_energies_[active_rotamers_[i]] < e_min){ + e_min = self_energies_[active_rotamers_[i]]; + index = active_rotamers_[i]; + } } } + active_rotamers_.resize(1); active_rotamers_[0] = index; @@ -265,10 +747,7 @@ void RotamerGraphNode::RemoveFromActiveEdges(RotamerGraphEdge* edge){ if(edge == edges_[counter]){ iterator i = std::find(active_edges_.begin(), active_edges_.end(), counter); - if(i != active_edges_.end()){ - active_edges_.erase(i); - } - if(active_edges_.empty()) this->Deactivate(); + if(i != active_edges_.end()) active_edges_.erase(i); return; } } @@ -633,35 +1112,180 @@ void RotamerGraph::Reset(){ } } -std::vector<int> RotamerGraph::Solve(uint64_t max_complexity, +promod3::core::Graph RotamerGraph::ToRawGraph() const{ + promod3::core::Graph raw_graph(this->GetNumNodes()); + std::map<RotamerGraphNode*, int> ptr_mapper; + for(uint i = 0; i < nodes_.size(); ++i){ + ptr_mapper[nodes_[i]] = i; + } + for(std::vector<RotamerGraphEdge*>::const_iterator i = edges_.begin(); + i != edges_.end(); ++i){ + if((*i)->IsActive()){ + int idx_one = ptr_mapper[(*i)->GetNode1()]; + int idx_two = ptr_mapper[(*i)->GetNode2()]; + raw_graph.Connect(idx_one,idx_two); + } + } + return raw_graph; +} + +std::vector<int> RotamerGraph::TreeSolve(uint64_t max_complexity, Real initial_epsilon) { core::ScopedTimerPtr prof = core::StaticRuntimeProfiler::StartScoped( - "Graph::Solve", 2); + "RotamerGraph::TreeSolve", 2); + + //all nodes must be active in the beginning + for(std::vector<RotamerGraphNode*>::iterator i = nodes_.begin(); + i != nodes_.end(); ++i){ + if(!(*i)->IsActive()){ + String err = "All nodes of graph must be active for TreeSolving."; + err += " Either construct a new graph or reset the current one!"; + throw promod3::Error(err); + } + } + + std::vector<int> overall_solution(this->GetNumNodes(), 0); + + //handle the case where graph is empty + if(this->GetNumNodes() == 0){ + return overall_solution; + } + + //handle the case where graph only contains one item + if(this->GetNumNodes() == 1){ + nodes_[0]->Deactivate(); + overall_solution[0] = nodes_[0]->GetOverallIndex(0); + return overall_solution; + } - TreePtr t; bool first_iteration = true; + + do{ + this->Prune(initial_epsilon, first_iteration); + + //build raw graph and extract connected components + promod3::core::Graph raw_graph = this->ToRawGraph(); + + //generate the connected components + std::vector<std::vector<int> > connected_components; + raw_graph.ConnectedComponents(connected_components); + + //solve the connected components seperately if the according + //complexity is feasible + for(uint component_idx = 0; component_idx < connected_components.size(); + ++component_idx){ + + const std::vector<int>& component = connected_components[component_idx]; + + if(component.size() == 1){ + if(nodes_[component[0]]->IsActive()){ + nodes_[component[0]]->Deactivate(); + int overall_idx = nodes_[component[0]]->GetOverallIndex(0); + overall_solution[component[0]] = overall_idx; + } + continue; + } + + promod3::core::Graph component_graph = raw_graph.SubGraph(component); + + TreeBag* component_tree = + promod3::core::GenerateMinimalWidthTree(component_graph); + + //generate post order traversal of previously constructed tree + std::vector<TreeBag* > traversal = + promod3::core::PostOrderTraversal(component_tree); + + //create initial data required to solve the enumeration problem + std::vector<int> num_active_rotamers(component.size()); + for(uint i = 0; i < component.size(); ++i){ + num_active_rotamers[i] = nodes_[component[i]]->GetNumActiveRotamers(); + } + std::vector<BagData> bag_data; + uint64_t component_complexity = FillRawBagData(traversal, + num_active_rotamers, + bag_data); + + //check whether complexity of current connected component is solvable... + //if not, we simply perform another iteration of pruning. + if(component_complexity > max_complexity) continue; + + //The complexity is low enough... let's fill remaining data and + //solve it! + + //first we have to extract the energies + std::vector<Real*> self_energies; + std::map<std::pair<int,int>, Real*> pairwise_energies; + + for(uint i = 0; i < component.size(); ++i){ + Real* self_e = new Real[num_active_rotamers[i]]; + nodes_[component[i]]->FillActiveSelfEnergies(self_e); + self_energies.push_back(self_e); + } + + for(uint i = 0; i < edges_.size(); ++i){ + RotamerGraphEdge* edge = edges_[i]; + //does it affect the current component? + int a = edge->GetNode1()->GetIndex(); + int b = edge->GetNode2()->GetIndex(); + + std::vector<int>::const_iterator a_it = std::find(component.begin(), + component.end(), a); + if(a_it == component.end()) continue; //it's not part of the component + + std::vector<int>::const_iterator b_it = std::find(component.begin(), + component.end(), b); + if(b_it == component.end()) continue; //it's not part of the component + + int a_in_component = a_it - component.begin(); + int b_in_component = b_it - component.begin(); + + Real* pairwise_e = new Real[num_active_rotamers[a_in_component] * + num_active_rotamers[b_in_component]]; + edge->FillActiveEnergies(pairwise_e); + + std::pair<int,int> edge_pair = std::make_pair(a_in_component, + b_in_component); + + pairwise_energies[edge_pair] = pairwise_e; + } + + //let's fill in remaining data + FillBagData(traversal, self_energies, pairwise_energies, bag_data); - while(true){ - this->Prune(initial_epsilon,first_iteration); - t = TreePtr(new Tree(this)); - if(t->GenerateTree(this) < max_complexity){ - break; + //let's do the whole enumeration game + EnumerateBagData(bag_data); + + //let's get the solution out + std::vector<int> component_solution(component.size(), 0); + CollectSolutionFromBagData(bag_data, bag_data.size() - 1, + component_solution); + + //alter the graph accordingly + for(uint i = 0; i < component.size(); ++i){ + uint overall_idx = + nodes_[component[i]]->GetOverallIndex(component_solution[i]); + nodes_[component[i]]->Deactivate(overall_idx); + overall_solution[component[i]] = overall_idx; + } + + //the energies are not needed anymore + for(std::vector<Real*>::iterator i = self_energies.begin(); + i != self_energies.end(); ++i){ + delete [] (*i); + } + + for(std::map<std::pair<int,int>, Real* >::iterator i = + pairwise_energies.begin(); i != pairwise_energies.end(); ++i){ + delete [] i->second; + } } + first_iteration = false; initial_epsilon *= 2; - } + }while(this->GetNumActiveNodes() > 0); - std::vector<int> return_vec(t->GetNumNodes(),0); - t->Solve(this,return_vec); - //the indices are based on the active rotamers in the rotamer groups - //we have to map that - int actual_pos = 0;; - for(node_iterator i = this->nodes_begin(); - i != this->nodes_end(); ++i, ++actual_pos){ - return_vec[actual_pos] = (*i)->GetOverallIndex(return_vec[actual_pos]); - } - return return_vec; + return overall_solution; } void RotamerGraph::Invert(Real* energies, Real* inverted_energies, diff --git a/sidechain/src/graph.hh b/sidechain/src/graph.hh index 31d83a4e7d6d6d5d9cb1071618f8a5cc0f0ac63e..9773aeb543c4bc9d9ab10166f7a47e5661480da4 100644 --- a/sidechain/src/graph.hh +++ b/sidechain/src/graph.hh @@ -1,21 +1,13 @@ #ifndef PROMOD3_ROTAMER_GRAPH_COMPONENTS_HH #define PROMOD3_ROTAMER_GRAPH_COMPONENTS_HH -#include <math.h> #include <vector> #include <limits.h> -#include <ctime> -#include <algorithm> #include <boost/shared_ptr.hpp> -#include <promod3/sidechain/tetrahedral_polytope.hh> -#include <promod3/sidechain/pairwise_terms.hh> -#include <promod3/sidechain/particle.hh> -#include <promod3/sidechain/frame.hh> #include <promod3/sidechain/rotamer_group.hh> -#include <promod3/core/message.hh> - +#include <promod3/core/graph.hh> namespace promod3{ namespace sidechain{ @@ -29,7 +21,7 @@ class RotamerGraphEdge{ public: RotamerGraphEdge(uint index, RotamerGraphNode* node1, RotamerGraphNode* node2, - uint num_i, uint num_j, Real* pairwise_energies_); + uint num_i, uint num_j, Real* pairwise_energies_); ~RotamerGraphEdge(); @@ -72,19 +64,19 @@ class RotamerGraphNode{ public: RotamerGraphNode(uint index, - uint num_rotamers, - Real* internal_energies, - Real* frame_energies); + uint num_rotamers, + Real* internal_energies, + Real* frame_energies); ~RotamerGraphNode(); void Reset(); - void Deactivate(); + void Deactivate(int index = -1); - bool IsActive() { return active_; } + bool IsActive() const { return active_; } - uint GetIndex() { return index_; } + uint GetIndex() const { return index_; } void AddEdge(RotamerGraphEdge* edge); @@ -171,8 +163,10 @@ public: void Reset(); - std::vector<int> Solve(uint64_t max_complexity = std::numeric_limits<uint64_t>::max(), - Real intial_epsilon = 0.02); + promod3::core::Graph ToRawGraph() const; + + std::vector<int> TreeSolve(uint64_t max_complexity = std::numeric_limits<uint64_t>::max(), + Real intial_epsilon = 0.02); RotamerGraphNode* GetNode(uint index) { return nodes_.at(index); } diff --git a/sidechain/src/tree.cc b/sidechain/src/tree.cc deleted file mode 100644 index e348b64c46465d1464309479ff6d05766c43bfa8..0000000000000000000000000000000000000000 --- a/sidechain/src/tree.cc +++ /dev/null @@ -1,856 +0,0 @@ -#include <promod3/sidechain/tree.hh> -#include <promod3/core/enumerator.hh> - -namespace{ - - //sums up every row to give an estimate of how many connections - //each node has. Be aware, that the diagonal element is one as well... - void SummedConnections(int* adjacency, int* sum, int n){ - int row_sum; - int* data_ptr = adjacency; - int j; - - for(int i =0; i < n; ++i){ - row_sum = 0; - for(j = 0; j < n; ++j){ - row_sum += *data_ptr; - ++data_ptr; - } - sum[i] = row_sum; - } - } - - //clears node idx by setting the corresponding row and col to zero - void ClearNode(int* adjacency, int idx, int n){ - //clear row - memset(&adjacency[idx*n],0,n*sizeof(int)); - //clear col - int* col_ptr = & adjacency[idx]; - for(int i = 0; i < n; ++i){ - *col_ptr = 0; - col_ptr += n; - } - } - - void BuildClique(int* adjacency, const std::vector<int>& indices, int n){ - //let's connect them all - for(uint i = 0; i < indices.size(); ++i){ - for(uint j = i+1; j < indices.size(); ++j){ - adjacency[indices[i]*n+indices[j]] = 1; - adjacency[indices[j]*n+indices[i]] = 1; - } - } - } - - promod3::sidechain::BagPtr TreePostProcessing(std::vector<std::pair<int,promod3::sidechain::BagPtr> >& bags){ - - bool something_happened = true; - promod3::sidechain::BagPtr actual_bag; - promod3::sidechain::Bag* actual_parent; - - while(something_happened){ - something_happened = false; - for(std::vector<std::pair<int, promod3::sidechain::BagPtr> >::iterator i = bags.begin(); - i != bags.end(); ++i){ - actual_bag = i->second; - actual_parent = actual_bag->GetParent(); - if(actual_parent != NULL){ - //if all members in actual bag are contained in the parent, we can - //delete the actual bag and attach all children to the parent - bool everything_there = true; - for(promod3::sidechain::Bag::member_iterator j = actual_bag->members_begin(); - j != actual_bag->members_end(); ++j){ - if(!actual_parent->HasMember(*j)){ - everything_there = false; - break; - } - } - if(everything_there){ - for(promod3::sidechain::Bag::child_iterator j = actual_bag->children_begin(); - j != actual_bag->children_end(); ++j){ - actual_parent->AttachBag(*j); - } - actual_parent->RemoveBag(actual_bag); - i = bags.erase(i); - something_happened = true; - if(i == bags.end()) break; - //we start with the second check already in the same iteration! - actual_bag = i->second; - actual_parent = actual_bag->GetParent(); - } - } - if(actual_parent != NULL){ - //if all members in actual parent are contained in actual bag, the actual parent - //can be replaced by actual_bag - bool everything_there = true; - for(promod3::sidechain::Bag::member_iterator j = actual_parent->members_begin(); - j != actual_parent->members_end(); ++j){ - if(!actual_bag->HasMember(*j)){ - everything_there = false; - break; - } - } - if(everything_there){ - //let's attach all children of parent to actual_bag (except itself) - for(promod3::sidechain::Bag::child_iterator j = actual_parent->children_begin(); - j != actual_parent->children_end(); ++j){ - if(*j == actual_bag) continue; - actual_bag->AttachBag(*j); - } - //lets find the index of the parent - bool found_parent = false; - for(uint j = 0; j < bags.size(); ++j){ - if(bags[j].second.get() == actual_parent){ - //place the actual_bag there - promod3::sidechain::Bag* parents_parent = actual_parent->GetParent(); - if(parents_parent != NULL){ - parents_parent->RemoveBag(actual_parent); - parents_parent->AttachBag(actual_bag); - } - else{ - actual_bag->SetParent(NULL); - } - i = bags.erase(bags.begin()+j); - something_happened = true; - found_parent = true; - break; - } - } - - --i; - if(!found_parent){ - throw "fuuuuuuck"; - } - } - } - } - } - - for(std::vector<std::pair<int, promod3::sidechain::BagPtr> >::iterator i = bags.begin(); - i != bags.end(); ++i){ - if(i->second->GetParent() == NULL) return i->second; - } - - throw "fuuuuck"; - - - } - - void ConnectedComponents(int* adjacency, int n, - std::vector<std::vector<int> >& components){ - - components.clear(); - - bool visited[n]; - memset(visited,0,n*sizeof(bool)); - - for(int i = 0; i < n; ++i){ - if(visited[i]) continue; - //it's not visited, so let's start with a new connected component... - components.push_back(std::vector<int>()); - std::stack<int> queue; - queue.push(i); - int actual_element; - while(!queue.empty()){ - actual_element = queue.top(); - queue.pop(); - if(!visited[actual_element]){ - //the actual element is not yet visited - // =>let's add it to the actual component and - //put all nodes it is connected to at the back of the - //queue - visited[actual_element] = true; - components.back().push_back(actual_element); - int* adj_ptr = &adjacency[actual_element * n]; - for(int j = 0; j < n; ++j){ - if(*adj_ptr == 1) queue.push(j); - ++adj_ptr; - } - } - } - std::sort(components.back().begin(), components.back().end()); - } - } - - promod3::sidechain::BagPtr TreeDecomp(int* matrix, int n){ - - //bags, that will be produced in a first stage of the algorithm - std::stack<promod3::sidechain::BagPtr> bags; - - //some variable needed in the while loop - int min_sum; - int idx = 0; - int* int_ptr; - std::vector<int> bag_members; - - //the number of adjacent nodes will be saved in here - int* sum = new int[n]; - - while(true){ - //lets get the number of adjacent edges for every node - SummedConnections(matrix,sum,n); - //find the minimal value and the corresponding node index - min_sum = std::numeric_limits<int>::max(); - int_ptr = sum; - for(int i = 0; i < n; ++i, ++int_ptr){ - if(*int_ptr == 0) continue; // the node is deactivated - if(*int_ptr < min_sum){ - min_sum = *int_ptr; - idx = i; - } - } - //check, whether there are any active edges remaining - if(min_sum == std::numeric_limits<int>::max()) break; - //let's add all nodes connected to the selected node to the same bag - bag_members.clear(); - int_ptr = &matrix[idx * n]; - for(int i = 0; i < n; ++i, ++int_ptr){ - if(*int_ptr == 1) bag_members.push_back(i); - } - - //let's connect all neighbouring vertices to a clique - BuildClique(matrix,bag_members,n); - //let's remove everything regarding node idx from matrix - ClearNode(matrix,idx,n); - //and finally add everything to the other bags - bags.push(promod3::sidechain::BagPtr(new promod3::sidechain::Bag(bag_members))); - } - - //this won't be needed anymore - delete [] sum; - - //we need to know which nodes are already added, to ensure a valid - //tree construction - std::set<int> nodes_in_tree; - - //in here we store all bags, where childs can be connected - //To ensure a low depth, they are sorted by the distance to - //the top bag => the root - std::vector<std::pair<int, promod3::sidechain::BagPtr> > bags_to_attach; - bags_to_attach.push_back(std::make_pair(0,bags.top())); - for(promod3::sidechain::Bag::member_iterator i = bags.top()->members_begin(); - i != bags.top()->members_end(); ++i){ - nodes_in_tree.insert(*i); - } - bags.pop(); - - std::vector<int> common_nodes; - while(!bags.empty()){ - promod3::sidechain::BagPtr actual_bag = bags.top(); - bags.pop(); - //we first need to know which of the bags node are already in the tree - common_nodes.clear(); - for(promod3::sidechain::Bag::member_iterator i = actual_bag->members_begin(); - i != actual_bag->members_end(); ++i){ - if(nodes_in_tree.find(*i) != nodes_in_tree.end()) common_nodes.push_back(*i); - } - //find first possible bag to attach actual bag - uint parent_bag_index = 0; - for(std::vector<std::pair<int,promod3::sidechain::BagPtr> >::iterator i = bags_to_attach.begin(); - i != bags_to_attach.end(); ++i){ - //all nodes in common_nodes must be present in parent bag - bool add = true; - for(std::vector<int>::iterator j = common_nodes.begin(); - j != common_nodes.end(); ++j){ - if(!i->second->HasMember(*j)){ - add = false; - break; - } - } - if(add) break; //thats the bag - ++parent_bag_index; - } - if(parent_bag_index == bags_to_attach.size()){ - throw "fuck"; - } - //attach the actual_bag - int depth = bags_to_attach[parent_bag_index].first + 1; - bags_to_attach[parent_bag_index].second->AttachBag(actual_bag); - - //insert the actual bag into the bags to attach vector - //insertion depends on the depth... - bool inserted_bag = false; - for(std::vector<std::pair<int,promod3::sidechain::BagPtr> >::iterator i = bags_to_attach.begin(); - i != bags_to_attach.end(); ++i){ - if(i->first > depth){ - bags_to_attach.insert(i,std::make_pair(depth,actual_bag)); - inserted_bag = true; - break; - } - } - if(!inserted_bag) bags_to_attach.push_back(std::make_pair(depth,actual_bag)); - - //we have to update the nodes present in the tree - for(promod3::sidechain::Bag::member_iterator i = actual_bag->members_begin(); - i != actual_bag->members_end(); ++i){ - nodes_in_tree.insert(*i); - } - } - - promod3::sidechain::BagPtr root = TreePostProcessing(bags_to_attach); - - return root; - } - -} - - -namespace promod3{ namespace sidechain{ - -Tree::~Tree(){ - for(std::vector<Real*>::iterator i = self_energies_.begin(); - i != self_energies_.end(); ++i){ - delete [] *i; - } - for(std::vector<std::map<int,Real*> >::iterator i = pairwise_energies_.begin(); - i != pairwise_energies_.end(); ++i){ - for(std::map<int,Real*>::iterator j = i->begin(); j != i->end(); ++j){ - delete [] j->second; - } - } - //note, that we don't have to delete anything in the bags, since - //they only contain pointers to the memory adresses just deleted -} - -uint64_t Tree::GenerateTree(RotamerGraph* graph){ - if(graph->GetNumNodes() != num_nodes_){ - std::stringstream ss; - ss << "Cannot do the tree decomposition, given graph object differs in number of nodes "; - ss << "to the graph object the tree object has been created with"; - throw promod3::Error(ss.str()); - } - - components_.clear(); - root_bags_.clear(); - isolated_residues_.clear(); - - //let's first generate an overall adjacency matrix - int* adjacency_matrix = new int[num_nodes_*num_nodes_]; - memset(adjacency_matrix,0,num_nodes_*num_nodes_*sizeof(int)); - //the diagonal elements get set to one, setting them to zero - //will be equivalent to deactivate the corresponging nodes - //in later calculation steps - for(uint i = 0; i < num_nodes_; ++i){ - adjacency_matrix[i*num_nodes_+i] = 1; - } - - //to generate the matrix we need a ptr/index mapper... - //could be implemented a bit more efficient - std::map<RotamerGraphNode*,int> ptr_index_map; - int counter = 0; - for(RotamerGraph::node_iterator i = graph->nodes_begin(); - i != graph->nodes_end(); ++i){ - ptr_index_map[*i] = counter; - ++counter; - } - - int a,b; - for(RotamerGraph::edge_iterator i = graph->edges_begin(); - i != graph->edges_end(); ++i){ - if(!(*i)->IsActive()) continue; - a = ptr_index_map[(*i)->GetNode1()]; - b = ptr_index_map[(*i)->GetNode2()]; - adjacency_matrix[a*num_nodes_+b] = 1; - adjacency_matrix[b*num_nodes_+a] = 1; - } - - //every connected component gets solved separately - ConnectedComponents(adjacency_matrix, num_nodes_, components_); - - //we build up an adjacency matrix for every component - size_t adjacency_matrices_size = 0; - for(std::vector<std::vector<int> >::iterator i = components_.begin(); - i != components_.end(); ++i){ - adjacency_matrices_size += i->size() * i->size(); - } - int* adjacency_matrices = new int[adjacency_matrices_size]; - - if(components_.size() == 1){ - memcpy(adjacency_matrices, adjacency_matrix, - adjacency_matrices_size * sizeof(int)); - } - else{ - memset(adjacency_matrices, 0, adjacency_matrices_size*sizeof(int)); - //lets copy over the values from the overall adjacency matrix - int component_start = 0; - for(std::vector<std::vector<int> >::iterator i = components_.begin(); - i != components_.end(); ++i){ - for(uint j = 0; j < i->size(); ++j){ - for(uint k = j; k < i->size(); ++k){ - int value = adjacency_matrix[(*i)[j]*num_nodes_ + (*i)[k]]; - if(value == 0) continue; - adjacency_matrices[component_start + j*i->size() + k] = value; - adjacency_matrices[component_start + k*i->size() + j] = value; - } - } - component_start += (i->size()*i->size()); - } - } - - int actual_pos = 0; - for(std::vector<std::vector<int> >::iterator i = components_.begin(); - i != components_.end(); ++i){ - if(i->size() == 1){ - isolated_residues_.push_back((*i)[0]); - i = components_.erase(i); - --i; - ++actual_pos; - } - else{ - root_bags_.push_back(TreeDecomp(&adjacency_matrices[actual_pos],i->size())); - actual_pos += i->size()*i->size(); - } - } - - delete [] adjacency_matrix; - delete [] adjacency_matrices; - //let's finally calculate the complexity for the return value - uint64_t complexity = 0; - std::vector<int> num_rotamers; - graph->GetNumActiveRotamers(num_rotamers); - for(uint i = 0; i < root_bags_.size(); ++i){ - complexity += root_bags_[i]->GetComplexity(num_rotamers,components_[i]); - } - - return complexity; -} - -void Tree::Solve(RotamerGraph* graph, std::vector<int>& overall_solution){ - if(graph->GetNumNodes() != num_nodes_){ - std::stringstream ss; - ss << "Cannot get complexity, given graph object differs in number of nodes "; - ss << "to the graph object the tree has been created with"; - throw promod3::Error(ss.str()); - } - - self_energies_.resize(num_nodes_); - pairwise_energies_.resize(num_nodes_); - - std::vector<int> num_rotamers; - graph->GetNumActiveRotamers(num_rotamers); - - //fill up self energies - uint pos = 0; - for(RotamerGraph::const_node_iterator i = graph->nodes_begin(); - i != graph->nodes_end(); ++i, ++pos){ - Real* self_energies = new Real[num_rotamers[pos]]; - (*i)->FillActiveSelfEnergies(self_energies); - self_energies_[pos] = self_energies; - } - - //fill up pairwise energies - for(uint i = 0; i < num_nodes_; ++i){ - pairwise_energies_[i].clear(); - } - std::map<RotamerGraphNode*,int> ptr_index_map; - int counter = 0; - for(RotamerGraph::node_iterator i = graph->nodes_begin(); - i != graph->nodes_end(); ++i){ - ptr_index_map[*i] = counter; - ++counter; - } - int a,b; - for(RotamerGraph::edge_iterator i = graph->edges_begin(); - i != graph->edges_end(); ++i){ - if(!(*i)->IsActive()) continue; - a = ptr_index_map[(*i)->GetNode1()]; - b = ptr_index_map[(*i)->GetNode2()]; - Real* energies = new Real[num_rotamers[a] * num_rotamers[b]]; - (*i)->FillActiveEnergies(energies); - pairwise_energies_[a][b] = energies; - } - - //fill required data into bags - for(uint i = 0; i < root_bags_.size(); ++i){ - root_bags_[i]->FillData(pairwise_energies_,self_energies_, - num_rotamers,components_[i]); - } - - //solve the thing - for(std::vector<BagPtr>::iterator i = root_bags_.begin(); - i != root_bags_.end(); ++i){ - (*i)->Enumerate(); - } - - //fill the results from the trees into the solution vector - for(uint i = 0; i < components_.size(); ++i){ - std::vector<int> subtree_solution(components_[i].size(),0); - root_bags_[i]->FillSolution(subtree_solution); - for(uint j = 0; j < components_[i].size(); ++j){ - overall_solution[components_[i][j]] = subtree_solution[j]; - } - } - - //fill the results for the isolated residues into the solution vector - Real e_min; - int index; - for(std::vector<int>::iterator i = isolated_residues_.begin(); - i != isolated_residues_.begin(); ++i){ - e_min = std::numeric_limits<Real>::max(); - for(int j = 0; j < num_rotamers[*i]; ++j){ - if(self_energies_[*i][j] < e_min){ - e_min = self_energies_[*i][j]; - index = j; - } - } - overall_solution[*i] = index; - } - -} - -Bag::Bag(const std::vector<int>& members, Bag* parent): members_(members), - parent_(parent) { - std::sort(members_.begin(), members_.end()); // just to be really sure! -} - -void Bag::FillData(const std::vector<std::map<int,Real*> >& pairwise_energies, - const std::vector<Real*>& self_energies, - const std::vector<int>& num_rotamers, - const std::vector<int>& component_mapping){ - //just to be sure... - lset_.clear(); - rset_.clear(); - r_self_energies_.clear(); - - lr_energies_.clear(); - lr_index_in_I_.clear(); - lr_index_in_R_.clear(); - lr_num_rot_.clear(); - - rl_energies_.clear(); - rl_index_in_I_.clear(); - rl_index_in_R_.clear(); - rl_num_rot_.clear(); - - rr_energies_.clear(); - rr_index_in_R_one_.clear(); - rr_index_in_R_two_.clear(); - rr_num_rot_.clear(); - - es_mapper_l_.clear(); - es_mapper_r_.clear(); - - num_rotamers_.clear(); - - if(parent_ != NULL){ - //not root - for(uint i = 0; i < members_.size(); ++i){ - if(parent_->HasMember(members_[i])) lset_.push_back(i); - else rset_.push_back(i); - } - } - else{ - for(uint i = 0; i < members_.size(); ++i){ - rset_.push_back(i); - } - } - - //we need a mapping... - //the energy data comes from the overall graph. This overall graph gets separated - //into connected components. Every tree refers to one of those components. - //The problem is, that the indices in these components start from one again, - //so we need a mapping, the definition of this particular component respectively - int ind[members_.size()]; - for(uint i = 0; i < members_.size(); ++i) ind[i] = component_mapping[members_[i]]; - - //we need the self energies for all residues in r - for(std::vector<int>::iterator i = rset_.begin(); i < rset_.end(); ++i){ - r_self_energies_.push_back(self_energies[ind[*i]]); - } - - //the number of rotamers is required for all members - for(uint i = 0; i < members_.size(); ++i){ - num_rotamers_.push_back(num_rotamers[ind[i]]); - } - - //let's specify all pairwise energies from l to r, r to l respectively - for(uint i = 0; i < lset_.size(); ++i){ - for(uint j = 0; j < rset_.size(); ++j){ - std::map<int,Real*>::const_iterator k = pairwise_energies[ind[lset_[i]]].find(ind[rset_[j]]); - if(k != pairwise_energies[ind[lset_[i]]].end()){ - //let's add it!! - lr_energies_.push_back(k->second); - lr_index_in_I_.push_back(i); - lr_index_in_R_.push_back(j); - lr_num_rot_.push_back(num_rotamers_[rset_[j]]); - } - } - } - - for(uint i = 0; i < rset_.size(); ++i){ - for(uint j = 0; j < lset_.size(); ++j){ - std::map<int,Real*>::const_iterator k = pairwise_energies[ind[rset_[i]]].find(ind[lset_[j]]); - if(k != pairwise_energies[ind[rset_[i]]].end()){ - //let's add it!! - rl_energies_.push_back(k->second); - rl_index_in_I_.push_back(j); - rl_index_in_R_.push_back(i); - rl_num_rot_.push_back(num_rotamers_[lset_[j]]); - } - } - } - - //let's specify all pairwise energies within r - for(uint i = 0; i < rset_.size(); ++i){ - for(uint j = i+1; j < rset_.size(); ++j){ - std::map<int,Real*>::const_iterator k = pairwise_energies[ind[rset_[i]]].find(ind[rset_[j]]); - if(k != pairwise_energies[ind[rset_[i]]].end()){ - //let's add it - rr_energies_.push_back(k->second); - rr_index_in_R_one_.push_back(i); - rr_index_in_R_two_.push_back(j); - rr_num_rot_.push_back(num_rotamers_[rset_[j]]); - } - } - } - - //let's call the function for all children - for(child_iterator i = this->children_begin(); i < this->children_end(); ++i){ - (*i)->FillData(pairwise_energies, self_energies, - num_rotamers,component_mapping); - } - - for(child_iterator i = this->children_begin(); - i != this->children_end(); ++i){ - std::vector<std::pair<int,int> > current_es_l; - std::vector<std::pair<int,int> > current_es_r; - int index_in_l; - for(uint j = 0; j < lset_.size(); ++j){ - (*i)->IndexInL(members_[lset_[j]],index_in_l); - if(index_in_l != -1){ - current_es_l.push_back(std::make_pair(j,index_in_l)); - } - } - for(uint j = 0; j < rset_.size(); ++j){ - (*i)->IndexInL(members_[rset_[j]],index_in_l); - if(index_in_l != -1){ - current_es_r.push_back(std::make_pair(j,index_in_l)); - } - } - - es_mapper_l_.push_back(current_es_l); - es_mapper_r_.push_back(current_es_r); - } - - solution_index_multiplicator_.resize(lset_.size(),1); - num_solutions_ = 1; - for(uint i = 0; i < lset_.size(); ++i){ - int actual_num = 1; - for(uint j = i+1; j < lset_.size(); ++j){ - actual_num *= num_rotamers_[lset_[j]]; - } - solution_index_multiplicator_[i] = actual_num; - num_solutions_ *= num_rotamers_[lset_[i]]; - } - - solutions_.resize(num_solutions_); - r_memory_size_ = sizeof(int)*rset_.size(); -} - -void Bag::AttachBag(BagPtr bag){ - bag->SetParent(this); - children_.push_back(bag); -} - -void Bag::RemoveBag(BagPtr bag){ - for(child_iterator i = this->children_begin(); - i != this->children_end(); ++i){ - if(*i == bag){ - children_.erase(i); - break; - } - } -} - -void Bag::RemoveBag(Bag* bag){ - for(child_iterator i = this->children_begin(); - i != this->children_end(); ++i){ - if(i->get() == bag){ - children_.erase(i); - break; - } - } -} - -bool Bag::HasMember(int member) const{ - return std::find(members_.begin(), members_.end(), member) != members_.end(); -} - -uint64_t Bag::GetComplexity(const std::vector<int>& num_rotamers, - const std::vector<int>& component_mapping) const{ - uint64_t local_complexity = 1; - for(const_member_iterator i = this->members_begin(); - i != this->members_end(); ++i){ - local_complexity *= num_rotamers[component_mapping[*i]]; - } - - if(this->IsLeaf()) return local_complexity; - - for(const_child_iterator i = this->children_begin(); i != children_end(); ++i){ - local_complexity += (*i)->GetComplexity(num_rotamers, component_mapping); - } - - return local_complexity; -} - -void Bag::Enumerate(){ - - for(child_iterator i = this->children_begin(); - i != this->children_end(); ++i){ - (*i)->Enumerate(); - } - - std::vector<int> I(lset_.size(),0); - std::vector<int> R(rset_.size(),0); - int num[rset_.size()]; - for(uint i = 0; i < rset_.size(); ++i){ - num[i] = num_rotamers_[rset_[i]]; - } - promod3::core::Enumerator r_enumerator(R,&num[0]); - - int* r_partial_indices = new int[children_.size() * r_enumerator.GetMaxEnumerations()]; - int* l_partial_indices = new int[children_.size()]; - - int* data_ptr = r_partial_indices; - for(int i = 0; i < r_enumerator.GetMaxEnumerations(); ++i){ - for(uint j = 0; j < children_.size(); ++j){ - children_[j]->PartialSolutionIndex(es_mapper_r_[j],R,*data_ptr); - ++data_ptr; - } - r_enumerator.Next(); - } - - if(parent_!=NULL){ - - int num[lset_.size()]; - for(uint i = 0; i < lset_.size(); ++i){ - num[i] = num_rotamers_[lset_[i]]; - } - promod3::core::Enumerator enumerator(I,&num[0]); - - for(int i = 0; i < num_solutions_; ++i){ - - for(uint j = 0; j < children_.size(); ++j){ - children_[j]->PartialSolutionIndex(es_mapper_l_[j],I,l_partial_indices[j]); - } - - this->OptimizeR(I,solutions_[i].first,solutions_[i].second, - l_partial_indices,r_partial_indices); - enumerator.Next(); - } - } - else{ - - for(uint i = 0; i < children_.size(); ++i){ - children_[i]->PartialSolutionIndex(es_mapper_l_[i],I,l_partial_indices[i]); - } - - this->OptimizeR(I,solutions_[0].first,solutions_[0].second, - l_partial_indices,r_partial_indices); - } - - delete [] r_partial_indices; - delete [] l_partial_indices; -} - -void Bag::FillSolution(std::vector<int>& solution){ - std::vector<int> I(lset_.size(),0); - for(uint i = 0; i < lset_.size(); ++i){ - I[i] = solution[members_[lset_[i]]]; - } - - int index = 0; - for(uint i = 0; i < I.size(); ++i){ - index += solution_index_multiplicator_[i]*I[i]; - } - - std::pair<std::vector<int>,Real> local_solution = solutions_[index]; - for(uint i = 0; i < local_solution.first.size(); ++i){ - solution[members_[rset_[i]]] = local_solution.first[i]; - } - - for(child_iterator i = this->children_begin(); - i != this->children_end(); ++i){ - (*i)->FillSolution(solution); - } -} - -void Bag::OptimizeR(const std::vector<int>& I, std::vector<int>& final_R, Real& final_e, - int* l_partial_indices, int* r_partial_indices) const{ - - if(rset_.size() == 0){ - final_e = 0; - final_R.clear(); - return; - } - - final_e = std::numeric_limits<Real>::max(); - Real actual_e; - - int lr_const_value[lr_energies_.size()]; - for(uint i = 0; i < lr_energies_.size(); ++i){ - lr_const_value[i] = I[lr_index_in_I_[i]]*lr_num_rot_[i]; - } - - int* R_temp[rset_.size()]; - memset(R_temp,0,r_memory_size_); - std::vector<int> R(rset_.size(),0); - int num[rset_.size()]; - for(uint i = 0; i < rset_.size(); ++i){ - num[i] = num_rotamers_[rset_[i]]; - } - promod3::core::Enumerator enumerator(R,&num[0]); - - int* r_data_ptr = r_partial_indices; - - uint lr_energies_size = lr_energies_.size(); - uint rl_energies_size = rl_energies_.size(); - uint rset_size = rset_.size(); - uint rr_energies_size = rr_energies_.size(); - uint children_size = children_.size(); - uint num_enumerations = enumerator.GetMaxEnumerations(); - - for(uint enum_counter = 0; enum_counter < num_enumerations; ++enum_counter){ - actual_e = 0.0; - - //evaluate EL term - - //let's add the lr pairwise energies - for(uint i = 0; i < lr_energies_size; ++i){ - actual_e += lr_energies_[i][lr_const_value[i]+R[lr_index_in_R_[i]]]; - } - - //let's add the rl pairwise energies - for(uint i = 0; i < rl_energies_size; ++i){ - actual_e += rl_energies_[i][R[rl_index_in_R_[i]]*rl_num_rot_[i]+I[rl_index_in_I_[i]]]; - } - - //evaluate ER term - - //let's add the self energies - for(uint i = 0; i < rset_size; ++i){ - actual_e += r_self_energies_[i][R[i]]; - } - //let's add the pairwise energies - for(uint i = 0; i < rr_energies_size; ++i){ - actual_e += rr_energies_[i][R[rr_index_in_R_one_[i]]*rr_num_rot_[i]+R[rr_index_in_R_two_[i]]]; - } - - //evaluate ES term - - for(uint i = 0; i < children_size; ++i){ - children_[i]->AddE(l_partial_indices[i]+(*r_data_ptr),actual_e); - ++r_data_ptr; - } - if(actual_e < final_e){ - memcpy(R_temp, &R[0], r_memory_size_); - final_e = actual_e; - } - enumerator.Next(); - } - - final_R.resize(rset_size); - memcpy(&final_R[0],R_temp,r_memory_size_); -} - -}} //ns diff --git a/sidechain/src/tree.hh b/sidechain/src/tree.hh deleted file mode 100644 index 5cd4405e0dcc94ac85db41694bd0eebf037407c6..0000000000000000000000000000000000000000 --- a/sidechain/src/tree.hh +++ /dev/null @@ -1,179 +0,0 @@ -#ifndef PROMOD3_TREE_HH -#define PROMOD3_TREE_HH - -#include <math.h> -#include <vector> -#include <stack> -#include <map> -#include <set> -#include <algorithm> -#include <limits.h> -#include <ctime> - -#include <boost/shared_ptr.hpp> -#include <promod3/sidechain/graph.hh> - - -namespace promod3{ namespace sidechain{ - -class Tree; -class Bag; -typedef boost::shared_ptr<Tree> TreePtr; -typedef boost::shared_ptr<Bag> BagPtr; - -class Tree{ - -public: - Tree(RotamerGraph* graph): num_nodes_(graph->GetNumNodes()) { } - - ~Tree(); - - size_t GetNumNodes() const { return num_nodes_; } - - uint64_t GenerateTree(RotamerGraph* graph); - - void Solve(RotamerGraph* graph, std::vector<int>& overall_solution); - -private: - uint num_nodes_; - std::vector<std::vector<int> > components_; - std::vector<Real*> self_energies_; - std::vector<std::map<int,Real*> > pairwise_energies_; - std::vector<BagPtr> root_bags_; - std::vector<int> isolated_residues_; -}; - -class Bag{ -public: - - Bag(const std::vector<int>& members, Bag* parent = NULL); - - void FillData(const std::vector<std::map<int,Real*> >& pairwise_energies, - const std::vector<Real*>& self_energies, - const std::vector<int>& num_rotamers, - const std::vector<int>& component_mapping); - - void SetParent(Bag* parent) { parent_ = parent; } - - Bag* GetParent() { return parent_; } - - void AttachBag(BagPtr bag); - - void RemoveBag(BagPtr bag); - - void RemoveBag(Bag* bag); - - bool IsLeaf() const { return children_.empty(); } - - bool HasMember(int member) const; - - uint64_t GetComplexity(const std::vector<int>& num_rotamers, - const std::vector<int>& component_mapping) const; - - void Enumerate(); - - void FillSolution(std::vector<int>& solution); - - typedef std::vector<int>::iterator member_iterator; - typedef std::vector<int>::const_iterator const_member_iterator; - - typedef std::vector<BagPtr>::iterator child_iterator; - typedef std::vector<BagPtr>::const_iterator const_child_iterator; - - member_iterator members_begin() { return members_.begin(); } - - member_iterator members_end() { return members_.end(); } - - const_member_iterator members_begin() const { return members_.begin(); } - - const_member_iterator members_end() const { return members_.end(); } - - child_iterator children_begin() { return children_.begin(); } - - child_iterator children_end() { return children_.end(); } - - const_child_iterator children_begin() const { return children_.begin(); } - - const_child_iterator children_end() const { return children_.end(); } - -private: - - inline void AddE(int solution_index, Real& e) const{ - e += solutions_[solution_index].second; - } - - void PartialSolutionIndex(const std::vector<std::pair<int,int> >& mapper, - const std::vector<int>& values, int& index){ - index = 0; - for(uint i = 0; i < mapper.size(); ++i){ - index += solution_index_multiplicator_[mapper[i].second] * values[mapper[i].first]; - } - } - - void IndexInL(int member, int& index) const{ - for(uint i = 0; i < lset_.size(); ++i){ - if(members_[lset_[i]] == member){ - index = i; - return; - } - } - index = -1; - } - - void OptimizeR(const std::vector<int>& I, std::vector<int>& final_R, - Real& final_e, int* l_partial_indices, - int* r_partial_indices) const; - - //members of bag as global indices - std::vector<int> members_; - - //members of the two sets, all members in l are also present in parent - //lset_ and rset_ contain the corresponding indices in members_ not the - //global members - std::vector<int> lset_; - std::vector<int> rset_; - - //solution for all possible l combinations - std::vector<std::pair<std::vector<int>,Real> > solutions_; - - //self explaining - std::vector<Real*> r_self_energies_; - - //stuff for lr pairwise energies - std::vector<Real*> lr_energies_; - std::vector<int> lr_index_in_I_; - std::vector<int> lr_index_in_R_; - std::vector<int> lr_num_rot_; - - //stuff for rl pairwise energies - std::vector<Real*> rl_energies_; - std::vector<int> rl_index_in_I_; - std::vector<int> rl_index_in_R_; - std::vector<int> rl_num_rot_; - - //stuff for rr pairwise energies - std::vector<Real*> rr_energies_; - std::vector<int> rr_index_in_R_one_; - std::vector<int> rr_index_in_R_two_; - std::vector<int> rr_num_rot_; - - //stores for every child node the indices of their l members - //in the current bags l and r set - std::vector<std::vector<std::pair<int,int> > > es_mapper_l_; - std::vector<std::vector<std::pair<int,int> > > es_mapper_r_; - - //self explaining stuff - std::vector<int> num_rotamers_; - Bag* parent_; //raw pointer - std::vector<BagPtr> children_; - - //helper variables - int r_memory_size_; - std::vector<int> solution_index_multiplicator_; - int num_solutions_; - -}; - -}}//ns - -#endif