Skip to content
Snippets Groups Projects
Commit 682f66ee authored by Gerardo Tauriello's avatar Gerardo Tauriello
Browse files

SCHWED-1740: handle multiple loops and overlaps in IdxHandler

parent 003af288
Branches
Tags
No related merge requests found
#include <promod3/loop/idx_handler.hh> #include <promod3/loop/idx_handler.hh>
#include <promod3/core/message.hh> #include <promod3/core/message.hh>
#include <ost/log.hh>
namespace promod3 { namespace loop { namespace promod3 { namespace loop {
...@@ -67,4 +68,108 @@ void IdxHandler::SetupScoreCalculation(uint start_resnum, uint num_residues, ...@@ -67,4 +68,108 @@ void IdxHandler::SetupScoreCalculation(uint start_resnum, uint num_residues,
} }
} }
void IdxHandler::ToLoopIndices(const std::vector<uint>& start_resnums,
const std::vector<uint>& num_residues,
const std::vector<uint>& chain_indices,
std::vector<uint>& loop_start_indices,
std::vector<uint>& loop_lengths,
bool enable_log) const {
// check consistency
const uint num_loops = start_resnums.size();
if (num_loops != num_residues.size()) {
throw promod3::Error("Size inconsistency of input loops!");
}
if (num_loops != chain_indices.size()) {
throw promod3::Error("Size inconsistency of input loops!");
}
// get first guess of loops
loop_start_indices.resize(num_loops);
loop_lengths = num_residues;
for (uint i_loop = 0; i_loop < num_loops; ++i_loop) {
const uint start_resnum = start_resnums[i_loop];
const uint chain_idx = chain_indices[i_loop];
// set start index
loop_start_indices[i_loop] = ToIdx(start_resnum, chain_idx);
// check size
if (start_resnum-1 + num_residues[i_loop] > chain_sizes_[chain_idx]) {
throw promod3::Error("Invalid End ResNum!");
}
}
// resolve overlaps
ResolveOverlaps(loop_start_indices, loop_lengths, enable_log);
}
void IdxHandler::ResolveOverlaps(std::vector<uint>& loop_start_indices,
std::vector<uint>& loop_lengths,
bool enable_log) {
// strategy for each loop
// - compare to previously processed loops
// -> if no overlap, nothing happens
// (code is almost as fast as only checking overlaps)
// -> if overlap, other loop deleted and merged into current one
// - if merges needed, write new loop_vectors and swap after
// -> things to be removed get loop_lengths == 0
// keep track of loops to keep
const uint num_loops = loop_start_indices.size();
uint num_loops_new = num_loops;
// check all loops
for (uint i_loop = 0; i_loop < num_loops; ++i_loop) {
// check for dummy loops (skipped)
if (loop_lengths[i_loop] == 0) {
// kill and skip it
--num_loops_new;
if (enable_log) {
LOG_INFO("Removed empty loop at index " << loop_start_indices[i_loop]);
}
continue;
}
// compare with others
for (uint i_loop_other = 0; i_loop_other < i_loop; ++i_loop_other) {
// loop from start_idx to start_idx + loop_lengths[i_loop] - 1
const uint start_idx = loop_start_indices[i_loop];
const uint end_idx = start_idx + loop_lengths[i_loop];
const uint start_idx_other = loop_start_indices[i_loop_other];
const uint end_idx_other = start_idx_other + loop_lengths[i_loop_other];
// if not (end_idx <= start_idx_other || start_idx >= end_idx_other) ...
if (end_idx > start_idx_other && start_idx < end_idx_other) {
// update current loop
if (start_idx_other < start_idx) {
loop_start_indices[i_loop] = start_idx_other;
}
loop_lengths[i_loop] = std::max(end_idx, end_idx_other)
- loop_start_indices[i_loop];
// kill other one
loop_lengths[i_loop_other] = 0;
--num_loops_new;
if (enable_log) {
LOG_INFO("Merged loops [" << start_idx << ", " << end_idx << "[ and ["
<< start_idx_other << ", " << end_idx_other << "[");
}
}
}
}
// build new vectors as needed
if (num_loops_new != num_loops) {
// new vectors containing only elements with length > 0
std::vector<uint> loop_start_indices_new(num_loops_new);
std::vector<uint> loop_lengths_new(num_loops_new);
uint i_loop_new = 0;
for (uint i_loop = 0; i_loop < num_loops; ++i_loop) {
if (loop_lengths[i_loop] > 0) {
assert(i_loop_new < num_loops_new); // guaranteed by algo above
loop_start_indices_new[i_loop_new] = loop_start_indices[i_loop];
loop_lengths_new[i_loop_new] = loop_lengths[i_loop];
++i_loop_new;
}
}
// swap them (fast!)
loop_start_indices.swap(loop_start_indices_new);
loop_lengths.swap(loop_lengths_new);
}
}
}} //ns }} //ns
...@@ -38,10 +38,10 @@ public: ...@@ -38,10 +38,10 @@ public:
uint ToIdx(uint resnum, uint chain_idx) const; uint ToIdx(uint resnum, uint chain_idx) const;
// returns vector of indices for given loop
std::vector<uint> ToIdxVector(uint start_resnum, uint num_residues, std::vector<uint> ToIdxVector(uint start_resnum, uint num_residues,
uint chain_idx) const; uint chain_idx) const;
// check input and set commonly used variables // check input and set commonly used variables
void SetupScoreCalculation(const loop::BackboneList& bb_list, void SetupScoreCalculation(const loop::BackboneList& bb_list,
uint start_resnum, uint chain_idx, uint start_resnum, uint chain_idx,
...@@ -51,6 +51,26 @@ public: ...@@ -51,6 +51,26 @@ public:
uint chain_idx, uint& start_idx, uint chain_idx, uint& start_idx,
uint& end_idx) const; uint& end_idx) const;
// returns start indices (as in ToIdx) and loops with resolved overlaps
// -> enable_log is passed to ResolveOverlaps
void ToLoopIndices(const std::vector<uint>& start_resnums,
const std::vector<uint>& num_residues,
const std::vector<uint>& chain_indices,
std::vector<uint>& loop_start_indices,
std::vector<uint>& loop_lengths,
bool enable_log = false) const;
// resolves overlaps in given loops
// notes:
// - loops of length 0 are removed
// - loops are allowed to touch without being merged
// - vectors are only modified if overlaps are found
// - this is fast code and can safely be used with minimal overhead
// - enable_log = true triggers LOG_INFO calls for resolved loops
static void ResolveOverlaps(std::vector<uint>& loop_start_indices,
std::vector<uint>& loop_lengths,
bool enable_log = false);
private: private:
size_t num_chains_; size_t num_chains_;
......
...@@ -10,6 +10,22 @@ BOOST_AUTO_TEST_SUITE( loop ); ...@@ -10,6 +10,22 @@ BOOST_AUTO_TEST_SUITE( loop );
using namespace promod3::loop; using namespace promod3::loop;
namespace {
bool HasLoop(const std::vector<uint>& loop_start_indices,
const std::vector<uint>& loop_lengths,
uint exp_start_idx, uint exp_length) {
// check if exp_... appear in vector
const uint num_loops = loop_start_indices.size();
for (uint i_loop = 0; i_loop < num_loops; ++i_loop) {
if (loop_start_indices[i_loop] == exp_start_idx
&& loop_lengths[i_loop] == exp_length) return true;
}
return false;
}
} // anon ns
BOOST_AUTO_TEST_CASE(test_idx_handler) { BOOST_AUTO_TEST_CASE(test_idx_handler) {
// setup // setup
std::vector<size_t> chain_sizes; std::vector<size_t> chain_sizes;
...@@ -92,4 +108,153 @@ BOOST_AUTO_TEST_CASE(test_idx_handler_bb_list) { ...@@ -92,4 +108,153 @@ BOOST_AUTO_TEST_CASE(test_idx_handler_bb_list) {
end_idx), promod3::Error); end_idx), promod3::Error);
} }
BOOST_AUTO_TEST_CASE(test_idx_handler_overlaps) {
// setup
std::vector<uint> loop_start_indices;
std::vector<uint> loop_lengths;
loop_start_indices.push_back(10);
loop_start_indices.push_back(5);
loop_start_indices.push_back(15);
loop_lengths.push_back(2);
loop_lengths.push_back(2);
loop_lengths.push_back(3);
// check
std::vector<uint> loop_start_indices_res = loop_start_indices;
std::vector<uint> loop_lengths_res = loop_lengths;
IdxHandler::ResolveOverlaps(loop_start_indices_res, loop_lengths_res);
BOOST_CHECK(loop_start_indices == loop_start_indices_res);
BOOST_CHECK(loop_lengths == loop_lengths_res);
// check empty loop
loop_lengths[1] = 0;
loop_start_indices_res = loop_start_indices;
loop_lengths_res = loop_lengths;
IdxHandler::ResolveOverlaps(loop_start_indices_res, loop_lengths_res);
BOOST_CHECK_EQUAL(loop_start_indices_res.size(), 2u);
BOOST_CHECK_EQUAL(loop_lengths_res.size(), 2u);
BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 10, 2));
BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 15, 3));
// check touch (no overlap)
loop_lengths[1] = 5;
loop_start_indices_res = loop_start_indices;
loop_lengths_res = loop_lengths;
IdxHandler::ResolveOverlaps(loop_start_indices_res, loop_lengths_res);
BOOST_CHECK(loop_start_indices == loop_start_indices_res);
BOOST_CHECK(loop_lengths == loop_lengths_res);
// check partial overlap
loop_lengths[1] = 6;
loop_start_indices_res = loop_start_indices;
loop_lengths_res = loop_lengths;
IdxHandler::ResolveOverlaps(loop_start_indices_res, loop_lengths_res);
BOOST_CHECK_EQUAL(loop_start_indices_res.size(), 2u);
BOOST_CHECK_EQUAL(loop_lengths_res.size(), 2u);
BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 5, 7));
BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 15, 3));
// check full overlap
loop_lengths[1] = 8;
loop_start_indices_res = loop_start_indices;
loop_lengths_res = loop_lengths;
IdxHandler::ResolveOverlaps(loop_start_indices_res, loop_lengths_res);
BOOST_CHECK_EQUAL(loop_start_indices_res.size(), 2u);
BOOST_CHECK_EQUAL(loop_lengths_res.size(), 2u);
BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 5, 8));
BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 15, 3));
// check double overlap
loop_lengths[1] = 20;
loop_start_indices_res = loop_start_indices;
loop_lengths_res = loop_lengths;
IdxHandler::ResolveOverlaps(loop_start_indices_res, loop_lengths_res);
BOOST_CHECK_EQUAL(loop_start_indices_res.size(), 1u);
BOOST_CHECK_EQUAL(loop_lengths_res.size(), 1u);
BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 5, 20));
// check double overlap with loop in loop
loop_lengths[1] = 12;
loop_start_indices_res = loop_start_indices;
loop_lengths_res = loop_lengths;
IdxHandler::ResolveOverlaps(loop_start_indices_res, loop_lengths_res);
BOOST_CHECK_EQUAL(loop_start_indices_res.size(), 1u);
BOOST_CHECK_EQUAL(loop_lengths_res.size(), 1u);
BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 5, 13));
}
BOOST_AUTO_TEST_CASE(test_idx_handler_multi_loops) {
// setup
std::vector<size_t> chain_sizes;
chain_sizes.push_back(10);
chain_sizes.push_back(0);
chain_sizes.push_back(8);
IdxHandler idx_handler(chain_sizes);
// input data
std::vector<uint> start_resnums;
std::vector<uint> num_residues;
std::vector<uint> chain_indices;
start_resnums.push_back(1);
num_residues.push_back(2);
chain_indices.push_back(2);
start_resnums.push_back(6);
num_residues.push_back(2);
chain_indices.push_back(0);
start_resnums.push_back(6);
num_residues.push_back(3);
chain_indices.push_back(2);
// output data
std::vector<uint> loop_start_indices;
std::vector<uint> loop_lengths;
// check
idx_handler.ToLoopIndices(start_resnums, num_residues, chain_indices,
loop_start_indices, loop_lengths);
BOOST_CHECK_EQUAL(loop_start_indices.size(), 3u);
BOOST_CHECK_EQUAL(loop_lengths.size(), 3u);
BOOST_CHECK(HasLoop(loop_start_indices, loop_lengths, 10, 2));
BOOST_CHECK(HasLoop(loop_start_indices, loop_lengths, 5, 2));
BOOST_CHECK(HasLoop(loop_start_indices, loop_lengths, 15, 3));
// check overlap fixing (just one of the cases)
num_residues[0] = 6;
idx_handler.ToLoopIndices(start_resnums, num_residues, chain_indices,
loop_start_indices, loop_lengths);
BOOST_CHECK_EQUAL(loop_start_indices.size(), 2u);
BOOST_CHECK_EQUAL(loop_lengths.size(), 2u);
BOOST_CHECK(HasLoop(loop_start_indices, loop_lengths, 10, 8));
BOOST_CHECK(HasLoop(loop_start_indices, loop_lengths, 5, 2));
num_residues[0] = 2;
// exceptions
start_resnums[0] = 9;
BOOST_CHECK_THROW(idx_handler.ToLoopIndices(start_resnums, num_residues,
chain_indices, loop_start_indices,
loop_lengths), promod3::Error);
start_resnums[0] = 8;
BOOST_CHECK_THROW(idx_handler.ToLoopIndices(start_resnums, num_residues,
chain_indices, loop_start_indices,
loop_lengths), promod3::Error);
start_resnums[0] = 0;
BOOST_CHECK_THROW(idx_handler.ToLoopIndices(start_resnums, num_residues,
chain_indices, loop_start_indices,
loop_lengths), promod3::Error);
start_resnums[0] = 1;
// bad chain
chain_indices[0] = 1;
BOOST_CHECK_THROW(idx_handler.ToLoopIndices(start_resnums, num_residues,
chain_indices, loop_start_indices,
loop_lengths), promod3::Error);
chain_indices[0] = 3;
BOOST_CHECK_THROW(idx_handler.ToLoopIndices(start_resnums, num_residues,
chain_indices, loop_start_indices,
loop_lengths), promod3::Error);
chain_indices[0] = 2;
// bad vector sizes
start_resnums.push_back(1);
BOOST_CHECK_THROW(idx_handler.ToLoopIndices(start_resnums, num_residues,
chain_indices, loop_start_indices,
loop_lengths), promod3::Error);
}
BOOST_AUTO_TEST_SUITE_END(); BOOST_AUTO_TEST_SUITE_END();
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment