diff --git a/loop/src/idx_handler.cc b/loop/src/idx_handler.cc index cb5a218428ee2c1fd5fa8ff196a9dcd525d6414c..40558c48a9338389aceb3f878724a7f9aa86e2ee 100644 --- a/loop/src/idx_handler.cc +++ b/loop/src/idx_handler.cc @@ -1,5 +1,6 @@ #include <promod3/loop/idx_handler.hh> #include <promod3/core/message.hh> +#include <ost/log.hh> namespace promod3 { namespace loop { @@ -67,4 +68,108 @@ void IdxHandler::SetupScoreCalculation(uint start_resnum, uint num_residues, } } +void IdxHandler::ToLoopIndices(const std::vector<uint>& start_resnums, + const std::vector<uint>& num_residues, + const std::vector<uint>& chain_indices, + std::vector<uint>& loop_start_indices, + std::vector<uint>& loop_lengths, + bool enable_log) const { + // check consistency + const uint num_loops = start_resnums.size(); + if (num_loops != num_residues.size()) { + throw promod3::Error("Size inconsistency of input loops!"); + } + if (num_loops != chain_indices.size()) { + throw promod3::Error("Size inconsistency of input loops!"); + } + + // get first guess of loops + loop_start_indices.resize(num_loops); + loop_lengths = num_residues; + for (uint i_loop = 0; i_loop < num_loops; ++i_loop) { + const uint start_resnum = start_resnums[i_loop]; + const uint chain_idx = chain_indices[i_loop]; + // set start index + loop_start_indices[i_loop] = ToIdx(start_resnum, chain_idx); + // check size + if (start_resnum-1 + num_residues[i_loop] > chain_sizes_[chain_idx]) { + throw promod3::Error("Invalid End ResNum!"); + } + } + // resolve overlaps + ResolveOverlaps(loop_start_indices, loop_lengths, enable_log); +} + +void IdxHandler::ResolveOverlaps(std::vector<uint>& loop_start_indices, + std::vector<uint>& loop_lengths, + bool enable_log) { + + // strategy for each loop + // - compare to previously processed loops + // -> if no overlap, nothing happens + // (code is almost as fast as only checking overlaps) + // -> if overlap, other loop deleted and merged into current one + // - if merges needed, write new loop_vectors and swap after + // -> things to be removed get loop_lengths == 0 + + // keep track of loops to keep + const uint num_loops = loop_start_indices.size(); + uint num_loops_new = num_loops; + // check all loops + for (uint i_loop = 0; i_loop < num_loops; ++i_loop) { + // check for dummy loops (skipped) + if (loop_lengths[i_loop] == 0) { + // kill and skip it + --num_loops_new; + if (enable_log) { + LOG_INFO("Removed empty loop at index " << loop_start_indices[i_loop]); + } + continue; + } + // compare with others + for (uint i_loop_other = 0; i_loop_other < i_loop; ++i_loop_other) { + // loop from start_idx to start_idx + loop_lengths[i_loop] - 1 + const uint start_idx = loop_start_indices[i_loop]; + const uint end_idx = start_idx + loop_lengths[i_loop]; + const uint start_idx_other = loop_start_indices[i_loop_other]; + const uint end_idx_other = start_idx_other + loop_lengths[i_loop_other]; + // if not (end_idx <= start_idx_other || start_idx >= end_idx_other) ... + if (end_idx > start_idx_other && start_idx < end_idx_other) { + // update current loop + if (start_idx_other < start_idx) { + loop_start_indices[i_loop] = start_idx_other; + } + loop_lengths[i_loop] = std::max(end_idx, end_idx_other) + - loop_start_indices[i_loop]; + // kill other one + loop_lengths[i_loop_other] = 0; + --num_loops_new; + if (enable_log) { + LOG_INFO("Merged loops [" << start_idx << ", " << end_idx << "[ and [" + << start_idx_other << ", " << end_idx_other << "["); + } + } + } + } + + // build new vectors as needed + if (num_loops_new != num_loops) { + // new vectors containing only elements with length > 0 + std::vector<uint> loop_start_indices_new(num_loops_new); + std::vector<uint> loop_lengths_new(num_loops_new); + uint i_loop_new = 0; + for (uint i_loop = 0; i_loop < num_loops; ++i_loop) { + if (loop_lengths[i_loop] > 0) { + assert(i_loop_new < num_loops_new); // guaranteed by algo above + loop_start_indices_new[i_loop_new] = loop_start_indices[i_loop]; + loop_lengths_new[i_loop_new] = loop_lengths[i_loop]; + ++i_loop_new; + } + } + // swap them (fast!) + loop_start_indices.swap(loop_start_indices_new); + loop_lengths.swap(loop_lengths_new); + } +} + }} //ns diff --git a/loop/src/idx_handler.hh b/loop/src/idx_handler.hh index 97c64e9702df6f8336bf9e539b6105cd6f744cbe..e91423f4d7be5b175cd42b776e9bf25262e12533 100644 --- a/loop/src/idx_handler.hh +++ b/loop/src/idx_handler.hh @@ -38,10 +38,10 @@ public: uint ToIdx(uint resnum, uint chain_idx) const; + // returns vector of indices for given loop std::vector<uint> ToIdxVector(uint start_resnum, uint num_residues, uint chain_idx) const; - // check input and set commonly used variables void SetupScoreCalculation(const loop::BackboneList& bb_list, uint start_resnum, uint chain_idx, @@ -51,6 +51,26 @@ public: uint chain_idx, uint& start_idx, uint& end_idx) const; + // returns start indices (as in ToIdx) and loops with resolved overlaps + // -> enable_log is passed to ResolveOverlaps + void ToLoopIndices(const std::vector<uint>& start_resnums, + const std::vector<uint>& num_residues, + const std::vector<uint>& chain_indices, + std::vector<uint>& loop_start_indices, + std::vector<uint>& loop_lengths, + bool enable_log = false) const; + + // resolves overlaps in given loops + // notes: + // - loops of length 0 are removed + // - loops are allowed to touch without being merged + // - vectors are only modified if overlaps are found + // - this is fast code and can safely be used with minimal overhead + // - enable_log = true triggers LOG_INFO calls for resolved loops + static void ResolveOverlaps(std::vector<uint>& loop_start_indices, + std::vector<uint>& loop_lengths, + bool enable_log = false); + private: size_t num_chains_; diff --git a/loop/tests/test_idx_handler.cc b/loop/tests/test_idx_handler.cc index d83eb076117962e42a9c1b3dca7311eaf85e6bf3..a93ca4607120f38e21977b9d4c010ec7b9560f8d 100644 --- a/loop/tests/test_idx_handler.cc +++ b/loop/tests/test_idx_handler.cc @@ -10,6 +10,22 @@ BOOST_AUTO_TEST_SUITE( loop ); using namespace promod3::loop; +namespace { + +bool HasLoop(const std::vector<uint>& loop_start_indices, + const std::vector<uint>& loop_lengths, + uint exp_start_idx, uint exp_length) { + // check if exp_... appear in vector + const uint num_loops = loop_start_indices.size(); + for (uint i_loop = 0; i_loop < num_loops; ++i_loop) { + if (loop_start_indices[i_loop] == exp_start_idx + && loop_lengths[i_loop] == exp_length) return true; + } + return false; +} + +} // anon ns + BOOST_AUTO_TEST_CASE(test_idx_handler) { // setup std::vector<size_t> chain_sizes; @@ -92,4 +108,153 @@ BOOST_AUTO_TEST_CASE(test_idx_handler_bb_list) { end_idx), promod3::Error); } +BOOST_AUTO_TEST_CASE(test_idx_handler_overlaps) { + // setup + std::vector<uint> loop_start_indices; + std::vector<uint> loop_lengths; + loop_start_indices.push_back(10); + loop_start_indices.push_back(5); + loop_start_indices.push_back(15); + loop_lengths.push_back(2); + loop_lengths.push_back(2); + loop_lengths.push_back(3); + + // check + std::vector<uint> loop_start_indices_res = loop_start_indices; + std::vector<uint> loop_lengths_res = loop_lengths; + IdxHandler::ResolveOverlaps(loop_start_indices_res, loop_lengths_res); + BOOST_CHECK(loop_start_indices == loop_start_indices_res); + BOOST_CHECK(loop_lengths == loop_lengths_res); + + // check empty loop + loop_lengths[1] = 0; + loop_start_indices_res = loop_start_indices; + loop_lengths_res = loop_lengths; + IdxHandler::ResolveOverlaps(loop_start_indices_res, loop_lengths_res); + BOOST_CHECK_EQUAL(loop_start_indices_res.size(), 2u); + BOOST_CHECK_EQUAL(loop_lengths_res.size(), 2u); + BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 10, 2)); + BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 15, 3)); + + // check touch (no overlap) + loop_lengths[1] = 5; + loop_start_indices_res = loop_start_indices; + loop_lengths_res = loop_lengths; + IdxHandler::ResolveOverlaps(loop_start_indices_res, loop_lengths_res); + BOOST_CHECK(loop_start_indices == loop_start_indices_res); + BOOST_CHECK(loop_lengths == loop_lengths_res); + + // check partial overlap + loop_lengths[1] = 6; + loop_start_indices_res = loop_start_indices; + loop_lengths_res = loop_lengths; + IdxHandler::ResolveOverlaps(loop_start_indices_res, loop_lengths_res); + BOOST_CHECK_EQUAL(loop_start_indices_res.size(), 2u); + BOOST_CHECK_EQUAL(loop_lengths_res.size(), 2u); + BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 5, 7)); + BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 15, 3)); + + // check full overlap + loop_lengths[1] = 8; + loop_start_indices_res = loop_start_indices; + loop_lengths_res = loop_lengths; + IdxHandler::ResolveOverlaps(loop_start_indices_res, loop_lengths_res); + BOOST_CHECK_EQUAL(loop_start_indices_res.size(), 2u); + BOOST_CHECK_EQUAL(loop_lengths_res.size(), 2u); + BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 5, 8)); + BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 15, 3)); + + // check double overlap + loop_lengths[1] = 20; + loop_start_indices_res = loop_start_indices; + loop_lengths_res = loop_lengths; + IdxHandler::ResolveOverlaps(loop_start_indices_res, loop_lengths_res); + BOOST_CHECK_EQUAL(loop_start_indices_res.size(), 1u); + BOOST_CHECK_EQUAL(loop_lengths_res.size(), 1u); + BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 5, 20)); + + // check double overlap with loop in loop + loop_lengths[1] = 12; + loop_start_indices_res = loop_start_indices; + loop_lengths_res = loop_lengths; + IdxHandler::ResolveOverlaps(loop_start_indices_res, loop_lengths_res); + BOOST_CHECK_EQUAL(loop_start_indices_res.size(), 1u); + BOOST_CHECK_EQUAL(loop_lengths_res.size(), 1u); + BOOST_CHECK(HasLoop(loop_start_indices_res, loop_lengths_res, 5, 13)); +} + +BOOST_AUTO_TEST_CASE(test_idx_handler_multi_loops) { + // setup + std::vector<size_t> chain_sizes; + chain_sizes.push_back(10); + chain_sizes.push_back(0); + chain_sizes.push_back(8); + IdxHandler idx_handler(chain_sizes); + // input data + std::vector<uint> start_resnums; + std::vector<uint> num_residues; + std::vector<uint> chain_indices; + start_resnums.push_back(1); + num_residues.push_back(2); + chain_indices.push_back(2); + start_resnums.push_back(6); + num_residues.push_back(2); + chain_indices.push_back(0); + start_resnums.push_back(6); + num_residues.push_back(3); + chain_indices.push_back(2); + // output data + std::vector<uint> loop_start_indices; + std::vector<uint> loop_lengths; + + // check + idx_handler.ToLoopIndices(start_resnums, num_residues, chain_indices, + loop_start_indices, loop_lengths); + BOOST_CHECK_EQUAL(loop_start_indices.size(), 3u); + BOOST_CHECK_EQUAL(loop_lengths.size(), 3u); + BOOST_CHECK(HasLoop(loop_start_indices, loop_lengths, 10, 2)); + BOOST_CHECK(HasLoop(loop_start_indices, loop_lengths, 5, 2)); + BOOST_CHECK(HasLoop(loop_start_indices, loop_lengths, 15, 3)); + + // check overlap fixing (just one of the cases) + num_residues[0] = 6; + idx_handler.ToLoopIndices(start_resnums, num_residues, chain_indices, + loop_start_indices, loop_lengths); + BOOST_CHECK_EQUAL(loop_start_indices.size(), 2u); + BOOST_CHECK_EQUAL(loop_lengths.size(), 2u); + BOOST_CHECK(HasLoop(loop_start_indices, loop_lengths, 10, 8)); + BOOST_CHECK(HasLoop(loop_start_indices, loop_lengths, 5, 2)); + num_residues[0] = 2; + + // exceptions + start_resnums[0] = 9; + BOOST_CHECK_THROW(idx_handler.ToLoopIndices(start_resnums, num_residues, + chain_indices, loop_start_indices, + loop_lengths), promod3::Error); + start_resnums[0] = 8; + BOOST_CHECK_THROW(idx_handler.ToLoopIndices(start_resnums, num_residues, + chain_indices, loop_start_indices, + loop_lengths), promod3::Error); + start_resnums[0] = 0; + BOOST_CHECK_THROW(idx_handler.ToLoopIndices(start_resnums, num_residues, + chain_indices, loop_start_indices, + loop_lengths), promod3::Error); + start_resnums[0] = 1; + // bad chain + chain_indices[0] = 1; + BOOST_CHECK_THROW(idx_handler.ToLoopIndices(start_resnums, num_residues, + chain_indices, loop_start_indices, + loop_lengths), promod3::Error); + chain_indices[0] = 3; + BOOST_CHECK_THROW(idx_handler.ToLoopIndices(start_resnums, num_residues, + chain_indices, loop_start_indices, + loop_lengths), promod3::Error); + chain_indices[0] = 2; + // bad vector sizes + start_resnums.push_back(1); + BOOST_CHECK_THROW(idx_handler.ToLoopIndices(start_resnums, num_residues, + chain_indices, loop_start_indices, + loop_lengths), promod3::Error); +} + BOOST_AUTO_TEST_SUITE_END();