From 90d939f10ceae87964cbb83b0156b015c0d3aad9 Mon Sep 17 00:00:00 2001 From: Niklaus Johner <niklaus.johner@unibas.ch> Date: Wed, 26 Nov 2014 10:48:08 +0100 Subject: [PATCH] Added domain_find from swiss model to ost. --- CMakeLists.txt | 2 +- cmake_support/OST.cmake | 3 + modules/mol/alg/pymod/CMakeLists.txt | 1 + .../mol/alg/pymod/export_contact_overlap.cc | 102 ++++ modules/mol/alg/pymod/wrap_mol_alg.cc | 4 + modules/mol/alg/src/CMakeLists.txt | 12 +- modules/mol/alg/src/adjacency_bitmap.cc | 105 ++++ modules/mol/alg/src/adjacency_bitmap.hh | 165 +++++++ modules/mol/alg/src/contact_overlap.cc | 461 ++++++++++++++++++ modules/mol/alg/src/contact_overlap.hh | 108 ++++ modules/mol/alg/src/domain_find.cc | 383 +++++++++++++++ modules/mol/alg/src/domain_find.hh | 81 +++ modules/mol/alg/src/similarity_matrix.cc | 41 ++ modules/mol/alg/src/similarity_matrix.hh | 36 ++ 14 files changed, 1502 insertions(+), 2 deletions(-) create mode 100644 modules/mol/alg/pymod/export_contact_overlap.cc create mode 100644 modules/mol/alg/src/adjacency_bitmap.cc create mode 100644 modules/mol/alg/src/adjacency_bitmap.hh create mode 100644 modules/mol/alg/src/contact_overlap.cc create mode 100644 modules/mol/alg/src/contact_overlap.hh create mode 100644 modules/mol/alg/src/domain_find.cc create mode 100644 modules/mol/alg/src/domain_find.hh create mode 100644 modules/mol/alg/src/similarity_matrix.cc create mode 100644 modules/mol/alg/src/similarity_matrix.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index 5734b0259..859e28053 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -244,7 +244,7 @@ if (ENABLE_STATIC) set(Boost_LIBRARIES) set(Boost_USE_STATIC_LIBS ON) find_package(Boost ${_BOOST_MIN_VERSION} - COMPONENTS filesystem system iostreams regex REQUIRED) + COMPONENTS filesystem system iostreams regex thread REQUIRED) find_package(ZLIB REQUIRED) if (UNIX AND NOT APPLE) diff --git a/cmake_support/OST.cmake b/cmake_support/OST.cmake index 7f801e25c..94cadd2ff 100644 --- a/cmake_support/OST.cmake +++ b/cmake_support/OST.cmake @@ -925,4 +925,7 @@ macro(setup_boost) find_package(Boost ${_BOOST_MIN_VERSION} COMPONENTS regex REQUIRED) set(BOOST_REGEX_LIBRARIES ${Boost_LIBRARIES}) set(Boost_LIBRARIES) + find_package(Boost ${_BOOST_MIN_VERSION} COMPONENTS thread REQUIRED) + set(BOOST_THREAD ${Boost_LIBRARIES}) + set(Boost_LIBRARIES) endmacro() diff --git a/modules/mol/alg/pymod/CMakeLists.txt b/modules/mol/alg/pymod/CMakeLists.txt index 6be6ca39d..7efbf8816 100644 --- a/modules/mol/alg/pymod/CMakeLists.txt +++ b/modules/mol/alg/pymod/CMakeLists.txt @@ -4,6 +4,7 @@ set(OST_MOL_ALG_PYMOD_SOURCES export_clash.cc export_trajectory_analysis.cc export_structure_analysis.cc + export_contact_overlap.cc ) set(OST_MOL_ALG_PYMOD_MODULES diff --git a/modules/mol/alg/pymod/export_contact_overlap.cc b/modules/mol/alg/pymod/export_contact_overlap.cc new file mode 100644 index 000000000..0f7c56c5f --- /dev/null +++ b/modules/mol/alg/pymod/export_contact_overlap.cc @@ -0,0 +1,102 @@ +#include <boost/python.hpp> +#include <boost/python/suite/indexing/vector_indexing_suite.hpp> + +#include <ost/mol/alg/contact_overlap.hh> +#include <ost/mol/alg/domain_find.hh> + +using namespace boost::python; +using namespace ost; +using namespace ost::mol::alg; + +SimilarityMatrix (*com_a)(const ost::seq::AlignmentHandle&, + Real, int, int)=&ContactOverlapMap; +SimilarityMatrix (*com_b)(const ost::seq::AlignmentHandle&, + Real, OverlapMode, Real)=&ContactOverlapMap; +SimilarityMatrix (*com_c)(const DistanceMatrix&, const DistanceMatrix&, + Real, OverlapMode, Real)=&ContactOverlapMap; + +void export_contact_overlap() +{ + class_<SimilarityMatrix>("SimilarityMatrix", init<int, Real>()) + .def("Get", &SimilarityMatrix::Get, + return_value_policy<copy_const_reference>()) + .def("Set", &SimilarityMatrix::Set) + .def("GetSize", &SimilarityMatrix::GetSize) + .add_property("size", &SimilarityMatrix::GetSize) + .def("AsImage", &SimilarityMatrix::AsImage) + .def("Save", &SimilarityMatrix::Save) + .def("Load", &SimilarityMatrix::Load).staticmethod("Load") + ; + + class_<DistanceMatrix, DistanceMatrixPtr>("DistanceMatrix", no_init) + .def("Create", &DistanceMatrix::Create).staticmethod("Create") + .def("FromAln", &DistanceMatrix::FromAln, + (arg("aln"), arg("index")=1)).staticmethod("FromAln") + .def("Get", &DistanceMatrix::Get, + return_value_policy<copy_const_reference>()) + .def("Set", &DistanceMatrix::Set) + + .def("GetSize", &DistanceMatrix::GetSize) + .add_property("size", &DistanceMatrix::GetSize) + ; + + enum_<OverlapMode>("OverlapMode") + .value("OM_RELATIVE", OM_RELATIVE) + .value("OM_ABSOLUTE", OM_ABSOLUTE) + .export_values() + ; + + def("DomainsFromCOM", &DomainsFromCOM, (arg("idx_a")=-1, arg("idx_b")=-1)); + + def("ContactOverlap", &ContactOverlap, (arg("dist_mat_a"), arg("dist_mat_b"), + arg("tolerance")=0.1, + arg("mode")=OM_RELATIVE, + arg("max_dist")=1e6)); + class_<ContactList>("ContactList", init<>()) + .def(vector_indexing_suite<ContactList>()) + ; + + def("Contacts", &Contacts); + def("CalcContactNodeDegree", &CalcContactNodeDegree); + def("LocalDistanceTest", &LocalDistanceTest); + def("ContactOverlapMap", com_a); + def("ContactOverlapMap", com_b, (arg("mode")=OM_RELATIVE, + arg("max_dist")=1e6)); + def("ContactOverlapMap", com_c, (arg("mode")=OM_RELATIVE, + arg("max_dist")=1e6)); + class_<Domain>("Domain", no_init) + .def_readwrite("views", &Domain::views) + .def_readwrite("views", &Domain::views) + .def_readwrite("inter_s", &Domain::inter_s) + .def_readwrite("inter_d", &Domain::inter_d) + .def_readwrite("intra_s", &Domain::intra_s) + .def_readwrite("intra_d", &Domain::intra_d) + ; + class_<std::vector<Domain> >("DomainList", init<>()) + .def(vector_indexing_suite<std::vector<Domain> >()) + ; + + class_<Domains, DomainsPtr>("Domains", no_init) + .def("FromCOM", Domains::FromCOM, + (arg("sim"), arg("aln"), arg("idx_a")=-1, + arg("idx_b")=-1)).staticmethod("FromCOM") + .def("FromAln", Domains::FromAln, + (arg("aln"), arg("tolerance")=1.5, + arg("radius")=1e6, arg("threshold")=0.5, arg("idx_a")=0, arg("idx_b")=1)).staticmethod("FromAln") + .def("FromDistMats", Domains::FromDistMats, + (arg("dmat_a"), arg("dmat_b"), arg("tolerance")=1.5, + arg("radius")=1e6, arg("threshold")=0.5, + arg("num_threads")=1, + arg("defined_only")=true, + arg("adj_map")=false)).staticmethod("FromDistMats") + .add_property("domains", make_function(&Domains::GetDomains, + return_value_policy<copy_const_reference>())) + .add_property("components", make_function(&Domains::GetComponents, + return_value_policy<copy_const_reference>())) + .add_property("adj_map", &Domains::GetAdjMap) + .add_property("num_free", &Domains::GetNumFreeResidues) + .add_property("free_d", &Domains::GetFreeD) + .add_property("free_s", &Domains::GetFreeS) + ; +} + diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc index 909f41d71..87e5be6a6 100644 --- a/modules/mol/alg/pymod/wrap_mol_alg.cc +++ b/modules/mol/alg/pymod/wrap_mol_alg.cc @@ -27,9 +27,11 @@ #include <ost/mol/alg/filter_clashes.hh> #include <ost/mol/alg/consistency_checks.hh> #include <ost/mol/alg/pdbize.hh> +#include <ost/mol/alg/contact_overlap.hh> #include <ost/export_helper/pair_to_tuple_conv.hh> #include <ost/export_helper/vec_to_list_conv.hh> + using namespace boost::python; using namespace ost; @@ -37,6 +39,7 @@ void export_svdSuperPose(); void export_TrajectoryAnalysis(); void export_StructureAnalysis(); void export_Clash(); +void export_contact_overlap(); #if OST_IMG_ENABLED void export_entity_to_density(); #endif @@ -105,6 +108,7 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) export_TrajectoryAnalysis(); export_StructureAnalysis(); export_Clash(); + export_contact_overlap(); #if OST_IMG_ENABLED export_entity_to_density(); #endif diff --git a/modules/mol/alg/src/CMakeLists.txt b/modules/mol/alg/src/CMakeLists.txt index b6a7f2892..bc89272e0 100644 --- a/modules/mol/alg/src/CMakeLists.txt +++ b/modules/mol/alg/src/CMakeLists.txt @@ -13,6 +13,11 @@ set(OST_MOL_ALG_HEADERS structure_analysis.hh consistency_checks.hh pdbize.hh + adjacency_bitmap.hh + consistency_checks.hh + contact_overlap.hh + domain_find.hh + similarity_matrix.hh ) set(OST_MOL_ALG_SOURCES @@ -29,6 +34,11 @@ set(OST_MOL_ALG_SOURCES structure_analysis.cc consistency_checks.cc pdbize.cc + adjacency_bitmap.cc + consistency_checks.cc + contact_overlap.cc + domain_find.cc + similarity_matrix.cc ) set(MOL_ALG_DEPS ost_mol ost_seq) @@ -54,7 +64,7 @@ module(NAME mol_alg SOURCES ${OST_MOL_ALG_SOURCES} HEADERS ${OST_MOL_ALG_HEADERS} HEADER_OUTPUT_DIR ost/mol/alg DEPENDS_ON ${MOL_ALG_DEPS} - LINK ${BOOST_PROGRAM_OPTIONS}) + LINK ${BOOST_PROGRAM_OPTIONS} ${BOOST_THREAD}) copy_if_different("${CMAKE_CURRENT_SOURCE_DIR}" "${STAGE_DIR}/share/openstructure" "atom_scattering_properties.txt" "ATOM_SCATTERING_PROPS" diff --git a/modules/mol/alg/src/adjacency_bitmap.cc b/modules/mol/alg/src/adjacency_bitmap.cc new file mode 100644 index 000000000..e117e0864 --- /dev/null +++ b/modules/mol/alg/src/adjacency_bitmap.cc @@ -0,0 +1,105 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#include <ost/img/image_factory.hh> +#include <ost/mol/atom_view.hh> +#include <ost/mol/residue_view.hh> +#include <ost/mol/residue_handle.hh> +#include <ost/mol/residue_view.hh> +#include "adjacency_bitmap.hh" + +namespace ost { namespace mol { namespace alg { + +const uint8_t AdjacencyBitmap::NUMBER_OF_BITS_SET[] = { + 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 2, + 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 2, + 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 2, + 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 3, + 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 2, + 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 2, + 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 2, + 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 3, + 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 2, + 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 2, + 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 2, + 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 3, + 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 3, + 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 3, + 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 3, + 2, 2, 2, 3, 2, 2, 2, 3, 2, 2, 2, 3, 3, 3, 3, 4 +}; + +const uint8_t AdjacencyBitmap::NUMBER_OF_EDGES_SET[] = { + 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, + 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, + 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, + 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, + 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4 +}; + +OverlapResult AdjacencyBitmap::Overlap(size_t vert_i, size_t vert_j) const { + // To determine the union of the direct neighbors of vert_i and vert_j, + // we calculated the bitwise or of the edge_bits and combine it with + // the union of the defined bits. For that purpose, we will need a mask + // for the edge bits. In binary the mask looks like 0101010101. + uint64_t edge_mask = 0x5555555555555555; + uint64_t* pi = data_+storage_size_*vert_i; + uint64_t* ei = pi + storage_size_; + uint64_t* pj = data_+storage_size_*vert_j; + size_t nominator = 0; + size_t denominator = 0; + size_t edges_set = 0; + for (; pi != ei; ++pi, ++pj) { + uint64_t intersection_ij = *pi & *pj; + uint64_t union_ij = ((*pi | *pj) & edge_mask) | intersection_ij; + for (size_t i = 0; i < sizeof(uint64_t); ++i) { + nominator += NUMBER_OF_BITS_SET[(intersection_ij >> 8*i) & 0xff]; + edges_set += NUMBER_OF_EDGES_SET[(intersection_ij >> 8*i) & 0xff]; + denominator += NUMBER_OF_BITS_SET[(union_ij >> 8*i) & 0xff]; + } + } + return OverlapResult(nominator, denominator, edges_set); +} + +void AdjacencyBitmap::dump() const { + + for (size_t i=0; i<this->size(); i+=10) { + for (size_t j=0;j<this->size(); j+=10) { + if (this->defined(i, j)) { + std::cout << (this->get(i, j) ? '*' : '.'); + } else { + std::cout << 'u'; + } + } + std::cout << std::endl; + } +} + +}}} + diff --git a/modules/mol/alg/src/adjacency_bitmap.hh b/modules/mol/alg/src/adjacency_bitmap.hh new file mode 100644 index 000000000..69eee65f2 --- /dev/null +++ b/modules/mol/alg/src/adjacency_bitmap.hh @@ -0,0 +1,165 @@ +#ifndef OST_MOL_ALG_ADJACENCY_BITMAP_HH +#define OST_MOL_ALG_ADJACENCY_BITMAP_HH + +#include <cstring> +#include <boost/shared_ptr.hpp> +#include <ost/img/image_handle.hh> +#include <ost/tri_matrix.hh> +#include <ost/seq/sequence_handle.hh> +#include <ost/seq/alignment_handle.hh> +#include "similarity_matrix.hh" +#include "contact_overlap.hh" +#include "module_config.hh" + +namespace ost { namespace mol { namespace alg { + + +struct OverlapResult { + OverlapResult(uint16_t nom, uint16_t denom, uint16_t def): + nominator(nom), denominator(denom), defined(def) {} + OverlapResult(): nominator(0), denominator(0), defined(0) {} + uint16_t nominator; + uint16_t denominator; + uint16_t defined; +}; + +// A highly optimized representation of the adjacency matrix +class AdjacencyBitmap { +public: + explicit AdjacencyBitmap(size_t n_vertices): + num_vertices_(n_vertices), + storage_size_(StorageSize(n_vertices)) + { + data_ = static_cast<uint64_t*>(malloc(this->num_bytes())); + end_ = data_+storage_size_*num_vertices_; + memset(data_, 0, this->num_bytes()); + } + + AdjacencyBitmap(const DistanceMatrix& dmat_a, + const DistanceMatrix& dmat_b, + Real threshold, Real radius, + int start, int end): + num_vertices_(end-start), + storage_size_(StorageSize(end-start)), + data_(static_cast<uint64_t*>(malloc(this->num_bytes()))), + end_(data_+storage_size_*num_vertices_) { + + for (int i = start; i < end; ++i) { + this->set(i-start, i-start, false, false); + for (int j = i+1; j < end; ++j) { + Real da = dmat_a.Get(i, j); + Real db = dmat_b.Get(i, j); + bool defined = da>=0 && db>=0 && (db < radius || da < radius); + bool agree = false; + if (defined) { + agree = std::abs(da-db)<threshold; + } + this->set(i-start, j-start, agree, defined); + this->set(j-start, i-start, agree, defined); + } + } + } + + AdjacencyBitmap(const SimilarityMatrix& sim, Real threshold): + num_vertices_(sim.GetSize()), + storage_size_(StorageSize(sim.GetSize())), + data_(static_cast<uint64_t*>(malloc(this->num_bytes()))), + end_(data_+storage_size_*num_vertices_) { + memset(data_, 0, this->num_bytes()); + for (int i = 0; i < sim.GetSize(); ++i) { + this->set(i, i, false, false); + for (int j = i+1; j < sim.GetSize(); ++j) { + this->set(i, j, sim.Get(i, j)>threshold, sim.Get(i, j)>=0.0); + this->set(j, i, sim.Get(i, j)>threshold, sim.Get(i, j)>=0.0); + } + } + } + + AdjacencyBitmap(const AdjacencyBitmap& rhs): + num_vertices_(rhs.num_vertices_), + storage_size_(rhs.storage_size_), + data_(static_cast<uint64_t*>(malloc(this->num_bytes()))), + end_(data_+storage_size_*num_vertices_) { + memcpy(data_, rhs.data_, this->num_bytes()); + } + + ~AdjacencyBitmap() { + if (data_) + free(data_); + } + + // calculates the overlap between the direct neighbours of vertex i + // and j using pure awesomeness and some bit trickery. + OverlapResult Overlap(size_t vert_i, size_t vert_j) const; + + void set(size_t i, size_t j, bool val, bool defined=true) { + size_t byte_index = i*storage_size_ + j/(4*sizeof(uint64_t)); + assert(byte_index < this->num_bytes()); + size_t bit_index = (2*j) % BITS; + // the following two variables are required to force use of + // 64 bit integer for bit operations. + uint64_t one = 1; + uint64_t two = 2; + assert(bit_index < sizeof(uint64_t)*8); + + if (val) { + data_[byte_index] |= (one << bit_index); + } else { + data_[byte_index] &= ~(one << bit_index); + } + + if (defined) { + data_[byte_index] |= (two << bit_index); + } else { + data_[byte_index] &= ~(two << bit_index); + } + } + + bool get(size_t i, size_t j) const { + uint64_t one = 1; + size_t byte_index = i*storage_size_ + j/(4*sizeof(uint64_t)); + assert(byte_index < storage_size_*num_vertices_); + size_t bit_index = (2*j) % BITS; + assert(bit_index < sizeof(uint64_t)*8); + return data_[byte_index] & (one << bit_index); + } + + bool defined(size_t i, size_t j) const { + uint64_t two = 2; + size_t byte_index = i*storage_size_ + j/(4*sizeof(uint64_t)); + assert(byte_index < storage_size_*num_vertices_); + size_t bit_index = (2*j) % BITS; + assert(bit_index < sizeof(uint64_t)*8); + return data_[byte_index] & (two << bit_index); + } + + size_t size() const { + return num_vertices_; + } + + size_t num_bytes() const { + return storage_size_*num_vertices_*sizeof(uint64_t); + } + void dump() const; +private: + + AdjacencyBitmap(); + AdjacencyBitmap& operator=(const AdjacencyBitmap&); + + const static size_t BITS=sizeof(uint64_t)*8; + size_t num_vertices_; + size_t storage_size_; + uint64_t* data_; + uint64_t* end_; + + static const uint8_t NUMBER_OF_BITS_SET[]; + static const uint8_t NUMBER_OF_EDGES_SET[]; + static size_t StorageSize(size_t verts) { + return verts/(4*sizeof(uint64_t)) + (verts % (sizeof(uint64_t)*4) ? 1 : 0); + } +}; + + +}}} +#endif + diff --git a/modules/mol/alg/src/contact_overlap.cc b/modules/mol/alg/src/contact_overlap.cc new file mode 100644 index 000000000..89246d9db --- /dev/null +++ b/modules/mol/alg/src/contact_overlap.cc @@ -0,0 +1,461 @@ +#include <ost/log.hh> +#include <ost/mol/mol.hh> +#include <ost/seq/alignment_handle.hh> +#include <ost/seq/alg/merge_pairwise_alignments.hh> +#include "contact_overlap.hh" + +using namespace ost; +using namespace ost::mol; +using namespace ost::seq; + +namespace ost { namespace mol { namespace alg { + +ContactList Contacts(const ost::mol::EntityView& ent, Real min_dist, + Real max_dist) +{ + Real min_sqr=min_dist*min_dist; + Real max_sqr=max_dist*max_dist; + ContactList contacts; + ResidueViewList residues=ent.GetResidueList(); + for (ResidueViewList::const_iterator + i=residues.begin(), e=residues.end(); i!=e; ++i) { + AtomView atom_a=i->FindAtom("CA"); + if (!atom_a.IsValid()) { + continue; + } + for (ResidueViewList::const_iterator + j=i+1, e2=residues.end(); j!=e2; ++j) { + AtomView atom_b=j->FindAtom("CA"); + if (!atom_b.IsValid()) { + continue; + } + Real d=geom::Length2(atom_a.GetPos()-atom_b.GetPos()); + if (min_sqr<d && max_sqr>d) { + contacts.push_back(Contact(atom_a.GetHandle(), atom_b.GetHandle())); + } + } + + } + return contacts; +} + +namespace { + +struct AtomHashCmp { + bool operator()(const AtomHandle& a, + const AtomHandle& b) const { + return a.GetHashCode()<b.GetHashCode(); + } +}; + +} + +void CalcContactNodeDegree(const ContactList& contacts) +{ + std::map<AtomHandle, int, AtomHashCmp> degrees; + for (ContactList::const_iterator + i=contacts.begin(), e=contacts.end(); i!=e; ++i) { + // yes, this is sane. upon first accessing a key, the value is initialised + // to 0 and *then* incremented. + degrees[i->atom_a]+=1; + degrees[i->atom_b]+=1; + } + for (std::map<AtomHandle, int>::iterator + i=degrees.begin(), e=degrees.end(); i!=e; ++i) { + AtomHandle atom=i->first; + atom.SetIntProp("degree", i->second); + } +} + +Real Reward(Real ref_dist, Real obs_dist, Real tolerance, + OverlapMode mode=OM_RELATIVE) { + assert(ref_dist>0.0); + Real norm_diff=mode==OM_RELATIVE ? + std::abs((obs_dist-ref_dist)/sqrt(obs_dist+ref_dist)) : + std::abs(ref_dist-obs_dist); + if (norm_diff<tolerance*0.5) { + return 1.0; + } + Real one_over_tol=0.5/tolerance; + return std::max(0.0, 1.0-(norm_diff-tolerance*0.5)*one_over_tol); +} + +Real LocalDistanceTest(const EntityView& mdl, const EntityView& ref, + Real max_dist, Real tolerance, bool only_complete) +{ + Real max_sqr=pow(max_dist+max_dist*tolerance, 2); + ContactList contacts; + ResidueViewList residues=ref.GetResidueList(); + Real in_ref=0.0; + Real in_mdl=0.0; + for (ResidueViewList::const_iterator + i=residues.begin(), e=residues.end(); i!=e; ++i) { + AtomView atom_a=i->FindAtom("CA"); + if (!atom_a.IsValid()) { + continue; + } + for (ResidueViewList::const_iterator + j=i+1, e2=residues.end(); j!=e2; ++j) { + AtomView atom_b=j->FindAtom("CA"); + if (!atom_b.IsValid()) { + continue; + } + Real d=geom::Length2(atom_a.GetPos()-atom_b.GetPos()); + if (max_sqr<d) { + continue; + } + if (!only_complete) { + in_ref+=1.0; + } + + AtomView atom_c=mdl.FindAtom(atom_a.GetResidue().GetChain().GetName(), + atom_a.GetResidue().GetNumber(), "CA"); + if (!atom_c.IsValid()) { + continue; + } + AtomView atom_d=mdl.FindAtom(atom_b.GetResidue().GetChain().GetName(), + atom_b.GetResidue().GetNumber(), "CA"); + if (!atom_d.IsValid()) { + continue; + } + if (only_complete) { + in_ref+=1.0; + } + in_mdl+=Reward(sqrt(d), geom::Length(atom_c.GetPos()-atom_d.GetPos()), + tolerance); + } + } + if (in_ref==0.0) { + return 0.0; + } + return in_mdl/in_ref; +} + +namespace { +void calculate_com(const ost::seq::AlignmentHandle& target_tpl_aln, + Real tolerance, int idx_a, int idx_b, + SimilarityMatrix& overlap, OverlapMode mode, + Real max_dist) +{ + ConstSequenceHandle s1=target_tpl_aln.GetSequence(idx_a); + ConstSequenceHandle s2=target_tpl_aln.GetSequence(idx_b); + if (!(s1.HasAttachedView() && s2.HasAttachedView())) { + throw Error("Both sequences must have a view attached"); + } + EntityView av=s1.GetAttachedView(); + for (int x=0; x<s1.GetLength(); ++x) { + if (s1[x]=='-' || s2[x]=='-') { + continue; + } + ResidueView res_a=s1.GetResidue(x); + if (!res_a) { + continue; + } + AtomView ca_a=res_a.FindAtom("CA"); + if (!ca_a) { + continue; + } + for (int y=x+1; y<s1.GetLength(); ++y) { + if (s1[y]=='-' || s2[y]=='-') { + continue; + } + ResidueView res_b=s1.GetResidue(y); + AtomView ca_b=res_b.FindAtom("CA"); + if (!ca_b.IsValid()) { + continue; + } + ResidueView res_c=s2.GetResidue(x); + AtomView ca_c=res_c.FindAtom("CA"); + if (!ca_c.IsValid()) { + continue; + } + ResidueView res_d=s2.GetResidue(y); + AtomView ca_d=res_d.FindAtom("CA"); + if (!ca_d.IsValid()) { + continue; + } + Real da=geom::Length(ca_a.GetPos()-ca_b.GetPos()); + Real db=geom::Length(ca_c.GetPos()-ca_d.GetPos()); + if (da>max_dist && db>max_dist) { + continue; + } + Real reward=Reward(da, db, tolerance, mode); + if (overlap.Get(x, y)>=0) { + reward=std::min(reward, overlap.Get(x, y)); + } + overlap.Set(x, y, reward); + } + } +} + +} +DistanceMatrixPtr DistanceMatrix::FromAln(const AlignmentHandle& aln, + int index) { + if (!aln.GetSequence(index).HasAttachedView()) { + throw Error("Sequence must have a view attached"); + } + return DistanceMatrixPtr(new DistanceMatrix(aln, index)); + +} + +DistanceMatrixPtr DistanceMatrix::Create(const ConstSequenceHandle& s) +{ + if (!s.HasAttachedView()) { + throw Error("Sequence must have a view attached"); + } + return DistanceMatrixPtr(new DistanceMatrix(s)); +} + +SimilarityMatrix ContactOverlapMap(const DistanceMatrix& dmat1, + const DistanceMatrix& dmat2, + Real tolerance, OverlapMode mode, + Real max_dist) +{ + if (dmat1.GetSize()!=dmat2.GetSize()) { + throw Error("Distance matrices must have equal size"); + } + SimilarityMatrix sim(dmat1.GetSize(), -1.0); + for (int i=0; i<dmat1.GetSize(); ++i) { + for (int j=i+1; j<dmat1.GetSize(); ++j) { + Real a=dmat1.Get(i, j); + Real b=dmat2.Get(i, j); + if ((a<0 || b<0.0) || (a>max_dist && a>max_dist)) { + continue; + } + sim(i, j)=Reward(a, b, tolerance, mode); + } + } + return sim; +} + +DistanceMatrix::DistanceMatrix(const AlignmentHandle& a, int index): + ost::TriMatrix<Real>(a.GetSequence(0).GetGaplessString().size(), -1.0) +{ + EntityView av=a.GetSequence(index).GetAttachedView(); + ConstSequenceHandle trg_seq = a.GetSequence(0); + ConstSequenceHandle tpl_seq = a.GetSequence(index); + for (int x=0; x<a.GetLength(); ++x) { + if (trg_seq[x]=='-' || tpl_seq[x]=='-') { + continue; + } + ResidueView res_a=tpl_seq.GetResidue(x); + if (!res_a) { + continue; + } + AtomView ca_a=res_a.FindAtom("CA"); + if (!ca_a) { + continue; + } + for (int y=x; y<a.GetLength(); ++y) { + if (trg_seq[y]=='-' || tpl_seq[y] =='-') { + continue; + } + ResidueView res_b=tpl_seq.GetResidue(y); + if (!res_b.IsValid()) { + continue; + } + AtomView ca_b=res_b.FindAtom("CA"); + if (!ca_b.IsValid()) { + continue; + } + this->Set(trg_seq.GetResidueIndex(x), + trg_seq.GetResidueIndex(y), + geom::Length(ca_a.GetPos()-ca_b.GetPos())); + } + } +} + +DistanceMatrix::DistanceMatrix(const ConstSequenceHandle& s): + ost::TriMatrix<Real>(s.GetLength(), -1.0) +{ + EntityView av=s.GetAttachedView(); + for (int x=0; x<s.GetLength(); ++x) { + if (s[x]=='-') { + continue; + } + ResidueView res_a=s.GetResidue(x); + if (!res_a) { + continue; + } + AtomView ca_a=res_a.FindAtom("CA"); + if (!ca_a) { + continue; + } + for (int y=x; y<s.GetLength(); ++y) { + if (s[y]=='-') { + continue; + } + ResidueView res_b=s.GetResidue(y); + if (!res_b.IsValid()) { + continue; + } + AtomView ca_b=res_b.FindAtom("CA"); + if (!ca_b.IsValid()) { + continue; + } + this->Set(x, y, geom::Length(ca_a.GetPos()-ca_b.GetPos())); + } + } +} + +SimilarityMatrix ContactOverlapMap(const AlignmentHandle& target_tpl_aln, + Real tolerance, OverlapMode mode, Real max_dist) +{ + SimilarityMatrix overlap(target_tpl_aln.GetLength(), -1.0); + for (int i=1; i<target_tpl_aln.GetCount(); ++i) { + calculate_com(target_tpl_aln, tolerance, 0, i, overlap, mode, max_dist); + } + return overlap; +} + +SimilarityMatrix ContactOverlapMap(const AlignmentHandle& target_tpl_aln, + Real tolerance, + int idx_a, int idx_b) +{ + SimilarityMatrix overlap(target_tpl_aln.GetLength(), -1.0); + calculate_com(target_tpl_aln, tolerance, idx_a, idx_b, overlap, + OM_ABSOLUTE, 1e6); + return overlap; +} + + +Real ContactOverlap(const DistanceMatrixPtr& lhs, + const DistanceMatrixPtr& rhs, + Real tolerance, OverlapMode mode, + Real dist_limit) +{ + Real nom=0.0, denom=0.0; + for (int i=0; i<lhs->GetSize(); ++i) { + for (int j=i+1; j<lhs->GetSize(); ++j) { + Real a=lhs->Get(i, j); + Real b=rhs->Get(i, j); + if (a>dist_limit && b>dist_limit) { + continue; + } + if (a>=0.0 && b>=0.0) { + denom+=1.0; + nom+=Reward(a, b, tolerance, mode); + } + } + } + return denom>0 ? nom/denom : 0.0; +} + +int LabelComponents(const SimilarityMatrix& sim, std::vector<int>& components, + int x, int comp_id, Real tolerance) +{ + int count=1; + components[x]=comp_id; + for (int y=x+1; y<sim.GetSize(); ++y) { + if (sim.Get(x, y)<tolerance || components[y]>-1) { + continue; + } + count+=LabelComponents(sim, components, y, comp_id, tolerance); + } + return count; +} + + +//img::ImageHandle +void DomainsFromCOM(const SimilarityMatrix& sim, + AlignmentHandle aln, + Real tolerance, int idx_a, int idx_b) +{ + const static int MAX_ITERATIONS=20; + const static Real EPSILON=1e-6; + SimilarityMatrix sim1=sim; + SimilarityMatrix sim2(sim1.GetSize(), 0.0); + SimilarityMatrix* sim1p=&sim1; + SimilarityMatrix* sim2p=&sim2; + img::ImageHandle im/*=img::CreateImage(img::Size(sim1.GetSize(), + sim1.GetSize(), MAX_ITERATIONS+1))*/; + + /*for (int i=0; i<sim.GetSize(); ++i) { + for (int j=i+1; j<sim.GetSize(); ++j) { + im.SetReal(img::Point(i, j, MAX_ITERATIONS), sim1.Get(i, j)); + } + }*/ + // iteratively update similarity based on direct neighbours + int n=MAX_ITERATIONS; + std::vector<bool> fixed_residues(sim1.GetSize(), false); + Real last_error=0.0; + while (n>0) { + --n; + Real error=0.0; + for (int i=0; i<sim.GetSize(); ++i) { + /*if (fixed_residues[i]) { + continue; + }*/ + fixed_residues[i]=true; + for (int j=i+1; j<sim.GetSize(); ++j) { + int nom=0, denom=0; + for (int k=0; k<sim.GetSize(); ++k) { + Real ik=sim1p->Get(i, k); + Real jk=sim1p->Get(j, k); + if (jk<0 || ik <0) { + continue; + } + if (ik>tolerance) { + denom+=1; + if (jk>tolerance) { + nom+=1; + } + } else if (jk>tolerance) { + denom+=1; + } + } + if (denom) { + Real newval=Real(nom)/denom; + if (!(newval>0.95 || newval<0.05)) { + fixed_residues[i]=false; + } + sim2p->Set(i, j, newval); + } else { + sim2p->Set(i, j, 0); + } + error+=std::abs(sim2p->Get(i, j)-sim1p->Get(i, j)); + //sim.SetReal(img::Point(i, j, n), sim2p->Get(i, j)); + } + } + error/=sim.GetSize()*(sim.GetSize()-1)/2; + std::swap(sim1p, sim2p); + if (std::abs(error-last_error)<EPSILON) { + break; + } + last_error=error; + } + // run connected component algorithm on similarity matrix to identify + // conserved domains. + std::vector<int> components(sim.GetSize(), -1); + int comp_id=0; + for (int i=0; i<sim.GetSize(); ++i) { + if (components[i]!=-1) { + continue; + } + if (LabelComponents(*sim2p, components, i, comp_id, tolerance)>1) { + comp_id+=1; + } { + components[i]=-1; + } + } + + for (int i=0; i<aln.GetLength(); ++i) { + if (idx_a>=0 && idx_b>=0) { + if (ResidueView r=aln.GetResidue(idx_a, i)) { + r.SetIntProp("comp_id", components[i]); + } + if (ResidueView r=aln.GetResidue(idx_b, i)) { + r.SetIntProp("comp_id", components[i]); + } + } else { + for (int j=0; j<aln.GetCount(); ++j) { + if (ResidueView r=aln.GetResidue(j, i)) { + r.SetIntProp("comp_id", components[i]); + } + } + } + + } +} + +}}} diff --git a/modules/mol/alg/src/contact_overlap.hh b/modules/mol/alg/src/contact_overlap.hh new file mode 100644 index 000000000..e7c470656 --- /dev/null +++ b/modules/mol/alg/src/contact_overlap.hh @@ -0,0 +1,108 @@ +#ifndef OST_MOL_ALG_CONTACT_HH +#define OST_MOL_ALG_CONTACT_HH + +#include <boost/shared_ptr.hpp> + +#include <ost/mol/entity_view.hh> +#include <ost/img/image.hh> +#include <ost/seq/alignment_handle.hh> + + +#include "module_config.hh" +#include "similarity_matrix.hh" + +namespace ost { namespace mol { namespace alg { + +struct Contact { + Contact(): atom_a(), atom_b() {} + Contact(ost::mol::AtomHandle a, ost::mol::AtomHandle b): + atom_a(a), atom_b(b) + { } + ost::mol::AtomHandle atom_a; + ost::mol::AtomHandle atom_b; + + bool operator==(const Contact& rhs) const + { + return atom_a==rhs.atom_a && atom_b==rhs.atom_b; + } + + bool operator!=(const Contact& rhs) const + { + return !this->operator==(rhs); + } +}; + +typedef std::vector<Contact> ContactList; + +/// \brief returns a a list of contacts between C-alpha atoms +ContactList DLLEXPORT_OST_MOL_ALG +Contacts(const ost::mol::EntityView& ent, Real min_dist, + Real max_dist); + +typedef enum { + OM_RELATIVE, + OM_ABSOLUTE +} OverlapMode; +/// \brief calculates and assigns the contact node degree as the int +/// property 'degree' for each atom +void DLLEXPORT_OST_MOL_ALG CalcContactNodeDegree(const ContactList& contacts); + +struct LDT { + Real ha; + Real ma; + Real la; +}; + +class DistanceMatrix; + +typedef boost::shared_ptr<DistanceMatrix> DistanceMatrixPtr; + +class DLLEXPORT_OST_MOL_ALG DistanceMatrix: public ost::TriMatrix<Real> { +public: + static DistanceMatrixPtr Create(const ost::seq::ConstSequenceHandle& s); + static DistanceMatrixPtr FromAln(const ost::seq::AlignmentHandle& a, + int index=1); +private: + DistanceMatrix(const ost::seq::ConstSequenceHandle& sequence); + DistanceMatrix(const ost::seq::AlignmentHandle& aln, int index); + +}; + + +Real DLLEXPORT_OST_MOL_ALG ContactOverlap(const DistanceMatrixPtr& lhs, + const DistanceMatrixPtr& rhs, + Real tolerance, + OverlapMode mode=OM_RELATIVE, + Real dist_limit=1e6); + + +Real DLLEXPORT_OST_MOL_ALG +LocalDistanceTest(const ost::mol::EntityView& mdl, + const ost::mol::EntityView& ref, + Real max_dist, + Real tolerance=0.02, + bool only_complete=false); + +SimilarityMatrix DLLEXPORT_OST_MOL_ALG +ContactOverlapMap(const ost::seq::AlignmentHandle& target_tpl_aln, + Real tolerance, int idx_a, int idx_b); + +SimilarityMatrix DLLEXPORT_OST_MOL_ALG +ContactOverlapMap(const ost::seq::AlignmentHandle& target_tpl_aln, + Real tolerance, OverlapMode mode=OM_RELATIVE, + Real max_dist=1e6); + +SimilarityMatrix DLLEXPORT_OST_MOL_ALG +ContactOverlapMap(const DistanceMatrix& dmat1, const DistanceMatrix& dmat2, + Real tolerance, OverlapMode mode=OM_RELATIVE, + Real max_dist=1e6); + + +//ost::img::ImageHandle +void DLLEXPORT_OST_MOL_ALG +DomainsFromCOM(const SimilarityMatrix& sim, ost::seq::AlignmentHandle aln, + Real tolerance, int idx_a=-1, int idx_b=-1); + +}}} + +#endif diff --git a/modules/mol/alg/src/domain_find.cc b/modules/mol/alg/src/domain_find.cc new file mode 100644 index 000000000..ffdfe5355 --- /dev/null +++ b/modules/mol/alg/src/domain_find.cc @@ -0,0 +1,383 @@ +#include <boost/thread.hpp> + +#include <ost/img/image_factory.hh> +#include <ost/mol/atom_view.hh> +#include <ost/mol/residue_view.hh> +#include <ost/mol/residue_handle.hh> +#include <ost/img/image_handle.hh> +#include <ost/mol/residue_view.hh> +#include <ost/seq/alignment_handle.hh> +#include <ost/mol/alg/domain_find.hh> +#include <ost/mol/alg/adjacency_bitmap.hh> + +using namespace ost::seq; +using namespace ost::mol; +using namespace ost::img; +using namespace ost; + +namespace ost { namespace mol { namespace alg { + +int clear_component(const AdjacencyBitmap& bmap, std::vector<int>& components, + int x, int comp_id, int offset) { + int count=1; + components[offset+x]=-1; + for (size_t y=x+1; y<bmap.size(); ++y) { + if (components[offset+y]==comp_id && bmap.get(x, y) && bmap.defined(x, y)) { + count+=clear_component(bmap, components, y, comp_id, offset); + } + } + return count; +} + +int label_component(const AdjacencyBitmap& bmap, std::vector<int>& components, + int x, int comp_id, int offset) { + int count=1; + components[offset+x]=comp_id; + for (size_t y=x+1; y<bmap.size(); ++y) { + if (components[offset+y]==-2 && bmap.get(x, y) && bmap.defined(x, y)) { + count+=label_component(bmap, components, y, comp_id, offset); + } + } + return count; +} + +// use template to force generation of two loops, one without image update +// and one with image update. we for sure don't want to have the image update +// when running DomainFind in fast mode +// +// this is the single-threaded variant +template <bool update_img> +void update_edge_weights_st(AdjacencyBitmap* adj1p, + AdjacencyBitmap* adj2p, + img::ImageHandle& im, + int n, Real tolerance, + int offset) { + for (size_t i=0; i<adj1p->size(); ++i) { + for (size_t j=i+1; j<adj1p->size(); ++j) { + OverlapResult ov = adj1p->Overlap(i, j); + if (ov.defined>10) { + // fixme: avoid promotion to float. totally unneccessary. + Real set = (static_cast<Real>(ov.nominator)/ + static_cast<Real>(ov.denominator ? ov.denominator : 1)); + adj2p->set(i, j, set>=tolerance); + adj2p->set(j, i, set>=tolerance); + if (update_img) { + im.SetReal(img::Point(i+offset, j+offset, n), set); + im.SetReal(img::Point(j+offset, i+offset, n), set); + } + } else { + adj2p->set(i, j, false, false); + adj2p->set(j, i, false, false); + if (update_img) { + im.SetReal(img::Point(i+offset, j+offset, n), 0); + im.SetReal(img::Point(j+offset, i+offset, n), 0); + } + } + } + } +} + +template <bool update_img> +struct update_edge_weights_mt { + + AdjacencyBitmap* adj1p; + AdjacencyBitmap* adj2p; + Real tolerance; + size_t start; + size_t end; + + img::ImageHandle im; + int n; + int offset; + update_edge_weights_mt(AdjacencyBitmap* a1p, AdjacencyBitmap* a2p, + Real tol, size_t s, size_t e, + img::ImageHandle i, int nth, + int o): + adj1p(a1p), adj2p(a2p), tolerance(tol), start(s), + end(std::min(a1p->size(), e)), im(i), n(nth), + offset(o) {} + + void operator()() { + for (size_t i=start; i<end; ++i) { + for (size_t j=0; j<adj1p->size(); ++j) { + if (i==j) continue; + OverlapResult ov = adj1p->Overlap(i, j); + if (ov.defined>10) { + // fixme: avoid promotion to float. totally unneccessary. + Real set = (static_cast<Real>(ov.nominator)/ + static_cast<Real>(ov.denominator ? ov.denominator : 1)); + adj2p->set(i, j, set>=tolerance); + if (update_img) { + im.SetReal(img::Point(i+offset, j+offset, n), set); + } + } else { + adj2p->set(i, j, false, false); + if (update_img) { + im.SetReal(img::Point(i+offset, j+offset, n), 0.0); + } + } + } + } + } +}; + +AdjacencyBitmap optimize_domains(AdjacencyBitmap adj1, Real tolerance, + img::ImageHandle& im, + bool update_img, + int num_threads, + int offset, + int full_size) { + + const static int MAX_ITERATIONS=10; + AdjacencyBitmap adj2(adj1.size()); + + AdjacencyBitmap* adj1p=&adj1; + AdjacencyBitmap* adj2p=&adj2; + int n=MAX_ITERATIONS; + if (update_img) { + im = img::CreateImage(img::Size(full_size, full_size, n+1)); + for (size_t i=0;i<adj1.size();++i) { + for (size_t j=0;j<adj1.size();++j) { + if (i==j) { + im.SetReal(img::Point(i+offset, j+offset, n), 0.0); + } else { + im.SetReal(img::Point(i+offset, j+offset, n), + adj1.get(i, j)); + } + } + } + } + + if (num_threads>1) { + while (n>0) { + --n; + boost::thread_group tg; + int rows_per_thread= ((adj1p->size()+1)/num_threads); + int adj_rows_per_thread = rows_per_thread; + if (int modulo = rows_per_thread % 64) { + adj_rows_per_thread = adj_rows_per_thread+64-modulo; + } + for (int i=0;i<num_threads;++i) { + if (i*adj_rows_per_thread>adj1p->size()) { + continue; + } + if (update_img) { + tg.create_thread(update_edge_weights_mt<true>(adj1p, adj2p, tolerance, + (i+0)*adj_rows_per_thread, + (i+1)*adj_rows_per_thread, + im, n, offset)); + } else { + tg.create_thread(update_edge_weights_mt<false>(adj1p, adj2p, tolerance, + (i+0)*adj_rows_per_thread, + (i+1)*adj_rows_per_thread, + im, n, offset)); + } + } + tg.join_all(); + std::swap(adj1p, adj2p); + } + return *adj1p; + } + while (n>0) { + --n; + if (update_img) { + update_edge_weights_st<true>(adj1p, adj2p, im, n, tolerance, offset); + } else { + update_edge_weights_st<false>(adj1p, adj2p, im, n, tolerance, offset); + } + std::swap(adj1p, adj2p); + } + return *adj1p; +} + +void connected_components(const AdjacencyBitmap& adj_bitmap, + std::vector<int>& components, int offset) { + int comp_id=0; + for (int i=0; i<offset; ++i) { + components[i] = -1; + } + for (size_t i=0; i<adj_bitmap.size(); ++i) { + if (components[offset+i]!=-2) { + continue; + } + int count = label_component(adj_bitmap, components, i, comp_id, offset); + if (count>16) { + comp_id+=1; + } else { + clear_component(adj_bitmap, components, i, comp_id, offset); + } + } + for (size_t i=offset+adj_bitmap.size(); i<components.size(); ++i) { + components[i] = -1; + } +} + +DomainsPtr Domains::FromAln(ost::seq::AlignmentHandle aln, + Real tolerance, Real radius, Real threshold, + int idx_a, int idx_b) { + SimilarityMatrix sim = ContactOverlapMap(aln, tolerance, OM_ABSOLUTE, + radius); + return Domains::FromCOM(sim, aln, threshold, idx_a, idx_b); +} + +DomainsPtr Domains::FromCOM(const SimilarityMatrix& sim, AlignmentHandle aln, + Real tolerance, int idx_a, int idx_b) +{ + bool update_img = false; + AdjacencyBitmap adj1(sim, 0.5); + img::ImageHandle im; + + AdjacencyBitmap final_adj(optimize_domains(adj1, tolerance, im, + update_img, 1, 0, + adj1.size())); + + // identify connected components in the graph + std::vector<int> components(adj1.size(), -2); + connected_components(final_adj, components, 0); + + DomainsMap domains_map; + if (idx_a>=0 && idx_b>=0) { + for (int i=0; i<aln.GetLength(); ++i) { + if (components[i]==-1) { + continue; + } + DomainsMap::iterator k = domains_map.find(components[i]); + if (k == domains_map.end()) { + std::pair<DomainsMap::iterator, bool> k2; + k2 = domains_map.insert(std::make_pair(components[i], + Domain(EntityViewList()))); + k = k2.first; + k->second.views.push_back(aln.GetSequence(idx_a).GetAttachedView().CreateEmptyView()); + k->second.views.push_back(aln.GetSequence(idx_b).GetAttachedView().CreateEmptyView()); + } + if (ResidueView r=aln.GetResidue(idx_a, i)) { + k->second.views[0].AddResidue(r.GetHandle(), ViewAddFlag::INCLUDE_ALL); + } + if (ResidueView r=aln.GetResidue(idx_b, i)) { + k->second.views[1].AddResidue(r.GetHandle(), ViewAddFlag::INCLUDE_ALL); + } + } + } + + int num_free = 0; + + Real free_d = 0, free_s = 0; + + for (size_t i=0; i<adj1.size(); ++i) { + if (components[i]==-1) { + num_free+=1; + } + for (size_t j=i+1; j<adj1.size(); ++j) { + if (!final_adj.defined(i, j)) { + continue; + } + if (components[i] == -1 || components[j] == -1) { + free_d+=1; + if (sim.Get(i, j)>=0) + free_s+=sim.Get(i, j); + continue; + } + Real s = sim.Get(i, j); + if (s<0) { + continue; + } + if (components[i] == components[j]) { + domains_map.find(components[i])->second.intra_s+= adj1.get(i, j); + domains_map.find(components[i])->second.intra_d+= 1; + } else { + domains_map.find(components[i])->second.inter_s+=adj1.get(i, j); + domains_map.find(components[i])->second.inter_d+= 1; + domains_map.find(components[j])->second.inter_s+= adj1.get(i, j); + domains_map.find(components[j])->second.inter_d+= 1; + } + } + } + + DomainsPtr domains(new Domains); + for (DomainsMap::iterator i = domains_map.begin(), + e = domains_map.end(); i!=e; ++i) { + domains->domains_.push_back(i->second); + } + domains->num_free_ = num_free; + domains->components_ = components; + domains->free_d_ = free_d; + domains->free_s_ = free_s; + if (im.IsValid()) + domains->adj_map_ = im; + return domains; +} + + +std::pair<int, int> get_defined_region(const DistanceMatrix& dmat_a, + const DistanceMatrix& dmat_b) { + int start = 0; + for (int i=0; i<dmat_a.GetSize(); ++i) { + if (dmat_a.Get(i, i)>=0.0 && dmat_b.Get(i, i)>=0.0) { + break; + } + start++; + } + if (start == dmat_a.GetSize()) { + // the two don't have an overlap + return std::make_pair(start, start); + } + int end = dmat_a.GetSize(); + for (int i=dmat_a.GetSize()-1; i>=0; --i) { + if (dmat_a.Get(i, i)>=0.0 && dmat_b.Get(i, i)>=0.0) { + break; + } + end--; + } + return std::make_pair(start, end); +} + +DomainsPtr Domains::FromDistMats(const DistanceMatrix& dmat_a, + const DistanceMatrix& dmat_b, + Real tolerance, Real radius, + Real threshold, int num_threads, + bool defined_only, + bool update_img) +{ + // for distance matrices derived from target-template alignments, C- + // and N-terminal regions of the distance map are undefined, since + // they are not covered in the alignment. To avoid doing a full-blown + // O(n^3) update on the full distance matrix, we first identify regions + // of the distance map which are defined and perform the update only on + // that part of the map. + std::pair<int, int> region(0, dmat_a.GetSize()); + if (defined_only) + region = get_defined_region(dmat_a, dmat_b); + + DomainsPtr domains(new Domains); + + if (region.first == region.second) { + // there is no overlap whatsoever between the two distance matrices. + // all present residues will thus be assigned to the unassigned + // component (designated as -1). + domains->components_.resize(dmat_a.GetSize(), -1); + return domains; + } + + // derive initial connectivity bitmap from the two distance matrices + AdjacencyBitmap adj1(dmat_a, dmat_b, tolerance, radius, region.first, + region.second); + + img::ImageHandle im; + + // optimize domains + AdjacencyBitmap final_adj(optimize_domains(adj1, threshold, im, + update_img, num_threads, + region.first, + dmat_a.GetSize())); + // identify connected components in the graph + std::vector<int> components(dmat_a.GetSize(), -2); + connected_components(final_adj, components, region.first); + domains->components_ = components; + if (im.IsValid()) + domains->adj_map_ = im; + return domains; + +} + +}}} + diff --git a/modules/mol/alg/src/domain_find.hh b/modules/mol/alg/src/domain_find.hh new file mode 100644 index 000000000..f3ce2b0dd --- /dev/null +++ b/modules/mol/alg/src/domain_find.hh @@ -0,0 +1,81 @@ +#ifndef OST_MOL_ALG_DOMAIN_FIND_HH +#define OST_MOL_ALG_DOMAIN_FIND_HH + +#include <ost/seq/alignment_handle.hh> + +#include "module_config.hh" +#include "contact_overlap.hh" + +namespace ost { namespace mol { namespace alg { + +struct Domain { + Domain(const ost::mol::EntityViewList& v): + inter_s(0), inter_d(0), intra_s(0), intra_d(0), views(v) {} + Domain(): inter_s(0), inter_d(0), intra_s(0), intra_d(0) {} + + Real inter_s; + Real inter_d; + Real intra_s; + Real intra_d; + + bool operator==(const Domain& rhs) const { + return views==rhs.views && inter_s == rhs.inter_s && + inter_d == rhs.inter_d && intra_s == rhs.intra_s && + intra_d == rhs.intra_d; + } + ost::mol::EntityViewList views; +}; + + +class Domains; + +typedef boost::shared_ptr<Domains> DomainsPtr; + +class DLLEXPORT_OST_MOL_ALG Domains { +public: + + static DomainsPtr FromAln(ost::seq::AlignmentHandle aln, + Real tolerance, Real radius, Real threshold, + int idx_a=-1, int idx_b=-1); + + static DomainsPtr FromCOM(const SimilarityMatrix& sim, + ost::seq::AlignmentHandle aln, + Real tolerance, int idx_a=-1, int idx_b=-1); + + // version of domain find which operates on two distances matrices + // we on purpose only return the connected components, since the + // distance matrices may not need to correpsond to an actual + // entity view. + static DomainsPtr FromDistMats(const DistanceMatrix& dmat_a, + const DistanceMatrix& dmat_b, + Real tolerance, + Real radius, + Real threshold, + int num_threads, + bool defined_only, + bool adj_map); + size_t GetNumFreeResidues() const { return num_free_; } + + Real GetFreeD() const { return free_d_; } + Real GetFreeS() const { return free_s_; } + + const std::vector<Domain>& GetDomains() const { return domains_; } + const std::vector<int>& GetComponents() const { return components_; } + + ost::img::ImageHandle GetAdjMap() const { return adj_map_; } +private: + Domains(): num_free_(0), free_s_(0), free_d_(0) {} + // holds the steps of the algorithm. mainly for debuggin purposes + ost::img::ImageHandle adj_map_; + int num_free_; + Real free_s_; + Real free_d_; + std::vector<Domain> domains_; + std::vector<int> components_; +}; + +typedef std::map<int, Domain> DomainsMap; + +}}} +#endif + diff --git a/modules/mol/alg/src/similarity_matrix.cc b/modules/mol/alg/src/similarity_matrix.cc new file mode 100644 index 000000000..e44ef5f67 --- /dev/null +++ b/modules/mol/alg/src/similarity_matrix.cc @@ -0,0 +1,41 @@ +#include "similarity_matrix.hh" + +namespace ost { namespace mol { namespace alg { + + +ost::img::ImageHandle SimilarityMatrix::AsImage(bool full) const +{ + ost::img::Size img_size(this->GetSize(), this->GetSize()); + ost::img::ImageHandle im=ost::img::CreateImage(img_size); + for (int i=0; i<this->GetSize(); ++i) { + + for (int j=full ? 0 : i+1; j<this->GetSize(); ++j) { + im.SetReal(ost::img::Point(i, j), this->Get(i, j)); + im.SetReal(ost::img::Point(j, i), this->Get(i, j)); + } + } + return im; +} + +SimilarityMatrix SimilarityMatrix::Load(const String& filename) +{ + std::ifstream istream(filename.c_str(), std::ios::binary); + size_t the_size; + istream.read(reinterpret_cast<char*>(&the_size), sizeof(size_t)); + SimilarityMatrix sim(the_size); + istream.read(reinterpret_cast<char*>(&sim.Data().front()), + sizeof(Real)*sim.Data().size()); + return sim; +} + +void SimilarityMatrix::Save(const String& filename) +{ + std::ofstream of(filename.c_str(), std::ios::binary); + size_t the_size=this->GetSize(); + of.write(reinterpret_cast<const char*>(&the_size), sizeof(size_t)); + of.write(reinterpret_cast<const char*>(&this->Data().front()), + sizeof(Real)*this->Data().size()); +} + +}}} + diff --git a/modules/mol/alg/src/similarity_matrix.hh b/modules/mol/alg/src/similarity_matrix.hh new file mode 100644 index 000000000..f87148586 --- /dev/null +++ b/modules/mol/alg/src/similarity_matrix.hh @@ -0,0 +1,36 @@ +#ifndef OST_MOL_ALG_SIMILARITY_MATRIX_HH +#define OST_MOL_ALG_SIMILARITY_MATRIX_HH + +#include <limits> +#include <fstream> +#include <ost/tri_matrix.hh> +#include <ost/img/image.hh> +#include "module_config.hh" + +namespace ost { namespace mol { namespace alg { + +// triangular distance matrix +class DLLEXPORT_OST_MOL_ALG SimilarityMatrix : public ost::TriMatrix<Real> { +public: + /// \brief initialise similarity matrix with n datapoints. + /// + /// The distances are initialised with the default value + /// + /// \param n is the number of datapoints + /// \param def_val is the default similarity + SimilarityMatrix(int n, Real def_val=std::numeric_limits<Real>::min()): + ost::TriMatrix<Real>(n, def_val) + { } + + ost::img::ImageHandle AsImage(bool full=false) const; + + static SimilarityMatrix Load(const String& filename); + + void Save(const String& filename); + +}; + +}}} + +#endif + -- GitLab