diff --git a/modules/seq/alg/pymod/wrap_seq_alg.cc b/modules/seq/alg/pymod/wrap_seq_alg.cc index 6bab6aa5f00236c7c60ce7baccf998c9d0d52405..3766ad7c7962c32e2a8de206d131c66332a7532f 100644 --- a/modules/seq/alg/pymod/wrap_seq_alg.cc +++ b/modules/seq/alg/pymod/wrap_seq_alg.cc @@ -187,6 +187,7 @@ void export_distance_analysis() def("CreateDistanceMap", &CreateDistanceMap, (arg("aln"))); def("CreateVarianceMap", &CreateVarianceMap, (arg("d_map"), arg("sigma")=25)); def("CreateDist2Mean", &CreateDist2Mean, (arg("d_map"))); + def("CreateMeanlDDTHA", &CreateMeanlDDTHA, (arg("d_map"))); class_<Distances>("Distances", no_init) .def("GetDataSize", &Distances::GetDataSize) @@ -233,6 +234,16 @@ void export_distance_analysis() .def("GetJsonString", &Dist2Mean::GetJsonString) .def("GetData", &DistToMeanGetData) ; + class_<MeanlDDT, MeanlDDTPtr, + boost::noncopyable>("MeanlDDT", no_init) + .def("Get", &MeanlDDT::Get, (arg("i_res"), arg("i_str"))) + .def("GetNumResidues", &MeanlDDT::GetNumResidues) + .def("GetNumStructures", &MeanlDDT::GetNumStructures) + .def("ExportDat", &MeanlDDT::ExportDat, (arg("file_name"))) + .def("ExportCsv", &MeanlDDT::ExportCsv, (arg("file_name"))) + .def("ExportJson", &MeanlDDT::ExportJson, (arg("file_name"))) + .def("GetJsonString", &MeanlDDT::GetJsonString) + ; } //////////////////////////////////////////////////////////////////// diff --git a/modules/seq/alg/src/variance_map.cc b/modules/seq/alg/src/variance_map.cc index c50527ac3a2b9169d11ab62c6b73135c5461fd6e..05d9eb04b1a814feeb8f961862975cdda725f0b3 100644 --- a/modules/seq/alg/src/variance_map.cc +++ b/modules/seq/alg/src/variance_map.cc @@ -24,6 +24,7 @@ #include <fstream> #include <iostream> #include <sstream> +#include <limits> #include <ost/mol/mol.hh> @@ -69,6 +70,59 @@ String GetJson(const T& data, uint num_rows, uint num_cols) { out_stream << "]"; return out_stream.str(); } + + +void FilllDDTValues(int len, const std::vector<Real>& dist_diff, + const std::vector<int>& below_15, + const std::vector<std::pair<int, int> >& idx_mapping, + std::vector<Real>& local_lddt) { + + std::vector<int> counts(len, 0); + std::vector<int> total_counts(len, 0); + + for(auto it = below_15.begin(); it != below_15.end(); ++it) { + Real diff = dist_diff[*it]; + int fulfilled = 0; + fulfilled += static_cast<int>(diff < static_cast<Real>(4.0)); + fulfilled += static_cast<int>(diff < static_cast<Real>(2.0)); + fulfilled += static_cast<int>(diff < static_cast<Real>(1.0)); + fulfilled += static_cast<int>(diff < static_cast<Real>(0.5)); + + const std::pair<int, int>& d_indices = idx_mapping[*it]; + counts[d_indices.first] += fulfilled; + counts[d_indices.second] += fulfilled; + total_counts[d_indices.first] += 4; + total_counts[d_indices.second] += 4; + } + + local_lddt.assign(len, std::numeric_limits<Real>::quiet_NaN()); + + for(int i = 0; i < len; ++i) { + if(counts[i] > 0) { + local_lddt[i] = static_cast<Real>(counts[i]) / total_counts[i]; + } + } + +} + + +void FilllDDTValues(int len, const std::vector<Real>& dist_one, + const std::vector<Real>& dist_two, + const std::vector<int>& below_15_one, + const std::vector<int>& below_15_two, + const std::vector<std::pair<int, int> >& idx_mapping, + std::vector<Real>& local_lddt_one, + std::vector<Real>& local_lddt_two) { + + std::vector<Real> dist_diff(dist_one.size()); + for(uint i = 0; i < dist_one.size(); ++i) { + dist_diff[i] = std::abs(dist_one[i] - dist_two[i]); + } + + FilllDDTValues(len, dist_diff, below_15_two, idx_mapping, local_lddt_one); + FilllDDTValues(len, dist_diff, below_15_one, idx_mapping, local_lddt_two); +} + } // anon ns /////////////////////////////////////////////////////////////////////////////// @@ -141,6 +195,39 @@ String Dist2Mean::GetJsonString() { return GetJson(*this, num_residues_, num_structures_); } +void MeanlDDT::ExportDat(const String& file_name) { + if (values_.size() == 0) throw IntegrityError("Matrix empty"); + // dump it + std::ofstream out_file(file_name.c_str()); + if (!out_file) throw Error("The file '" + file_name + "' cannot be opened."); + for (uint i_res = 0; i_res < num_residues_; ++i_res) { + out_file << (i_res+1); + for (uint i_str = 0; i_str < num_structures_; ++i_str) { + out_file << " " << this->Get(i_res, i_str); + } + out_file << std::endl; + } + out_file.close(); +} + +void MeanlDDT::ExportCsv(const String& file_name) { + if (values_.size() == 0) throw IntegrityError("Matrix empty"); + DumpCsv(file_name, *this, num_residues_, num_structures_); +} + +void MeanlDDT::ExportJson(const String& file_name) { + if (values_.size() == 0) throw IntegrityError("Matrix empty"); + std::ofstream out_file(file_name.c_str()); + if (!out_file) throw Error("The file '" + file_name + "' cannot be opened."); + out_file << this->GetJsonString() << std::endl; + out_file.close(); +} + +String MeanlDDT::GetJsonString() { + if (values_.size() == 0) throw IntegrityError("Matrix empty"); + return GetJson(*this, num_residues_, num_structures_); +} + /////////////////////////////////////////////////////////////////////////////// // Create maps VarianceMapPtr CreateVarianceMap(const DistanceMapPtr dmap, Real sigma) { @@ -192,5 +279,84 @@ Dist2MeanPtr CreateDist2Mean(const DistanceMapPtr dmap) { dist2mean->DivideBy(len); return dist2mean; } + + +MeanlDDTPtr CreateMeanlDDTHA(const DistanceMapPtr dmap) { + // setup/check + uint nstruc = dmap->GetNumStructures(); + uint len = dmap->GetSize(); + if (len <= 1 || nstruc == 0) { + throw IntegrityError("empty distance map provided"); + } + + // the relevant pairwise distances are the ones that are off-diagonal from the + // pairwise distance matrices + uint n_off_diagonal = (len*len-len)/2; + std::vector<std::vector<Real> > dist_data(dmap->GetNumStructures()); + std::vector<std::vector<int> > below_15(dmap->GetNumStructures()); + for(uint s_idx = 0; s_idx < nstruc; ++s_idx) { + // the default value is simply a very large number + // this will trigger a distance difference above any relevant threshold + dist_data[s_idx].assign(n_off_diagonal, 10000000.0); + } + + std::vector<std::pair<int,int> > off_diagonal_mapper(n_off_diagonal); + + uint off_diag_idx = 0; + for (uint i_res = 0; i_res < len; ++i_res) { + for (uint j_res = i_res + 1; j_res < len; ++j_res, ++off_diag_idx) { + off_diagonal_mapper[off_diag_idx] = std::make_pair(i_res, j_res); + const Distances& di = dmap->GetDistances(i_res, j_res); + const uint size = di.GetDataSize(); + for (uint k = 0; k < size; ++k) { + const std::pair<Real, int> ret = di.GetDataElement(k); + dist_data[ret.second-1][off_diag_idx] = ret.first; + if(ret.first <= 15.0) { + below_15[ret.second-1].push_back(off_diag_idx); + } + } + } + } + + std::vector<std::vector<Real> > avg_lddt_values(nstruc); + std::vector<std::vector<int> > avg_lddt_counts(nstruc); + + for(uint i = 0; i < nstruc; ++i) { + avg_lddt_values[i].assign(len, 0); + avg_lddt_counts[i].assign(len, 0); + } + + std::vector<Real> lddt_i, lddt_j; + for(uint i = 0; i < nstruc; ++i) { + for(uint j = i+1; j < nstruc; ++j) { + FilllDDTValues(len, dist_data[i], dist_data[j], below_15[i], below_15[j], + off_diagonal_mapper, lddt_i, lddt_j); + for(uint k = 0; k < len; ++k) { + if(!std::isnan(lddt_i[k])) { + avg_lddt_values[i][k] += lddt_i[k]; + avg_lddt_counts[i][k] += 1; + } + if(!std::isnan(lddt_j[k])) { + avg_lddt_values[j][k] += lddt_j[k]; + avg_lddt_counts[j][k] += 1; + } + } + } + } + + MeanlDDTPtr return_ptr(new MeanlDDT(len, nstruc)); + for(uint struc_idx = 0; struc_idx < nstruc; ++struc_idx) { + for(uint res_idx = 0; res_idx < len; ++res_idx) { + if(avg_lddt_counts[struc_idx][res_idx] > 0) { + return_ptr->Set(res_idx, struc_idx, + avg_lddt_values[struc_idx][res_idx]/avg_lddt_counts[struc_idx][res_idx]); + } + } + } + + return return_ptr; +} + + }}} diff --git a/modules/seq/alg/src/variance_map.hh b/modules/seq/alg/src/variance_map.hh index 6bf4780f9f7e1e35763343b1579e7189fbb36394..16738cf13c7a3a8384c006fa03daf1c9b88eabe1 100644 --- a/modules/seq/alg/src/variance_map.hh +++ b/modules/seq/alg/src/variance_map.hh @@ -36,8 +36,10 @@ namespace ost { namespace seq { namespace alg { class VarianceMap; class Dist2Mean; +class MeanlDDT; typedef boost::shared_ptr<VarianceMap> VarianceMapPtr; typedef boost::shared_ptr<Dist2Mean> Dist2MeanPtr; +typedef boost::shared_ptr<MeanlDDT> MeanlDDTPtr; /// \brief Container for variances for each entry in a distance map. /// Main functionality: Get/Set, Min, Max, ExportXXX @@ -119,6 +121,58 @@ private: std::vector<Real> values_; }; + +class DLLEXPORT_OST_SEQ_ALG MeanlDDT { +public: + // all values initialized to 0 in constructor! + MeanlDDT(uint num_residues, uint num_structures) + : num_residues_(num_residues), num_structures_(num_structures) + , values_(num_residues * num_structures, 0) { } + + void Set(uint i_res, uint i_str, Real val) { + values_[GetIndex(i_res, i_str)] = val; + } + + Real Get(uint i_res, uint i_str) const { + return values_[GetIndex(i_res, i_str)]; + } + + Real& operator()(uint i_res, uint i_str) { + return values_[GetIndex(i_res, i_str)]; + } + Real operator()(uint i_res, uint i_str) const { + return values_[GetIndex(i_res, i_str)]; + } + + std::vector<Real>& Data() { return values_; } + + uint GetNumResidues() const { return num_residues_; } + uint GetNumStructures() const { return num_structures_; } + + void Add(uint i_res, uint i_str, Real val) { + values_[GetIndex(i_res, i_str)] += val; + } + + void ExportDat(const String& file_name); + void ExportCsv(const String& file_name); + void ExportJson(const String& file_name); + String GetJsonString(); + +private: + uint GetIndex(uint i_res, uint i_str) const { + assert(i_res < num_residues_); + assert(i_str < num_structures_); + return (i_res * num_structures_ + i_str); + } + + uint num_residues_; + uint num_structures_; + std::vector<Real> values_; +}; + + + + /// \returns Variance measure for each entry in dmap. /// \param dmap Distance map as created with CreateDistanceMap. /// \param sigma Used for weighting of variance measure @@ -132,6 +186,9 @@ CreateVarianceMap(const DistanceMapPtr dmap, Real sigma=25); Dist2MeanPtr DLLEXPORT_OST_SEQ_ALG CreateDist2Mean(const DistanceMapPtr dmap); +MeanlDDTPtr DLLEXPORT_OST_SEQ_ALG +CreateMeanlDDTHA(const DistanceMapPtr dmap); + }}} #endif