diff --git a/CHANGELOG.txt b/CHANGELOG.txt index e7b55a2574e40bd3e792c5746be5ec7e95716d98..94038ac2be3a9443a8d3eb521a07d4bfd629c081 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,11 +1,16 @@ -Changes in Release 1.8 +Changes in Release <RELEASE NUMBER> -------------------------------------------------------------------------------- * nonstandard C++ module was moved from ost.conop to ost.mol.alg. This implies change in the API. Mapping functions CopyResidue, CopyConserved and CopyNonConserved that were previousely imported from ost.conop are now to be imported from ost.mol.alg. + * Removed habit of changing secondary structure of entities when loading + from mmCIF PDB files. Before, OST would turn secondary structure 'EEH' + into 'ECH' to make it look nicer in DNG. Now, 'EEH' stays 'EEH'. * Added Molck API to the ost.mol.alg module. + * Extended lDDT API in ost.mol.alg module to reproduce functionality of lddt + binary. Changes in Release 1.7.1 -------------------------------------------------------------------------------- diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index 730ca1b8e97ed554287158184c9d9eb20283da37..23dc32f461a08cf1633362e60bb3b9c54ecb09af 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -638,7 +638,7 @@ of the annotation available. .. method:: GetChainIntervalList() - See :attr:`chainintervalls` + See :attr:`chainintervals` .. method:: GetOperations() diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 27a7c9bc8406308409525862a7b1d906417d2c36..905e28442bab8bd68300426bdeb99df8c6d4cb0e 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -1543,17 +1543,7 @@ void MMCifReader::AssignSecStructure(mol::EntityHandle ent) continue; } mol::SecStructure alpha(mol::SecStructure::ALPHA_HELIX); - // some PDB files contain helix/strand entries that are adjacent to each - // other. To avoid visual artifacts, we effectively shorten the first of - // the two secondary structure segments to insert one residue of coil - // conformation. - mol::ResNum start = i->start, end = i->end; - if (helix_list_.end() != i+1 && // unit test - (*(i+1)).start.GetNum() <= end.GetNum()+1 && - (*(i+1)).end.GetNum() > end.GetNum()) { - end = mol::ResNum((*(i+1)).start.GetNum()-2); - } - chain.AssignSecondaryStructure(alpha, start, end); + chain.AssignSecondaryStructure(alpha, i->start, i->end); } for (MMCifHSVector::const_iterator i=strand_list_.begin(), @@ -1565,14 +1555,7 @@ void MMCifReader::AssignSecStructure(mol::EntityHandle ent) continue; } mol::SecStructure extended(mol::SecStructure::EXTENDED); - mol::ResNum start = i->start, end = i->end; - // see comment for helix assignment - if (strand_list_.end() != i+1 && // unit test - (*(i+1)).start.GetNum() <= end.GetNum()+1 && - (*(i+1)).end.GetNum() > end.GetNum()) { - end=mol::ResNum((*(i+1)).start.GetNum()-2); - } - chain.AssignSecondaryStructure(extended, start, end); + chain.AssignSecondaryStructure(extended, i->start, i->end); } } diff --git a/modules/io/src/mol/pdb_reader.cc b/modules/io/src/mol/pdb_reader.cc index a203234ecc275f82bceb87b75781ad1e626fb100..ffa04e6ecc0144a342bd477c84ab35b570f78e5a 100644 --- a/modules/io/src/mol/pdb_reader.cc +++ b/modules/io/src/mol/pdb_reader.cc @@ -520,16 +520,7 @@ void PDBReader::AssignSecStructure(mol::EntityHandle ent) continue; } mol::SecStructure alpha(mol::SecStructure::ALPHA_HELIX); - // some PDB files contain helix/strand entries that are adjacent to each - // other. To avoid visual artifacts, we effectively shorten the first of the - // two secondary structure segments to insert one residue of coil - // conformation. - mol::ResNum start=i->start, end=i->end; - if (helix_list_.end()!=i+1 && (*(i+1)).start.GetNum()<=end.GetNum()+1 && - (*(i+1)).end.GetNum()>end.GetNum()) { - end=mol::ResNum((*(i+1)).start.GetNum()-2); - } - chain.AssignSecondaryStructure(alpha, start, end); + chain.AssignSecondaryStructure(alpha, i->start, i->end); } for (HSList::const_iterator i=strand_list_.begin(), e=strand_list_.end(); @@ -540,13 +531,7 @@ void PDBReader::AssignSecStructure(mol::EntityHandle ent) continue; } mol::SecStructure extended(mol::SecStructure::EXTENDED); - mol::ResNum start=i->start, end=i->end; - // see comment for helix assignment - if (strand_list_.end()!=i+1 && (*(i+1)).start.GetNum()<=end.GetNum()+1 && - (*(i+1)).end.GetNum()>end.GetNum()) { - end=mol::ResNum((*(i+1)).start.GetNum()-2); - } - chain.AssignSecondaryStructure(extended, start, end); + chain.AssignSecondaryStructure(extended, i->start, i->end); } } diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst index bd2683a2ec2b138a7440d28514638d3fdeba8a05..d4b71376c848fc14e9e0dcb1abecb7421e8debc1 100644 --- a/modules/mol/alg/doc/molalg.rst +++ b/modules/mol/alg/doc/molalg.rst @@ -1236,6 +1236,102 @@ Algorithms on Structures :class:`~ost.mol.EntityHandle` + +.. class:: FindMemParam + + Result object for the membrane detection algorithm described below + + .. attribute:: axis + + initial search axis from which optimal membrane slab could be found + + .. attribute:: tilt_axis + + Axis around which we tilt the membrane starting from the initial axis + + .. attribute:: tilt + + Angle to tilt around tilt axis + + .. attribute:: angle + + After the tilt operation we perform a rotation around the initial axis + with this angle to get the final membrane axis + + .. attribute:: membrane_axis + + The result of applying the tilt and rotation procedure described above. + The membrane_axis is orthogonal to the membrane plane and has unit length. + + .. attribute:: pos + + Real number that describes the membrane center point. To get the actual + position you can do: pos * membrane_axis + + .. attribute:: width + + Total width of the membrane in A + + .. attribute:: energy + + Pseudo energy of the implicit solvation model + + .. attribute:: membrane_representation + + Dummy atoms that represent the membrane. This entity is only valid if + the according flag has been set to True when calling FindMembrane. + + +.. method:: FindMembrane(ent, assign_membrane_representation=True, fast=False) + + Estimates the optimal membrane position of a protein by using an implicit + solvation model. The original algorithm and the used energy function are + described in: Lomize AL, Pogozheva ID, Lomize MA, Mosberg HI (2006) + Positioning of proteins in membranes: A computational approach. + + There are some modifications in this implementation and the procedure is + as follows: + + * Initial axis are constructed that build the starting point for initial + parameter grid searches. + + * For every axis, the protein is rotated so that the axis builds the z-axis + + * In order to exclude internal hydrophilic pores, only the outermost atoms + with respect the the z-axis enter an initial grid search + * The width and position of the membrane is optimized for different + combinations of tilt and rotation angles (further described in + :class:`FindMemParam`). The top 20 parametrizations + (only top parametrization if *fast* is True) are stored for further + processing. + + * The 20 best membrane parametrizations from the initial grid search + (only the best if *fast* is set to True) enter a final + minimization step using a Levenberg-Marquardt minimizer. + + + :param ent: Entity of a transmembrane protein, you'll get weird + results if this is not the case. The energy term + of the result is typically a good indicator whether + *ent* is an actual transmembrane protein. + :type ent: :class:`ost.mol.EntityHandle` / :class:`ost.mol.EntityView` + + :param assign_membrane_representation: Whether to construct a membrane + representation using dummy atoms + + :type assign_membrane_representation: :class:`bool` + + :param fast: If set to false, the 20 best results of the initial grid + search undergo a Levenberg-Marquardt minimization and + the parametrization with optimal minimized energy is + returned. + If set to yes, only the best result of the initial grid + search is selected and returned after + Levenberg-Marquardt minimization. + + :returns: The results object + :rtype: :class:`ost.mol.alg.FindMemParam` + .. _traj-analysis: Trajectory Analysis diff --git a/modules/mol/alg/pymod/CMakeLists.txt b/modules/mol/alg/pymod/CMakeLists.txt index 5519c6adf2741e213eba44cb82fbaf559f4c19e8..f4d1fdb71b68519b303d5df13dea34a2d529c230 100644 --- a/modules/mol/alg/pymod/CMakeLists.txt +++ b/modules/mol/alg/pymod/CMakeLists.txt @@ -9,6 +9,7 @@ set(OST_MOL_ALG_PYMOD_SOURCES export_sec_structure.cc export_non_standard.cc export_molck.cc + export_membrane.cc ) set(OST_MOL_ALG_PYMOD_MODULES diff --git a/modules/mol/alg/pymod/export_membrane.cc b/modules/mol/alg/pymod/export_membrane.cc new file mode 100644 index 0000000000000000000000000000000000000000..fe8ebe52de9e43c2a98499718f1951015794453f --- /dev/null +++ b/modules/mol/alg/pymod/export_membrane.cc @@ -0,0 +1,59 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2017 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + + +#include <boost/python.hpp> + +#include <ost/mol/alg/find_membrane.hh> + +using namespace boost::python; + +namespace{ + ost::mol::alg::FindMemParam FindMembraneView(ost::mol::EntityView& v, + bool assign_membrane_representation, + bool fast) { + return ost::mol::alg::FindMembrane(v, assign_membrane_representation, fast); + } + + ost::mol::alg::FindMemParam FindMembraneHandle(ost::mol::EntityHandle& h, + bool assign_membrane_representation, + bool fast) { + return ost::mol::alg::FindMembrane(h, assign_membrane_representation, fast); + } +} + +void export_find_membrane() { + + class_<ost::mol::alg::FindMemParam>("FindMemParam", no_init) + .def_readonly("tilt", &ost::mol::alg::FindMemParam::tilt) + .def_readonly("angle", &ost::mol::alg::FindMemParam::angle) + .def_readonly("width", &ost::mol::alg::FindMemParam::width) + .def_readonly("pos", &ost::mol::alg::FindMemParam::pos) + .def_readonly("energy", &ost::mol::alg::FindMemParam::energy) + .def_readonly("axis", &ost::mol::alg::FindMemParam::axis) + .def_readonly("tilt_axis", &ost::mol::alg::FindMemParam::tilt_axis) + .def_readonly("membrane_axis", &ost::mol::alg::FindMemParam::GetMembraneAxis) + .def_readonly("membrane_representation", &ost::mol::alg::FindMemParam::membrane_representation) + ; + + def("FindMembrane", FindMembraneView, (arg("ent"), arg("assign_membrane_representation")=true, + arg("fast")=false)); + def("FindMembrane", FindMembraneHandle, (arg("ent"), arg("assign_membrane_representation")=true, + arg("fast")=false)); +} diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc index d8f8b46890e7154108f9f55173e7cce94d6adbc5..419dc042795b6d71ae27e7cc7591f1b6cd3e00a5 100644 --- a/modules/mol/alg/pymod/wrap_mol_alg.cc +++ b/modules/mol/alg/pymod/wrap_mol_alg.cc @@ -46,6 +46,7 @@ void export_Molck(); void export_contact_overlap(); void export_accessibility(); void export_sec_struct(); +void export_find_membrane(); #if OST_IMG_ENABLED void export_entity_to_density(); #endif @@ -350,6 +351,7 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) export_contact_overlap(); export_accessibility(); export_sec_struct(); + export_find_membrane(); #if OST_IMG_ENABLED export_entity_to_density(); #endif diff --git a/modules/mol/alg/src/CMakeLists.txt b/modules/mol/alg/src/CMakeLists.txt index ef2ae22ae10ccc9172285b7d79e96b298c7d6fec..42c571d79444add71b47a8da9916a106a2e1caf1 100644 --- a/modules/mol/alg/src/CMakeLists.txt +++ b/modules/mol/alg/src/CMakeLists.txt @@ -22,6 +22,7 @@ set(OST_MOL_ALG_HEADERS sec_struct.hh nonstandard.hh molck.hh + find_membrane.hh ) set(OST_MOL_ALG_SOURCES @@ -47,6 +48,7 @@ set(OST_MOL_ALG_SOURCES sec_struct.cc nonstandard.cc molck.cc + find_membrane.cc ) set(MOL_ALG_DEPS ost_mol ost_seq) diff --git a/modules/mol/alg/src/accessibility.cc b/modules/mol/alg/src/accessibility.cc index a1becdd7f83c3de0b939e22788d6bf4bb3f22926..276b53ba2256802e9c9afc52c2f4b49e308e796f 100644 --- a/modules/mol/alg/src/accessibility.cc +++ b/modules/mol/alg/src/accessibility.cc @@ -203,6 +203,11 @@ Real GetAtomAccessibilityNACCESS(Real x_pos, Real y_pos, Real z_pos, Real a = x_pos - x[i]; Real b = y_pos - y[i]; Real c = a*a + b*b; + + if(c == Real(0.0)) { + return 0.0; + } + dx[i] = a; dy[i] = b; dsqr[i] = c; diff --git a/modules/mol/alg/src/find_membrane.cc b/modules/mol/alg/src/find_membrane.cc new file mode 100644 index 0000000000000000000000000000000000000000..f48fb1149815f65e162e2f39a2c961dff494a0a1 --- /dev/null +++ b/modules/mol/alg/src/find_membrane.cc @@ -0,0 +1,1018 @@ +#include <ost/mol/alg/find_membrane.hh> +#include <ost/mol/alg/accessibility.hh> +#include <ost/geom/vecmat3_op.hh> +#include <ost/message.hh> + +#include <limits> +#include <exception> +#include <list> +#include <cmath> +#include <Eigen/Core> +#include <Eigen/Eigenvalues> + +namespace{ + +// Copyright notice of the Levenberg Marquardt minimizer we use... + +// Copyright (c) 2007, 2008, 2009 libmv authors. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +// +// A simple implementation of levenberg marquardt. +// +// [1] K. Madsen, H. Nielsen, O. Tingleoff. Methods for Non-linear Least +// Squares Problems. +// http://www2.imm.dtu.dk/pubdb/views/edoc_download.php/3215/pdf/imm3215.pdf +// +// TODO(keir): Cite the Lourakis' dogleg paper. + +template<typename Function, + typename Jacobian, + typename Solver = Eigen::JacobiSVD< + Eigen::Matrix<typename Function::FMatrixType::RealScalar, + Function::XMatrixType::RowsAtCompileTime, + Function::XMatrixType::RowsAtCompileTime> > > +class LevenbergMarquardt { + public: + typedef typename Function::XMatrixType::RealScalar Scalar; + typedef typename Function::FMatrixType FVec; + typedef typename Function::XMatrixType Parameters; + typedef Eigen::Matrix<typename Function::FMatrixType::RealScalar, + Function::FMatrixType::RowsAtCompileTime, + Function::XMatrixType::RowsAtCompileTime> JMatrixType; + typedef Eigen::Matrix<typename JMatrixType::RealScalar, + JMatrixType::ColsAtCompileTime, + JMatrixType::ColsAtCompileTime> AMatrixType; + + // TODO(keir): Some of these knobs can be derived from each other and + // removed, instead of requiring the user to set them. + enum Status { + RUNNING, + GRADIENT_TOO_SMALL, // eps > max(J'*f(x)) + RELATIVE_STEP_SIZE_TOO_SMALL, // eps > ||dx|| / ||x|| + ERROR_TOO_SMALL, // eps > ||f(x)|| + HIT_MAX_ITERATIONS, + }; + + LevenbergMarquardt(const Function &f) + : f_(f), df_(f) {} + + struct SolverParameters { + SolverParameters() + : gradient_threshold(1e-20), + relative_step_threshold(1e-20), + error_threshold(1e-16), + initial_scale_factor(1e-3), + max_iterations(200) {} + Scalar gradient_threshold; // eps > max(J'*f(x)) + Scalar relative_step_threshold; // eps > ||dx|| / ||x|| + Scalar error_threshold; // eps > ||f(x)|| + Scalar initial_scale_factor; // Initial u for solving normal equations. + int max_iterations; // Maximum number of solver iterations. + }; + + struct Results { + Scalar error_magnitude; // ||f(x)|| + Scalar gradient_magnitude; // ||J'f(x)|| + int iterations; + Status status; + }; + + Status Update(const Parameters &x, const SolverParameters ¶ms, + JMatrixType *J, AMatrixType *A, FVec *error, Parameters *g) { + *J = df_(x); + *A = (*J).transpose() * (*J); + *error = -f_(x); + *g = (*J).transpose() * *error; + if (g->array().abs().maxCoeff() < params.gradient_threshold) { + return GRADIENT_TOO_SMALL; + } else if (error->norm() < params.error_threshold) { + return ERROR_TOO_SMALL; + } + return RUNNING; + } + + Results minimize(Parameters *x_and_min) { + SolverParameters params; + return minimize(params, x_and_min); + } + + Results minimize(const SolverParameters ¶ms, Parameters *x_and_min) { + Parameters &x = *x_and_min; + JMatrixType J; + AMatrixType A; + FVec error; + Parameters g; + + Results results; + results.status = Update(x, params, &J, &A, &error, &g); + + Scalar u = Scalar(params.initial_scale_factor*A.diagonal().maxCoeff()); + Scalar v = 2; + + Parameters dx, x_new; + int i; + for (i = 0; results.status == RUNNING && i < params.max_iterations; ++i) { +// LOG(INFO) << "iteration: " << i; +// LOG(INFO) << "||f(x)||: " << f_(x).norm(); +// LOG(INFO) << "max(g): " << g.array().abs().maxCoeff(); +// LOG(INFO) << "u: " << u; +// LOG(INFO) << "v: " << v; + AMatrixType A_augmented = A + u*AMatrixType::Identity(J.cols(), J.cols()); + Solver solver(A_augmented, Eigen::ComputeThinU | Eigen::ComputeThinV); + dx = solver.solve(g); + if (dx.norm() <= params.relative_step_threshold * x.norm()) { + results.status = RELATIVE_STEP_SIZE_TOO_SMALL; + break; + } + + x_new = x + dx; + // Rho is the ratio of the actual reduction in error to the reduction + // in error that would be obtained if the problem was linear. + // See [1] for details. + Scalar rho((error.squaredNorm() - f_(x_new).squaredNorm()) + / dx.dot(u*dx + g)); + if (rho > 0) { + // Accept the Gauss-Newton step because the linear model fits well. + x = x_new; + results.status = Update(x, params, &J, &A, &error, &g); + Scalar tmp = Scalar(2*rho-1); + u = u*std::max(Scalar(1/3.), 1 - (tmp*tmp*tmp)); + v = 2; + continue; + } + + // Reject the update because either the normal equations failed to solve + // or the local linear model was not good (rho < 0). Instead, increase + // to move closer to gradient descent. + u *= v; + v *= 2; + } + if (results.status == RUNNING) { + results.status = HIT_MAX_ITERATIONS; + } + results.error_magnitude = error.norm(); + results.gradient_magnitude = g.norm(); + results.iterations = i; + return results; + } + + private: + const Function &f_; + Jacobian df_; +}; + +geom::Mat3 RotationAroundAxis(geom::Vec3 axis, Real angle) { + + Real aa, ab, ac, ba, bb, bc, ca, cb, cc, one_m_cos, cos_ang, sin_ang; + + cos_ang = std::cos(angle); + sin_ang = std::sin(angle); + one_m_cos = 1-cos_ang; + + aa = cos_ang+axis[0]*axis[0]*one_m_cos; + ab = axis[0]*axis[1]*one_m_cos-axis[2]*sin_ang; + ac = axis[0]*axis[2]*one_m_cos+axis[1]*sin_ang; + + ba = axis[1]*axis[0]*one_m_cos+axis[2]*sin_ang; + bb = cos_ang+axis[1]*axis[1]*one_m_cos; + bc = axis[1]*axis[2]*one_m_cos-axis[0]*sin_ang; + + ca = axis[2]*axis[0]*one_m_cos-axis[1]*sin_ang; + cb = axis[2]*axis[1]*one_m_cos+axis[0]*sin_ang; + cc = cos_ang+axis[2]*axis[2]*one_m_cos; + + geom::Mat3 result(aa, ab, ac, ba, bb, bc, ca, cb, cc); + return result; +} + +geom::Vec3 RotateAroundAxis(geom::Vec3 point, geom::Vec3 axis, Real angle) { + + geom::Mat3 rot = RotationAroundAxis(axis, angle); + geom::Vec3 result = rot*point; + return result; +} + + +// Levenberg Marquardt specific objects for the membrane finding algorithm + +struct EnergyF { + EnergyF(const std::vector<geom::Vec3>& p, const std::vector<Real>& t_e, + Real l, Real o, const geom::Vec3& ax, const geom::Vec3& tilt_ax): + positions(p), transfer_energies(t_e), axis(ax), + tilt_axis(tilt_ax), lambda(l), offset(o) { } + + typedef Eigen::Matrix<Real, 4, 1> XMatrixType; + typedef Eigen::Matrix<Real, 1, 1> FMatrixType; + + Eigen::Matrix<Real,1,1> operator()(const Eigen::Matrix<Real, 4, 1>& x) const { + + FMatrixType result; + result(0,0) = 0.0; + geom::Vec3 tilted_axis = axis; + + tilted_axis = RotateAroundAxis(tilted_axis, tilt_axis, x(0, 0)); + tilted_axis = RotateAroundAxis(tilted_axis, axis, x(1, 0)); + + Real pos_on_axis; + Real distance_to_center; + Real half_width = Real(0.5) * x(2, 0); + Real exponent; + Real one_over_lambda = Real(1.0) / lambda; + + int n = transfer_energies.size(); + + for(int i = 0; i < n; ++i) { + pos_on_axis = geom::Dot(tilted_axis, positions[i]); + distance_to_center = std::abs(x(3, 0) - pos_on_axis); + exponent = (distance_to_center - half_width) * one_over_lambda; + result(0, 0) += (1.0 / (1.0 + std::exp(exponent))) * transfer_energies[i]; + } + + result(0, 0) += offset; + + return result; + } + + std::vector<geom::Vec3> positions; + std::vector<Real> transfer_energies; + geom::Vec3 axis; + geom::Vec3 tilt_axis; + Real lambda; + + // define an offset parameter... + // The levenberg-marquardt algorithm is designed to minimize an error + // function, aims to converge towards zero. The offset parameter gets added + // at the end of the energy calculation to get a positive result => + // forces algorithm to minimize + Real offset; +}; + + +struct EnergyDF { + + EnergyDF(const EnergyF& f): function(f),d_tilt(0.02),d_angle(0.02), + d_width(0.4),d_pos(0.4) { } + + Eigen::Matrix<Real,1,4> operator()(const Eigen::Matrix<Real, 4, 1>& x) const { + + Eigen::Matrix<Real,1,4> result; + Eigen::Matrix<Real, 4, 1> parameter1 = x; + Eigen::Matrix<Real, 4, 1> parameter2 = x; + + parameter1(0,0)+=d_tilt; + parameter2(0,0)-=d_tilt; + result(0,0) = (function(parameter1)(0, 0) - + function(parameter2)(0, 0)) / (2*d_tilt); + + parameter1=x; + parameter2=x; + parameter1(1,0)+=d_angle; + parameter2(1,0)-=d_angle; + result(0,1) = (function(parameter1)(0,0) - + function(parameter2)(0,0)) / (2*d_angle); + + parameter1=x; + parameter2=x; + parameter1(2,0)+=d_width; + parameter2(2,0)-=d_width; + result(0,2) = (function(parameter1)(0,0) - + function(parameter2)(0,0)) / (2*d_width); + + parameter1=x; + parameter2=x; + parameter1(3,0)+=d_pos; + parameter2(3,0)-=d_pos; + result(0,3) = (function(parameter1)(0,0) - + function(parameter2)(0,0)) / (2*d_pos); + return result; + } + + EnergyF function; + Real d_tilt; + Real d_angle; + Real d_width; + Real d_pos; +}; + + +void GetRanges(const std::vector<geom::Vec3>& atom_positions, + Real& min_x, Real& max_x, Real& min_y, Real& max_y, + Real& min_z, Real& max_z) { + + min_x = std::numeric_limits<Real>::max(); + max_x = -min_x; + + min_y = std::numeric_limits<Real>::max(); + max_y = -min_y; + + min_z = std::numeric_limits<Real>::max(); + max_z = -min_z; + + for(uint i = 0; i < atom_positions.size(); ++i) { + const geom::Vec3& pos = atom_positions[i]; + min_x = std::min(min_x, pos[0]); + max_x = std::max(max_x, pos[0]); + min_y = std::min(min_y, pos[1]); + max_y = std::max(max_y, pos[1]); + min_z = std::min(min_z, pos[2]); + max_z = std::max(max_z, pos[2]); + } +} + + +void FloodLevel(char* data, int x_start, int y_start, + int x_extent, int y_extent, + int orig_value, int dest_value) { + + //http://lodev.org/cgtutor/floodfill.html + if(orig_value != data[x_start*y_extent + y_start]) { + return; + } + + std::vector<std::pair<int,int> > queue; + queue.push_back(std::make_pair(x_start, y_start)); + + int y1,y,x; + bool spanLeft, spanRight; + std::pair<int,int> actual_position; + + while(!queue.empty()){ + + actual_position = queue.back(); + queue.pop_back(); + x = actual_position.first; + y = actual_position.second; + + y1 = y; + + while(y1 >= 0 && data[x*y_extent + y1] == orig_value) { + --y1; + } + + y1++; + spanLeft = spanRight = 0; + + while(y1 < y_extent && + data[x*y_extent + y1] == orig_value ) { + + data[x*y_extent + y1] = dest_value; + + if(!spanLeft && x > 0 && + data[(x-1)*y_extent + y1] == orig_value) { + queue.push_back(std::make_pair(x-1,y1)); + spanLeft = 1; + } + else if(spanLeft && x > 0 && + data[(x-1)*y_extent + y1] != orig_value) { + spanLeft = 0; + } + + if(!spanRight && x < x_extent - 1 && + data[(x+1)*y_extent + y1] == orig_value) { + queue.push_back(std::make_pair(x+1,y1)); + spanRight = 1; + } + else if(spanRight && x < x_extent - 1 && + data[(x+1)*y_extent + y1] != orig_value) { + spanRight = 0; + } + ++y1; + } + } +} + + +void GetExposedAtoms(const std::vector<geom::Vec3>& atom_positions, + const std::vector<Real>& transfer_energies, + std::vector<geom::Vec3>& exposed_atom_positions, + std::vector<Real>& exposed_transfer_energies) { + + // sum of approx. vdw radius of the present heavy atoms (1.8) + // plus 1.4 for water. + Real radius = 3.2; + Real one_over_radius = Real(1.0) / radius; + + // lets setup a grid in which we place the atoms + Real min_x, max_x, min_y, max_y, min_z, max_z; + GetRanges(atom_positions, min_x, max_x, min_y, max_y, min_z, max_z); + + // we guarantee that the thing is properly solvated in the x-y plane and add + // some space around this is also necessary to avoid overflow checks in + // different places + min_x -= Real(2.1) * radius; + min_y -= Real(2.1) * radius; + min_z -= Real(2.1) * radius; + max_x += Real(2.1) * radius; + max_y += Real(2.1) * radius; + max_z += Real(2.1) * radius; + + int num_xbins = std::ceil((max_x - min_x) * one_over_radius); + int num_ybins = std::ceil((max_y - min_y) * one_over_radius); + int num_zbins = std::ceil((max_z - min_z) * one_over_radius); + int num_bins = num_xbins * num_ybins * num_zbins; + char* grid = new char[num_bins]; + memset(grid, 0, num_bins); + + for(uint i = 0; i < atom_positions.size(); ++i) { + + const geom::Vec3& pos = atom_positions[i]; + int x_bin = (pos[0] - min_x) * one_over_radius; + int y_bin = (pos[1] - min_y) * one_over_radius; + int z_bin = (pos[2] - min_z) * one_over_radius; + + // we're really crude here and simply set all 27 cubes with central + // cube defined by x_bin, y_bin and z_bin to one + for(int z = z_bin - 1; z <= z_bin + 1; ++z) { + for(int x = x_bin - 1; x <= x_bin + 1; ++x) { + for(int y = y_bin - 1; y <= y_bin + 1; ++y) { + grid[z*num_xbins*num_ybins + x*num_ybins + y] = 1; + } + } + } + } + + // lets call flood fill for every layer along the z-axis from every + // corner in the x-y plane. + for(int z = 0; z < num_zbins; ++z) { + char* level = &grid[z*num_xbins*num_ybins]; + FloodLevel(level, 0, 0, num_xbins, num_ybins, 0, 2); + FloodLevel(level, 0, num_ybins - 1, num_xbins, num_ybins, 0, 2); + FloodLevel(level, num_xbins - 1, 0, num_xbins, num_ybins, 0, 2); + FloodLevel(level, num_xbins - 1, num_ybins - 1, num_xbins, num_ybins, 0, 2); + } + + // every cube in every layer that has currently value 1 that has a city-block + // distance below 3 to any cube with value 2 is considered to be in contact + // with the outer surface... lets set them to a value of three + for(int z = 0; z < num_zbins; ++z) { + char* level = &grid[z*num_xbins*num_ybins]; + for(int x = 0; x < num_xbins; ++x) { + for(int y = 0; y < num_ybins; ++y) { + if(level[x*num_ybins + y] == 1) { + int x_from = std::max(0, x - 3); + int x_to = std::min(num_xbins-1, x + 3); + int y_from = std::max(0, y - 3); + int y_to = std::min(num_ybins-1, y + 3); + bool exposed = false; + for(int i = x_from; i <= x_to && !exposed; ++i) { + for(int j = y_from; j <= y_to; ++j) { + if(level[i*num_ybins + j] == 2) { + level[x*num_ybins + y] = 3; + exposed = true; + break; + } + } + } + } + } + } + } + + // all positions that lie in a cube with value 3 are considered to be exposed... + exposed_atom_positions.clear(); + exposed_transfer_energies.clear(); + for(uint i = 0; i < atom_positions.size(); ++i) { + const geom::Vec3& pos = atom_positions[i]; + int x_bin = (pos[0] - min_x) * one_over_radius; + int y_bin = (pos[1] - min_y) * one_over_radius; + int z_bin = (pos[2] - min_z) * one_over_radius; + if(grid[z_bin*num_xbins*num_ybins + x_bin*num_ybins + y_bin] == 3) { + exposed_atom_positions.push_back(pos); + exposed_transfer_energies.push_back(transfer_energies[i]); + } + } + + // cleanup + delete [] grid; +} + + +void ScanAxis(const std::vector<geom::Vec3>& atom_positions, + const std::vector<Real>& transfer_energies, + const geom::Vec3& axis, + Real& best_width, Real& best_center, Real& best_energy) { + + int n_pos = atom_positions.size(); + + geom::Vec3 normalized_axis = geom::Normalize(axis); + std::vector<Real> pos_on_axis(n_pos); + + for(int i = 0; i < n_pos; ++i) { + pos_on_axis[i] = geom::Dot(atom_positions[i], normalized_axis); + } + + Real min_pos = pos_on_axis[0]; + Real max_pos = min_pos; + + for(int i = 1; i < n_pos; ++i) { + min_pos = std::min(min_pos, pos_on_axis[i]); + max_pos = std::max(max_pos, pos_on_axis[i]); + } + + min_pos = std::floor(min_pos); + max_pos = std::ceil(max_pos); + + int full_width = int(max_pos - min_pos) + 1; + + //energies representing the energy profile along the axis + std::vector<Real> mapped_energies(full_width, 0.0); + + for(int i = 0; i < n_pos; ++i) { + mapped_energies[int(pos_on_axis[i] - min_pos)] += transfer_energies[i]; + } + + best_width = 0; + best_center = 0; + best_energy = std::numeric_limits<Real>::max(); + + for(int window_width = 10; window_width <= 40; ++window_width) { + + if(window_width > full_width) { + break; + } + + Real energy=0.0; + for(int i = 0; i < window_width; ++i) { + energy += mapped_energies[i]; + } + + if(energy < best_energy) { + best_width = window_width; + best_center = min_pos + Real(0.5) * window_width + Real(0.5); + best_energy = energy; + } + + for(int pos = 1; pos < full_width - window_width; ++pos) { + energy -= mapped_energies[pos-1]; + energy += mapped_energies[pos+window_width-1]; + if(energy < best_energy){ + best_width = window_width; + best_center = min_pos + pos + Real(0.5) * window_width + Real(0.5); + best_energy = energy; + } + } + } +} + + +struct LMInput { + ost::mol::alg::FindMemParam mem_param; + geom::Transform initial_transform; + std::vector<geom::Vec3> exposed_atom_positions; + std::vector<Real> exposed_transfer_energies; +}; + + +void SampleZ(const std::vector<geom::Vec3>& atom_pos, + const std::vector<Real>& transfer_energies, + const geom::Transform& initial_transform, + int n_solutions, std::list<LMInput>& top_solutions) { + + + std::vector<geom::Vec3> transformed_atom_pos(atom_pos.size()); + for(uint at_idx = 0; at_idx < atom_pos.size(); ++at_idx) { + transformed_atom_pos[at_idx] = initial_transform.Apply(atom_pos[at_idx]); + } + + std::vector<geom::Vec3> exposed_atom_positions; + std::vector<Real> exposed_transfer_energies; + GetExposedAtoms(transformed_atom_pos, transfer_energies, + exposed_atom_positions, exposed_transfer_energies); + + std::vector<Real> tilt_angles; + std::vector<Real> rotation_angles; + for(int tilt_deg = 0; tilt_deg <= 45; tilt_deg += 5) { + if(tilt_deg == 0) { + tilt_angles.push_back(0.0); + rotation_angles.push_back(0.0); + } + else { + Real tilt_angle = Real(tilt_deg) / Real(180.) * Real(M_PI); + for(int angle_deg = 0; angle_deg < 360; angle_deg += 5) { + tilt_angles.push_back(tilt_angle); + rotation_angles.push_back(Real(angle_deg) / Real(180.) * Real(M_PI)); + } + } + } + + geom::Vec3 normalized_axis(0.0,0.0,1.0); + geom::Vec3 tilt_axis(1.0,0.0,0.0); + + for(uint i = 0; i < tilt_angles.size(); ++i) { + + Real tilt_angle = tilt_angles[i]; + Real rotation_angle = rotation_angles[i]; + + geom::Vec3 tilted_axis = RotateAroundAxis(normalized_axis, tilt_axis, + tilt_angle); + geom::Vec3 scan_axis = RotateAroundAxis(tilted_axis, normalized_axis, + rotation_angle); + + Real actual_width, actual_center, actual_energy; + ScanAxis(exposed_atom_positions, exposed_transfer_energies, scan_axis, + actual_width, actual_center, actual_energy); + + if(static_cast<int>(top_solutions.size()) >= n_solutions && + actual_energy > top_solutions.back().mem_param.energy) { + continue; + } + + LMInput lm_input; + lm_input.mem_param.axis = normalized_axis; + lm_input.mem_param.tilt_axis = tilt_axis; + lm_input.mem_param.tilt = tilt_angle; + lm_input.mem_param.angle = rotation_angle; + lm_input.mem_param.width = actual_width; + lm_input.mem_param.pos = actual_center; + lm_input.mem_param.energy = actual_energy; + lm_input.initial_transform = initial_transform; + lm_input.exposed_atom_positions = exposed_atom_positions; + lm_input.exposed_transfer_energies = exposed_transfer_energies; + + if(top_solutions.empty()) { + top_solutions.push_back(lm_input); + } + else { + bool added = false; + for(std::list<LMInput>::iterator sol_it = top_solutions.begin(); + sol_it != top_solutions.end(); ++sol_it) { + if(sol_it->mem_param.energy > lm_input.mem_param.energy) { + top_solutions.insert(sol_it, lm_input); + added = true; + break; + } + } + + if(!added) { + top_solutions.push_back(lm_input); + } + + while(static_cast<int>(top_solutions.size()) > n_solutions) { + top_solutions.pop_back(); + } + } + } +} + + +ost::mol::alg::FindMemParam GetFinalSolution(const std::list<LMInput>& top_solutions, + Real lambda) { + + Real best_energy = std::numeric_limits<Real>::max(); + std::list<LMInput>::const_iterator best_sol_it = top_solutions.begin(); + Eigen::Matrix<Real, 4, 1> lm_parameters; + Eigen::Matrix<Real, 4, 1> best_lm_parameters; + LevenbergMarquardt<EnergyF, EnergyDF>::Results lm_result; + + for(std::list<LMInput>::const_iterator sol_it = top_solutions.begin(); + sol_it != top_solutions.end(); ++sol_it) { + + Real offset = std::max(Real(20000.), std::abs(sol_it->mem_param.energy * 2)); + + EnergyF en_f(sol_it->exposed_atom_positions, + sol_it->exposed_transfer_energies, + lambda, offset, + sol_it->mem_param.axis, + sol_it->mem_param.tilt_axis); + + lm_parameters(0,0) = sol_it->mem_param.tilt; + lm_parameters(1,0) = sol_it->mem_param.angle; + lm_parameters(2,0) = sol_it->mem_param.width; + lm_parameters(3,0) = sol_it->mem_param.pos; + + LevenbergMarquardt<EnergyF,EnergyDF> lm(en_f); + lm_result = lm.minimize(&lm_parameters); + + Real minimized_energy = en_f(lm_parameters)(0, 0) - en_f.offset; + + if(minimized_energy < best_energy) { + best_energy = minimized_energy; + best_sol_it = sol_it; + best_lm_parameters = lm_parameters; + } + } + + ost::mol::alg::FindMemParam mem_param = best_sol_it->mem_param; + mem_param.energy = best_energy; + mem_param.tilt = best_lm_parameters(0,0); + mem_param.angle = best_lm_parameters(1,0); + mem_param.width = best_lm_parameters(2,0); + mem_param.pos = best_lm_parameters(3,0); + + // the solution is still relative to the initial transform that has + // been applied when calling the SampleZ funtion! + geom::Transform t = best_sol_it->initial_transform; + mem_param.tilt_axis = t.ApplyInverse(mem_param.tilt_axis); + mem_param.axis = t.ApplyInverse(mem_param.axis); + + return mem_param; +} + + +ost::mol::EntityHandle CreateMembraneRepresentation( + const std::vector<geom::Vec3>& atom_positions, + const ost::mol::alg::FindMemParam& param, + Real membrane_margin = 15, + Real delta = 2.0) { + + // let's first construct two planes defining the membrane + geom::Vec3 membrane_axis = param.GetMembraneAxis(); + geom::Vec3 one = param.pos * membrane_axis + + membrane_axis * param.width / 2; + geom::Vec3 two = param.pos * membrane_axis - + membrane_axis * param.width / 2; + geom::Plane plane_one = geom::Plane(one, membrane_axis); + geom::Plane plane_two = geom::Plane(two, membrane_axis); + + // let's find all positions that are somehow close to those planes + geom::Vec3List close_pos; + geom::Vec3List close_pos_one; + geom::Vec3List close_pos_two; + + for(uint i = 0; i < atom_positions.size(); ++i) { + + Real d1 = geom::Distance(plane_one, atom_positions[i]); + Real d2 = geom::Distance(plane_two, atom_positions[i]); + + if(d1 < Real(3.)) { + close_pos_one.push_back(atom_positions[i]); + } + + if(d2 < Real(3.)) { + close_pos_two.push_back(atom_positions[i]); + } + + if(d1 < Real(3.) || d2 < Real(3.)) { + close_pos.push_back(atom_positions[i]); + } + } + + // the geometric center of the close pos vector in combination with the + // membrane axis define the central "line" of the disks that will represent + // the membrane + geom::Vec3 center_pos = close_pos.GetCenter(); + geom::Line3 center_line = geom::Line3(center_pos, center_pos + membrane_axis); + + // the final radius of the "disks" is based on the maximal distance of any + // position in close_pos to the center_line plus the membrane_margin + + Real max_d_to_center_line = 0; + for(uint i = 0; i < close_pos.size(); ++i) { + Real d = geom::Distance(center_line, close_pos[i]); + max_d_to_center_line = std::max(max_d_to_center_line, d); + } + + Real disk_radius = max_d_to_center_line + membrane_margin; + int num_sampling_points = (Real(2.) * disk_radius) / delta; + + // reassign the top and bottom positions, that have been only arbitrary + // points on the membrane planes + one = geom::IntersectionPoint(center_line, plane_one); + two = geom::IntersectionPoint(center_line, plane_two); + + // find a pair of perpendicular vectors, that are on the plane + geom::Vec3 arbitrary_vec(1.0, 0.0, 0.0); + if(geom::Angle(membrane_axis, arbitrary_vec) < 0.1) { + // parallel is not cool in this case + arbitrary_vec = geom::Vec3(0.0, 1.0, 0.0); + } + geom::Vec3 plane_x = geom::Normalize(geom::Cross(membrane_axis, arbitrary_vec)); + geom::Vec3 plane_y = geom::Normalize(geom::Cross(membrane_axis, plane_x)); + + // final representing positions come in here + std::vector<geom::Vec3> final_pos; + + // do plane one + geom::Vec3 origin = one - delta * num_sampling_points * 0.5 * plane_x - + delta * num_sampling_points * 0.5 * plane_y; + + for(int i = 0; i < num_sampling_points; ++i) { + for(int j = 0; j < num_sampling_points; ++j) { + geom::Vec3 pos = origin + i*delta*plane_x + j*delta*plane_y; + if(geom::Distance(pos, one) < disk_radius) { + bool far_far_away = true; + // this is slow... + for(uint k = 0; k < close_pos_one.size(); ++k) { + if(geom::Length2(pos - close_pos_one[k]) < Real(16.)) { + far_far_away = false; + break; + } + } + if(far_far_away) { + final_pos.push_back(pos); + } + } + } + } + + // do plane two + origin = two - delta * num_sampling_points * 0.5 * plane_x - + delta * num_sampling_points * 0.5 * plane_y; + + for(int i = 0; i < num_sampling_points; ++i) { + for(int j = 0; j < num_sampling_points; ++j) { + geom::Vec3 pos = origin + i*delta*plane_x + j*delta*plane_y; + if(geom::Distance(pos, two) < disk_radius) { + bool far_far_away = true; + // this is slow... + for(uint k = 0; k < close_pos_two.size(); ++k) { + if(geom::Length2(pos - close_pos_two[k]) < Real(16.)) { + far_far_away = false; + break; + } + } + if(far_far_away) { + final_pos.push_back(pos); + } + } + } + } + + // create hacky entity that contains membrane representing positions and + // return + ost::mol::EntityHandle membrane_ent = ost::mol::CreateEntity(); + ost::mol::XCSEditor ed = membrane_ent.EditXCS(); + + ost::mol::ChainHandle chain = ed.InsertChain("M"); + ost::mol::ResidueHandle res = ed.AppendResidue(chain, "MEM"); + String atom_names = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + uint atom_name_idx = 0; + uint atom_name_secondary_idx = 0; + + for(uint i = 0; i < final_pos.size(); ++i) { + + if(atom_name_secondary_idx == atom_names.size()) { + ++atom_name_idx; + atom_name_secondary_idx = 0; + } + if(atom_name_idx == atom_names.size()) { + res = ed.AppendResidue(chain, "MEM"); + atom_name_idx = 0; + atom_name_secondary_idx = 0; + } + + String atom_name = "--"; + atom_name[0] = atom_names[atom_name_idx]; + atom_name[1] = atom_names[atom_name_secondary_idx]; + + ed.InsertAtom(res, atom_name, final_pos[i]); + ++atom_name_secondary_idx; + } + + return membrane_ent; +} + + +} // anon namespace + + +namespace ost{ namespace mol{ namespace alg{ + +geom::Vec3 FindMemParam::GetMembraneAxis() const { + + geom::Vec3 result = RotateAroundAxis(axis,tilt_axis,tilt); + result = RotateAroundAxis(result,axis,angle); + return result; +} + + +FindMemParam FindMembrane(ost::mol::EntityView& ent, + bool assign_membrane_representation, bool fast) { + + ost::mol::EntityView peptide_view = ent.Select("peptide=true and ele!=H"); + Accessibility(peptide_view); + + std::vector<geom::Vec3> atom_pos; + std::vector<Real> transfer_energies; + + atom_pos.reserve(peptide_view.GetAtomCount()); + transfer_energies.reserve(peptide_view.GetAtomCount()); + + ost::mol::AtomViewList atoms = peptide_view.GetAtomList(); + String stupid_string("S_N_O_C"); + + for(ost::mol::AtomViewList::iterator it = atoms.begin(); + it != atoms.end(); ++it) { + + if(!it->HasProp("asaAtom")) { + continue; + } + + String element = it->GetElement(); + if(stupid_string.find(element) == std::string::npos) { + continue; + } + + Real asa = it->GetFloatProp("asaAtom"); + atom_pos.push_back(it->GetPos()); + + if(element == "S") { + transfer_energies.push_back(asa * Real(10.0)); + } + else if(element == "N") { + transfer_energies.push_back(asa * Real(53.0)); + } + else if(element == "O") { + transfer_energies.push_back(asa * Real(57.0)); + } + else if(element == "C") { + // check whether we find a double bond to distinguish between + // hibridization states + bool assigned_energy = false; + ost::mol::BondHandleList bond_list = it->GetBondList(); + for(ost::mol::BondHandleList::iterator bond_it = bond_list.begin(); + bond_it != bond_list.end(); ++bond_it){ + unsigned char bond_order = bond_it->GetBondOrder(); + if(bond_order > '1'){ + transfer_energies.push_back(asa * Real(-19.0)); + assigned_energy = true; + break;; + } + } + if(!assigned_energy) { + transfer_energies.push_back(asa * Real(-22.6)); + } + } + } + + if(atom_pos.size() < 10) { + throw ost::Error("Cannot detect membrane with such a low number " + "of heavy atoms!"); + } + + // we always optimizer along the z-axis. + // We therefore have to transform the positions. We use a rotation + // around the z-axis with subsequent rotation around the x-axis for this task + std::vector<geom::Transform> transformations; + int n_euler_angles = 3; + int n_transformations = n_euler_angles * n_euler_angles * n_euler_angles; + std::vector<Real> euler_angles(n_euler_angles); + euler_angles[0] = 0.0; + euler_angles[1] = M_PI/3; + euler_angles[2] = 2*M_PI/3; + + for(int i = 0; i < n_euler_angles; ++i) { + for(int j = 0; j < n_euler_angles; ++j) { + for(int k = 0; k < n_euler_angles; ++k) { + geom::Mat3 rot_matrix = geom::EulerTransformation(euler_angles[i], + euler_angles[j], + euler_angles[k]); + geom::Transform transform; + transform.SetRot(rot_matrix); + transformations.push_back(transform); + } + } + } + + // lets use the generated transforms to search for initial solutions that can + // then be fed into a final minimization... + std::list<LMInput> top_solutions; + int n_initial_solutions = fast ? 1 : 20; + + for(int transformation_idx = 0; transformation_idx < n_transformations; + ++transformation_idx) { + SampleZ(atom_pos, transfer_energies, transformations[transformation_idx], + n_initial_solutions, top_solutions); + } + + // Perform the final minimization and return the best solution. + // please note, that the returned solution is transformed back in order + // to match the initial atom positions + FindMemParam final_solution = GetFinalSolution(top_solutions, 0.9); + + if(assign_membrane_representation) { + final_solution.membrane_representation = CreateMembraneRepresentation( + atom_pos, + final_solution); + } + + return final_solution; +} + + +FindMemParam FindMembrane(ost::mol::EntityHandle& ent, + bool assign_membrane_representation, + bool fast) { + + ost::mol::EntityView ent_view = ent.CreateFullView(); + return FindMembrane(ent_view, assign_membrane_representation, fast); +} + +}}} // ns diff --git a/modules/mol/alg/src/find_membrane.hh b/modules/mol/alg/src/find_membrane.hh new file mode 100644 index 0000000000000000000000000000000000000000..962ea68fc09680304c2da095f78900eff5483225 --- /dev/null +++ b/modules/mol/alg/src/find_membrane.hh @@ -0,0 +1,31 @@ +#include <ost/mol/mol.hh> + +#include <ost/geom/geom.hh> +#include <ost/io/binary_data_source.hh> +#include <ost/io/binary_data_sink.hh> + +namespace ost { namespace mol{ namespace alg{ + +struct FindMemParam{ + FindMemParam() { } + + geom::Vec3 GetMembraneAxis() const; + geom::Vec3 axis; + geom::Vec3 tilt_axis; + Real tilt; + Real angle; + Real width; + Real pos; + Real energy; + ost::mol::EntityHandle membrane_representation; +}; + +FindMemParam FindMembrane(ost::mol::EntityHandle& ent, + bool assign_membrane_representation, + bool fast); + +FindMemParam FindMembrane(ost::mol::EntityView& ent, + bool assign_membrane_representation, + bool fast); + +}}} // ns diff --git a/modules/mol/base/doc/entity.rst b/modules/mol/base/doc/entity.rst index 41945d534e48b52c1c9771bbeab4de236a70802e..3b0aa5c9fdcd51cdb563922b9fb384f8ec046736 100644 --- a/modules/mol/base/doc/entity.rst +++ b/modules/mol/base/doc/entity.rst @@ -118,7 +118,13 @@ The Handle Classes an enabled ``USE_NUMPY`` flag (see :ref:`here <cmake-flags>` for details). :type: :class:`numpy.array` - + + .. attribute:: valid + + Validity of handle. + + :type: bool + .. method:: GetName() :returns: Name associated to this entity. @@ -327,6 +333,10 @@ The Handle Classes :type radius: float :returns: :class:`AtomHandleList` (list of :class:`AtomHandle`) + + .. method:: IsValid() + + See :attr:`valid` .. class:: ChainHandle @@ -424,6 +434,12 @@ The Handle Classes :meth:`GetCenterOfAtoms`. :type: :class:`~ost.geom.Vec3` + + .. attribute:: valid + + Validity of handle. + + :type: bool .. method:: FindResidue(res_num) @@ -469,6 +485,10 @@ The Handle Classes See :attr:`description` + .. method:: IsValid() + + See :attr:`valid` + .. class:: ResidueHandle The residue is either used to represent complete molecules or building blocks @@ -625,8 +645,6 @@ The Handle Classes Central atom used for rendering traces. For peptides, this is usually the CA atom. For nucleotides, this is usually the P atom. - Also available as :meth:`GetCentralAtom` and :meth:`SetCentralAtom`. - :type: :class:`AtomHandle` .. attribute:: central_normal @@ -635,10 +653,30 @@ The Handle Classes nucleotides if all required atoms available. Otherwise, the (1,0,0) vector is returned. - Read-only. Also available as :meth:`GetCentralNormal`. - :type: :class:`~ost.geom.Vec3` + .. attribute:: valid + + Validity of handle. + + :type: bool + + .. attribute:: next + + Residue after this one in the same chain. Invalid handle returned if there + is no next residue. Residues are ordered as in :attr:`ChainHandle.residues` + independently on whether they are connected or not (see + :func:`InSequence` to check for connected residues). + + :type: :class:`ResidueHandle` + + .. attribute:: prev + + Residue before this one in the same chain. Otherwise same behaviour as + :attr:`next`. + + :type: :class:`ResidueHandle` + .. method:: FindAtom(atom_name) Get atom by atom name. See also :attr:`atoms` @@ -693,8 +731,12 @@ The Handle Classes .. method:: GetCentralNormal() - See :attr:`index` + See :attr:`central_normal` + + .. method:: IsValid() + See :attr:`valid` + .. class:: AtomHandle @@ -811,6 +853,12 @@ The Handle Classes :type: int + .. attribute:: valid + + Validity of handle. + + :type: bool + .. method:: FindBondToAtom(other_atom) Finds and returns the bond formed between this atom and `other_atom`. If no @@ -928,8 +976,7 @@ The Handle Classes .. method:: IsValid() See :attr:`valid` - - :rtype: bool + The View Classes -------------------------------------------------------------------------------- @@ -1010,6 +1057,12 @@ The View Classes :type: :class:`EntityHandle` + .. attribute:: valid + + Validity of view. + + :type: bool + .. method:: GetName() :returns: :func:`~EntityHandle.GetName` of entity :attr:`handle`. @@ -1271,6 +1324,10 @@ The View Classes See :attr:`atom_count` + .. method:: IsValid() + + See :attr:`valid` + .. class:: ChainView A view representation of a :class:`ChainHandle`. Mostly, the same @@ -1380,7 +1437,7 @@ The View Classes .. attribute:: valid - Validity of handle. + Validity of view. :type: bool @@ -1782,6 +1839,28 @@ Other Entity-Related Functions :returns: :class:`EntityHandle` +.. function:: InSequence(res, res_next) + + :return: True, if both *res* and *res_next* are :attr:`~ResidueHandle.valid`, + *res_next* is the residue following *res* (see + :attr:`ResidueHandle.next`), both residues are linking (i.e. + :attr:`~ChemClass.IsPeptideLinking` or + :attr:`~ChemClass.IsNucleotideLinking`) and are connected by an + appropriate bond. + :rtype: :class:`bool` + :param res: First residue to check. + :type res: :class:`ResidueHandle` + :param res_next: Second residue to check. + :type res_next: :class:`ResidueHandle` + +.. function:: BondExists(atom_a, atom_b) + + :return: True, if *atom_a* and *atom_b* are connected by a bond. + :rtype: :class:`bool` + :param atom_a: First atom to check. + :type atom_a: :class:`AtomHandle` + :param atom_b: Second atom to check. + :type atom_b: :class:`AtomHandle` Residue Numbering -------------------------------------------------------------------------------- diff --git a/modules/mol/base/doc/query.rst b/modules/mol/base/doc/query.rst index 54380f93f0f8b952934e31ef8aea865b1b89cbd8..6cf82e2c44df6b821db9bc9b6021c7903ff974b5 100644 --- a/modules/mol/base/doc/query.rst +++ b/modules/mol/base/doc/query.rst @@ -16,7 +16,7 @@ selections in a convenient way. Selections are carried out mainly by calling the .. code-block:: python - arginines=model.Select('rname=ARG') + arginines = model.Select('rname=ARG') A simple selection query (called a predicate) consists of a property (here, `rname`), a comparison operator (here, `=`) and an argument (here, `ARG`). The @@ -24,7 +24,7 @@ return value of a call to the :meth:`EntityHandle.Select` method is always an :class:`EntityView`. The :class:`EntityView` always contains a full hierarchy of elements, never standalone separated elements. In the above example, the :class:`EntityView` called `arginines` will contain all chains from the -structure called 'model' that have at least one arginine. In turn these chains +structure called `model` that have at least one arginine. In turn these chains will contain all residues that have been identified as arginines. The residues themselves will contain references to all of their atoms. Of course, queries are not limited to selecting residues based on their type, it is also possible to @@ -32,9 +32,9 @@ select atom by name: .. code-block:: python - c_betas=model.Select('aname=CB') + c_betas = model.Select('aname=CB') -As before, c`betas is an instance of an :class:`EntityView` object and contains +As before, `c_betas` is an instance of an :class:`EntityView` object and contains a full hierarchy. The main difference to the previous example is that the selected residues do not contain a list of all of their atoms but only the C-beta. These examples clarify why the name 'view' was chosen for this result of @@ -45,8 +45,17 @@ Both the selection statements that have been used so far take strings as their a .. code-block:: python - n_term=model.Select('rnum<=20') - + n_term = model.Select('rnum<=20') + +If you want to supply arguments with special characters they need to be put in +quotation marks. For instance, this is needed to select the chain named "_" or +for any chain name conatining ".", " " or ",". Hence, chain "_" can be selected +with: + +.. code-block:: python + + model.Select('cname="_"') + Combining predicates ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -60,25 +69,25 @@ Compact forms are available for several selection statements. For example, to se .. code-block:: python - arg_and_asn=model.Select('rname=ARG or rname=ASN') + arg_and_asn = model.Select('rname=ARG or rname=ASN') However, this is rather cumbersome as it requires the word `rname` to be typed twice. Since the only difference between the two parts of the selection is the argument that follows the word `rname`, the statement can also be written in an abbreviated form: .. code-block:: python - arg_and_asn=model.Select('rname=ARG,ASN') + arg_and_asn = model.Select('rname=ARG,ASN') Another example: to select residues with numbers in the range 130 to 200, one could use the following statement .. code-block:: python - center=model.Select('rnum>=130 and rnum<=200') + center = model.Select('rnum>=130 and rnum<=200') or alternatively use the much nicer syntax: .. code-block:: python - center=model.Select('rnum=130:200') + center = model.Select('rnum=130:200') This last statement is completely equivalent to the previous one. This syntax can be used when the selection statement requires a range of integer values @@ -91,25 +100,38 @@ The query .. code-block:: python - around_center=model.Select('5 <> {0,0,0}') + around_center = model.Select('5 <> {0,0,0}') -selects all chains, residues and atoms that lie with 5 Å to the origin of the reference system ({0,0,0}). The `<>` operator is called the ‘within’ operator. -Instead of a point, the within statements can also be used to return a view containing all chains, residues and atoms within a radius of another selection statement applied to the same entity. Square brackets are used to delimit the inner query statement. +selects all chains, residues and atoms that lie with 5 Å to the origin of the +reference system ({0,0,0}). The `<>` operator is called the 'within' operator. +Instead of a point, the within statements can also be used to return a view +containing all chains, residues and atoms within a radius of another selection +statement applied to the same entity. Square brackets are used to delimit the +inner query statement. .. code-block:: python - around_hem=model.Select('5 <> [rname=HEM]') + around_hem = model.Select('5 <> [rname=HEM]') model.Select('5 <> [rname=HEM and ele=C] and rname!=HEM') Bonds and Queries ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -When an :class:`EntityView` is generated by a selection, it includes by default only bonds for which both connected atoms satisfy the query statement. This can be changed by passing the parameters `EXCLUSIVE_BONDS` or `NO_BONDS` when calling the Select method. `EXCLUSIVE_BONDS` adds bonds to the :class:`EntityView` when at least one of the two atoms falls within the boundary of the selection. `NO_BONDS` suppresses the bond inclusion step completely. +When an :class:`EntityView` is generated by a selection, it includes by default +only bonds for which both connected atoms satisfy the query statement. This can +be changed by passing the parameters `EXCLUSIVE_BONDS` or `NO_BONDS` when +calling the Select method. `EXCLUSIVE_BONDS` adds bonds to the +:class:`EntityView` when at least one of the two atoms falls within the boundary +of the selection. `NO_BONDS` suppresses the bond inclusion step completely. Whole Residue Queries ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If the parameter `MATCH_RESIDUES` is passed when the Select method is called, the resulting :class:`EntityView` will include whole residues for which at least one atom satisfies the query. This means that if at least one atom in the residue falls within the boundaries of the selection, all atoms of the residue will be included in the View. +If the parameter `MATCH_RESIDUES` is passed when the Select method is called, +the resulting :class:`EntityView` will include whole residues for which at least +one atom satisfies the query. This means that if at least one atom in the +residue falls within the boundaries of the selection, all atoms of the residue +will be included in the View. More Query Usage ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -141,9 +163,9 @@ they are defined. Therefore, all generic properties start with a `g`, followed b chain_handle.SetIntProp("testpropchain", 10) # query statements - sel_a=e.Select("gatestpropatom<=10.0") - sel_r=e.Select("grtestpropres=1.0") - sel_c=e.Select("gctestpropchain>5") + sel_a = e.Select("gatestpropatom<=10.0") + sel_r = e.Select("grtestpropres=1.0") + sel_c = e.Select("gctestpropchain>5") Since generic properties do not need to be defined for all parts of an entity (e.g. it could be specified for one single :class:`AtomHandle`), the query @@ -154,11 +176,11 @@ statement which can be done using a ':' character: # if one or more atoms have no generic properties - sel=e.Select("gatestprop=5") + sel = e.Select("gatestprop=5") # this will throw an error # you can specify a default value: - sel=e.Select("gatestprop:1.0=5") + sel = e.Select("gatestprop:1.0=5") # this will run through smoothly and use 1.0 as # the default value for all atoms that do not # have the generic property 'testprop' diff --git a/modules/mol/mm/src/simulation.cc b/modules/mol/mm/src/simulation.cc index 2a23ddb9a629a112b3301c35dd6756ad2ba08b92..dea985213bf993a1a49f0ec06c17f7b34d4e36fe 100644 --- a/modules/mol/mm/src/simulation.cc +++ b/modules/mol/mm/src/simulation.cc @@ -27,10 +27,10 @@ Simulation::Simulation(const ost::mol::EntityHandle& handle, //note, that ent_ will be "completed" inside this function! //(hydrogens and shit) - - ent_ = handle.Copy(); - TopologyPtr top = TopologyCreator::Create(ent_,settings); - this->Init(top, settings); + + ost::mol::EntityHandle ent = handle.Copy(); + TopologyPtr top = TopologyCreator::Create(ent,settings); + this->Init(top, ent, settings); } Simulation::Simulation(const TopologyPtr top, @@ -40,21 +40,16 @@ Simulation::Simulation(const TopologyPtr top, if(static_cast<uint>(handle.GetAtomCount()) != top->GetNumParticles()){ throw ost::Error("Number of atoms in entity must be consistent with number of particles in topology!"); } - ent_ = handle.Copy(); - this->Init(top, settings); + ost::mol::EntityHandle ent = handle.Copy(); + this->Init(top, ent, settings); } void Simulation::Save(const String& filename){ + std::ofstream stream(filename.c_str(), std::ios_base::binary); io::BinaryDataSink ds(stream); + ds << *top_; - geom::Vec3List positions = this->GetPositions(false,false); - for(geom::Vec3List::iterator i = positions.begin(); - i != positions.end(); ++i){ - ds & (*i)[0]; - ds & (*i)[1]; - ds & (*i)[2]; - } uint num_chains; uint num_residues; @@ -96,15 +91,11 @@ void Simulation::Save(const String& filename){ k != atom_list.end(); ++k){ atom_name = k->GetName(); atom_element = k->GetElement(); - geom::Vec3 pos = k->GetPos(); bfac = k->GetBFactor(); occ = k->GetOccupancy(); is_hetatm = k->IsHetAtom(); ds & atom_name; ds & atom_element; - ds & pos[0]; - ds & pos[1]; - ds & pos[2]; ds & bfac; ds & occ; ds & is_hetatm; @@ -112,19 +103,18 @@ void Simulation::Save(const String& filename){ } } - ost::mol::AtomHandleList atom_list = ent_.GetAtomList(); ost::mol::AtomHandleList bonded_atoms; std::map<long,int> atom_indices; int actual_index = 0; - for(ost::mol::AtomHandleList::const_iterator i = atom_list.begin(), e = atom_list.end(); - i != e; ++i){ + for(ost::mol::AtomHandleList::const_iterator i = atom_list_.begin(), + e = atom_list_.end(); i != e; ++i){ atom_indices[i->GetHashCode()] = actual_index; ++actual_index; } - for(ost::mol::AtomHandleList::iterator i = atom_list.begin(); - i != atom_list.end(); ++i){ + for(ost::mol::AtomHandleList::iterator i = atom_list_.begin(); + i != atom_list_.end(); ++i){ bonded_atoms = i->GetBondPartners(); num_bonded_atoms = bonded_atoms.size(); ds & num_bonded_atoms; @@ -139,6 +129,7 @@ void Simulation::Save(const String& filename){ } SimulationPtr Simulation::Load(const String& filename, SettingsPtr settings){ + if (!boost::filesystem::exists(filename)) { std::stringstream ss; ss << "Could not open simulation File '" @@ -146,71 +137,17 @@ SimulationPtr Simulation::Load(const String& filename, SettingsPtr settings){ throw ost::io::IOException(ss.str()); } - SimulationPtr sim_ptr(new Simulation); - std::ifstream stream(filename.c_str(), std::ios_base::binary); io::BinaryDataSource ds(stream); - TopologyPtr top_p(new Topology); - ds >> *top_p; - - sim_ptr->top_ = top_p; - - sim_ptr->system_ = SystemCreator::Create(sim_ptr->top_,settings, - sim_ptr->system_force_mapper_); - - sim_ptr->integrator_ = settings->integrator; - - OpenMM::Platform::loadPluginsFromDirectory (settings->openmm_plugin_directory); - OpenMM::Platform::loadPluginsFromDirectory (settings->custom_plugin_directory); - OpenMM::Platform* platform; - - switch(settings->platform){ - case Reference:{ - platform = &OpenMM::Platform::getPlatformByName("Reference"); - break; - } - case OpenCL:{ - platform = &OpenMM::Platform::getPlatformByName("OpenCL"); - break; - } - case CUDA:{ - platform = &OpenMM::Platform::getPlatformByName("CUDA"); - break; - } - case CPU:{ - platform = &OpenMM::Platform::getPlatformByName("CPU"); - break; - } - default:{ - throw ost::Error("Invalid Platform when Loading simulation!"); - } - } - - sim_ptr->context_ = ContextPtr(new OpenMM::Context(*(sim_ptr->system_), - *(sim_ptr->integrator_), - *platform)); - std::vector<OpenMM::Vec3> positions; - OpenMM::Vec3 open_mm_vec; - Real a,b,c; - for(int i = 0; i < sim_ptr->system_->getNumParticles(); ++i){ - ds & a; - ds & b; - ds & c; - open_mm_vec[0] = a; - open_mm_vec[1] = b; - open_mm_vec[2] = c; - positions.push_back(open_mm_vec); - } - sim_ptr->context_->setPositions(positions); + SimulationPtr sim_ptr(new Simulation); + TopologyPtr top(new Topology); + ds >> *top; uint num_chains; uint num_residues; uint num_atoms; uint num_bonded_atoms; - Real x_pos; - Real y_pos; - Real z_pos; Real bfac; Real occ; bool is_hetatm; @@ -239,29 +176,29 @@ SimulationPtr Simulation::Load(const String& filename, SettingsPtr settings){ for(uint k = 0; k < num_atoms; ++k){ ds & atom_name; ds & atom_element; - ds & x_pos; - ds & y_pos; - ds & z_pos; ds & bfac; ds & occ; ds & is_hetatm; - geom::Vec3 pos(x_pos,y_pos,z_pos); - ed.InsertAtom(res,atom_name,pos,atom_element,occ,bfac,is_hetatm); + ed.InsertAtom(res, atom_name, geom::Vec3(0.0,0.0,0.0), + atom_element, occ, bfac, is_hetatm); } } } - ost::mol::AtomHandleList atom_list = ent.GetAtomList(); - for(uint i = 0; i < atom_list.size(); ++i){ + + sim_ptr->Init(top, ent, settings); + + for(uint i = 0; i < sim_ptr->atom_list_.size(); ++i){ ds & num_bonded_atoms; for(uint j = 0; j < num_bonded_atoms; ++j){ ds & atom_index; - ed.Connect(atom_list[i],atom_list[atom_index]); + ed.Connect(sim_ptr->atom_list_[i], sim_ptr->atom_list_[atom_index]); } } - sim_ptr->ent_ = ent; - + // also loads the positions that have been set in the context + // they get mapped over to the attached entity sim_ptr->context_->loadCheckpoint(stream); + sim_ptr->UpdatePositions(); return sim_ptr; } @@ -297,10 +234,12 @@ void Simulation::EnsurePluginsLoaded(const String& plugin_path) { void Simulation::Init(const TopologyPtr top, + const ost::mol::EntityHandle& ent, const SettingsPtr settings){ - top_ = top; + ent_ = ent; + atom_list_ = ent_.GetAtomList(); if(!settings->integrator){ //user did not specify an integrator, so let's just use a standard integrator @@ -358,12 +297,11 @@ void Simulation::Init(const TopologyPtr top, context_ = ContextPtr(new OpenMM::Context(*system_,*integrator_,*platform,context_properties)); - ost::mol::AtomHandleList atom_list = ent_.GetAtomList(); std::vector<OpenMM::Vec3> positions; geom::Vec3 ost_vec; OpenMM::Vec3 open_mm_vec; - for(ost::mol::AtomHandleList::iterator i = atom_list.begin(); - i!=atom_list.end();++i){ + for(ost::mol::AtomHandleList::iterator i = atom_list_.begin(); + i!=atom_list_.end();++i){ ost_vec = i->GetPos(); open_mm_vec[0] = ost_vec[0]/10; open_mm_vec[1] = ost_vec[1]/10; @@ -446,14 +384,12 @@ void Simulation::UpdatePositions(bool enforce_periodic_box){ if(top_->GetNumParticles() != static_cast<uint>(ent_.GetAtomCount())){ throw ost::Error("Num particles in topology and num atoms in entity are not consistent!"); } - geom::Vec3List positions = this->GetPositions(enforce_periodic_box, true); + geom::Vec3List positions; + StateExtractor::ExtractPositions(context_, positions, enforce_periodic_box, + true); ost::mol::XCSEditor ed = ent_.EditXCS(ost::mol::BUFFERED_EDIT); - ost::mol::AtomHandleList atom_list = ent_.GetAtomList(); - ost::mol::AtomHandleList::iterator a = atom_list.begin(); - ost::mol::AtomHandleList::iterator ae = atom_list.end(); - geom::Vec3List::iterator v = positions.begin(); - for(; a != ae; ++a, ++v){ - ed.SetAtomPos(*a,*v); + for(uint i = 0; i < atom_list_.size(); ++i) { + ed.SetAtomPos(atom_list_[i], positions[i]); } } diff --git a/modules/mol/mm/src/simulation.hh b/modules/mol/mm/src/simulation.hh index 89c58b10eb96f8b2b0a5995986d2ea80a47a43b7..7cbf06d1aededa101f55658b14f50f42d73dd6ca 100644 --- a/modules/mol/mm/src/simulation.hh +++ b/modules/mol/mm/src/simulation.hh @@ -143,6 +143,7 @@ private: Simulation() { } //hidden constructor... void Init(const ost::mol::mm::TopologyPtr top, + const ost::mol::EntityHandle& ent, const SettingsPtr settings); int TimeToNextNotification(); @@ -160,6 +161,7 @@ private: std::vector<int> time_to_notify_; std::map<FuncType,uint> system_force_mapper_; ost::mol::EntityHandle ent_; + ost::mol::AtomHandleList atom_list_; }; }}} //ns