diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 0fba6dc01f416b4406cf9be28f015cc3172fb99a..27a7c9bc8406308409525862a7b1d906417d2c36 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -795,6 +795,10 @@ void MMCifReader::ParseCitation(const std::vector<StringRef>& columns) } } if (indices_[BOOK_TITLE] != -1) { + // this is only set in few PDB entries and RCSB overrides it with + // the journal_abbrev for their citations + // -> as of August 1, 2017, 5 entries known: 5b1j, 5b1k, 5fax, 5fbz, 5ffn + // -> all those have journal_abbrev set if ((columns[indices_[BOOK_TITLE]] != StringRef(".", 1)) && (columns[indices_[BOOK_TITLE]][0]!='?')) { cit.SetPublishedIn(columns[indices_[BOOK_TITLE]].str()); @@ -802,15 +806,19 @@ void MMCifReader::ParseCitation(const std::vector<StringRef>& columns) } if (indices_[JOURNAL_ABBREV] != -1) { if (columns[indices_[JOURNAL_ABBREV]] != StringRef(".", 1)) { - if (cit.GetPublishedIn().length() > 0) { - throw IOException(this->FormatDiagnostic(STAR_DIAG_WARNING, - "citation.book_title already occupies the 'published_in' field of this citation, cannot add " + - columns[indices_[JOURNAL_ABBREV]].str() + - ".", - this->GetCurrentLinenum())); - } else { - cit.SetPublishedIn(columns[indices_[JOURNAL_ABBREV]].str()); + const String journal_abbrev = columns[indices_[JOURNAL_ABBREV]].str(); + const String published_in = cit.GetPublishedIn(); + if (published_in.length() > 0 && published_in != journal_abbrev) { + LOG_WARNING(this->FormatDiagnostic(STAR_DIAG_WARNING, + "The 'published_in' field was " + "already set by citation.book_title " + "'" + published_in + "'! " + "This will be overwritten by " + "citation.journal_abbrev '" + + journal_abbrev + "'.", + this->GetCurrentLinenum())); } + cit.SetPublishedIn(journal_abbrev); } } if (indices_[JOURNAL_VOLUME] != -1) { diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index fb87382d106a3742a4ca3e0eee908da57cd4f394..2cfac7fcef03a65e57d20a6658ad07f90b339ab6 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -587,28 +587,38 @@ BOOST_AUTO_TEST_CASE(mmcif_citation_tests) tmmcif_h.Add(StringRef("journal_abbrev", 14)); tmmcif_p.OnBeginLoop(tmmcif_h); + // ensure that we use book_title if no journal given (no RCSB use of this) columns.push_back(StringRef("Foo", 3)); columns.push_back(StringRef("1979", 4)); columns.push_back(StringRef("The Guide", 9)); columns.push_back(StringRef(".", 1)); BOOST_CHECK_NO_THROW(tmmcif_p.ParseCitation(columns)); + const MMCifInfoCitation& cit = tmmcif_p.GetInfo().GetCitations().back(); + BOOST_CHECK_EQUAL(cit.GetID(), String("Foo")); + BOOST_CHECK_EQUAL(cit.GetYear(), 1979); + BOOST_CHECK_EQUAL(cit.GetPublishedIn(), String("The Guide")); - BOOST_CHECK(tmmcif_p.GetInfo().GetCitations().back().GetYear() == 1979); - + // ensure that we override book_title if not properly given columns.pop_back(); columns.pop_back(); columns.push_back(StringRef(".", 1)); columns.push_back(StringRef("Hitch", 5)); BOOST_CHECK_NO_THROW(tmmcif_p.ParseCitation(columns)); + BOOST_CHECK_EQUAL(tmmcif_p.GetInfo().GetCitations().back().GetPublishedIn(), + String("Hitch")); + // ensure that we override book_title if journal given + // (def. behavior on RCSB webpage) columns.pop_back(); columns.pop_back(); columns.push_back(StringRef("The Guide", 9)); columns.push_back(StringRef("Hitch", 5)); - BOOST_CHECK_THROW(tmmcif_p.ParseCitation(columns), IOException); + BOOST_CHECK_NO_THROW(tmmcif_p.ParseCitation(columns)); + BOOST_CHECK_EQUAL(tmmcif_p.GetInfo().GetCitations().back().GetPublishedIn(), + String("Hitch")); BOOST_TEST_MESSAGE(" done."); } diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst index a460828b3b82b976eb60afee71740bb7f4f6d2db..4ef9b3d516c592229e01879f051450a2087f5222 100644 --- a/modules/mol/alg/doc/molalg.rst +++ b/modules/mol/alg/doc/molalg.rst @@ -848,10 +848,12 @@ Algorithms on Structures .. method:: Accessibility(ent, probe_radius=1.4, include_hydrogens=False,\ include_hetatm=False, include_water=False,\ oligo_mode=False, selection="", asa_abs="asaAbs",\ - asa_rel="asaRel", asa_atom="asaAtom") + asa_rel="asaRel", asa_atom="asaAtom", \ + algorithm = NACCESS) - Calculates the accesssible surface area for ever atom in *ent* according to - Lee & Richards by "rolling" a probe with given *probe_radius* over the atoms. + Calculates the accesssible surface area for ever atom in *ent*. The algorithm + mimics the behaviour of the bindings available for the NACCESS and DSSP tools + and has been tested to reproduce the numbers accordingly. :param ent: Entity on which to calculate the surface :type ent: :class:`~ost.mol.EntityView` / @@ -907,18 +909,52 @@ Algorithms on Structures :param asa_rel: Float property name to assign the relative solvent accessibility to a residue. This is the absolute accessibility divided by the maximum solvent - accessibility of that particular residue. Only - residues of the 20 standarad amino acids can be handled. - Every non standard residue gets assigned a value of - -99.9. + accessibility of that particular residue. + This maximum solvent accessibility is dependent on + the chosen :class:`AccessibilityAlgorithm`. + Only residues of the 20 standarad amino acids + can be handled. + + * In case of the **NACCESS** algorithm you can expect + a value in range [0.0, 100.0] and a value of -99.9 + for non standard residues. + + * In case of the **DSSP** algorithm you can expect a + value in range [0.0, 1.0], no float property is assigned + in case of a non standard residue. + :type asa_rel: :class:`str` :param asa_atom: Float property name to assign the solvent accessible area to each atom. - :type asa_atom: :class:`str` + :type asa_atom: :class:`str` + + :param algorithm: Specifies the used algorithm for solvent accessibility + calculations + + :type algorithm: :class:`AccessibilityAlgorithm` + + :return: The summed solvent accessibilty of each atom in *ent*. +.. class:: AccessibilityAlgorithm + + The accessibility algorithm enum specifies the algorithm used by + the respective tools. Available are: + + NACCESS, DSSP + + + +.. method:: AssignSecStruct(ent) + + Assigns secondary structures to all residues based on hydrogen bond patterns + as described by DSSP. + + :param ent: Entity on which to assign secondary structures + :type ent: :class:`~ost.mol.EntityView`/ + :class:`~ost.mol.EntityHandle` .. _traj-analysis: diff --git a/modules/mol/alg/pymod/CMakeLists.txt b/modules/mol/alg/pymod/CMakeLists.txt index 4244ac562e50bad6d9e362ca5d5109e4f134144a..fa2942575835fd70e480563148784918bad58122 100644 --- a/modules/mol/alg/pymod/CMakeLists.txt +++ b/modules/mol/alg/pymod/CMakeLists.txt @@ -6,6 +6,7 @@ set(OST_MOL_ALG_PYMOD_SOURCES export_structure_analysis.cc export_contact_overlap.cc export_accessibility.cc + export_sec_structure.cc ) set(OST_MOL_ALG_PYMOD_MODULES diff --git a/modules/mol/alg/pymod/export_accessibility.cc b/modules/mol/alg/pymod/export_accessibility.cc index 94d2bcabc9e99c7e0b508e30f97169c935f67639..789c8f8da6c8ff2fabdba8ba13b1f7754c0f3347 100644 --- a/modules/mol/alg/pymod/export_accessibility.cc +++ b/modules/mol/alg/pymod/export_accessibility.cc @@ -35,11 +35,12 @@ Real WrapAccessibilityHandle(ost::mol::EntityHandle& ent, const String& selection, const String& asa_abs, const String& asa_rel, - const String& asa_atom) { + const String& asa_atom, + ost::mol::alg::AccessibilityAlgorithm algorithm) { return ost::mol::alg::Accessibility(ent, probe_radius, include_hydrogens, include_hetatm, include_water, oligo_mode, - selection, asa_abs, asa_rel, asa_atom); + selection, asa_abs, asa_rel, asa_atom, algorithm); } Real WrapAccessibilityView(ost::mol::EntityView& ent, @@ -50,11 +51,12 @@ Real WrapAccessibilityView(ost::mol::EntityView& ent, bool oligo_mode, const String& selection, const String& asa_abs, const String& asa_rel, - const String& asa_atom) { + const String& asa_atom, + ost::mol::alg::AccessibilityAlgorithm algorithm) { return ost::mol::alg::Accessibility(ent, probe_radius, include_hydrogens, include_hetatm, include_water, oligo_mode, - selection, asa_abs, asa_rel, asa_atom); + selection, asa_abs, asa_rel, asa_atom, algorithm); } } // ns @@ -64,6 +66,12 @@ Real WrapAccessibilityView(ost::mol::EntityView& ent, void export_accessibility() { + enum_<ost::mol::alg::AccessibilityAlgorithm>("AccessibilityAlgorithm") + .value("NACCESS", ost::mol::alg::NACCESS) + .value("DSSP", ost::mol::alg::DSSP) + .export_values() + ; + def("Accessibility", WrapAccessibilityHandle, (arg("ent"), arg("probe_radius")=1.4, arg("include_hydrogens")=false, @@ -73,7 +81,8 @@ void export_accessibility() { arg("selection")="", arg("asa_abs")="asaAbs", arg("asa_rel")="asaRel", - arg("asa_atom")="asaAtom")); + arg("asa_atom")="asaAtom", + arg("algorithm")=ost::mol::alg::NACCESS)); def("Accessibility", WrapAccessibilityView, (arg("ent"), arg("probe_radius")=1.4, @@ -84,6 +93,7 @@ void export_accessibility() { arg("selection")="", arg("asa_abs")="asaAbs", arg("asa_rel")="asaRel", - arg("asa_atom")="asaAtom")); + arg("asa_atom")="asaAtom", + arg("algorithm")=ost::mol::alg::NACCESS)); } diff --git a/modules/mol/alg/pymod/export_sec_structure.cc b/modules/mol/alg/pymod/export_sec_structure.cc new file mode 100644 index 0000000000000000000000000000000000000000..6191b6921b026c3d6d7d4096fa4bb60b9bc5822a --- /dev/null +++ b/modules/mol/alg/pymod/export_sec_structure.cc @@ -0,0 +1,43 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2017 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + + +#include <boost/python.hpp> + +#include <ost/mol/alg/sec_struct.hh> + +using namespace boost::python; + +namespace{ + +void AssignSecStructView(ost::mol::EntityView& view) { + ost::mol::alg::AssignSecStruct(view); +} + +void AssignSecStructHandle(ost::mol::EntityHandle& handle) { + ost::mol::alg::AssignSecStruct(handle); +} + +} // ns + +void export_sec_struct() { + def("AssignSecStruct", &AssignSecStructView, (arg("ent"))); + def("AssignSecStruct", &AssignSecStructHandle, (arg("ent"))); +} + diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc index 00eb51cb35657fba1826eea45525f42fbfe7800a..9f6c7f850fcaa869be30b337b1c94c654e5d2927 100644 --- a/modules/mol/alg/pymod/wrap_mol_alg.cc +++ b/modules/mol/alg/pymod/wrap_mol_alg.cc @@ -41,6 +41,7 @@ void export_StructureAnalysis(); void export_Clash(); void export_contact_overlap(); void export_accessibility(); +void export_sec_struct(); #if OST_IMG_ENABLED void export_entity_to_density(); #endif @@ -111,6 +112,7 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) export_Clash(); export_contact_overlap(); export_accessibility(); + export_sec_struct(); #if OST_IMG_ENABLED export_entity_to_density(); #endif diff --git a/modules/mol/alg/src/CMakeLists.txt b/modules/mol/alg/src/CMakeLists.txt index bd74affe8c31c2e1f246a3a1a88f34002f1208da..d473247e656e17639bc903ce900cfafb42cea83c 100644 --- a/modules/mol/alg/src/CMakeLists.txt +++ b/modules/mol/alg/src/CMakeLists.txt @@ -19,6 +19,7 @@ set(OST_MOL_ALG_HEADERS domain_find.hh similarity_matrix.hh accessibility.hh + sec_struct.hh ) set(OST_MOL_ALG_SOURCES @@ -41,6 +42,7 @@ set(OST_MOL_ALG_SOURCES domain_find.cc similarity_matrix.cc accessibility.cc + sec_struct.cc ) set(MOL_ALG_DEPS ost_mol ost_seq) diff --git a/modules/mol/alg/src/accessibility.cc b/modules/mol/alg/src/accessibility.cc index bb6eb6ecd93329342b9443f93a0db35a4f6b2513..a1becdd7f83c3de0b939e22788d6bf4bb3f22926 100644 --- a/modules/mol/alg/src/accessibility.cc +++ b/modules/mol/alg/src/accessibility.cc @@ -180,14 +180,13 @@ struct CubeOccupationGrid{ bool* occupied_cubes; }; - -Real GetAtomAccessibility(Real x_pos, Real y_pos, Real z_pos, - Real radius, - const std::vector<Real>& x, - const std::vector<Real>& y, - const std::vector<Real>& z, - const std::vector<Real>& radii) { - +Real GetAtomAccessibilityNACCESS(Real x_pos, Real y_pos, Real z_pos, + Real radius, + const std::vector<Real>& x, + const std::vector<Real>& y, + const std::vector<Real>& z, + const std::vector<Real>& radii) { + int num_close_atoms = x.size(); if(num_close_atoms == 0) { @@ -329,9 +328,119 @@ Real GetAtomAccessibility(Real x_pos, Real y_pos, Real z_pos, } +struct DSSPHelper{ + + DSSPHelper(Real x, Real y, Real z, Real r, Real d): x_pos(x), y_pos(y), + z_pos(z), + squared_radius(r), + squared_distance(d) { } + + Real x_pos; + Real y_pos; + Real z_pos; + Real squared_radius; + Real squared_distance; + + bool operator<(const DSSPHelper& rhs) const { return squared_distance < + rhs.squared_distance; } +}; + + +Real GetAtomAccessibilityDSSP(Real x_pos, Real y_pos, Real z_pos, + Real radius, + const std::vector<Real>& x, + const std::vector<Real>& y, + const std::vector<Real>& z, + const std::vector<Real>& radii) { + + int num_close_atoms = x.size(); + + if(num_close_atoms == 0) { + // return area of full sphere + return Real(4.0) * M_PI * radius * radius; + } + + std::vector<DSSPHelper> helpers; + helpers.reserve(num_close_atoms); + + Real dx, dy, dz, dist_squared, summed_radii; + for(int i = 0; i < num_close_atoms; ++i) { + dx = x_pos - x[i]; + dy = y_pos - y[i]; + dz = z_pos - z[i]; + dist_squared = dx*dx + dy*dy + dz*dz; + summed_radii = radius + radii[i]; + summed_radii *= summed_radii; + if(dist_squared < summed_radii) { + helpers.push_back(DSSPHelper(x[i] - x_pos, y[i] - y_pos, z[i] - z_pos, + radii[i] * radii[i], dist_squared)); + } + } + + std::sort(helpers.begin(), helpers.end()); + + const ost::mol::alg::DSSPAccessibilityParam& param = + ost::mol::alg::DSSPAccessibilityParam::GetInstance(); + const std::vector<Real>& fibonacci_x = param.GetFibonacciX(); + const std::vector<Real>& fibonacci_y = param.GetFibonacciY(); + const std::vector<Real>& fibonacci_z = param.GetFibonacciZ(); + Real fibo_weight = param.GetPointWeight(); + Real summed_fibo_weight = 0.0; + Real fibo_x, fibo_y, fibo_z; + uint num_helpers = helpers.size(); + + for(uint i = 0; i < fibonacci_x.size(); ++i) { + fibo_x = fibonacci_x[i] * radius; + fibo_y = fibonacci_y[i] * radius; + fibo_z = fibonacci_z[i] * radius; + + bool covered = false; + + for(uint j = 0; j < num_helpers; ++j) { + dx = fibo_x - helpers[j].x_pos; + dy = fibo_y - helpers[j].y_pos; + dz = fibo_z - helpers[j].z_pos; + dist_squared = dx*dx + dy*dy + dz*dz; + if(dist_squared < helpers[j].squared_radius) { + covered = true; + break; + } + } + if(!covered) { + summed_fibo_weight += fibo_weight; + } + } + + return summed_fibo_weight * radius * radius; +} + +Real GetAtomAccessibility(Real x_pos, Real y_pos, Real z_pos, + Real radius, + const std::vector<Real>& x, + const std::vector<Real>& y, + const std::vector<Real>& z, + const std::vector<Real>& radii, + ost::mol::alg::AccessibilityAlgorithm algorithm) { + switch(algorithm){ + case ost::mol::alg::NACCESS: return GetAtomAccessibilityNACCESS(x_pos, y_pos, + z_pos, radius, + x, y, z, radii); + + case ost::mol::alg::DSSP: return GetAtomAccessibilityDSSP(x_pos, y_pos, + z_pos, radius, + x, y, z, radii); + + } + + // silence compiler warning... + return Real(0); +} + + void SolveCube(const CubeGrid& cube_grid, int cube_idx, const std::vector<Real>& x, const std::vector<Real>& y, const std::vector<Real>& z, const std::vector<Real>& radii, + ost::mol::alg::AccessibilityAlgorithm algorithm, std::vector<Real>& asa) { //prepare some stuff @@ -395,7 +504,8 @@ void SolveCube(const CubeGrid& cube_grid, int cube_idx, asa[atom_idx] = GetAtomAccessibility(current_x, current_y, current_z, current_radius, close_atom_x, close_atom_y, - close_atom_z, close_atom_radii); + close_atom_z, close_atom_radii, + algorithm); } } @@ -451,8 +561,9 @@ CubeGrid SetupCubeGrid(const std::vector<Real>& x, void CalculateASAOligo(std::vector<Real>& x, std::vector<Real>& y, std::vector<Real>& z, std::vector<Real>& radii, - std::vector<int> chain_indices, - Real probe_radius, std::vector<Real>& asa, + std::vector<int> chain_indices, Real probe_radius, + ost::mol::alg::AccessibilityAlgorithm algorithm, + std::vector<Real>& asa, std::vector<Real>& asa_single_chain) { int num_atoms = x.size(); @@ -472,7 +583,8 @@ void CalculateASAOligo(std::vector<Real>& x, std::vector<Real>& y, if(cube_grid.IsInitialized(cube_idx)) { // there is at least one atom! - SolveCube(cube_grid, cube_idx, x, y, z, full_radii, asa); + SolveCube(cube_grid, cube_idx, x, y, z, full_radii, + algorithm, asa); } } @@ -524,7 +636,7 @@ void CalculateASAOligo(std::vector<Real>& x, std::vector<Real>& y, // there is at least one atom in the cube AND the environment has // changes compared to the previous asa calculation SolveCube(single_chain_cube_grid, cube_idx, x, y, z, full_radii, - asa_single_chain); + algorithm, asa_single_chain); } } } @@ -536,6 +648,7 @@ void CalculateASA(const std::vector<Real>& x, const std::vector<Real>& z, const std::vector<Real>& radii, Real probe_radius, + ost::mol::alg::AccessibilityAlgorithm algorithm, std::vector<Real>& asa) { int num_atoms = x.size(); @@ -555,33 +668,51 @@ void CalculateASA(const std::vector<Real>& x, if(cube_grid.IsInitialized(cube_idx)) { // there is at least one atom! - SolveCube(cube_grid, cube_idx, x, y, z, full_radii, asa); + SolveCube(cube_grid, cube_idx, x, y, z, full_radii, + algorithm, asa); } } } void ASAParamFromAtomList(const ost::mol::AtomViewList& atom_list, + ost::mol::alg::AccessibilityAlgorithm algorithm, std::vector<Real>& x_pos, std::vector<Real>& y_pos, std::vector<Real>& z_pos, std::vector<Real>& radii) { - const ost::mol::alg::AccessibilityParam& param = - ost::mol::alg::AccessibilityParam::GetInstance(); - String rname, aname, ele; - - for(ost::mol::AtomViewList::const_iterator at_it = atom_list.begin(); - at_it != atom_list.end(); ++at_it) { - rname = at_it->GetResidue().GetName(); - aname = at_it->GetName(); - ele = at_it->GetElement(); - geom::Vec3 at_pos = at_it->GetPos(); - x_pos.push_back(at_pos[0]); - y_pos.push_back(at_pos[1]); - z_pos.push_back(at_pos[2]); - radii.push_back(param.GetVdWRadius(rname, aname, ele)); - } + if(algorithm == ost::mol::alg::NACCESS) { + const ost::mol::alg::NACCESSAccessibilityParam& param = + ost::mol::alg::NACCESSAccessibilityParam::GetInstance(); + String rname, aname, ele; + + for(ost::mol::AtomViewList::const_iterator at_it = atom_list.begin(); + at_it != atom_list.end(); ++at_it) { + rname = at_it->GetResidue().GetName(); + aname = at_it->GetName(); + ele = at_it->GetElement(); + geom::Vec3 at_pos = at_it->GetPos(); + x_pos.push_back(at_pos[0]); + y_pos.push_back(at_pos[1]); + z_pos.push_back(at_pos[2]); + radii.push_back(param.GetVdWRadius(rname, aname, ele)); + } + } else if(algorithm == ost::mol::alg::DSSP) { + const ost::mol::alg::DSSPAccessibilityParam& param = + ost::mol::alg::DSSPAccessibilityParam::GetInstance(); + String aname; + + for(ost::mol::AtomViewList::const_iterator at_it = atom_list.begin(); + at_it != atom_list.end(); ++at_it) { + aname = at_it->GetName(); + geom::Vec3 at_pos = at_it->GetPos(); + x_pos.push_back(at_pos[0]); + y_pos.push_back(at_pos[1]); + z_pos.push_back(at_pos[2]); + radii.push_back(param.GetVdWRadius(aname)); + } + } } void ChainIndicesFromAtomList(const ost::mol::AtomViewList& atom_list, @@ -608,7 +739,14 @@ Real SetAccessibilityProps(ost::mol::EntityView& ent, const std::vector<Real>& asa, const String& asa_atom, const String& asa_abs, - const String& asa_rel){ + const String& asa_rel, + ost::mol::alg::AccessibilityAlgorithm algorithm) { + + // first set accessibilities of all involved residues to 0.0 + for(uint idx = 0; idx < asa.size(); ++idx) { + ost::mol::ResidueView res = atom_list[idx].GetResidue(); + res.SetFloatProp(asa_abs, 0.0); + } // assign absolute accessibilities Real summed_asa = 0.0; @@ -616,27 +754,45 @@ Real SetAccessibilityProps(ost::mol::EntityView& ent, Real val = asa[idx]; atom_list[idx].SetFloatProp(asa_atom, val); ost::mol::ResidueView res = atom_list[idx].GetResidue(); - Real current_asa = res.GetFloatProp(asa_abs, 0.0); + Real current_asa = res.GetFloatProp(asa_abs); res.SetFloatProp(asa_abs, current_asa + val); summed_asa += val; } - // go over residue and assign relative accessibilities String rname; ost::mol::ResidueViewList res_list = ent.GetResidueList(); - for(uint idx = 0; idx < res_list.size(); ++idx) { - rname = res_list[idx].GetName(); - Real tot_acc = - ost::mol::alg::AccessibilityParam::GetInstance().GetResidueAccessibility(rname); - if(tot_acc == Real(-1.0)) { - // no accessibility found... - // let's mimic NACCESS behaviour - res_list[idx].SetFloatProp(asa_rel, Real(-99.9)); + if(algorithm == ost::mol::alg::NACCESS) { + // go over residue and assign relative accessibilities + for(uint idx = 0; idx < res_list.size(); ++idx) { + rname = res_list[idx].GetName(); + Real tot_acc = + ost::mol::alg::NACCESSAccessibilityParam::GetInstance().GetResidueAccessibility(rname); + if(tot_acc == Real(-1.0)) { + // no accessibility found... + // let's mimic NACCESS behaviour + res_list[idx].SetFloatProp(asa_rel, Real(-99.9)); + } + else { + // the fraction gets multiplied by 100 (Thats how NACCESS does it...) + Real val = res_list[idx].GetFloatProp(asa_abs, 0.0) / tot_acc * 100.0; + res_list[idx].SetFloatProp(asa_rel, val); + } } - else { - // the fraction gets multiplied by 100 (Thats how NACCESS does it...) - Real val = res_list[idx].GetFloatProp(asa_abs, 0.0) / tot_acc * 100.0; - res_list[idx].SetFloatProp(asa_rel, val); + } else if(algorithm == ost::mol::alg::DSSP) { + // go over residue and assign relative accessibilities + for(uint idx = 0; idx < res_list.size(); ++idx) { + rname = res_list[idx].GetName(); + Real tot_acc = + ost::mol::alg::DSSPAccessibilityParam::GetInstance().GetResidueAccessibility(rname); + if(tot_acc == Real(-1.0)) { + // no accessibility found... + // let's mimic DSSP binding behaviour + // => do nothing + } + else { + Real val = res_list[idx].GetFloatProp(asa_abs, 0.0) / tot_acc; + res_list[idx].SetFloatProp(asa_rel, val); + } } } @@ -650,7 +806,7 @@ Real SetAccessibilityProps(ost::mol::EntityView& ent, namespace ost { namespace mol { namespace alg { -AccessibilityParam::AccessibilityParam() { +NACCESSAccessibilityParam::NACCESSAccessibilityParam() { std::map<String, Real> ala_map; std::map<String, Real> arg_map; @@ -1095,13 +1251,6 @@ AccessibilityParam::AccessibilityParam() { hoh_map["O"] = 1.40; - - - - - - - vdw_radii_["ALA"] = ala_map; vdw_radii_["ARG"] = arg_map; vdw_radii_["ASP"] = asp_map; @@ -1158,7 +1307,7 @@ AccessibilityParam::AccessibilityParam() { } -Real AccessibilityParam::GuessRadius(const String& ele) const{ +Real NACCESSAccessibilityParam::GuessRadius(const String& ele) const{ if(ele == "C") return 1.80; if(ele == "N") return 1.60; if(ele == "S") return 1.85; @@ -1175,8 +1324,9 @@ Real AccessibilityParam::GuessRadius(const String& ele) const{ } -Real AccessibilityParam::GetVdWRadius(const String& rname, const String& aname, - const String& ele) const{ +Real NACCESSAccessibilityParam::GetVdWRadius(const String& rname, + const String& aname, + const String& ele) const{ std::map<String, std::map<String, Real> >::const_iterator res_map_it = vdw_radii_.find(rname); if(res_map_it != vdw_radii_.end()) { @@ -1188,12 +1338,70 @@ Real AccessibilityParam::GetVdWRadius(const String& rname, const String& aname, } -Real AccessibilityParam::GetResidueAccessibility(const String& rname) const{ +Real NACCESSAccessibilityParam::GetResidueAccessibility(const String& rname) const{ std::map<String, Real>::const_iterator it = accessibilities_.find(rname); if(it != accessibilities_.end()) return it->second; else return Real(-1.0); } +DSSPAccessibilityParam::DSSPAccessibilityParam() { + + // these values are from the dssp binding. nobody knows + // the source... + accessibilities_["ALA"] = Real(118.0); + accessibilities_["ARG"] = Real(317.0); + accessibilities_["ASN"] = Real(238.0); + accessibilities_["ASP"] = Real(243.0); + accessibilities_["CYS"] = Real(183.0); + accessibilities_["GLN"] = Real(262.0); + accessibilities_["GLU"] = Real(286.0); + accessibilities_["GLY"] = Real(154.0); + accessibilities_["HIS"] = Real(258.0); + accessibilities_["ILE"] = Real(228.0); + accessibilities_["LEU"] = Real(243.0); + accessibilities_["LYS"] = Real(278.0); + accessibilities_["MET"] = Real(260.0); + accessibilities_["PHE"] = Real(271.0); + accessibilities_["PRO"] = Real(204.0); + accessibilities_["SER"] = Real(234.0); + accessibilities_["THR"] = Real(206.0); + accessibilities_["TRP"] = Real(300.0); + accessibilities_["TYR"] = Real(303.0); + accessibilities_["VAL"] = Real(216.0); + + + // construct evenly distributed points on a sphere. + // same as in DSSP... + fibonacci_x_.reserve(401); + fibonacci_y_.reserve(401); + fibonacci_z_.reserve(401); + + Real golden_ratio = (Real(1.0) + std::sqrt(5.0)) * Real(0.5); + point_weight_ = (Real(4.0) * Real(M_PI)) / Real(401.0); + + for (int i = -200; i <= 200; ++i) { + Real lat = std::asin((Real(2.0) * Real(i)) / Real(401.0)); + Real lon = std::fmod(Real(i), golden_ratio) * + Real(2.0) * Real(M_PI) / golden_ratio; + fibonacci_x_.push_back(std::sin(lon) * std::cos(lat)); + fibonacci_y_.push_back(std::cos(lon) * std::cos(lat)); + fibonacci_z_.push_back(std::sin(lat)); + } +} + +Real DSSPAccessibilityParam::GetVdWRadius(const String& aname) const{ + if(aname == "N") return Real(1.65); + if(aname == "CA") return Real(1.87); + if(aname == "C") return Real(1.76); + if(aname == "O") return Real(1.4); + return Real(1.8); +} + +Real DSSPAccessibilityParam::GetResidueAccessibility(const String& rname) const{ + std::map<String, Real>::const_iterator it = accessibilities_.find(rname); + if(it != accessibilities_.end()) return it->second; + else return Real(-1.0); +} Real Accessibility(ost::mol::EntityView& ent, Real probe_radius, bool include_hydrogens, @@ -1202,7 +1410,8 @@ Real Accessibility(ost::mol::EntityView& ent, const String& selection, const String& asa_abs, const String& asa_rel, - const String& asa_atom) { + const String& asa_atom, + AccessibilityAlgorithm algorithm) { String internal_selection = selection; @@ -1254,7 +1463,7 @@ Real Accessibility(ost::mol::EntityView& ent, // extract data from ent ost::mol::AtomViewList atom_list = selected_ent.GetAtomList(); - ASAParamFromAtomList(atom_list, x_pos, y_pos, z_pos, radii); + ASAParamFromAtomList(atom_list, algorithm, x_pos, y_pos, z_pos, radii); if(atom_list.size() == 0) return 0.0; @@ -1269,10 +1478,11 @@ Real Accessibility(ost::mol::EntityView& ent, std::vector<Real> asa; std::vector<Real> asa_single_chain; CalculateASAOligo(x_pos, y_pos, z_pos, radii, chain_indices, - probe_radius, asa, asa_single_chain); + probe_radius, algorithm, asa, asa_single_chain); Real summed_asa = SetAccessibilityProps(selected_ent, atom_list, asa, - asa_atom, asa_abs, asa_rel); + asa_atom, asa_abs, asa_rel, + algorithm); // in case of the single chain asa, the property names are // moodified!! @@ -1283,7 +1493,7 @@ Real Accessibility(ost::mol::EntityView& ent, SetAccessibilityProps(selected_ent, atom_list, asa_single_chain, single_chain_asa_atom, single_chain_asa_abs, - single_chain_asa_rel); + single_chain_asa_rel, algorithm); return summed_asa; } @@ -1291,14 +1501,14 @@ Real Accessibility(ost::mol::EntityView& ent, // do it! do it! do it! std::vector<Real> asa; - CalculateASA(x_pos, y_pos, z_pos, radii, probe_radius, asa); + CalculateASA(x_pos, y_pos, z_pos, radii, probe_radius, algorithm, asa); Real summed_asa = SetAccessibilityProps(selected_ent, atom_list, asa, - asa_atom, asa_abs, asa_rel); + asa_atom, asa_abs, asa_rel, + algorithm); return summed_asa; } - } @@ -1309,12 +1519,13 @@ Real Accessibility(ost::mol::EntityHandle& ent, const String& selection, const String& asa_abs, const String& asa_rel, - const String& asa_atom) { + const String& asa_atom, + AccessibilityAlgorithm algorithm) { ost::mol::EntityView entity_view = ent.CreateFullView(); return Accessibility(entity_view, probe_radius, include_hydrogens, include_hetatm, include_water, oligo_mode, - selection, asa_abs, asa_rel, asa_atom); + selection, asa_abs, asa_rel, asa_atom, algorithm); } }}} //ns diff --git a/modules/mol/alg/src/accessibility.hh b/modules/mol/alg/src/accessibility.hh index cd0a57638fa14eeb550d170f1d47c204e7c6c1d2..2ed83cc53493ed39b16ddc6067b5c38c1b75730b 100644 --- a/modules/mol/alg/src/accessibility.hh +++ b/modules/mol/alg/src/accessibility.hh @@ -25,19 +25,19 @@ namespace ost { namespace mol { namespace alg { +typedef enum { +NACCESS, DSSP +} AccessibilityAlgorithm; -class AccessibilityParam { +class NACCESSAccessibilityParam { public: // Singleton access to one constant instance - static const AccessibilityParam& GetInstance() { - static AccessibilityParam instance; + static const NACCESSAccessibilityParam& GetInstance() { + static NACCESSAccessibilityParam instance; return instance; } // Data access - - Real GuessRadius(const String& ele) const; - Real GetVdWRadius(const String& rname, const String& aname, const String& ele) const; @@ -46,12 +46,46 @@ public: private: // construction only inside here - AccessibilityParam(); + NACCESSAccessibilityParam(); + + Real GuessRadius(const String& ele) const; + std::map<String, std::map<String, Real> > vdw_radii_; std::map<String, Real> accessibilities_; }; +class DSSPAccessibilityParam { + + public: + // Singleton access to one constant instance + static const DSSPAccessibilityParam& GetInstance() { + static DSSPAccessibilityParam instance; + return instance; + } + // Data access + Real GetVdWRadius(const String& aname) const; + + Real GetResidueAccessibility(const String& rname) const; + + const std::vector<Real>& GetFibonacciX() const { return fibonacci_x_; } + const std::vector<Real>& GetFibonacciY() const { return fibonacci_y_; } + const std::vector<Real>& GetFibonacciZ() const { return fibonacci_z_; } + Real GetPointWeight() const { return point_weight_; } + + private: + + // construction only inside here + DSSPAccessibilityParam(); + + std::map<String, std::map<String, Real> > vdw_radii_; + std::map<String, Real> accessibilities_; + std::vector<Real> fibonacci_x_; + std::vector<Real> fibonacci_y_; + std::vector<Real> fibonacci_z_; + Real point_weight_; + }; + Real Accessibility(ost::mol::EntityView& ent, Real probe_radius = 1.4, bool include_hydrogens = false, @@ -61,7 +95,8 @@ Real Accessibility(ost::mol::EntityView& ent, const String& selection = "", const String& asa_abs = "asaAbs", const String& asa_rel = "asaRel", - const String& asa_atom = "asaAtom"); + const String& asa_atom = "asaAtom", + AccessibilityAlgorithm algorithm = NACCESS); Real Accessibility(ost::mol::EntityHandle& ent, @@ -73,7 +108,8 @@ Real Accessibility(ost::mol::EntityHandle& ent, const String& selection = "", const String& asa_abs = "asaAbs", const String& asa_rel = "asaRel", - const String& asa_atom = "asaAtom"); + const String& asa_atom = "asaAtom", + AccessibilityAlgorithm algorithm = NACCESS); }}} //ns diff --git a/modules/mol/alg/src/sec_struct.cc b/modules/mol/alg/src/sec_struct.cc new file mode 100644 index 0000000000000000000000000000000000000000..0c1fd6465f1cdcb22b887b985587a4096eadbc5d --- /dev/null +++ b/modules/mol/alg/src/sec_struct.cc @@ -0,0 +1,665 @@ +#include <ost/mol/alg/sec_struct.hh> + +#include <ost/mol/sec_structure.hh> + +#include <limits> +#include <iostream> + +namespace { + +struct ExtendedBridge { + + ExtendedBridge() { + // should be enough for most strands + strand_one.reserve(16); + strand_two.reserve(16); + } + + bool parallel; // antiparallel otherwise + std::vector<int> strand_one; + std::vector<int> strand_two; +}; + +bool Connected(const std::vector<int>& connected_to_next, + int start_idx, int length) { + if(start_idx < 0) return false; + if(start_idx + length > static_cast<int>(connected_to_next.size())) { + return false; + } + for(int i = 0; i < (length - 1); ++i) { + if(connected_to_next[start_idx + i] == 0) return false; + } + return true; +} + +bool AddIfValid(int idx_one, int idx_two, int start_idx, int end_idx, + const std::vector<int>& connected_to_next, + bool parallel, + std::vector<ExtendedBridge>& extended_bridges) { + + // this function assumes idx_one to be sorted in an increasing manner + // when called multiple times! + + // idx_one is expected to be in range [start_idx, end_idx] + // if idx_two is in the same range we can skip cases where + // idx_one > idx_two, since the interactions are symmetric and we would + // see every bridge twice. If idx_two is outside this range + // (e.g. when we only calculate the secondary structure for a stretch + // of a larger structure) we have to add it anyway. + + if(idx_two >= start_idx && idx_two <= end_idx && idx_two < idx_one) { + return false; + } + + // the abs in the following check is still needed for the case when + // idx_two is outside [start_idx, end_idx] + if(std::abs(idx_two - idx_one) >= 3 && + Connected(connected_to_next, idx_one - 1, 3) && + Connected(connected_to_next, idx_two - 1, 3)) { + + // it is a bridge! Let's see whether we can extend + // an existing bridge... + bool added = false; + + for(uint i = 0; i < extended_bridges.size(); ++i) { + + if(extended_bridges[i].parallel == parallel && + idx_one == extended_bridges[i].strand_one.back() + 1) { + + if(parallel && idx_two == extended_bridges[i].strand_two.back() + 1) { + extended_bridges[i].strand_one.push_back(idx_one); + extended_bridges[i].strand_two.push_back(idx_two); + added = true; + break; + } + + if(!parallel && idx_two == extended_bridges[i].strand_two.back() - 1) { + extended_bridges[i].strand_one.push_back(idx_one); + extended_bridges[i].strand_two.push_back(idx_two); + added = true; + break; + } + } + } + + if(!added) { + // haven't seen it... Lets add a new one! + extended_bridges.push_back(ExtendedBridge()); + extended_bridges.back().parallel = parallel; + extended_bridges.back().strand_one.push_back(idx_one); + extended_bridges.back().strand_two.push_back(idx_two); + } + return true; + } + return false; +} + +void ResolveBulges(const std::vector<int>& connected_to_next, + std::vector<ExtendedBridge>& extended_bridges) { + + for(uint i = 0; i < extended_bridges.size(); ++i) { + for(uint j = i + 1; j < extended_bridges.size(); ++j) { + + if(extended_bridges[i].parallel == extended_bridges[j].parallel) { + + int i_strand_one_begin = extended_bridges[i].strand_one[0]; + int i_strand_one_end = extended_bridges[i].strand_one.back(); + int i_strand_two_begin = extended_bridges[i].strand_two[0]; + int i_strand_two_end = extended_bridges[i].strand_two.back(); + int j_strand_one_begin = extended_bridges[j].strand_one[0]; + int j_strand_one_end = extended_bridges[j].strand_one.back(); + int j_strand_two_begin = extended_bridges[j].strand_two[0]; + int j_strand_two_end = extended_bridges[j].strand_two.back(); + + // first strand must not overlap + if(i_strand_one_end >= j_strand_one_begin && + i_strand_one_begin <= j_strand_one_end) { + continue; + } + + // both strands must be connected, the first strand is the + // same for parallel and anti-parallel bridges + int begin = std::min(i_strand_one_begin, j_strand_one_begin); + int end = std::max(i_strand_one_end, j_strand_one_end); + if(!Connected(connected_to_next, begin, end - begin)) continue; + + // the distances are dependent on parallel / anti-parallel + int d_one = -1; + int d_two = -1; + + if(extended_bridges[i].parallel) { + // check connectivity for second strand + begin = std::min(i_strand_two_begin, j_strand_two_begin); + end = std::max(i_strand_two_end, j_strand_two_end); + if(!Connected(connected_to_next, begin, end - begin)) continue; + + d_one = j_strand_one_begin - i_strand_one_end; + d_two = j_strand_two_begin - i_strand_two_end; + } + else { + // check connectivity for second strand + begin = std::min(i_strand_two_end, j_strand_two_end); + end = std::max(i_strand_two_begin, j_strand_two_begin); + if(!Connected(connected_to_next, begin, end - begin)) continue; + + d_one = j_strand_one_begin - i_strand_one_end; + d_two = i_strand_two_end - j_strand_two_begin; + } + + if(d_one < 0 || d_two < 0) continue; + + if((d_one < 6 && d_two < 3) || (d_one < 3 && d_two < 6)) { + + extended_bridges[i].strand_one.insert( + extended_bridges[i].strand_one.end(), + extended_bridges[j].strand_one.begin(), + extended_bridges[j].strand_one.end()); + extended_bridges[i].strand_two.insert( + extended_bridges[i].strand_two.end(), + extended_bridges[j].strand_two.begin(), + extended_bridges[j].strand_two.end()); + extended_bridges.erase(extended_bridges.begin() + j); + --j; + } + } + } + } +} + +void AssignBridges(int start_idx, int size, + const std::vector<int>& donor_for_one, + const std::vector<int>& donor_for_two, + const std::vector<int>& connected_to_next, + String& ss_assignment) { + + int full_size = donor_for_one.size(); + int end_idx = start_idx + size - 1; + + std::vector<ExtendedBridge> extended_bridges; + + // let's search for bridges + + std::vector<int> parallel_partners; + std::vector<int> antiparallel_partners; + + for(int i = std::max(0, start_idx - 5); + i < std::min(start_idx + size + 5, full_size); ++i) { + + for(int j = 0; j < full_size; ++j) { + + // check for parallel bridge + if(i > 0 && i < full_size - 1) { + if((donor_for_one[i+1] == j && donor_for_one[j] == i - 1) || + (donor_for_one[i+1] == j && donor_for_two[j] == i - 1) || + (donor_for_two[i+1] == j && donor_for_one[j] == i - 1) || + (donor_for_two[i+1] == j && donor_for_two[j] == i - 1)) { + if(AddIfValid(i,j,start_idx, end_idx, connected_to_next, + true, extended_bridges)) continue; + } + } + if(j > 0 && j < full_size - 1) { + if((donor_for_one[j+1] == i && donor_for_one[i] == j - 1) || + (donor_for_one[j+1] == i && donor_for_two[i] == j - 1) || + (donor_for_two[j+1] == i && donor_for_one[i] == j - 1) || + (donor_for_two[j+1] == i && donor_for_two[i] == j - 1)) { + if(AddIfValid(i,j,start_idx, end_idx, connected_to_next, + true, extended_bridges)) continue; + } + } + + // check for antiparallel bridge + if(i > 0 && i < full_size - 1 && j > 0 && j < full_size - 1) { + if((donor_for_one[j+1] == i - 1 && donor_for_one[i+1] == j - 1) || + (donor_for_one[j+1] == i - 1 && donor_for_two[i+1] == j - 1) || + (donor_for_two[j+1] == i - 1 && donor_for_one[i+1] == j - 1) || + (donor_for_two[j+1] == i - 1 && donor_for_two[i+1] == j - 1)) { + if(AddIfValid(i,j,start_idx, end_idx, connected_to_next, + false, extended_bridges)) continue; + } + } + if((donor_for_one[i] == j && donor_for_one[j] == i) || + (donor_for_one[i] == j && donor_for_two[j] == i) || + (donor_for_two[i] == j && donor_for_one[j] == i) || + (donor_for_two[i] == j && donor_for_two[j] == i)) { + AddIfValid(i,j,start_idx, end_idx, connected_to_next, + false, extended_bridges); + } + } + } + + ResolveBulges(connected_to_next, extended_bridges); + + for(uint i = 0; i < extended_bridges.size(); ++i) { + if(extended_bridges[i].strand_one.size() > 1) { + for(int j = extended_bridges[i].strand_one.front(); + j <= extended_bridges[i].strand_one.back(); ++j) { + int idx = j - start_idx; + if(idx >= 0 && idx < size) ss_assignment[idx] = 'E'; + } + if(extended_bridges[i].parallel) { + for(int j = extended_bridges[i].strand_two.front(); + j <= extended_bridges[i].strand_two.back(); ++j) { + int idx = j - start_idx; + if(idx >= 0 && idx < size) ss_assignment[idx] = 'E'; + } + } + else { + for(int j = extended_bridges[i].strand_two.back(); + j <= extended_bridges[i].strand_two.front(); ++j) { + int idx = j - start_idx; + if(idx >= 0 && idx < size) ss_assignment[idx] = 'E'; + } + } + } + else { + int idx = extended_bridges[i].strand_one[0] - start_idx; + if(idx >= 0 && idx < size && ss_assignment[idx] != 'E') { + ss_assignment[idx] = 'B'; + } + idx = extended_bridges[i].strand_two[0] - start_idx; + if(idx >= 0 && idx < size && ss_assignment[idx] != 'E') { + ss_assignment[idx] = 'B'; + } + } + } +} + +void AssignTurns(int start_idx, int size, + const std::vector<geom::Vec3>& ca_positions, + const std::vector<int>& donor_for_one, + const std::vector<int>& donor_for_two, + const std::vector<int>& connected_to_next, + String& ss_assignment) { + + int full_size = donor_for_one.size(); + + // in turns we have following numbering scheme + // 1: its the beginning of a turn + // 2: its in the middle of a turn + // 3: its the end of a turn + // 4: its both, start and end + + std::vector<std::vector<int> > turn_arrays(3); + turn_arrays[0].assign(full_size, 0); + turn_arrays[1].assign(full_size, 0); + turn_arrays[2].assign(full_size, 0); + std::vector<int>& turn_3 = turn_arrays[0]; + std::vector<int>& turn_4 = turn_arrays[1]; + std::vector<int>& turn_5 = turn_arrays[2]; + + // search for turns + for(int turn_type = 3; turn_type <= 5; ++turn_type) { + std::vector<int>& turn_array = turn_arrays[turn_type - 3]; + for(int i = std::max(0, start_idx - turn_type); + i < std::min(start_idx + size, full_size - turn_type); ++i) { + + if((donor_for_one[i + turn_type] == i || + donor_for_two[i + turn_type] == i) && + Connected(connected_to_next, i, turn_type)) { + + // last element is end of turn + turn_array[i + turn_type] = 3; + + // set all the stuff in between to 2 (except it already is a start...) + for(int j = i + 1; j < i + turn_type; ++j) { + if(turn_array[j] == 0) turn_array[j] = 2; + } + + // set first element to start (might be start AND end) + if(turn_array[i] == 3) turn_array[i] = 4; + else turn_array[i] = 1; + } + } + } + + // to fill in all the turns in the relevant range + // its easier to work in a separate array and then fill everything + // into ss_assignment in the end to avoid indexing nightmare... + // + // coding: + // 0: C + // 1: T + // 2: S + // 3: G + // 4: H + // 5: I + // 6: other stuff (all kind of bridges) + std::vector<int> turn_array(full_size, 0); + + // first map the "other stuff" from the ss_assignment we already have + for(int i = 0; i < size; ++i) { + if(ss_assignment[i] != 'C') { + turn_array[start_idx + i] = 6; + } + } + + // do H + for(int i = std::max(start_idx - 3, 1); i < std::min(start_idx + size, full_size - 3); ++i) { + if((turn_4[i] == 1 || turn_4[i] == 4) && + (turn_4[i - 1] == 1 || turn_4[i - 1] == 4)) { + turn_array[i] = 4; + turn_array[i+1] = 4; + turn_array[i+2] = 4; + turn_array[i+3] = 4; + } + } + + // do G + for(int i = std::max(start_idx - 2, 1); i < std::min(start_idx + size, full_size - 2); ++i) { + if((turn_3[i] == 1 || turn_3[i] == 4) && + (turn_3[i - 1] == 1 || turn_3[i - 1] == 4)) { + // check whether we can still add the turn + if((turn_array[i] == 0 || turn_array[i] == 3) && + (turn_array[i+1] == 0 || turn_array[i+1] == 3) && + (turn_array[i+2] == 0 || turn_array[i+2] == 3)) { + turn_array[i] = 3; + turn_array[i+1] = 3; + turn_array[i+2] = 3; + } + } + } + + // do I + for(int i = std::max(start_idx - 4, 1); i < std::min(start_idx + size, full_size - 4); ++i) { + if((turn_5[i] == 1 || turn_5[i] == 4) && + (turn_5[i - 1] == 1 || turn_5[i - 1] == 4)) { + + // check whether we can still add the turn + bool add_it = true; + for(int j = 0; j < 5; ++j) { + if(!(turn_array[i+j] == 0 || + turn_array[i+j] == 5 || + turn_array[i+j] == 4)) { // we prefer pi helices!! + // (default in dssp) + add_it = false; + break; + } + } + + if(add_it) { + turn_array[i] = 5; + turn_array[i+1] = 5; + turn_array[i+2] = 5; + turn_array[i+3] = 5; + turn_array[i+4] = 5; + } + } + } + + // do turns(T) + + // do turn_3 + for(int i = start_idx; i < start_idx + size; ++i) { + if(turn_array[i] == 0) { + if((i > 0 && (turn_3[i - 1] == 1 || turn_3[i - 1] == 4)) || + (i > 1 && (turn_3[i - 2] == 1 || turn_3[i - 2] == 4))) { + turn_array[i] = 1; + } + } + } + + // do turn_4 + for(int i = start_idx; i < start_idx + size; ++i) { + if(turn_array[i] == 0) { + if((i > 0 && (turn_4[i - 1] == 1 || turn_4[i - 1] == 4)) || + (i > 1 && (turn_4[i - 2] == 1 || turn_4[i - 2] == 4)) || + (i > 2 && (turn_4[i - 3] == 1 || turn_4[i - 3] == 4))) { + turn_array[i] = 1; + } + } + } + + // do turn_5 + for(int i = start_idx; i < start_idx + size; ++i) { + if(turn_array[i] == 0) { + if((i > 0 && (turn_5[i - 1] == 1 || turn_5[i - 1] == 4)) || + (i > 1 && (turn_5[i - 2] == 1 || turn_5[i - 2] == 4)) || + (i > 2 && (turn_5[i - 3] == 1 || turn_5[i - 3] == 4)) || + (i > 3 && (turn_5[i - 4] == 1 || turn_5[i - 4] == 4))) { + turn_array[i] = 1; + } + } + } + + // do bends (S) + Real angle; + for(int i = std::max(start_idx, 2); + i < std::min(start_idx + size, full_size - 2); ++i){ + if(turn_array[i] == 0) { + angle = geom::Angle(ca_positions[i]-ca_positions[i-2], + ca_positions[i+2]-ca_positions[i]); + if(angle > 1.2217 && + Connected(connected_to_next, i - 2, 5)) { + turn_array[i] = 2; + } + } + } + + for(int i = start_idx; i < start_idx + size; ++i) { + int idx = i - start_idx; + if(turn_array[i] == 1) ss_assignment[idx] = 'T'; + else if(turn_array[i] == 2) ss_assignment[idx] = 'S'; + else if(turn_array[i] == 3) ss_assignment[idx] = 'G'; + else if(turn_array[i] == 4) ss_assignment[idx] = 'H'; + else if(turn_array[i] == 5) ss_assignment[idx] = 'I'; + } +} + +} // anon ns + +namespace ost{ namespace mol{ namespace alg{ + +String RawEstimateSS(const std::vector<geom::Vec3>& ca_positions, + int start_idx, int size, + const std::vector<int>& donor_for_one, + const std::vector<int>& donor_for_two, + const std::vector<int>& connected_to_next) { + + if(size < 1){ + throw ost::Error("Size of input is too small!in dssp calculation"); + } + + int full_size = ca_positions.size(); + + if(start_idx + size > full_size) { + throw ost::Error("Selected stretch is not fully covered by provided data in dssp calculation!"); + } + + if(full_size != static_cast<int>(donor_for_one.size())){ + throw ost::Error("Position and donor data are inconsistent for dssp calculation!"); + } + + if(full_size != static_cast<int>(donor_for_two.size())){ + throw ost::Error("Position and donor data are inconsistent for dssp calculation!"); + } + + String ss_assignment; + ss_assignment.assign(size,'C'); + + AssignBridges(start_idx, size, donor_for_one, donor_for_two, + connected_to_next, ss_assignment); + + AssignTurns(start_idx, size, ca_positions, + donor_for_one, donor_for_two, + connected_to_next, ss_assignment); + + return ss_assignment; +} + +void PrepareSSData(const ost::mol::ResidueViewList& res_list, + std::vector<int>& res_indices, + std::vector<geom::Vec3>& ca_positions, + std::vector<int>& donor_for_one, + std::vector<int>& donor_for_two, + std::vector<int>& connected_to_next) { + + res_indices.clear(); + ca_positions.clear(); + donor_for_one.clear(); + donor_for_two.clear(); + connected_to_next.clear(); + + std::vector<geom::Vec3> n_positions; + std::vector<geom::Vec3> c_positions; + std::vector<geom::Vec3> o_positions; + + int total_num_residues = res_list.size(); + n_positions.reserve(total_num_residues); + ca_positions.reserve(total_num_residues); + c_positions.reserve(total_num_residues); + o_positions.reserve(total_num_residues); + res_indices.reserve(total_num_residues); + connected_to_next.reserve(total_num_residues); + + for(int res_idx = 0; res_idx < total_num_residues; ++res_idx) { + + const ost::mol::ResidueView& res = res_list[res_idx]; + + // add information if all atoms are present + ost::mol::AtomView n = res.FindAtom("N"); + ost::mol::AtomView ca = res.FindAtom("CA"); + ost::mol::AtomView c = res.FindAtom("C"); + ost::mol::AtomView o = res.FindAtom("O"); + + if(n.IsValid() && ca.IsValid() && c.IsValid() && o.IsValid()) { + n_positions.push_back(n.GetPos()); + ca_positions.push_back(ca.GetPos()); + c_positions.push_back(c.GetPos()); + o_positions.push_back(o.GetPos()); + res_indices.push_back(res_idx); + } + } + + int num_residues = res_indices.size(); + + for(int i = 0; i < num_residues - 1; ++i) { + + if(res_list[res_indices[i]].GetChain() == + res_list[res_indices[i+1]].GetChain() && + geom::Length2(n_positions[i+1] - c_positions[i]) <= 6.25) { + connected_to_next.push_back(1); + } + else { + connected_to_next.push_back(0); + } + } + + // the last residue needs to be handled as well... + connected_to_next.push_back(0); + + // estimate the hydrogen positions and also keep track which ones are valid + // (e.g. in case of chain breaks / prolines...) + // please note, that DSSP simply uses the directional vector given by + // carbon and oxygen of the previous residue to determine the hydrogen + // position + std::vector<geom::Vec3> h_positions(num_residues); + std::vector<int> valid_hydrogens(num_residues, 0); + + for(int i = 0; i < num_residues - 1; ++i) { + if(connected_to_next[i] && res_list[res_indices[i+1]].GetName() != "PRO") { + geom::Vec3 dir_vec = geom::Normalize(c_positions[i] - o_positions[i]); + h_positions[i+1] = n_positions[i+1] + dir_vec; + valid_hydrogens[i+1] = 1; + } + } + + std::pair<Real, Real> max_e_pair = + std::make_pair(std::numeric_limits<Real>::max(), + std::numeric_limits<Real>::max()); + + std::vector<std::pair<Real,Real> > donor_energies(num_residues, max_e_pair); + std::vector<std::pair<int,int> > acceptors(num_residues, std::make_pair(-1, -1)); + Real e; + + for(int i = 0; i < num_residues - 1; ++i){ + for(int j = i + 1; j < num_residues; ++j){ + if(geom::Length2(ca_positions[i] - ca_positions[j]) < 81.0){ + + //i as acceptor, j as donor + if(j > i + 1 && valid_hydrogens[j]){ + e = DSSPHBondEnergy(h_positions[j], n_positions[j], + c_positions[i], o_positions[i]); + // stupid rounding to be consistent with dssp + e = std::floor(e * 1000 + 0.5) * 0.001; + + if(e < donor_energies[j].first) { + donor_energies[j].second = donor_energies[j].first; + acceptors[j].second = acceptors[j].first; + donor_energies[j].first = e; + acceptors[j].first = i; + } + else if(e < donor_energies[j].second) { + donor_energies[j].second = e; + acceptors[j].second = i; + } + } + + //i as donor, j as acceptor + if(valid_hydrogens[i]){ + + e = DSSPHBondEnergy(h_positions[i], n_positions[i], + c_positions[j], o_positions[j]); + // stupid rounding to be consistent with dssp + e = std::floor(e * 1000 + 0.5) * 0.001; + + if(e < donor_energies[i].first) { + donor_energies[i].second = donor_energies[i].first; + acceptors[i].second = acceptors[i].first; + donor_energies[i].first = e; + acceptors[i].first = j; + } + else if(e < donor_energies[i].second) { + donor_energies[i].second = e; + acceptors[i].second = j; + } + } + } + } + } + + donor_for_one.assign(num_residues, -1); + donor_for_two.assign(num_residues, -1); + + for(int i = 0; i < num_residues; ++i) { + if(donor_energies[i].first < -0.5) { + donor_for_one[i] = acceptors[i].first; + } + if(donor_energies[i].second < -0.5) { + donor_for_two[i] = acceptors[i].second; + } + } +} + +void AssignSecStruct(ost::mol::EntityView& ent) { + + ost::mol::ResidueViewList res_list = ent.GetResidueList(); + + for(uint res_idx = 0; res_idx < res_list.size(); ++res_idx) { + // set all residues to coil to start with + res_list[res_idx].SetSecStructure(ost::mol::SecStructure('C')); + } + + std::vector<int> res_indices; + std::vector<geom::Vec3> ca_positions; + std::vector<int> donor_for_one; + std::vector<int> donor_for_two; + std::vector<int> connected_to_next; + + PrepareSSData(res_list, res_indices, ca_positions, + donor_for_one, donor_for_two, connected_to_next); + + String ss = RawEstimateSS(ca_positions, 0, ca_positions.size(), + donor_for_one, donor_for_two, + connected_to_next); + + for(uint i = 0; i < ca_positions.size(); ++i) { + res_list[res_indices[i]].SetSecStructure(ost::mol::SecStructure(ss[i])); + } +} + +void AssignSecStruct(ost::mol::EntityHandle& ent) { + ost::mol::EntityView view = ent.CreateFullView(); + AssignSecStruct(view); +} + +}}} //ns diff --git a/modules/mol/alg/src/sec_struct.hh b/modules/mol/alg/src/sec_struct.hh new file mode 100644 index 0000000000000000000000000000000000000000..198efee76d2d5fca91fbbcb70b000b8f6e5ec5b3 --- /dev/null +++ b/modules/mol/alg/src/sec_struct.hh @@ -0,0 +1,81 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2017 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_SEC_STRUCT_HH +#define OST_SEC_STRUCT_HH + +#include <vector> + +#include <ost/base.hh> +#include <ost/geom/geom.hh> +#include <ost/message.hh> +#include <ost/mol/mol.hh> + +namespace ost { namespace mol{ namespace alg{ + +inline Real DSSPHBondEnergy(const geom::Vec3& h_pos, const geom::Vec3& n_pos, + const geom::Vec3& c_pos, const geom::Vec3& o_pos) { + Real on = 1.0/geom::Distance(o_pos,n_pos); + Real ch = 1.0/geom::Distance(c_pos,h_pos); + Real oh = 1.0/geom::Distance(o_pos,h_pos); + Real cn = 1.0/geom::Distance(c_pos,n_pos); + return 27.888 * (on+ch-oh-cn); +} + +// Raw estimation of secondary structure +// +// This function is not intended for Python export, since the input has to be +// prepared carefully. It basically estimates the secondary structure of a +// stretch of amino acids based on the hydrogen bond pattern as described for +// the dssp tool. +// +// To define the hydrogen bonds you need to provide the vectors donor_for_one +// and donor_for two. The index of an acceptor residue appears in one of the +// two vectors if the corresponding hbond energy is < -0.5 +// (be aware of prolines that can't be donors!). +// There are two of those vectors, because for every residue we store the two +// lowest energy acceptors. If there is no acceptor available, the value +// at this position must be -1. +// The connected_to_next contains zeros and ones that defines, whether there +// is a peptide bond towards the next residue and the ca_positions vector +// is self explaining. +// As an additional feature you can also provide the according data for the +// full structure but only estimate the secondary structure for a small +// stretch, that gets defined by start_idx and size. +// For an example usage have a look at the AssignSecStruct functions. + +String RawEstimateSS(const std::vector<geom::Vec3>& ca_positions, + int start_idx, int size, + const std::vector<int>& donor_for_one, + const std::vector<int>& donor_for_two, + const std::vector<int>& connected_to_next); + +void PrepareSSData(const ost::mol::ResidueViewList& res_list, + std::vector<int>& res_indices, + std::vector<geom::Vec3>& ca_positions, + std::vector<int>& donor_for_one, + std::vector<int>& donor_for_two, + std::vector<int>& connected_to_next); + +void AssignSecStruct(ost::mol::EntityView& ent); + +void AssignSecStruct(ost::mol::EntityHandle& ent); + +}}} //ns + +#endif diff --git a/modules/mol/alg/tests/CMakeLists.txt b/modules/mol/alg/tests/CMakeLists.txt index 10c5fbc2319956ce21cc8d319b393b89a17fb86f..0002d71492e639d21d456a93f94950db59aa1afe 100644 --- a/modules/mol/alg/tests/CMakeLists.txt +++ b/modules/mol/alg/tests/CMakeLists.txt @@ -2,15 +2,16 @@ set(OST_MOL_ALG_UNIT_TESTS test_superposition.cc tests.cc test_consistency_checks.cc + test_partial_sec_struct_assignment.cc test_pdbize.py test_convenient_superpose.py test_hbond.py test_accessibility.py + test_sec_struct.py ) if (COMPOUND_LIB) list(APPEND OST_MOL_ALG_UNIT_TESTS test_qsscoring.py) endif() -ost_unittest(MODULE mol_alg SOURCES "${OST_MOL_ALG_UNIT_TESTS}") - +ost_unittest(MODULE mol_alg SOURCES "${OST_MOL_ALG_UNIT_TESTS}" LINK ost_io) diff --git a/modules/mol/alg/tests/test_accessibility.py b/modules/mol/alg/tests/test_accessibility.py index 748b4d8c1943413d0ac6b8716d5134c5af2d9bc2..3ff4c0fde1431336b798f21c2545527baed62fd1 100644 --- a/modules/mol/alg/tests/test_accessibility.py +++ b/modules/mol/alg/tests/test_accessibility.py @@ -2,6 +2,7 @@ from ost import io, mol, settings import unittest import os from ost.bindings import naccess +from ost.bindings import dssp class AccessibilityContainer: @@ -80,7 +81,7 @@ def Compare(acc_one, acc_two): class TestAccessibility(unittest.TestCase): - def testAcc(self): + def testAccNACCESS(self): # tests oligo mode by comparing the results from doing the # corresponding calculations manually @@ -100,15 +101,54 @@ class TestAccessibility(unittest.TestCase): # naccess results try: naccess_path = settings.Locate("naccess") + ent_three = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb")) + ent_three = ent_three.Select("peptide=true") + acc_naccess = AccessibilitiesRaw(ent_three, use_naccess=True) + self.assertTrue(Compare(acc_classic, acc_naccess)) except: print "Could not find NACCESS, could not compare Accessiblity function..." - return - ent_three = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb")) - ent_three = ent_three.Select("peptide=true") - acc_naccess = AccessibilitiesRaw(ent_three, use_naccess=True) - self.assertTrue(Compare(acc_classic, acc_naccess)) + def testAccDSSP(self): + + dssp_path = None + + try: + dssp_path = settings.Locate("dssp") + except: + try: + dssp_path = settings.locate("mkdssp") + except: + pass + pass + + if dssp_path == None: + print "Could not find DSSP, could not compare Accessibility function..." + + # we assume oligo mode to be working as it is tested in + # testAccNACCESS. So we only test the single residue + # accessibilitities + ent_one = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb")) + ent_two = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb")) + ent_one = ent_one.Select("peptide=true") + ent_two = ent_two.Select("peptide=true") + + dssp.AssignDSSP(ent_one, extract_burial_status=True, dssp_bin = dssp_path) + mol.alg.Accessibility(ent_two, algorithm=mol.alg.AccessibilityAlgorithm.DSSP) + + for a,b in zip(ent_one.residues, ent_two.residues): + + # overall accessibility + if a.HasProp("solvent_accessibility") and b.HasProp("asaAbs"): + diff = abs(a.GetFloatProp("solvent_accessibility") -\ + round(b.GetFloatProp("asaAbs"))) + self.assertTrue(diff < 0.01) + + # relative accessibility + if a.HasProp("relative_solvent_accessibility") and b.HasProp("asaRel"): + diff = abs(a.GetFloatProp("relative_solvent_accessibility") -\ + b.GetFloatProp("asaRel")) + self.assertTrue(diff < 0.01) if __name__ == "__main__": diff --git a/modules/mol/alg/tests/test_partial_sec_struct_assignment.cc b/modules/mol/alg/tests/test_partial_sec_struct_assignment.cc new file mode 100644 index 0000000000000000000000000000000000000000..b6a9915608edd7858af045686a93e8df8b740369 --- /dev/null +++ b/modules/mol/alg/tests/test_partial_sec_struct_assignment.cc @@ -0,0 +1,88 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2017 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + + +#include <ost/mol/alg/sec_struct.hh> +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> +#include <boost/test/auto_unit_test.hpp> +#include <ost/mol/mol.hh> +#include <ost/io/mol/pdb_reader.hh> +#include <ost/mol/builder.hh> +#include <ost/message.hh> + + + +BOOST_AUTO_TEST_SUITE( mol_alg ); + +BOOST_AUTO_TEST_CASE(partial_sec_struct_assignment) { + + // load a test structure + String filepath = "testfiles/1a0s.pdb"; + ost::io::PDBReader reader(filepath, ost::io::IOProfile()); + ost::mol::EntityHandle test_ent = ost::mol::CreateEntity(); + reader.Import(test_ent); + + ost::mol::EntityView view = test_ent.CreateFullView(); + ost::mol::ResidueViewList res_list = view.GetResidueList(); + + std::vector<int> res_indices; + std::vector<geom::Vec3> ca_positions; + std::vector<int> donor_for_one; + std::vector<int> donor_for_two; + std::vector<int> connected_to_next; + + ost::mol::alg::PrepareSSData(res_list, res_indices, ca_positions, + donor_for_one, donor_for_two, + connected_to_next); + + String full_pred = ost::mol::alg::RawEstimateSS(ca_positions, + 0, ca_positions.size(), + donor_for_one, donor_for_two, + connected_to_next); + + // select some start indices to test + int max_num = ca_positions.size(); + std::vector<int> test_locations; + test_locations.push_back(max_num/10); + test_locations.push_back((max_num/10)*2); + test_locations.push_back((max_num/10)*3); + test_locations.push_back((max_num/10)*4); + test_locations.push_back((max_num/10)*5); + test_locations.push_back((max_num/10)*6); + test_locations.push_back((max_num/10)*7); + test_locations.push_back((max_num/10)*8); + test_locations.push_back((max_num/10)*9); + int stretch_length = std::min(12, max_num - test_locations.back()); + + for(uint i = 0; i < test_locations.size(); ++i) { + String expected_ss = full_pred.substr(test_locations[i], stretch_length); + String calculated_ss = ost::mol::alg::RawEstimateSS(ca_positions, + test_locations[i], + stretch_length, + donor_for_one, + donor_for_two, + connected_to_next); + + BOOST_CHECK(expected_ss == calculated_ss); + } + +} + +BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/mol/alg/tests/test_sec_struct.py b/modules/mol/alg/tests/test_sec_struct.py new file mode 100644 index 0000000000000000000000000000000000000000..3c2d11295a39ac866abb286390c356d60a700591 --- /dev/null +++ b/modules/mol/alg/tests/test_sec_struct.py @@ -0,0 +1,35 @@ +from ost import io, mol, settings +import unittest +import os +from ost.bindings import dssp + + +class TestSecStruct(unittest.TestCase): + + def testSecStruct(self): + + # unit test only makes sense, when a dssp binary is around + dssp_path = None + try: + dssp_path = settings.Locate("dssp") + except: + try: + dssp_path = settings.Locate("mkdssp") + except: + print "Could not find dssp, could not compare sec struct assignment..." + return + + dssp_ent = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb")) + ost_ent = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb")) + + dssp.AssignDSSP(dssp_ent, dssp_bin = dssp_path) + mol.alg.AssignSecStruct(ost_ent) + + for a, b in zip(dssp_ent.residues, ost_ent.residues): + self.assertTrue(str(a.GetSecStructure()) == str(b.GetSecStructure())) + + +if __name__ == "__main__": + + from ost import testutils + testutils.RunTests()