diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 9189aecece5345a60ec850332af0fca826e14603..a74f28d4d27440806392edbeba1ee36255b67351 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,3 +1,25 @@ +Changes in Release <RELEASE NUMBER> +-------------------------------------------------------------------------------- + + * nonstandard C++ module was moved from ost.conop to ost.mol.alg. This implies + change in the API. Mapping functions CopyResidue, CopyConserved and + CopyNonConserved that were previousely imported from ost.conop are now + to be imported from ost.mol.alg. + * Added Molck API to the ost.mol.alg module. + * Removed habit of changing secondary structure of entities when loading + from mmCIF PDB files. Before, OST would turn secondary structure 'EEH' + into 'ECH' to make it look nicer in DNG. Now, 'EEH' stays 'EEH'. + +Changes in Release 1.7.1 +-------------------------------------------------------------------------------- + + * Fixed an issue that could cause the star format parser (mmCIF, chemical + components dictionary) to enter an infinite loop + * Chemical components dictionary was extended by new chemical classes + introduced by PDB + * Fixed unit tests + * Improved documentation + Changes in Release 1.7 -------------------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f582069ba52a1b79e281c8866d6f591c5f1942e..2cf4c283a4cbfd59833a4d76837da48948dbe9f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ project(OpenStructure CXX C) set (CMAKE_EXPORT_COMPILE_COMMANDS 1) set (OST_VERSION_MAJOR 1) set (OST_VERSION_MINOR 7) -set (OST_VERSION_PATCH 0) +set (OST_VERSION_PATCH 1) set (OST_VERSION_STRING ${OST_VERSION_MAJOR}.${OST_VERSION_MINOR}.${OST_VERSION_PATCH} ) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake_support) include(OST) diff --git a/modules/conop/doc/aminoacid.rst b/modules/conop/doc/aminoacid.rst index de15eb98a91c3fe53a9008c20d314d4aa8954edc..a33104ca6b91085b63a3a0df02b100ebd1d9c2d7 100644 --- a/modules/conop/doc/aminoacid.rst +++ b/modules/conop/doc/aminoacid.rst @@ -79,63 +79,4 @@ Converter functions .. method:: Empty() - Whether the set is empty, i.e. doesn't contain any amino acids. - - -Mapping functions --------------------------------------------------------------------------------- - -The following functions help to convert one residue into another by reusing as -much as possible from the present atoms. They are mainly meant to map from -standard amino acid to other standard amino acids or from modified amino acids -to standard amino acids. - -.. function:: CopyResidue(src_res, dst_res, editor) - - Copies the atoms of ``src_res`` to ``dst_res`` using the residue names - as guide to decide which of the atoms should be copied. If ``src_res`` and - ``dst_res`` have the same name, or ``src_res`` is a modified version of - ``dst_res`` (i.e. have the same single letter code), CopyConserved will be - called, otherwise CopyNonConserved will be called. - - :param src_res: The source residue - :type src_res: :class:`~ost.mol.ResidueHandle` - :param dst_res: The destination residue - :type dst_res: :class:`~ost.mol.ResidueHandle` - - :returns: true if the residue could be copied, false if not. - -.. function:: CopyConserved(src_res, dst_res, editor) - - Copies the atoms of ``src_res`` to ``dst_res`` assuming that the parent - amino acid of ``src_res`` (or ``src_res`` itself) are identical to ``dst_res``. - - If ``src_res`` and ``dst_res`` are identical, all heavy atoms are copied - to ``dst_res``. If ``src_res`` is a modified version of ``dst_res`` and the - modification is a pure addition (e.g. the phosphate group of phosphoserine), - the modification is stripped off and all other heavy atoms are copied to - ``dst_res``. If the modification is not a pure addition, only the backbone - heavy atoms are copied to ``dst_res``. - - Additionally, the selenium atom of ``MSE`` is converted to sulphur. - - :param src_res: The source residue - :type src_res: :class:`~ost.mol.ResidueHandle` - :param dst_res: The destination residue - :type dst_res: :class:`~ost.mol.ResidueHandle` - - :returns: a tuple of bools stating whether the residue could be copied and - whether the Cbeta atom was inserted into the ``dst_res``. - -.. function:: CopyNonConserved(src_res, dst_res, editor) - - Copies the heavy backbone atoms and Cbeta (except for ``GLY``) of ``src_res`` - to ``dst_res``. - - :param src_res: The source residue - :type src_res: :class:`~ost.mol.ResidueHandle` - :param dst_res: The destination residue - :type dst_res: :class:`~ost.mol.ResidueHandle` - - :returns: a tuple of bools stating whether the residue could be copied and - whether the Cbeta atom was inserted into the ``dst_res``. + Whether the set is empty, i.e. doesn't contain any amino acids. \ No newline at end of file diff --git a/modules/conop/pymod/CMakeLists.txt b/modules/conop/pymod/CMakeLists.txt index 9c09906222fb97e1c935590ec22e81b536c94891..23dff26a0022fa9c312c9bfd1acda033d1c304b7 100644 --- a/modules/conop/pymod/CMakeLists.txt +++ b/modules/conop/pymod/CMakeLists.txt @@ -8,7 +8,6 @@ set(OST_CONOP_PYMOD_SOURCES export_conop.cc export_diag.cc export_rule_based.cc - export_non_standard.cc export_ring_finder.cc ) diff --git a/modules/conop/pymod/wrap_conop.cc b/modules/conop/pymod/wrap_conop.cc index 119c86c325f3ce6fc48cf88298838f441cb04a36..04bcfce147b3ebf986cee454c2594e242ade7059 100644 --- a/modules/conop/pymod/wrap_conop.cc +++ b/modules/conop/pymod/wrap_conop.cc @@ -25,7 +25,6 @@ void export_Sanitizer(); void export_Conop(); void export_RingFinder(); void export_AminoAcids(); -void export_NonStandard(); void export_processor(); void export_rule_based(); void export_heuristic(); @@ -41,6 +40,5 @@ BOOST_PYTHON_MODULE(_ost_conop) export_Compound(); export_RingFinder(); export_AminoAcids(); - export_NonStandard(); export_diag(); } diff --git a/modules/conop/src/CMakeLists.txt b/modules/conop/src/CMakeLists.txt index 0185618ea4a9ebb02d917393670ce3f401ed494c..c2f1f4c6d0148c62461be5852bd33872ad39f903 100644 --- a/modules/conop/src/CMakeLists.txt +++ b/modules/conop/src/CMakeLists.txt @@ -9,7 +9,6 @@ compound.hh compound_lib.hh module_config.hh rule_based.hh -nonstandard.hh minimal_compound_lib.hh compound_lib_base.hh ring_finder.hh @@ -27,12 +26,11 @@ rule_based.cc model_check.cc compound.cc compound_lib.cc -nonstandard.cc ring_finder.cc ) module(NAME conop SOURCES ${OST_CONOP_SOURCES} - HEADERS ${OST_CONOP_HEADERS} DEPENDS_ON ost_mol ost_mol_alg ost_geom ost_db) + HEADERS ${OST_CONOP_HEADERS} DEPENDS_ON ost_mol ost_geom ost_db) if (WIN32) diff --git a/modules/conop/tests/CMakeLists.txt b/modules/conop/tests/CMakeLists.txt index 96400650977655f3efed4108de874bf9e1168bee..44fd58acfc7036926e3e5b31ebe51db7129291c8 100644 --- a/modules/conop/tests/CMakeLists.txt +++ b/modules/conop/tests/CMakeLists.txt @@ -9,8 +9,7 @@ set(OST_CONOP_UNIT_TESTS if (COMPOUND_LIB) list(APPEND OST_CONOP_UNIT_TESTS test_compound.py - test_cleanup.py - test_nonstandard.py) + test_cleanup.py) endif() ost_unittest(MODULE conop diff --git a/modules/index.rst b/modules/index.rst index 4521313c8592ca3d57a7a91dcac7813c7a524316..2e8c37b274c8c40308f7aedb259fd51e1dd8888e 100644 --- a/modules/index.rst +++ b/modules/index.rst @@ -30,6 +30,7 @@ OpenStructure documentation contributing table mol/alg/lddt + mol/alg/molck For Starters -------------------------------------------------------------------------------- @@ -101,7 +102,9 @@ Varia **Users**: :doc:`Reporting a problem <users>` -**lDDT**: :doc:`lDDT command line executable<mol/alg/lddt>` +**lDDT**: :doc:`lDDT command line executable and Python API<mol/alg/lddt>` + +**Molck**: :doc:`Molecular Checker<mol/alg/molck>` Extending OpenStructure -------------------------------------------------------------------------------- diff --git a/modules/io/doc/io.rst b/modules/io/doc/io.rst index f8f5302e060d360d13b2851d42dce12d3e9d2c66..aa3e15991ba46ea68d9ffeb68c95d6a3aed6a1c5 100644 --- a/modules/io/doc/io.rst +++ b/modules/io/doc/io.rst @@ -343,3 +343,45 @@ Saving Density Maps 12 +Stereochemical Parameters +-------------------------------------------------------------------------------- + +In order to check the structure for some stereo-chemical and steric clashes +before computing the lDDT scores it is required to pass parameter file based on +Engh and Huber parameters, and on the atomic radii as defined in the Cambridge +Structural Database. OpenStructure ships with default file called +`stereo_chemical_props.txt` located in `$OST_ROOT/share/openstructure` +directory. A class :class:`~ost.io.StereoChemicalParamsReader` is used to read +this file. + +.. class:: StereoChemicalParamsReader(filename="") + + Object that holds and reads stereochemical parameters. + + :param filename: Sets :attr:`filename`. + + .. attribute:: bond_table + + The table containing bond information of type :class:`~ost.mol.alg.StereoChemicalParams`. + + .. attribute:: angle_table + + The table containing angle information of type :class:`~ost.mol.alg.StereoChemicalParams`. + + .. attribute:: nonbonded_table + + The table containing clashes of type :class:`~ost.mol.alg.ClashingDistances`. + + .. attribute:: filename + + The path to the parameter file that will be used. If set to "", it reads the + default file shipped with OpenStructure. + + :type: :class:`str` + + .. method:: Read(check=False) + + Read the file. + + :param check: Raise an error when any of the resulting tables are empty. + :type check: :class:`bool` diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index 730ca1b8e97ed554287158184c9d9eb20283da37..23dc32f461a08cf1633362e60bb3b9c54ecb09af 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -638,7 +638,7 @@ of the annotation available. .. method:: GetChainIntervalList() - See :attr:`chainintervalls` + See :attr:`chainintervals` .. method:: GetOperations() diff --git a/modules/io/pymod/wrap_io.cc b/modules/io/pymod/wrap_io.cc index 302720245b63d534d88aceb2bba9c4d64bba0464..8704a4657d5a7f152f96d339164976fe5bbeb0ce 100644 --- a/modules/io/pymod/wrap_io.cc +++ b/modules/io/pymod/wrap_io.cc @@ -33,6 +33,7 @@ using namespace boost::python; #include <ost/io/mol/entity_io_sdf_handler.hh> #include <ost/io/mol/pdb_reader.hh> #include <ost/io/mol/dcd_io.hh> +#include <ost/io/stereochemical_params_reader.hh> using namespace ost; using namespace ost::io; @@ -120,6 +121,15 @@ BOOST_PYTHON_MODULE(_ost_io) def("LoadMAE", &LoadMAE); def("LoadPQR", &LoadPQR); + class_<io::StereoChemicalParamsReader>("StereoChemicalParamsReader", + init<String>(arg("filename"))) + .def(init<>()) + .def("Read", &io::StereoChemicalParamsReader::Read, (arg("check")=false)) + .def_readwrite("filename", &io::StereoChemicalParamsReader::filename) + .def_readwrite("bond_table", &io::StereoChemicalParamsReader::bond_table) + .def_readwrite("angle_table", &io::StereoChemicalParamsReader::angle_table) + .def_readwrite("nonbonded_table", &io::StereoChemicalParamsReader::nonbonded_table); + export_pdb_io(); export_mmcif_io(); #if OST_IMG_ENABLED diff --git a/modules/io/src/CMakeLists.txt b/modules/io/src/CMakeLists.txt index 13d6e3c6adef7c8adbe30aa0494d2668ce7242d7..41ac18cc63766a9674afc7e61fd4acc7542ea99d 100644 --- a/modules/io/src/CMakeLists.txt +++ b/modules/io/src/CMakeLists.txt @@ -65,7 +65,7 @@ if (ENABLE_IMG) endif() #################################### -set(OST_IO_DEPENDENCIES ost_base;ost_conop;ost_seq) +set(OST_IO_DEPENDENCIES ost_base;ost_conop;ost_seq;ost_mol_alg) if (ENABLE_IMG) set(OST_IO_DEPENDENCIES ${OST_IO_DEPENDENCIES};ost_img;ost_img_alg) endif() diff --git a/modules/io/src/mol/CMakeLists.txt b/modules/io/src/mol/CMakeLists.txt index e0792e93ee64ec6038451cad5a283a5c5c12144c..c88d99db8690af8efafc44a896f1cd017d03ff19 100644 --- a/modules/io/src/mol/CMakeLists.txt +++ b/modules/io/src/mol/CMakeLists.txt @@ -20,6 +20,7 @@ star_parser.cc mmcif_reader.cc mmcif_info.cc pdb_str.cc +stereochemical_params_reader.cc PARENT_SCOPE ) @@ -47,5 +48,6 @@ surface_io_handler.hh load_surface.hh surface_io_msms_handler.hh pdb_str.hh +stereochemical_params_reader.hh PARENT_SCOPE ) diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 27a7c9bc8406308409525862a7b1d906417d2c36..905e28442bab8bd68300426bdeb99df8c6d4cb0e 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -1543,17 +1543,7 @@ void MMCifReader::AssignSecStructure(mol::EntityHandle ent) continue; } mol::SecStructure alpha(mol::SecStructure::ALPHA_HELIX); - // some PDB files contain helix/strand entries that are adjacent to each - // other. To avoid visual artifacts, we effectively shorten the first of - // the two secondary structure segments to insert one residue of coil - // conformation. - mol::ResNum start = i->start, end = i->end; - if (helix_list_.end() != i+1 && // unit test - (*(i+1)).start.GetNum() <= end.GetNum()+1 && - (*(i+1)).end.GetNum() > end.GetNum()) { - end = mol::ResNum((*(i+1)).start.GetNum()-2); - } - chain.AssignSecondaryStructure(alpha, start, end); + chain.AssignSecondaryStructure(alpha, i->start, i->end); } for (MMCifHSVector::const_iterator i=strand_list_.begin(), @@ -1565,14 +1555,7 @@ void MMCifReader::AssignSecStructure(mol::EntityHandle ent) continue; } mol::SecStructure extended(mol::SecStructure::EXTENDED); - mol::ResNum start = i->start, end = i->end; - // see comment for helix assignment - if (strand_list_.end() != i+1 && // unit test - (*(i+1)).start.GetNum() <= end.GetNum()+1 && - (*(i+1)).end.GetNum() > end.GetNum()) { - end=mol::ResNum((*(i+1)).start.GetNum()-2); - } - chain.AssignSecondaryStructure(extended, start, end); + chain.AssignSecondaryStructure(extended, i->start, i->end); } } diff --git a/modules/io/src/mol/pdb_reader.cc b/modules/io/src/mol/pdb_reader.cc index a203234ecc275f82bceb87b75781ad1e626fb100..ffa04e6ecc0144a342bd477c84ab35b570f78e5a 100644 --- a/modules/io/src/mol/pdb_reader.cc +++ b/modules/io/src/mol/pdb_reader.cc @@ -520,16 +520,7 @@ void PDBReader::AssignSecStructure(mol::EntityHandle ent) continue; } mol::SecStructure alpha(mol::SecStructure::ALPHA_HELIX); - // some PDB files contain helix/strand entries that are adjacent to each - // other. To avoid visual artifacts, we effectively shorten the first of the - // two secondary structure segments to insert one residue of coil - // conformation. - mol::ResNum start=i->start, end=i->end; - if (helix_list_.end()!=i+1 && (*(i+1)).start.GetNum()<=end.GetNum()+1 && - (*(i+1)).end.GetNum()>end.GetNum()) { - end=mol::ResNum((*(i+1)).start.GetNum()-2); - } - chain.AssignSecondaryStructure(alpha, start, end); + chain.AssignSecondaryStructure(alpha, i->start, i->end); } for (HSList::const_iterator i=strand_list_.begin(), e=strand_list_.end(); @@ -540,13 +531,7 @@ void PDBReader::AssignSecStructure(mol::EntityHandle ent) continue; } mol::SecStructure extended(mol::SecStructure::EXTENDED); - mol::ResNum start=i->start, end=i->end; - // see comment for helix assignment - if (strand_list_.end()!=i+1 && (*(i+1)).start.GetNum()<=end.GetNum()+1 && - (*(i+1)).end.GetNum()>end.GetNum()) { - end=mol::ResNum((*(i+1)).start.GetNum()-2); - } - chain.AssignSecondaryStructure(extended, start, end); + chain.AssignSecondaryStructure(extended, i->start, i->end); } } diff --git a/modules/io/src/mol/star_parser.cc b/modules/io/src/mol/star_parser.cc index 80043259b4df5fdf34ee81de6bf471696d03bc5a..214eb626c9f743a1cbbb218bede0ad7f795b2f73 100644 --- a/modules/io/src/mol/star_parser.cc +++ b/modules/io/src/mol/star_parser.cc @@ -550,11 +550,15 @@ void StarParser::Parse() case 'd': if (tline.length()>=5 && StringRef("data_", 5)==tline.substr(0, 5)) { this->ParseData(); + } else { + throw IOException("Missing 'data_' control structure"); } break; case 'g': if (tline.length()>=7 && StringRef("global_", 7)==tline.substr(0, 7)) { this->ParseGlobal(); + } else { + throw IOException("Missing 'global_' control structure"); } break; case '#': diff --git a/modules/io/src/mol/stereochemical_params_reader.cc b/modules/io/src/mol/stereochemical_params_reader.cc new file mode 100644 index 0000000000000000000000000000000000000000..f0317a3ea19ae8dd78b675a4c08576555e5e659a --- /dev/null +++ b/modules/io/src/mol/stereochemical_params_reader.cc @@ -0,0 +1,64 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#include <boost/filesystem/fstream.hpp> +#include <ost/platform.hh> +#include <ost/mol/alg/filter_clashes.hh> +#include <ost/io/stereochemical_params_reader.hh> + + +namespace { + std::vector<String> ReadStereoChemicalFile(const String& filename){ + boost::filesystem::path loc(filename); + boost::filesystem::ifstream infile(loc); + if (!infile) { + std::stringstream serr; + serr << "Could not find " << filename; + throw ost::Error(serr.str()); + } + std::vector<String> stereo_chemical_props; + String line; + while (std::getline(infile, line)) + { + std::stringstream line_stream(line); + stereo_chemical_props.push_back(line); + } + + return stereo_chemical_props; + } +} + +namespace ost { namespace io { + +StereoChemicalParamsReader::StereoChemicalParamsReader() { + filename = ost::GetSharedDataPath() + "/stereo_chemical_props.txt"; +} + +StereoChemicalParamsReader::StereoChemicalParamsReader(const String& init_filename): filename(init_filename) {} + +void StereoChemicalParamsReader::Read(bool check) { + std::vector<String> stereo_chemical_props = ReadStereoChemicalFile(filename); + // Bonds + bond_table = ost::mol::alg::FillStereoChemicalParams("Bond", stereo_chemical_props, check); + // Angles + angle_table = ost::mol::alg::FillStereoChemicalParams("Angle", stereo_chemical_props, check); + // Not bonded + nonbonded_table = ost::mol::alg::FillClashingDistances(stereo_chemical_props, check); +} +}} // ns + diff --git a/modules/io/src/mol/stereochemical_params_reader.hh b/modules/io/src/mol/stereochemical_params_reader.hh new file mode 100644 index 0000000000000000000000000000000000000000..f8990f55268de150d6f17a28160f53e695781bfa --- /dev/null +++ b/modules/io/src/mol/stereochemical_params_reader.hh @@ -0,0 +1,40 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_IO_STEREOCHEMICAL_PARAMS_READER_H +#define OST_IO_STEREOCHEMICAL_PARAMS_READER_H + +#include <ost/io/module_config.hh> +#include <ost/mol/alg/filter_clashes.hh> + +namespace ost { namespace io { + +struct StereoChemicalParamsReader { + String filename; + ost::mol::alg::StereoChemicalParams bond_table; + ost::mol::alg::StereoChemicalParams angle_table; + ost::mol::alg::ClashingDistances nonbonded_table; + + StereoChemicalParamsReader(); + StereoChemicalParamsReader(const String& filename); + void Read(bool check=false); +}; + +}} // ns + +#endif \ No newline at end of file diff --git a/modules/io/tests/test_star_parser.cc b/modules/io/tests/test_star_parser.cc index 6ff217b748c123770f789a2168d540be3f1ef94d..c4e40fdbda137d74293282a592884fb5aa7e8305 100644 --- a/modules/io/tests/test_star_parser.cc +++ b/modules/io/tests/test_star_parser.cc @@ -389,6 +389,20 @@ BOOST_AUTO_TEST_CASE(star_items_as_row) BOOST_TEST_MESSAGE(" done."); } +BOOST_AUTO_TEST_CASE(star_broken_data) +{ + std::ifstream s("testfiles/broken_data.cif"); + LoopTestParser star_p(s); + BOOST_CHECK_THROW(star_p.Parse(), IOException); +} + +BOOST_AUTO_TEST_CASE(star_broken_global) +{ + std::ifstream s("testfiles/broken_global.cif"); + LoopTestParser star_p(s); + BOOST_CHECK_THROW(star_p.Parse(), IOException); +} + BOOST_AUTO_TEST_CASE(star_missing_data) { std::ifstream s("testfiles/missing_data.cif"); diff --git a/modules/io/tests/testfiles/broken_data.cif b/modules/io/tests/testfiles/broken_data.cif new file mode 100644 index 0000000000000000000000000000000000000000..2ae242255c2bd8c6e952ffee6d1815ee01f6335a --- /dev/null +++ b/modules/io/tests/testfiles/broken_data.cif @@ -0,0 +1 @@ +d lines starting with a 'd' kicked the parser into an infinite loop. diff --git a/modules/io/tests/testfiles/broken_global.cif b/modules/io/tests/testfiles/broken_global.cif new file mode 100644 index 0000000000000000000000000000000000000000..0f340e0befe0077229b53b121979181ec3c890fc --- /dev/null +++ b/modules/io/tests/testfiles/broken_global.cif @@ -0,0 +1 @@ +g lines starting with a 'g' kicked the parser into an infinite loop. diff --git a/modules/mol/alg/doc/lddt.rst b/modules/mol/alg/doc/lddt.rst index 3d3e862c3fbc61682eb880fe3e738365b8cb1501..6529c47758e29d8fa292c475d7a212ce25cc9e56 100644 --- a/modules/mol/alg/doc/lddt.rst +++ b/modules/mol/alg/doc/lddt.rst @@ -1,6 +1,6 @@ -====== +==== lDDT -====== +==== ------------------------------------- Where can I find the lDDT executable? @@ -180,3 +180,69 @@ For example: WARNING: Verbosity levels 1 and 2 can generate a large amount of output text, especially with large structures and multiple models being evaluated. + +=============== +lDDT Python API +=============== + +One can replicate the binary using simple python script: + +.. code-block:: python + + #! /bin/env python + """Run lDDT from within script.""" + from ost.io import LoadPDB + from ost.mol.alg import (CleanlDDTReferences, + PreparelDDTGlobalRDMap, + lDDTSettings, + CheckStructure, + LocalDistDiffTest, + GetlDDTPerResidueStats, + PrintlDDTPerResidueStats) + from ost.io import StereoChemicalParamsReader + + model_path = "Path to your model pdb file" + reference_path = "Path to your reference pdb file" + # + # Load model and prepare its view + model = LoadPDB(model_path) + model_view = model.GetChainList()[0].Select("peptide=true") + # + # Prepare references - it should be alist of EntityView(s) + references = [LoadPDB(reference_path).CreateFullView()] + # + # Initialize settings with default parameters and print them + settings = lDDTSettings() + settings.PrintParameters() + + # + # Clean up references + CleanlDDTReferences(references) + # + # Prepare residue map from references + rdmap = PreparelDDTGlobalRDMap(references, settings) + # + # This part is optional and it depends on our settings parameter + if settings.structural_checks: + stereochemical_parameters = StereoChemicalParamsReader( + settings.parameter_file_path) + stereochemical_parameters.Read() + CheckStructure(ent=model_view, + bond_table=stereochemical_parameters.bond_table, + angle_table=stereochemical_parameters.angle_table, + nonbonded_table=stereochemical_parameters.nonbonded_table, + bond_tolerance=settings.bond_tolerance, + angle_tolerance=settings.angle_tolerance) + # + # Calculate lDDT + LocalDistDiffTest(model_view, references, rdmap, settings) + # + # Get the local scores + local_scores = GetlDDTPerResidueStats(model, rdmap, settings) + # + # Pring local scores + PrintlDDTPerResidueStats(local_scores, settings) + +This can be useful when we already have an models and references already read +in the memory and we do not want run the binary. +Please refere to specific function documentation for more details. diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst index 1641b6b495143c263417018e8d86224bfadca955..d4b71376c848fc14e9e0dcb1abecb7421e8debc1 100644 --- a/modules/mol/alg/doc/molalg.rst +++ b/modules/mol/alg/doc/molalg.rst @@ -59,6 +59,24 @@ Local Distance Test scores (lDDT, DRMSD) :returns: a tuple containing the counts of the conserved distances in the model and of all the checked distances +.. function:: LocalDistDiffTest(model, reference_list, distance_list, settings) + + Wrapper around :func:`LocalDistDiffTest` above. + + :param model: the model structure + :type model: :class:`~ost.mol.EntityView` + :param reference_list: the list of reference structures from which distances were derived + :type reference_list: :class:`list` of :class:`~ost.mol.EntityView` + :param distance_list: A residue distance map prepared with :func:`PreparelDDTGlobalRDMap` + with *reference_list* and *settings* as parameters. + :type distance_list: :class:`~ost.mol.alg.GlobalRDMap` + :param settings: lDDT settings + :type settings: :class:`~ost.mol.alg.lDDTSettings` + + :returns: the Local Distance Difference Test score (conserved distances + divided by all the checked distances) + :rtype: :class:`float` + .. function:: LocalDistDiffTest(model, target, cutoff, max_dist, \ local_lddt_property_string="") @@ -287,22 +305,283 @@ Local Distance Test scores (lDDT, DRMSD) :returns: :class:`~ost.mol.alg.GlobalRDMap` - -.. class:: UniqueAtomIdentifier - Object containing enough information to uniquely identify an atom in a structure +.. function:: PreparelDDTGlobalRDMap(reference_list, settings) - .. method:: UniqueAtomIdentifier(chain,residue_number,residue_name,atom_name) - - Creates an UniqueAtomIdentifier object starting from relevant atom information + A wrapper around :func:`CreateDistanceList` and + :func:`CreateDistanceListFromMultipleReferences`. Depending on the length of + the ``reference_list`` it calls one or the other. + + :param reference_list: a list of reference structures from which distances are + derived + :type reference_list: list of :class:`~ost.mol.EntityView` + :param settings: lDDT settings + :type settings: :class:`~ost.mol.alg.lDDTSettings` + + :returns: :class:`~ost.mol.alg.GlobalRDMap` + + +.. function:: CleanlDDTReferences(reference_list) + + Prepares references to be used in lDDT calculation. It checks if all references + has the same chain name and selects this chain for for further calculations. + + .. warning:: + + This function modifies the passed *reference_list* list. + + :param reference_list: A list of reference structures from which distances are + derived + :type reference_list: :class:`list` of :class:`~ost.mol.EntityView` + + +.. function:: CheckStructure(ent, \ + bond_table, \ + angle_table, \ + nonbonded_table, \ + bond_tolerance, \ + angle_tolerance) + + Perform structural checks and filters the structure. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityView` + :param bond_table: List of bond stereo chemical parameters obtained from + :class:`~ost.io.StereoChemicalParamsReader` or :func:`FillStereoChemicalParams` + :type bond_table: :class:`~ost.mol.alg.StereoChemicalParams` + :param angle_table: List of angle stereo chemical parameters obtained from + :class:`~ost.io.StereoChemicalParamsReader` or :func:`FillStereoChemicalParams` + :type angle_table: :class:`~ost.mol.alg.StereoChemicalParams` + :param nonbonded_table: Information about the clashing distances obtained from + :class:`~ost.io.StereoChemicalParamsReader` or :func:`FillClashingDistances` + :type nonbonded_table: :class:`~ost.mol.alg.ClashingDistances` + :param bond_tolerance: Tolerance in stddev for bonds + :type bond_tolerance: float + :param angle_tolerance: Tolerance in stddev for angles + :type angle_tolerance: float + + +.. function:: GetlDDTPerResidueStats(model, distance_list, settings) + + Get the per-residue statistics from the lDDT calculation. + + :param model: The model structure + :type model: :class:`~ost.mol.EntityHandle` + :param distance_list: The list of distances to check for conservation + :type distance_list: :class:`~ost.mol.alg.GlobalRDMap` + :param settings: lDDT settings + :type settings: :class:`~ost.mol.alg.lDDTSettings` + :returns: Per-residue local lDDT scores + :rtype: :class:`list` of :class:`~ost.mol.alg.lDDTLocalScore` + + +.. function:: PrintlDDTPerResidueStats(scores, settings) + + Print per-residue statistics from lDDT calculation. + + :param scores: Local lDDT scores + :type scores: :class:`list` of :class:`~ost.mol.alg.lDDTLocalScore` + :param settings: lDDT settings + :type settings: :class:`~ost.mol.alg.lDDTSettings` + + +.. class:: lDDTLocalScore(cname, rname, rnum, is_assessed, quality_problems, \ + local_lddt, conserved_dist, total_dist) + + Object containing per-residue information about calculated lDDT. + + :param cname: Sets :attr:`cname` + :param rname: Sets :attr:`rname` + :param rnum: Sets :attr:`rnum` + :param is_assessed: Sets :attr:`is_assessed` + :param quality_problems: Sets :attr:`quality_problems` + :param local_lddt: Sets :attr:`local_lddt` + :param conserved_dist: Sets :attr:`conserved_dist` + :param total_dist: Sets :attr:`total_dist` + + .. attribute:: cname + + Chain name. + + :type: :class:`str` + + .. attribute:: rname + + Residue name. + + :type: :class:`str` + + .. attribute:: rnum + + Residue number. + + :type: :class:`int` + + .. attribute:: is_assessed + + Is the residue taken into account? Yes or No. + + :type: :class:`str` + + .. attribute:: quality_problems + + Does the residue have quality problems? + No if there are no problems, NA if the problems were not assessed, Yes if + there are sidechain problems and Yes+ if there are backbone problems. + + :type: :class:`str` + + .. attribute:: local_lddt + + Local lDDT score for residue. + + :type: :class:`float` + + .. attribute:: conserved_dist + + Number of conserved distances. + + :type: :class:`int` + + .. attribute:: total_dist + + Total number of conserved distances. + + :type: :class:`int` + + .. method:: ToString(structural_checks) + + :return: String representation of the lDDTLocalScore object. + :rtype: :class:`str` - :param chain: a string containing the name of the chain to which the atom - belongs - :param residue_number: the number of the residue to which the atom belongs - :type residue_number: :class:`~ost.mol.ResNum` - :param residue_name: a string containing the name of the residue to which - the atom belongs - :param atom_name: a string containing the name of the atom + :param structural_checks: Where structural checks applied during calculations? + :type structural_checks: bool + + .. method:: GetHeader(structural_checks, cutoffs_length) + + Get the names of the fields as printed by ToString method. + + :param structural_checks: Where structural checks applied during calculations? + :type structural_checks: bool + :param cutoffs_length: Length of the cutoffs list used for calculations + :type cutoffs_length: int + + +.. class:: lDDTSettings(bond_tolerance=12, \ + angle_tolerance=12, \ + radius=15, \ + sequence_separation=0, \ + sel="", \ + parameter_file_path="", \ + structural_checks=True, \ + consistency_checks=True, \ + cutoffs=(0.5, 1.0, 2.0, 4.0), \ + label="locallddt") + + Object containing the settings used for lDDT calculations. + + :param bond_tolerance: Sets :attr:`bond_tolerance`. + :param angle_tolerance: Sets :attr:`angle_tolerance`. + :param radius: Sets :attr:`radius`. + :param sequence_separation: Sets :attr:`sequence_separation`. + :param sel: Sets :attr:`sel`. + :param parameter_file_path: Sets :attr:`parameter_file_path`. + :param structural_checks: Sets :attr:`structural_checks`. + :param consistency_checks: Sets :attr:`consistency_checks`. + :param cutoffs: Sets :attr:`cutoffs`. + :param label: Sets :attr:`label`. + + .. attribute:: bond_tolerance + + Tolerance in stddevs for bonds. + + :type: :class:`float` + + .. attribute:: angle_tolerance + + Tolerance in stddevs for angles. + + :type: :class:`float` + + .. attribute:: radius + + Distance inclusion radius. + + :type: :class:`float` + + .. attribute:: sequence_separation + + Sequence separation. + + :type: :class:`int` + + .. attribute:: sel + + Selection performed on reference(s). + + :type: :class:`str` + + .. attribute:: parameter_file_path + + Path to the stereochemical parameter file. If set to "", it the default file + shipped with OpenStructure is used (see + :class:`~ost.io.StereoChemicalParamsReader`). + + :type: :class:`str` + + .. attribute:: structural_checks + + Are structural checks and filter input data on? + + :type: :class:`bool` + + .. attribute:: consistency_checks + + Are consistency checks on? + + :type: :class:`bool` + + .. attribute:: cutoffs + + List of thresholds used to determine distance conservation. + + :type: :class:`list` of :class:`float` + + .. attribute:: label + + The base name for the ResidueHandle properties that store the local scores. + + :type: :class:`str` + + .. method:: SetStereoChemicalParamsPath(path) + + Set the path to the stereochemical parameter file. + + :param path: Path to stereochemical parameter file + :type path: str + + .. method:: PrintParameters() + + Print settings. + + .. method:: ToString() + + :return: String representation of the lDDTSettings object. + :rtype: :class:`str` + + +.. class:: UniqueAtomIdentifier(chain, residue_number, residue_name, atom_name) + + Object containing enough information to uniquely identify an atom in a + structure. + + :param chain: A string containing the name of the chain to which the atom + belongs + :param residue_number: The number of the residue to which the atom belongs + :type residue_number: :class:`~ost.mol.ResNum` + :param residue_name: A string containing the name of the residue to which + the atom belongs + :param atom_name: A string containing the name of the atom .. method:: GetChainName() @@ -957,6 +1236,102 @@ Algorithms on Structures :class:`~ost.mol.EntityHandle` + +.. class:: FindMemParam + + Result object for the membrane detection algorithm described below + + .. attribute:: axis + + initial search axis from which optimal membrane slab could be found + + .. attribute:: tilt_axis + + Axis around which we tilt the membrane starting from the initial axis + + .. attribute:: tilt + + Angle to tilt around tilt axis + + .. attribute:: angle + + After the tilt operation we perform a rotation around the initial axis + with this angle to get the final membrane axis + + .. attribute:: membrane_axis + + The result of applying the tilt and rotation procedure described above. + The membrane_axis is orthogonal to the membrane plane and has unit length. + + .. attribute:: pos + + Real number that describes the membrane center point. To get the actual + position you can do: pos * membrane_axis + + .. attribute:: width + + Total width of the membrane in A + + .. attribute:: energy + + Pseudo energy of the implicit solvation model + + .. attribute:: membrane_representation + + Dummy atoms that represent the membrane. This entity is only valid if + the according flag has been set to True when calling FindMembrane. + + +.. method:: FindMembrane(ent, assign_membrane_representation=True, fast=False) + + Estimates the optimal membrane position of a protein by using an implicit + solvation model. The original algorithm and the used energy function are + described in: Lomize AL, Pogozheva ID, Lomize MA, Mosberg HI (2006) + Positioning of proteins in membranes: A computational approach. + + There are some modifications in this implementation and the procedure is + as follows: + + * Initial axis are constructed that build the starting point for initial + parameter grid searches. + + * For every axis, the protein is rotated so that the axis builds the z-axis + + * In order to exclude internal hydrophilic pores, only the outermost atoms + with respect the the z-axis enter an initial grid search + * The width and position of the membrane is optimized for different + combinations of tilt and rotation angles (further described in + :class:`FindMemParam`). The top 20 parametrizations + (only top parametrization if *fast* is True) are stored for further + processing. + + * The 20 best membrane parametrizations from the initial grid search + (only the best if *fast* is set to True) enter a final + minimization step using a Levenberg-Marquardt minimizer. + + + :param ent: Entity of a transmembrane protein, you'll get weird + results if this is not the case. The energy term + of the result is typically a good indicator whether + *ent* is an actual transmembrane protein. + :type ent: :class:`ost.mol.EntityHandle` / :class:`ost.mol.EntityView` + + :param assign_membrane_representation: Whether to construct a membrane + representation using dummy atoms + + :type assign_membrane_representation: :class:`bool` + + :param fast: If set to false, the 20 best results of the initial grid + search undergo a Levenberg-Marquardt minimization and + the parametrization with optimal minimized energy is + returned. + If set to yes, only the best result of the initial grid + search is selected and returned after + Levenberg-Marquardt minimization. + + :returns: The results object + :rtype: :class:`ost.mol.alg.FindMemParam` + .. _traj-analysis: Trajectory Analysis @@ -1187,3 +1562,282 @@ used to skip frames in the analysis. .. automodule:: ost.mol.alg.structure_analysis :members: + +.. _mapping-functions: + +Mapping functions +-------------------------------------------------------------------------------- + +.. currentmodule:: ost.mol.alg + +The following functions help to convert one residue into another by reusing as +much as possible from the present atoms. They are mainly meant to map from +standard amino acid to other standard amino acids or from modified amino acids +to standard amino acids. + +.. function:: CopyResidue(src_res, dst_res, editor) + + Copies the atoms of ``src_res`` to ``dst_res`` using the residue names + as guide to decide which of the atoms should be copied. If ``src_res`` and + ``dst_res`` have the same name, or ``src_res`` is a modified version of + ``dst_res`` (i.e. have the same single letter code), CopyConserved will be + called, otherwise CopyNonConserved will be called. + + :param src_res: The source residue + :type src_res: :class:`~ost.mol.ResidueHandle` + :param dst_res: The destination residue + :type dst_res: :class:`~ost.mol.ResidueHandle` + + :returns: true if the residue could be copied, false if not. + +.. function:: CopyConserved(src_res, dst_res, editor) + + Copies the atoms of ``src_res`` to ``dst_res`` assuming that the parent + amino acid of ``src_res`` (or ``src_res`` itself) are identical to ``dst_res``. + + If ``src_res`` and ``dst_res`` are identical, all heavy atoms are copied + to ``dst_res``. If ``src_res`` is a modified version of ``dst_res`` and the + modification is a pure addition (e.g. the phosphate group of phosphoserine), + the modification is stripped off and all other heavy atoms are copied to + ``dst_res``. If the modification is not a pure addition, only the backbone + heavy atoms are copied to ``dst_res``. + + Additionally, the selenium atom of ``MSE`` is converted to sulphur. + + :param src_res: The source residue + :type src_res: :class:`~ost.mol.ResidueHandle` + :param dst_res: The destination residue + :type dst_res: :class:`~ost.mol.ResidueHandle` + + :returns: a tuple of bools stating whether the residue could be copied and + whether the Cbeta atom was inserted into the ``dst_res``. + +.. function:: CopyNonConserved(src_res, dst_res, editor) + + Copies the heavy backbone atoms and Cbeta (except for ``GLY``) of ``src_res`` + to ``dst_res``. + + :param src_res: The source residue + :type src_res: :class:`~ost.mol.ResidueHandle` + :param dst_res: The destination residue + :type dst_res: :class:`~ost.mol.ResidueHandle` + + :returns: a tuple of bools stating whether the residue could be copied and + whether the Cbeta atom was inserted into the ``dst_res``. + + +Molecular Checker (Molck) +-------------------------------------------------------------------------------- + +Programmatic usage +################## + +Molecular Checker (Molck) could be called directly from the code using Molck +function: + +.. code-block:: python + + #! /bin/env python + + """Run Molck with Python API. + + + This is an exemplary procedure on how to run Molck using Python API which is + equivalent to the command line: + + molck <PDB PATH> --rm=hyd,oxt,nonstd,unk \ + --fix-ele --out=<OUTPUT PATH> \ + --complib=<PATH TO compounds.chemlib> + """ + + from ost.io import LoadPDB, SavePDB + from ost.mol.alg import MolckSettings, Molck + + from ost.conop import CompoundLib + + + pdbid = "<PDB PATH>" + lib = CompoundLib.Load("<PATH TO compounds.chemlib>") + + # Using Molck function + ent = LoadPDB(pdbid) + ms = MolckSettings(rm_unk_atoms=True, + rm_non_std=True, + rm_hyd_atoms=True, + rm_oxt_atoms=True, + rm_zero_occ_atoms=False, + colored=False, + map_nonstd_res=False, + assign_elem=True) + Molck(ent, lib, ms) + SavePDB(ent, "<OUTPUT PATH>") + +It can also be split into subsequent commands for greater controll: + +.. code-block:: python + + #! /bin/env python + + """Run Molck with Python API. + + + This is an exemplary procedure on how to run Molck using Python API which is + equivalent to the command line: + + molck <PDB PATH> --rm=hyd,oxt,nonstd,unk \ + --fix-ele --out=<OUTPUT PATH> \ + --complib=<PATH TO compounds.chemlib> + """ + + from ost.io import LoadPDB, SavePDB + from ost.mol.alg import (RemoveAtoms, MapNonStandardResidues, + CleanUpElementColumn) + from ost.conop import CompoundLib + + + pdbid = "<PDB PATH>" + lib = CompoundLib.Load("<PATH TO compounds.chemlib>") + map_nonstd = False + + # Using function chain + ent = LoadPDB(pdbid) + if map_nonstd: + MapNonStandardResidues(lib=lib, ent=ent) + + RemoveAtoms(lib=lib, + ent=ent, + rm_unk_atoms=True, + rm_non_std=True, + rm_hyd_atoms=True, + rm_oxt_atoms=True, + rm_zero_occ_atoms=False, + colored=False) + + CleanUpElementColumn(lib=lib, ent=ent) + SavePDB(ent, "<OUTPUT PATH>") + +API +### + +.. class:: MolckSettings(rm_unk_atoms=False, rm_non_std=False, \ + rm_hyd_atoms=True, rm_oxt_atoms=False, \ + rm_zero_occ_atoms=False, colored=False, \ + map_nonstd_res=True, assign_elem=True) + + Stores settings used for Molecular Checker. + + :param rm_unk_atoms: Sets :attr:`rm_unk_atoms`. + :param rm_non_std: Sets :attr:`rm_non_std`. + :param rm_hyd_atoms: Sets :attr:`rm_hyd_atoms`. + :param rm_oxt_atoms: Sets :attr:`rm_oxt_atoms`. + :param rm_zero_occ_atoms: Sets :attr:`rm_zero_occ_atoms`. + :param colored: Sets :attr:`colored`. + :param map_nonstd_res: Sets :attr:`map_nonstd_res`. + :param assign_elem: Sets :attr:`assign_elem`. + + .. attribute:: rm_unk_atoms + + Remove unknown and atoms not following the nomenclature. + + :type: :class:`bool` + + .. attribute:: rm_non_std + + Remove all residues not one of the 20 standard amino acids + + :type: :class:`bool` + + .. attribute:: rm_hyd_atoms + + Remove hydrogen atoms + + :type: :class:`bool` + + .. attribute:: rm_oxt_atoms + + Remove terminal oxygens + + :type: :class:`bool` + + .. attribute:: rm_zero_occ_atoms + + Remove atoms with zero occupancy + + :type: :class:`bool` + + .. attribute:: colored + + Whether output should be colored + + :type: :class:`bool` + + .. attribute:: map_nonstd_res + + Maps modified residues back to the parent amino acid, for example + MSE -> MET, SEP -> SER + + :type: :class:`bool` + + .. attribute:: assign_elem + + Clean up element column + + :type: :class:`bool` + + + .. method:: ToString() + + :return: String representation of the MolckSettings. + :rtype: :class:`str` + +.. warning:: + + The API here is set such that the functions modify the passed structure *ent* + in-place. If this is not ok, please work on a copy of the structure. + +.. function:: Molck(ent, lib, settings) + + Runs Molck on provided entity. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` + :param settings: Molck settings + :type settings: :class:`MolckSettings` + + +.. function:: MapNonStandardResidues(ent, lib) + + Maps modified residues back to the parent amino acid, for example MSE -> MET. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` + +.. function:: RemoveAtoms(ent, lib, rm_unk_atoms=False, rm_non_std=False, \ + rm_hyd_atoms=True, rm_oxt_atoms=False, \ + rm_zero_occ_atoms=False, colored=False) + + Removes atoms and residues according to some criteria. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` + :param rm_unk_atoms: See :attr:`MolckSettings.rm_unk_atoms` + :param rm_non_std: See :attr:`MolckSettings.rm_non_std` + :param rm_hyd_atoms: See :attr:`MolckSettings.rm_hyd_atoms` + :param rm_oxt_atoms: See :attr:`MolckSettings.rm_oxt_atoms` + :param rm_zero_occ_atoms: See :attr:`MolckSettings.rm_zero_occ_atoms` + :param colored: See :attr:`MolckSettings.colored` + +.. function:: CleanUpElementColumn(ent, lib) + + Clean up element column. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` \ No newline at end of file diff --git a/modules/mol/alg/doc/molck.rst b/modules/mol/alg/doc/molck.rst new file mode 100644 index 0000000000000000000000000000000000000000..5936b0584c5b1c3fb8697bb8151859a0a28b017d --- /dev/null +++ b/modules/mol/alg/doc/molck.rst @@ -0,0 +1,66 @@ +========================= +Molecular Checker (Molck) +========================= + +-------------------------------------- +Where can I find the Molck executable? +-------------------------------------- + +The Molck executable can be found at <YOUR-OST-STAGE-DIR>/bin + +----------- +Basic Usage +----------- + +To check one PDB file (struc1.pdb) with Molck, use the following command: + +.. code-block:: bash + + molck --complib <PATH TO COMPOUND LIB> struc1.pdb + +The checked and cleaned file will be saved by default ad struc1-molck.pdb. + +Similarly it is possible to check a list of PDB files: + +.. code-block:: bash + + molck --complib <PATH TO COMPOUND LIB> struc1.pdb struc2.pdb struc3.pdb + + +----------- +All Options +----------- + +The molck executable supports several other command line options, +please find them following: + +.. code-block:: bash + + usage: molck [options] file1.pdb [file2.pdb [...]] + options + --complib=path location of the compound library file. If not provided, the + following locations are searched in this order: + 1. Working directory, + 2. OpenStructure standard library location (if the + executable is part of a standard OpenStructure installation) + --rm=<a>,<b> remove atoms and residues matching some criteria: + - zeroocc - Remove atoms with zero occupancy + - hyd - Remove hydrogen atoms + - oxt - Remove terminal oxygens + - nonstd - Remove all residues not one of the 20 standard amino acids + - unk - Remove unknown and atoms not following the nomenclature + --fix-ele clean up element column + --stdout write cleaned file(s) to stdout + --out=filename write cleaned file(s) to disk. % characters in the filename are + replaced with the basename of the input file without extension. + Default: %-molcked.pdb + --color=auto|on|off whether output should be colored + --map-nonstd maps modified residues back to the parent amino acid, for example + MSE -> MET, SEP -> SER. + +================ +Molck Python API +================ + +Within OST, one can also call the :func:`~ost.mol.alg.Molck` function directly +on entities to get the same effect as with the binary. diff --git a/modules/mol/alg/pymod/CMakeLists.txt b/modules/mol/alg/pymod/CMakeLists.txt index fa2942575835fd70e480563148784918bad58122..f4d1fdb71b68519b303d5df13dea34a2d529c230 100644 --- a/modules/mol/alg/pymod/CMakeLists.txt +++ b/modules/mol/alg/pymod/CMakeLists.txt @@ -7,6 +7,9 @@ set(OST_MOL_ALG_PYMOD_SOURCES export_contact_overlap.cc export_accessibility.cc export_sec_structure.cc + export_non_standard.cc + export_molck.cc + export_membrane.cc ) set(OST_MOL_ALG_PYMOD_MODULES diff --git a/modules/mol/alg/pymod/export_membrane.cc b/modules/mol/alg/pymod/export_membrane.cc new file mode 100644 index 0000000000000000000000000000000000000000..fe8ebe52de9e43c2a98499718f1951015794453f --- /dev/null +++ b/modules/mol/alg/pymod/export_membrane.cc @@ -0,0 +1,59 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2017 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + + +#include <boost/python.hpp> + +#include <ost/mol/alg/find_membrane.hh> + +using namespace boost::python; + +namespace{ + ost::mol::alg::FindMemParam FindMembraneView(ost::mol::EntityView& v, + bool assign_membrane_representation, + bool fast) { + return ost::mol::alg::FindMembrane(v, assign_membrane_representation, fast); + } + + ost::mol::alg::FindMemParam FindMembraneHandle(ost::mol::EntityHandle& h, + bool assign_membrane_representation, + bool fast) { + return ost::mol::alg::FindMembrane(h, assign_membrane_representation, fast); + } +} + +void export_find_membrane() { + + class_<ost::mol::alg::FindMemParam>("FindMemParam", no_init) + .def_readonly("tilt", &ost::mol::alg::FindMemParam::tilt) + .def_readonly("angle", &ost::mol::alg::FindMemParam::angle) + .def_readonly("width", &ost::mol::alg::FindMemParam::width) + .def_readonly("pos", &ost::mol::alg::FindMemParam::pos) + .def_readonly("energy", &ost::mol::alg::FindMemParam::energy) + .def_readonly("axis", &ost::mol::alg::FindMemParam::axis) + .def_readonly("tilt_axis", &ost::mol::alg::FindMemParam::tilt_axis) + .def_readonly("membrane_axis", &ost::mol::alg::FindMemParam::GetMembraneAxis) + .def_readonly("membrane_representation", &ost::mol::alg::FindMemParam::membrane_representation) + ; + + def("FindMembrane", FindMembraneView, (arg("ent"), arg("assign_membrane_representation")=true, + arg("fast")=false)); + def("FindMembrane", FindMembraneHandle, (arg("ent"), arg("assign_membrane_representation")=true, + arg("fast")=false)); +} diff --git a/modules/mol/alg/pymod/export_molck.cc b/modules/mol/alg/pymod/export_molck.cc new file mode 100644 index 0000000000000000000000000000000000000000..e5e27dac593ebaf321c1b2fd5558ff53f209f543 --- /dev/null +++ b/modules/mol/alg/pymod/export_molck.cc @@ -0,0 +1,137 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#include <stdexcept> +#include <boost/python.hpp> +#include <boost/python/raw_function.hpp> + +using namespace boost::python; + +#include <ost/mol/alg/molck.hh> + +using namespace ost::mol::alg; + +namespace { + +object MolckSettingsInitWrapper(tuple args, dict kwargs){ + + object self = args[0]; + args = tuple(args.slice(1,_)); + + bool rm_unk_atoms = false; + if(kwargs.contains("rm_unk_atoms")){ + rm_unk_atoms = extract<bool>(kwargs["rm_unk_atoms"]); + kwargs["rm_unk_atoms"].del(); + } + + bool rm_non_std = false; + if(kwargs.contains("rm_non_std")){ + rm_non_std = extract<bool>(kwargs["rm_non_std"]); + kwargs["rm_non_std"].del(); + } + + bool rm_hyd_atoms = true; + if(kwargs.contains("rm_hyd_atoms")){ + rm_hyd_atoms = extract<bool>(kwargs["rm_hyd_atoms"]); + kwargs["rm_hyd_atoms"].del(); + } + + bool rm_oxt_atoms = false; + if(kwargs.contains("rm_oxt_atoms")){ + rm_oxt_atoms = extract<bool>(kwargs["rm_oxt_atoms"]); + kwargs["rm_oxt_atoms"].del(); + } + + bool rm_zero_occ_atoms = false; + if(kwargs.contains("rm_zero_occ_atoms")){ + rm_zero_occ_atoms = extract<bool>(kwargs["rm_zero_occ_atoms"]); + kwargs["rm_zero_occ_atoms"].del(); + } + + bool colored = false; + if(kwargs.contains("colored")){ + colored = extract<bool>(kwargs["colored"]); + kwargs["colored"].del(); + } + + bool map_nonstd_res = true; + if(kwargs.contains("map_nonstd_res")){ + map_nonstd_res = extract<bool>(kwargs["map_nonstd_res"]); + kwargs["map_nonstd_res"].del(); + } + + bool assign_elem = true; + if(kwargs.contains("assign_elem")){ + assign_elem = extract<bool>(kwargs["assign_elem"]); + kwargs["assign_elem"].del(); + } + + if(len(kwargs) > 0){ + std::stringstream ss; + ss << "Invalid keywords observed when setting up MolckSettings! "; + ss << "Or did you pass the same keyword twice? "; + ss << "Valid keywords are: rm_unk_atoms, rm_non_std, rm_hyd_atoms, "; + ss << "rm_oxt_atoms, rm_zero_occ_atoms, colored, map_nonstd_res, "; + ss << "assign_elem!"; + throw std::invalid_argument(ss.str()); + } + + + return self.attr("__init__")(rm_unk_atoms, + rm_non_std, + rm_hyd_atoms, + rm_oxt_atoms, + rm_zero_occ_atoms, + colored, + map_nonstd_res, + assign_elem); +}} + +void export_Molck() +{ + class_<MolckSettings>("MolckSettings", no_init) + .def("__init__", raw_function(MolckSettingsInitWrapper)) + .def(init<bool , bool, bool, bool, bool, bool, bool, bool>()) + .def("ToString", &MolckSettings::ToString) + .def("__repr__", &MolckSettings::ToString) + .def("__str__", &MolckSettings::ToString) + .def_readwrite("rm_unk_atoms", &MolckSettings::rm_unk_atoms) + .def_readwrite("rm_non_std", &MolckSettings::rm_non_std) + .def_readwrite("rm_hyd_atoms", &MolckSettings::rm_hyd_atoms) + .def_readwrite("rm_oxt_atoms", &MolckSettings::rm_oxt_atoms) + .def_readwrite("rm_zero_occ_atoms", &MolckSettings::rm_zero_occ_atoms) + .def_readwrite("colored", &MolckSettings::colored) + .def_readwrite("map_nonstd_res", &MolckSettings::map_nonstd_res) + .def_readwrite("assign_elem", &MolckSettings::assign_elem); + + def("MapNonStandardResidues", &MapNonStandardResidues, (arg("ent"), + arg("lib"))); + + def("RemoveAtoms", &RemoveAtoms, (arg("ent"), + arg("lib"), + arg("rm_unk_atoms")=false, + arg("rm_non_std")=false, + arg("rm_hyd_atoms")=true, + arg("rm_oxt_atoms")=false, + arg("rm_zero_occ_atoms")=false, + arg("colored")=false)); + + def("CleanUpElementColumn", &CleanUpElementColumn, (arg("ent"), arg("lib"))); + + def("Molck", &Molck, (arg("ent"), arg("lib"), arg("settings"))); +} diff --git a/modules/conop/pymod/export_non_standard.cc b/modules/mol/alg/pymod/export_non_standard.cc similarity index 96% rename from modules/conop/pymod/export_non_standard.cc rename to modules/mol/alg/pymod/export_non_standard.cc index 50b1fd2d6e169e31ff5e192e03b19b98541bf89d..8d485b169ecbd290483daf5f0ab5ca3e06e8da80 100644 --- a/modules/conop/pymod/export_non_standard.cc +++ b/modules/mol/alg/pymod/export_non_standard.cc @@ -18,11 +18,11 @@ //------------------------------------------------------------------------------ #include <boost/python.hpp> #include <ost/mol/mol.hh> -#include <ost/conop/nonstandard.hh> +#include <ost/mol/alg/nonstandard.hh> using namespace boost::python; -using namespace ost::conop; +using namespace ost::mol::alg; using namespace ost::mol; object copy_conserved_handle(ResidueHandle src_res, ResidueHandle dst_res, diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc index 9f6c7f850fcaa869be30b337b1c94c654e5d2927..913b157748b3febce98b159a742b64afe41054fb 100644 --- a/modules/mol/alg/pymod/wrap_mol_alg.cc +++ b/modules/mol/alg/pymod/wrap_mol_alg.cc @@ -18,6 +18,7 @@ //------------------------------------------------------------------------------ #include <boost/python.hpp> +#include <boost/python/raw_function.hpp> #include <boost/python/suite/indexing/map_indexing_suite.hpp> #include <ost/config.hh> #include <ost/mol/alg/local_dist_diff_test.hh> @@ -39,9 +40,12 @@ void export_svdSuperPose(); void export_TrajectoryAnalysis(); void export_StructureAnalysis(); void export_Clash(); +void export_NonStandard(); +void export_Molck(); void export_contact_overlap(); void export_accessibility(); void export_sec_struct(); +void export_find_membrane(); #if OST_IMG_ENABLED void export_entity_to_density(); #endif @@ -50,6 +54,7 @@ namespace { std::pair<long int,long int> (*lddt_a)(const mol::EntityView&, const mol::alg::GlobalRDMap& , std::vector<Real>, int, const String&)=&mol::alg::LocalDistDiffTest; Real (*lddt_c)(const mol::EntityView&, const mol::EntityView& , Real, Real, const String&)=&mol::alg::LocalDistDiffTest; +// Real (*lddt_d)(const mol::EntityView&, std::vector<mol::EntityView>&, const mol::alg::GlobalRDMap&, mol::alg::lDDTSettings&)=&mol::alg::LocalDistDiffTest; Real (*lddt_b)(const seq::AlignmentHandle&,Real, Real, int, int)=&mol::alg::LocalDistDiffTest; std::pair<mol::EntityView,mol::alg::ClashingInfo> (*fc_a)(const mol::EntityView&, const mol::alg::ClashingDistances&,bool)=&mol::alg::FilterClashes; std::pair<mol::EntityView,mol::alg::ClashingInfo> (*fc_b)(const mol::EntityHandle&, const mol::alg::ClashingDistances&, bool)=&mol::alg::FilterClashes; @@ -58,6 +63,18 @@ std::pair<mol::EntityView,mol::alg::StereoChemistryInfo> (*csc_b)(const mol::Ent mol::CoordGroupHandle (*superpose_frames1)(mol::CoordGroupHandle&, mol::EntityView&, int, int, int)=&mol::alg::SuperposeFrames; mol::CoordGroupHandle (*superpose_frames2)(mol::CoordGroupHandle&, mol::EntityView&, mol::EntityView&, int, int)=&mol::alg::SuperposeFrames; + +Real lddt_d(const mol::EntityView& model, list& reference_list, const mol::alg::GlobalRDMap& distance_list, mol::alg::lDDTSettings& settings){ + int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); + std::vector<ost::mol::EntityView> reference_list_vector(reference_list_length); + + for (int i=0; i<reference_list_length; i++) { + reference_list_vector[i] = boost::python::extract<ost::mol::EntityView>(reference_list[i]); + } + + return LocalDistDiffTest(model, reference_list_vector, distance_list, settings); +} + ost::mol::alg::StereoChemicalParams fill_stereochemical_params_wrapper (const String& header, const list& stereo_chemical_props_file) { int stereo_chemical_props_file_length = boost::python::extract<int>(stereo_chemical_props_file.attr("__len__")()); @@ -82,13 +99,13 @@ ost::mol::alg::ClashingDistances fill_clashing_distances_wrapper (const list& st return ost::mol::alg::FillClashingDistances(stereo_chemical_props_file_vector); } -ost::mol::alg::GlobalRDMap create_distance_list_from_multiple_references(const list& ref_list, const list& cutoff_list, int sequence_separation, Real max_dist) +ost::mol::alg::GlobalRDMap create_distance_list_from_multiple_references(const list& reference_list, const list& cutoff_list, int sequence_separation, Real max_dist) { - int ref_list_length = boost::python::extract<int>(ref_list.attr("__len__")()); - std::vector<ost::mol::EntityView> ref_list_vector(ref_list_length); + int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); + std::vector<ost::mol::EntityView> reference_list_vector(reference_list_length); - for (int i=0; i<ref_list_length; i++) { - ref_list_vector[i] = boost::python::extract<ost::mol::EntityView>(ref_list[i]); + for (int i=0; i<reference_list_length; i++) { + reference_list_vector[i] = boost::python::extract<ost::mol::EntityView>(reference_list[i]); } int cutoff_list_length = boost::python::extract<int>(cutoff_list.attr("__len__")()); @@ -97,10 +114,152 @@ ost::mol::alg::GlobalRDMap create_distance_list_from_multiple_references(const l for (int i=0; i<cutoff_list_length; i++) { cutoff_list_vector[i] = boost::python::extract<Real>(cutoff_list[i]); } - return ost::mol::alg::CreateDistanceListFromMultipleReferences(ref_list_vector, cutoff_list_vector, sequence_separation, max_dist); + return ost::mol::alg::CreateDistanceListFromMultipleReferences(reference_list_vector, cutoff_list_vector, sequence_separation, max_dist); } +object lDDTSettingsInitWrapper(tuple args, dict kwargs){ + + object self = args[0]; + args = tuple(args.slice(1,_)); + + Real bond_tolerance = 12.0; + if(kwargs.contains("bond_tolerance")){ + bond_tolerance = extract<Real>(kwargs["bond_tolerance"]); + kwargs["bond_tolerance"].del(); + } + + Real angle_tolerance = 12.0; + if(kwargs.contains("angle_tolerance")){ + angle_tolerance = extract<Real>(kwargs["angle_tolerance"]); + kwargs["angle_tolerance"].del(); + } + + Real radius = 15.0; + if(kwargs.contains("radius")){ + radius = extract<Real>(kwargs["radius"]); + kwargs["radius"].del(); + } + + int sequence_separation = 0; + if(kwargs.contains("sequence_separation")){ + sequence_separation = extract<int>(kwargs["sequence_separation"]); + kwargs["sequence_separation"].del(); + } + + String sel = ""; + if(kwargs.contains("sel")){ + sel = extract<String>(kwargs["sel"]); + kwargs["sel"].del(); + } + + String parameter_file_path = ""; + if(kwargs.contains("parameter_file_path")){ + parameter_file_path = extract<String>(kwargs["parameter_file_path"]); + kwargs["parameter_file_path"].del(); + } + + bool structural_checks = true; + if(kwargs.contains("structural_checks")){ + structural_checks = extract<bool>(kwargs["structural_checks"]); + kwargs["structural_checks"].del(); + } + bool consistency_checks = true; + if(kwargs.contains("consistency_checks")){ + consistency_checks = extract<bool>(kwargs["consistency_checks"]); + kwargs["consistency_checks"].del(); + } + + std::vector<Real> cutoffs; + if(kwargs.contains("cutoffs")){ + list cutoff_list = extract<list>(kwargs["cutoffs"]); + int cutoff_list_length = boost::python::extract<int>(cutoff_list.attr("__len__")()); + for (int i=0; i<cutoff_list_length; i++) { + cutoffs.push_back(boost::python::extract<Real>(cutoff_list[i])); + } + kwargs["cutoffs"].del(); + } else { + cutoffs.push_back(0.5); + cutoffs.push_back(1.0); + cutoffs.push_back(2.0); + cutoffs.push_back(4.0); + } + + String label = "locallddt"; + if(kwargs.contains("label")){ + label = extract<String>(kwargs["label"]); + kwargs["label"].del(); + } + + if(len(kwargs) > 0){ + std::stringstream ss; + ss << "Invalid keywords observed when setting up MolckSettings! "; + ss << "Or did you pass the same keyword twice? "; + ss << "Valid keywords are: bond_tolerance, angle_tolerance, radius, "; + ss << "sequence_separation, sel, parameter_file_path, structural_checks, "; + ss << "consistency_checks, cutoffs, label!"; + throw std::invalid_argument(ss.str()); + } + + return self.attr("__init__")(bond_tolerance, + angle_tolerance, + radius, + sequence_separation, + sel, + parameter_file_path, + structural_checks, + consistency_checks, + cutoffs, + label); +} + + +void clean_lddt_references_wrapper(const list& reference_list) +{ + int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); + std::vector<ost::mol::EntityView> reference_list_vector(reference_list_length); + + for (int i=0; i<reference_list_length; i++) { + reference_list_vector[i] = boost::python::extract<ost::mol::EntityView>(reference_list[i]); + } + + return ost::mol::alg::CleanlDDTReferences(reference_list_vector); +} + +ost::mol::alg::GlobalRDMap prepare_lddt_global_rdmap_wrapper(const list& reference_list, mol::alg::lDDTSettings settings) +{ + int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); + std::vector<ost::mol::EntityView> reference_list_vector(reference_list_length); + + for (int i=0; i<reference_list_length; i++) { + reference_list_vector[i] = boost::python::extract<ost::mol::EntityView>(reference_list[i]); + } + + return mol::alg::PreparelDDTGlobalRDMap(reference_list_vector, settings); +} + +list get_lddt_per_residue_stats_wrapper(mol::EntityHandle& model, + ost::mol::alg::GlobalRDMap& distance_list, + ost::mol::alg::lDDTSettings& settings) { + std::vector<mol::alg::lDDTLocalScore> scores = GetlDDTPerResidueStats(model, distance_list, settings); + list local_scores_list; + for (std::vector<mol::alg::lDDTLocalScore>::const_iterator sit = scores.begin(); sit != scores.end(); ++sit) { + local_scores_list.append(*sit); + } + return local_scores_list; +} + +void print_lddt_per_residue_stats_wrapper(list& scores, mol::alg::lDDTSettings settings){ + int scores_length = boost::python::extract<int>(scores.attr("__len__")()); + std::vector<mol::alg::lDDTLocalScore> scores_vector(scores_length); + + for (int i=0; i<scores_length; i++) { + scores_vector[i] = boost::python::extract<mol::alg::lDDTLocalScore>(scores[i]); + } + + return mol::alg::PrintlDDTPerResidueStats(scores_vector, settings); +} + } @@ -110,9 +269,12 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) export_TrajectoryAnalysis(); export_StructureAnalysis(); export_Clash(); + export_NonStandard(); + export_Molck(); export_contact_overlap(); export_accessibility(); export_sec_struct(); + export_find_membrane(); #if OST_IMG_ENABLED export_entity_to_density(); #endif @@ -120,6 +282,7 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) def("LocalDistDiffTest", lddt_a, (arg("sequence_separation")=0,arg("local_lddt_property_string")="")); def("LocalDistDiffTest", lddt_c, (arg("local_lddt_property_string")="")); def("LocalDistDiffTest", lddt_b, (arg("ref_index")=0, arg("mdl_index")=1)); + def("LocalDistDiffTest", &lddt_d, (arg("model"), arg("reference_list"), ("distance_list"), arg("settings"))); def("FilterClashes", fc_a, (arg("ent"), arg("clashing_distances"), arg("always_remove_bb")=false)); def("FilterClashes", fc_b, (arg("ent"), arg("clashing_distances"), arg("always_remove_bb")=false)); def("CheckStereoChemistry", csc_a, (arg("ent"), arg("bonds"), arg("angles"), arg("bond_tolerance"), arg("angle_tolerance"), arg("always_remove_bb")=false)); @@ -161,13 +324,59 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) .def("GetResidueName",&mol::alg::UniqueAtomIdentifier::GetResidueName) .def("GetAtomName",&mol::alg::UniqueAtomIdentifier::GetAtomName) .def("GetQualifiedAtomName",&mol::alg::UniqueAtomIdentifier::GetQualifiedAtomName) - ; + ; + + class_<mol::alg::lDDTSettings>("lDDTSettings", no_init) + .def("__init__", raw_function(lDDTSettingsInitWrapper)) + .def(init<Real, Real, Real, int, String, String, bool, bool, std::vector<Real>&, String>()) + .def("ToString", &mol::alg::lDDTSettings::ToString) + .def("SetStereoChemicalParamsPath", &mol::alg::lDDTSettings::SetStereoChemicalParamsPath) + .def("PrintParameters", &mol::alg::lDDTSettings::PrintParameters) + .def("__repr__", &mol::alg::lDDTSettings::ToString) + .def("__str__", &mol::alg::lDDTSettings::ToString) + .def_readwrite("bond_tolerance", &mol::alg::lDDTSettings::bond_tolerance) + .def_readwrite("angle_tolerance", &mol::alg::lDDTSettings::angle_tolerance) + .def_readwrite("radius", &mol::alg::lDDTSettings::radius) + .def_readwrite("sequence_separation", &mol::alg::lDDTSettings::sequence_separation) + .def_readwrite("sel", &mol::alg::lDDTSettings::sel) + .def_readwrite("parameter_file_path", &mol::alg::lDDTSettings::parameter_file_path) + .def_readwrite("structural_checks", &mol::alg::lDDTSettings::structural_checks) + .def_readwrite("consistency_checks", &mol::alg::lDDTSettings::consistency_checks) + .def_readwrite("cutoffs", &mol::alg::lDDTSettings::cutoffs) + .def_readwrite("label", &mol::alg::lDDTSettings::label); + + class_<mol::alg::lDDTLocalScore>("lDDTLocalScore", init<String, String, int, String, String, Real, int, int>()) + .def("ToString", &mol::alg::lDDTLocalScore::ToString) + .def("GetHeader", &mol::alg::lDDTLocalScore::GetHeader) + .def_readwrite("cname", &mol::alg::lDDTLocalScore::cname) + .def_readwrite("rname", &mol::alg::lDDTLocalScore::rname) + .def_readwrite("rnum", &mol::alg::lDDTLocalScore::rnum) + .def_readwrite("is_assessed", &mol::alg::lDDTLocalScore::is_assessed) + .def_readwrite("quality_problems", &mol::alg::lDDTLocalScore::quality_problems) + .def_readwrite("local_lddt", &mol::alg::lDDTLocalScore::local_lddt) + .def_readwrite("conserved_dist", &mol::alg::lDDTLocalScore::conserved_dist) + .def_readwrite("total_dist", &mol::alg::lDDTLocalScore::total_dist); def("FillClashingDistances",&fill_clashing_distances_wrapper); def("FillStereoChemicalParams",&fill_stereochemical_params_wrapper); def("IsStandardResidue",&mol::alg::IsStandardResidue); def("PrintGlobalRDMap",&mol::alg::PrintGlobalRDMap); def("PrintResidueRDMap",&mol::alg::PrintResidueRDMap); + def("CleanlDDTReferences", &clean_lddt_references_wrapper); + def("PreparelDDTGlobalRDMap", + &prepare_lddt_global_rdmap_wrapper, + (arg("reference_list"), arg("settings"))); + def("CheckStructure", + &mol::alg::CheckStructure, + (arg("ent"), arg("bond_table"), arg("angle_table"), arg("nonbonded_table"), + arg("bond_tolerance"), arg("angle_tolerance"))); + def("GetlDDTPerResidueStats", + &get_lddt_per_residue_stats_wrapper, + (arg("model"), arg("distance_list"), arg("settings"))); + def("PrintlDDTPerResidueStats", + &print_lddt_per_residue_stats_wrapper, + (arg("scores"), arg("settings"))); + class_<mol::alg::PDBize>("PDBize", init<int>(arg("min_polymer_size")=10)) diff --git a/modules/mol/alg/src/CMakeLists.txt b/modules/mol/alg/src/CMakeLists.txt index d473247e656e17639bc903ce900cfafb42cea83c..42c571d79444add71b47a8da9916a106a2e1caf1 100644 --- a/modules/mol/alg/src/CMakeLists.txt +++ b/modules/mol/alg/src/CMakeLists.txt @@ -20,6 +20,9 @@ set(OST_MOL_ALG_HEADERS similarity_matrix.hh accessibility.hh sec_struct.hh + nonstandard.hh + molck.hh + find_membrane.hh ) set(OST_MOL_ALG_SOURCES @@ -43,6 +46,9 @@ set(OST_MOL_ALG_SOURCES similarity_matrix.cc accessibility.cc sec_struct.cc + nonstandard.cc + molck.cc + find_membrane.cc ) set(MOL_ALG_DEPS ost_mol ost_seq) @@ -58,7 +64,7 @@ if (ENABLE_IMG) entity_to_density.cc ) - set(MOL_ALG_DEPS ${MOL_ALG_DEPS} ost_img ost_img_alg ost_seq_alg) + set(MOL_ALG_DEPS ${MOL_ALG_DEPS} ost_img ost_img_alg ost_seq_alg ost_conop) endif() executable(NAME lddt SOURCES lddt.cc diff --git a/modules/mol/alg/src/accessibility.cc b/modules/mol/alg/src/accessibility.cc index a1becdd7f83c3de0b939e22788d6bf4bb3f22926..276b53ba2256802e9c9afc52c2f4b49e308e796f 100644 --- a/modules/mol/alg/src/accessibility.cc +++ b/modules/mol/alg/src/accessibility.cc @@ -203,6 +203,11 @@ Real GetAtomAccessibilityNACCESS(Real x_pos, Real y_pos, Real z_pos, Real a = x_pos - x[i]; Real b = y_pos - y[i]; Real c = a*a + b*b; + + if(c == Real(0.0)) { + return 0.0; + } + dx[i] = a; dy[i] = b; dsqr[i] = c; diff --git a/modules/mol/alg/src/filter_clashes.cc b/modules/mol/alg/src/filter_clashes.cc index 690a10aff11a238a45663186ac6b98c3ace29db5..455d1e7dbd8114aeb7ef972cc50691de2e9a0e8c 100644 --- a/modules/mol/alg/src/filter_clashes.cc +++ b/modules/mol/alg/src/filter_clashes.cc @@ -201,7 +201,7 @@ bool StereoChemicalParams::IsEmpty() const return false; } -StereoChemicalParams FillStereoChemicalParams(const String& header, std::vector<String>& stereo_chemical_props_file) +StereoChemicalParams FillStereoChemicalParams(const String& header, std::vector<String>& stereo_chemical_props_file, bool check) { StereoChemicalParams table; bool found=false; @@ -276,11 +276,18 @@ StereoChemicalParams FillStereoChemicalParams(const String& header, std::vector< if (found==false) { std::cout << "Could not find the relevant section in the stereo-chemical parameter file" << std::endl; return StereoChemicalParams(); - }; + }; + if (check) { + if (table.IsEmpty()) { + std::stringstream serr; + serr << "Error reading the " << header << " section of the stereo-chemical parameter file."; + throw ost::Error(serr.str()); + } + } return table; }; -ClashingDistances FillClashingDistances(std::vector<String>& stereo_chemical_props_file) +ClashingDistances FillClashingDistances(std::vector<String>& stereo_chemical_props_file, bool check) { ClashingDistances table; bool found=false; @@ -340,7 +347,14 @@ ClashingDistances FillClashingDistances(std::vector<String>& stereo_chemical_pro if (found==false) { std::cout << "Could not find the relevant section in the stereo-chemical parameter file" << std::endl; return ClashingDistances(); - } + } + if (check) { + if (table.IsEmpty()) { + std::stringstream serr; + serr << "Error reading the Clashing section of the stereo-chemical parameter file."; + throw ost::Error(serr.str()); + } + } return table; } diff --git a/modules/mol/alg/src/filter_clashes.hh b/modules/mol/alg/src/filter_clashes.hh index f53f123dc09922937382a5bb67502b0add7603c2..fe6d12c75a7303ec637e194d341831c1200ac4b0 100644 --- a/modules/mol/alg/src/filter_clashes.hh +++ b/modules/mol/alg/src/filter_clashes.hh @@ -244,13 +244,13 @@ private: /// \brief Fills a list of reference clashing distances from the content of a parameter file /// /// Requires a list of strings holding the contents of a parameter file, one line per string -ClashingDistances DLLEXPORT_OST_MOL_ALG FillClashingDistances(std::vector<String>& stereo_chemical_props_file); +ClashingDistances DLLEXPORT_OST_MOL_ALG FillClashingDistances(std::vector<String>& stereo_chemical_props_file, bool check=false); /// \brief Fills a list of stereo-chemical statistics from the content of a parameter file /// /// Requires a list of strings holding the contents of a parameter file, one line per string /// The header can be 'Bonds' to read bond statistics or 'Angles' to read angle statistics -StereoChemicalParams DLLEXPORT_OST_MOL_ALG FillStereoChemicalParams(const String& header, std::vector<String>& stereo_chemical_props_file); +StereoChemicalParams DLLEXPORT_OST_MOL_ALG FillStereoChemicalParams(const String& header, std::vector<String>& stereo_chemical_props_file, bool check=false); /// \brief Filters a structure based on detected clashes between non bonded atoms. Entity version /// diff --git a/modules/mol/alg/src/find_membrane.cc b/modules/mol/alg/src/find_membrane.cc new file mode 100644 index 0000000000000000000000000000000000000000..f48fb1149815f65e162e2f39a2c961dff494a0a1 --- /dev/null +++ b/modules/mol/alg/src/find_membrane.cc @@ -0,0 +1,1018 @@ +#include <ost/mol/alg/find_membrane.hh> +#include <ost/mol/alg/accessibility.hh> +#include <ost/geom/vecmat3_op.hh> +#include <ost/message.hh> + +#include <limits> +#include <exception> +#include <list> +#include <cmath> +#include <Eigen/Core> +#include <Eigen/Eigenvalues> + +namespace{ + +// Copyright notice of the Levenberg Marquardt minimizer we use... + +// Copyright (c) 2007, 2008, 2009 libmv authors. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +// +// A simple implementation of levenberg marquardt. +// +// [1] K. Madsen, H. Nielsen, O. Tingleoff. Methods for Non-linear Least +// Squares Problems. +// http://www2.imm.dtu.dk/pubdb/views/edoc_download.php/3215/pdf/imm3215.pdf +// +// TODO(keir): Cite the Lourakis' dogleg paper. + +template<typename Function, + typename Jacobian, + typename Solver = Eigen::JacobiSVD< + Eigen::Matrix<typename Function::FMatrixType::RealScalar, + Function::XMatrixType::RowsAtCompileTime, + Function::XMatrixType::RowsAtCompileTime> > > +class LevenbergMarquardt { + public: + typedef typename Function::XMatrixType::RealScalar Scalar; + typedef typename Function::FMatrixType FVec; + typedef typename Function::XMatrixType Parameters; + typedef Eigen::Matrix<typename Function::FMatrixType::RealScalar, + Function::FMatrixType::RowsAtCompileTime, + Function::XMatrixType::RowsAtCompileTime> JMatrixType; + typedef Eigen::Matrix<typename JMatrixType::RealScalar, + JMatrixType::ColsAtCompileTime, + JMatrixType::ColsAtCompileTime> AMatrixType; + + // TODO(keir): Some of these knobs can be derived from each other and + // removed, instead of requiring the user to set them. + enum Status { + RUNNING, + GRADIENT_TOO_SMALL, // eps > max(J'*f(x)) + RELATIVE_STEP_SIZE_TOO_SMALL, // eps > ||dx|| / ||x|| + ERROR_TOO_SMALL, // eps > ||f(x)|| + HIT_MAX_ITERATIONS, + }; + + LevenbergMarquardt(const Function &f) + : f_(f), df_(f) {} + + struct SolverParameters { + SolverParameters() + : gradient_threshold(1e-20), + relative_step_threshold(1e-20), + error_threshold(1e-16), + initial_scale_factor(1e-3), + max_iterations(200) {} + Scalar gradient_threshold; // eps > max(J'*f(x)) + Scalar relative_step_threshold; // eps > ||dx|| / ||x|| + Scalar error_threshold; // eps > ||f(x)|| + Scalar initial_scale_factor; // Initial u for solving normal equations. + int max_iterations; // Maximum number of solver iterations. + }; + + struct Results { + Scalar error_magnitude; // ||f(x)|| + Scalar gradient_magnitude; // ||J'f(x)|| + int iterations; + Status status; + }; + + Status Update(const Parameters &x, const SolverParameters ¶ms, + JMatrixType *J, AMatrixType *A, FVec *error, Parameters *g) { + *J = df_(x); + *A = (*J).transpose() * (*J); + *error = -f_(x); + *g = (*J).transpose() * *error; + if (g->array().abs().maxCoeff() < params.gradient_threshold) { + return GRADIENT_TOO_SMALL; + } else if (error->norm() < params.error_threshold) { + return ERROR_TOO_SMALL; + } + return RUNNING; + } + + Results minimize(Parameters *x_and_min) { + SolverParameters params; + return minimize(params, x_and_min); + } + + Results minimize(const SolverParameters ¶ms, Parameters *x_and_min) { + Parameters &x = *x_and_min; + JMatrixType J; + AMatrixType A; + FVec error; + Parameters g; + + Results results; + results.status = Update(x, params, &J, &A, &error, &g); + + Scalar u = Scalar(params.initial_scale_factor*A.diagonal().maxCoeff()); + Scalar v = 2; + + Parameters dx, x_new; + int i; + for (i = 0; results.status == RUNNING && i < params.max_iterations; ++i) { +// LOG(INFO) << "iteration: " << i; +// LOG(INFO) << "||f(x)||: " << f_(x).norm(); +// LOG(INFO) << "max(g): " << g.array().abs().maxCoeff(); +// LOG(INFO) << "u: " << u; +// LOG(INFO) << "v: " << v; + AMatrixType A_augmented = A + u*AMatrixType::Identity(J.cols(), J.cols()); + Solver solver(A_augmented, Eigen::ComputeThinU | Eigen::ComputeThinV); + dx = solver.solve(g); + if (dx.norm() <= params.relative_step_threshold * x.norm()) { + results.status = RELATIVE_STEP_SIZE_TOO_SMALL; + break; + } + + x_new = x + dx; + // Rho is the ratio of the actual reduction in error to the reduction + // in error that would be obtained if the problem was linear. + // See [1] for details. + Scalar rho((error.squaredNorm() - f_(x_new).squaredNorm()) + / dx.dot(u*dx + g)); + if (rho > 0) { + // Accept the Gauss-Newton step because the linear model fits well. + x = x_new; + results.status = Update(x, params, &J, &A, &error, &g); + Scalar tmp = Scalar(2*rho-1); + u = u*std::max(Scalar(1/3.), 1 - (tmp*tmp*tmp)); + v = 2; + continue; + } + + // Reject the update because either the normal equations failed to solve + // or the local linear model was not good (rho < 0). Instead, increase + // to move closer to gradient descent. + u *= v; + v *= 2; + } + if (results.status == RUNNING) { + results.status = HIT_MAX_ITERATIONS; + } + results.error_magnitude = error.norm(); + results.gradient_magnitude = g.norm(); + results.iterations = i; + return results; + } + + private: + const Function &f_; + Jacobian df_; +}; + +geom::Mat3 RotationAroundAxis(geom::Vec3 axis, Real angle) { + + Real aa, ab, ac, ba, bb, bc, ca, cb, cc, one_m_cos, cos_ang, sin_ang; + + cos_ang = std::cos(angle); + sin_ang = std::sin(angle); + one_m_cos = 1-cos_ang; + + aa = cos_ang+axis[0]*axis[0]*one_m_cos; + ab = axis[0]*axis[1]*one_m_cos-axis[2]*sin_ang; + ac = axis[0]*axis[2]*one_m_cos+axis[1]*sin_ang; + + ba = axis[1]*axis[0]*one_m_cos+axis[2]*sin_ang; + bb = cos_ang+axis[1]*axis[1]*one_m_cos; + bc = axis[1]*axis[2]*one_m_cos-axis[0]*sin_ang; + + ca = axis[2]*axis[0]*one_m_cos-axis[1]*sin_ang; + cb = axis[2]*axis[1]*one_m_cos+axis[0]*sin_ang; + cc = cos_ang+axis[2]*axis[2]*one_m_cos; + + geom::Mat3 result(aa, ab, ac, ba, bb, bc, ca, cb, cc); + return result; +} + +geom::Vec3 RotateAroundAxis(geom::Vec3 point, geom::Vec3 axis, Real angle) { + + geom::Mat3 rot = RotationAroundAxis(axis, angle); + geom::Vec3 result = rot*point; + return result; +} + + +// Levenberg Marquardt specific objects for the membrane finding algorithm + +struct EnergyF { + EnergyF(const std::vector<geom::Vec3>& p, const std::vector<Real>& t_e, + Real l, Real o, const geom::Vec3& ax, const geom::Vec3& tilt_ax): + positions(p), transfer_energies(t_e), axis(ax), + tilt_axis(tilt_ax), lambda(l), offset(o) { } + + typedef Eigen::Matrix<Real, 4, 1> XMatrixType; + typedef Eigen::Matrix<Real, 1, 1> FMatrixType; + + Eigen::Matrix<Real,1,1> operator()(const Eigen::Matrix<Real, 4, 1>& x) const { + + FMatrixType result; + result(0,0) = 0.0; + geom::Vec3 tilted_axis = axis; + + tilted_axis = RotateAroundAxis(tilted_axis, tilt_axis, x(0, 0)); + tilted_axis = RotateAroundAxis(tilted_axis, axis, x(1, 0)); + + Real pos_on_axis; + Real distance_to_center; + Real half_width = Real(0.5) * x(2, 0); + Real exponent; + Real one_over_lambda = Real(1.0) / lambda; + + int n = transfer_energies.size(); + + for(int i = 0; i < n; ++i) { + pos_on_axis = geom::Dot(tilted_axis, positions[i]); + distance_to_center = std::abs(x(3, 0) - pos_on_axis); + exponent = (distance_to_center - half_width) * one_over_lambda; + result(0, 0) += (1.0 / (1.0 + std::exp(exponent))) * transfer_energies[i]; + } + + result(0, 0) += offset; + + return result; + } + + std::vector<geom::Vec3> positions; + std::vector<Real> transfer_energies; + geom::Vec3 axis; + geom::Vec3 tilt_axis; + Real lambda; + + // define an offset parameter... + // The levenberg-marquardt algorithm is designed to minimize an error + // function, aims to converge towards zero. The offset parameter gets added + // at the end of the energy calculation to get a positive result => + // forces algorithm to minimize + Real offset; +}; + + +struct EnergyDF { + + EnergyDF(const EnergyF& f): function(f),d_tilt(0.02),d_angle(0.02), + d_width(0.4),d_pos(0.4) { } + + Eigen::Matrix<Real,1,4> operator()(const Eigen::Matrix<Real, 4, 1>& x) const { + + Eigen::Matrix<Real,1,4> result; + Eigen::Matrix<Real, 4, 1> parameter1 = x; + Eigen::Matrix<Real, 4, 1> parameter2 = x; + + parameter1(0,0)+=d_tilt; + parameter2(0,0)-=d_tilt; + result(0,0) = (function(parameter1)(0, 0) - + function(parameter2)(0, 0)) / (2*d_tilt); + + parameter1=x; + parameter2=x; + parameter1(1,0)+=d_angle; + parameter2(1,0)-=d_angle; + result(0,1) = (function(parameter1)(0,0) - + function(parameter2)(0,0)) / (2*d_angle); + + parameter1=x; + parameter2=x; + parameter1(2,0)+=d_width; + parameter2(2,0)-=d_width; + result(0,2) = (function(parameter1)(0,0) - + function(parameter2)(0,0)) / (2*d_width); + + parameter1=x; + parameter2=x; + parameter1(3,0)+=d_pos; + parameter2(3,0)-=d_pos; + result(0,3) = (function(parameter1)(0,0) - + function(parameter2)(0,0)) / (2*d_pos); + return result; + } + + EnergyF function; + Real d_tilt; + Real d_angle; + Real d_width; + Real d_pos; +}; + + +void GetRanges(const std::vector<geom::Vec3>& atom_positions, + Real& min_x, Real& max_x, Real& min_y, Real& max_y, + Real& min_z, Real& max_z) { + + min_x = std::numeric_limits<Real>::max(); + max_x = -min_x; + + min_y = std::numeric_limits<Real>::max(); + max_y = -min_y; + + min_z = std::numeric_limits<Real>::max(); + max_z = -min_z; + + for(uint i = 0; i < atom_positions.size(); ++i) { + const geom::Vec3& pos = atom_positions[i]; + min_x = std::min(min_x, pos[0]); + max_x = std::max(max_x, pos[0]); + min_y = std::min(min_y, pos[1]); + max_y = std::max(max_y, pos[1]); + min_z = std::min(min_z, pos[2]); + max_z = std::max(max_z, pos[2]); + } +} + + +void FloodLevel(char* data, int x_start, int y_start, + int x_extent, int y_extent, + int orig_value, int dest_value) { + + //http://lodev.org/cgtutor/floodfill.html + if(orig_value != data[x_start*y_extent + y_start]) { + return; + } + + std::vector<std::pair<int,int> > queue; + queue.push_back(std::make_pair(x_start, y_start)); + + int y1,y,x; + bool spanLeft, spanRight; + std::pair<int,int> actual_position; + + while(!queue.empty()){ + + actual_position = queue.back(); + queue.pop_back(); + x = actual_position.first; + y = actual_position.second; + + y1 = y; + + while(y1 >= 0 && data[x*y_extent + y1] == orig_value) { + --y1; + } + + y1++; + spanLeft = spanRight = 0; + + while(y1 < y_extent && + data[x*y_extent + y1] == orig_value ) { + + data[x*y_extent + y1] = dest_value; + + if(!spanLeft && x > 0 && + data[(x-1)*y_extent + y1] == orig_value) { + queue.push_back(std::make_pair(x-1,y1)); + spanLeft = 1; + } + else if(spanLeft && x > 0 && + data[(x-1)*y_extent + y1] != orig_value) { + spanLeft = 0; + } + + if(!spanRight && x < x_extent - 1 && + data[(x+1)*y_extent + y1] == orig_value) { + queue.push_back(std::make_pair(x+1,y1)); + spanRight = 1; + } + else if(spanRight && x < x_extent - 1 && + data[(x+1)*y_extent + y1] != orig_value) { + spanRight = 0; + } + ++y1; + } + } +} + + +void GetExposedAtoms(const std::vector<geom::Vec3>& atom_positions, + const std::vector<Real>& transfer_energies, + std::vector<geom::Vec3>& exposed_atom_positions, + std::vector<Real>& exposed_transfer_energies) { + + // sum of approx. vdw radius of the present heavy atoms (1.8) + // plus 1.4 for water. + Real radius = 3.2; + Real one_over_radius = Real(1.0) / radius; + + // lets setup a grid in which we place the atoms + Real min_x, max_x, min_y, max_y, min_z, max_z; + GetRanges(atom_positions, min_x, max_x, min_y, max_y, min_z, max_z); + + // we guarantee that the thing is properly solvated in the x-y plane and add + // some space around this is also necessary to avoid overflow checks in + // different places + min_x -= Real(2.1) * radius; + min_y -= Real(2.1) * radius; + min_z -= Real(2.1) * radius; + max_x += Real(2.1) * radius; + max_y += Real(2.1) * radius; + max_z += Real(2.1) * radius; + + int num_xbins = std::ceil((max_x - min_x) * one_over_radius); + int num_ybins = std::ceil((max_y - min_y) * one_over_radius); + int num_zbins = std::ceil((max_z - min_z) * one_over_radius); + int num_bins = num_xbins * num_ybins * num_zbins; + char* grid = new char[num_bins]; + memset(grid, 0, num_bins); + + for(uint i = 0; i < atom_positions.size(); ++i) { + + const geom::Vec3& pos = atom_positions[i]; + int x_bin = (pos[0] - min_x) * one_over_radius; + int y_bin = (pos[1] - min_y) * one_over_radius; + int z_bin = (pos[2] - min_z) * one_over_radius; + + // we're really crude here and simply set all 27 cubes with central + // cube defined by x_bin, y_bin and z_bin to one + for(int z = z_bin - 1; z <= z_bin + 1; ++z) { + for(int x = x_bin - 1; x <= x_bin + 1; ++x) { + for(int y = y_bin - 1; y <= y_bin + 1; ++y) { + grid[z*num_xbins*num_ybins + x*num_ybins + y] = 1; + } + } + } + } + + // lets call flood fill for every layer along the z-axis from every + // corner in the x-y plane. + for(int z = 0; z < num_zbins; ++z) { + char* level = &grid[z*num_xbins*num_ybins]; + FloodLevel(level, 0, 0, num_xbins, num_ybins, 0, 2); + FloodLevel(level, 0, num_ybins - 1, num_xbins, num_ybins, 0, 2); + FloodLevel(level, num_xbins - 1, 0, num_xbins, num_ybins, 0, 2); + FloodLevel(level, num_xbins - 1, num_ybins - 1, num_xbins, num_ybins, 0, 2); + } + + // every cube in every layer that has currently value 1 that has a city-block + // distance below 3 to any cube with value 2 is considered to be in contact + // with the outer surface... lets set them to a value of three + for(int z = 0; z < num_zbins; ++z) { + char* level = &grid[z*num_xbins*num_ybins]; + for(int x = 0; x < num_xbins; ++x) { + for(int y = 0; y < num_ybins; ++y) { + if(level[x*num_ybins + y] == 1) { + int x_from = std::max(0, x - 3); + int x_to = std::min(num_xbins-1, x + 3); + int y_from = std::max(0, y - 3); + int y_to = std::min(num_ybins-1, y + 3); + bool exposed = false; + for(int i = x_from; i <= x_to && !exposed; ++i) { + for(int j = y_from; j <= y_to; ++j) { + if(level[i*num_ybins + j] == 2) { + level[x*num_ybins + y] = 3; + exposed = true; + break; + } + } + } + } + } + } + } + + // all positions that lie in a cube with value 3 are considered to be exposed... + exposed_atom_positions.clear(); + exposed_transfer_energies.clear(); + for(uint i = 0; i < atom_positions.size(); ++i) { + const geom::Vec3& pos = atom_positions[i]; + int x_bin = (pos[0] - min_x) * one_over_radius; + int y_bin = (pos[1] - min_y) * one_over_radius; + int z_bin = (pos[2] - min_z) * one_over_radius; + if(grid[z_bin*num_xbins*num_ybins + x_bin*num_ybins + y_bin] == 3) { + exposed_atom_positions.push_back(pos); + exposed_transfer_energies.push_back(transfer_energies[i]); + } + } + + // cleanup + delete [] grid; +} + + +void ScanAxis(const std::vector<geom::Vec3>& atom_positions, + const std::vector<Real>& transfer_energies, + const geom::Vec3& axis, + Real& best_width, Real& best_center, Real& best_energy) { + + int n_pos = atom_positions.size(); + + geom::Vec3 normalized_axis = geom::Normalize(axis); + std::vector<Real> pos_on_axis(n_pos); + + for(int i = 0; i < n_pos; ++i) { + pos_on_axis[i] = geom::Dot(atom_positions[i], normalized_axis); + } + + Real min_pos = pos_on_axis[0]; + Real max_pos = min_pos; + + for(int i = 1; i < n_pos; ++i) { + min_pos = std::min(min_pos, pos_on_axis[i]); + max_pos = std::max(max_pos, pos_on_axis[i]); + } + + min_pos = std::floor(min_pos); + max_pos = std::ceil(max_pos); + + int full_width = int(max_pos - min_pos) + 1; + + //energies representing the energy profile along the axis + std::vector<Real> mapped_energies(full_width, 0.0); + + for(int i = 0; i < n_pos; ++i) { + mapped_energies[int(pos_on_axis[i] - min_pos)] += transfer_energies[i]; + } + + best_width = 0; + best_center = 0; + best_energy = std::numeric_limits<Real>::max(); + + for(int window_width = 10; window_width <= 40; ++window_width) { + + if(window_width > full_width) { + break; + } + + Real energy=0.0; + for(int i = 0; i < window_width; ++i) { + energy += mapped_energies[i]; + } + + if(energy < best_energy) { + best_width = window_width; + best_center = min_pos + Real(0.5) * window_width + Real(0.5); + best_energy = energy; + } + + for(int pos = 1; pos < full_width - window_width; ++pos) { + energy -= mapped_energies[pos-1]; + energy += mapped_energies[pos+window_width-1]; + if(energy < best_energy){ + best_width = window_width; + best_center = min_pos + pos + Real(0.5) * window_width + Real(0.5); + best_energy = energy; + } + } + } +} + + +struct LMInput { + ost::mol::alg::FindMemParam mem_param; + geom::Transform initial_transform; + std::vector<geom::Vec3> exposed_atom_positions; + std::vector<Real> exposed_transfer_energies; +}; + + +void SampleZ(const std::vector<geom::Vec3>& atom_pos, + const std::vector<Real>& transfer_energies, + const geom::Transform& initial_transform, + int n_solutions, std::list<LMInput>& top_solutions) { + + + std::vector<geom::Vec3> transformed_atom_pos(atom_pos.size()); + for(uint at_idx = 0; at_idx < atom_pos.size(); ++at_idx) { + transformed_atom_pos[at_idx] = initial_transform.Apply(atom_pos[at_idx]); + } + + std::vector<geom::Vec3> exposed_atom_positions; + std::vector<Real> exposed_transfer_energies; + GetExposedAtoms(transformed_atom_pos, transfer_energies, + exposed_atom_positions, exposed_transfer_energies); + + std::vector<Real> tilt_angles; + std::vector<Real> rotation_angles; + for(int tilt_deg = 0; tilt_deg <= 45; tilt_deg += 5) { + if(tilt_deg == 0) { + tilt_angles.push_back(0.0); + rotation_angles.push_back(0.0); + } + else { + Real tilt_angle = Real(tilt_deg) / Real(180.) * Real(M_PI); + for(int angle_deg = 0; angle_deg < 360; angle_deg += 5) { + tilt_angles.push_back(tilt_angle); + rotation_angles.push_back(Real(angle_deg) / Real(180.) * Real(M_PI)); + } + } + } + + geom::Vec3 normalized_axis(0.0,0.0,1.0); + geom::Vec3 tilt_axis(1.0,0.0,0.0); + + for(uint i = 0; i < tilt_angles.size(); ++i) { + + Real tilt_angle = tilt_angles[i]; + Real rotation_angle = rotation_angles[i]; + + geom::Vec3 tilted_axis = RotateAroundAxis(normalized_axis, tilt_axis, + tilt_angle); + geom::Vec3 scan_axis = RotateAroundAxis(tilted_axis, normalized_axis, + rotation_angle); + + Real actual_width, actual_center, actual_energy; + ScanAxis(exposed_atom_positions, exposed_transfer_energies, scan_axis, + actual_width, actual_center, actual_energy); + + if(static_cast<int>(top_solutions.size()) >= n_solutions && + actual_energy > top_solutions.back().mem_param.energy) { + continue; + } + + LMInput lm_input; + lm_input.mem_param.axis = normalized_axis; + lm_input.mem_param.tilt_axis = tilt_axis; + lm_input.mem_param.tilt = tilt_angle; + lm_input.mem_param.angle = rotation_angle; + lm_input.mem_param.width = actual_width; + lm_input.mem_param.pos = actual_center; + lm_input.mem_param.energy = actual_energy; + lm_input.initial_transform = initial_transform; + lm_input.exposed_atom_positions = exposed_atom_positions; + lm_input.exposed_transfer_energies = exposed_transfer_energies; + + if(top_solutions.empty()) { + top_solutions.push_back(lm_input); + } + else { + bool added = false; + for(std::list<LMInput>::iterator sol_it = top_solutions.begin(); + sol_it != top_solutions.end(); ++sol_it) { + if(sol_it->mem_param.energy > lm_input.mem_param.energy) { + top_solutions.insert(sol_it, lm_input); + added = true; + break; + } + } + + if(!added) { + top_solutions.push_back(lm_input); + } + + while(static_cast<int>(top_solutions.size()) > n_solutions) { + top_solutions.pop_back(); + } + } + } +} + + +ost::mol::alg::FindMemParam GetFinalSolution(const std::list<LMInput>& top_solutions, + Real lambda) { + + Real best_energy = std::numeric_limits<Real>::max(); + std::list<LMInput>::const_iterator best_sol_it = top_solutions.begin(); + Eigen::Matrix<Real, 4, 1> lm_parameters; + Eigen::Matrix<Real, 4, 1> best_lm_parameters; + LevenbergMarquardt<EnergyF, EnergyDF>::Results lm_result; + + for(std::list<LMInput>::const_iterator sol_it = top_solutions.begin(); + sol_it != top_solutions.end(); ++sol_it) { + + Real offset = std::max(Real(20000.), std::abs(sol_it->mem_param.energy * 2)); + + EnergyF en_f(sol_it->exposed_atom_positions, + sol_it->exposed_transfer_energies, + lambda, offset, + sol_it->mem_param.axis, + sol_it->mem_param.tilt_axis); + + lm_parameters(0,0) = sol_it->mem_param.tilt; + lm_parameters(1,0) = sol_it->mem_param.angle; + lm_parameters(2,0) = sol_it->mem_param.width; + lm_parameters(3,0) = sol_it->mem_param.pos; + + LevenbergMarquardt<EnergyF,EnergyDF> lm(en_f); + lm_result = lm.minimize(&lm_parameters); + + Real minimized_energy = en_f(lm_parameters)(0, 0) - en_f.offset; + + if(minimized_energy < best_energy) { + best_energy = minimized_energy; + best_sol_it = sol_it; + best_lm_parameters = lm_parameters; + } + } + + ost::mol::alg::FindMemParam mem_param = best_sol_it->mem_param; + mem_param.energy = best_energy; + mem_param.tilt = best_lm_parameters(0,0); + mem_param.angle = best_lm_parameters(1,0); + mem_param.width = best_lm_parameters(2,0); + mem_param.pos = best_lm_parameters(3,0); + + // the solution is still relative to the initial transform that has + // been applied when calling the SampleZ funtion! + geom::Transform t = best_sol_it->initial_transform; + mem_param.tilt_axis = t.ApplyInverse(mem_param.tilt_axis); + mem_param.axis = t.ApplyInverse(mem_param.axis); + + return mem_param; +} + + +ost::mol::EntityHandle CreateMembraneRepresentation( + const std::vector<geom::Vec3>& atom_positions, + const ost::mol::alg::FindMemParam& param, + Real membrane_margin = 15, + Real delta = 2.0) { + + // let's first construct two planes defining the membrane + geom::Vec3 membrane_axis = param.GetMembraneAxis(); + geom::Vec3 one = param.pos * membrane_axis + + membrane_axis * param.width / 2; + geom::Vec3 two = param.pos * membrane_axis - + membrane_axis * param.width / 2; + geom::Plane plane_one = geom::Plane(one, membrane_axis); + geom::Plane plane_two = geom::Plane(two, membrane_axis); + + // let's find all positions that are somehow close to those planes + geom::Vec3List close_pos; + geom::Vec3List close_pos_one; + geom::Vec3List close_pos_two; + + for(uint i = 0; i < atom_positions.size(); ++i) { + + Real d1 = geom::Distance(plane_one, atom_positions[i]); + Real d2 = geom::Distance(plane_two, atom_positions[i]); + + if(d1 < Real(3.)) { + close_pos_one.push_back(atom_positions[i]); + } + + if(d2 < Real(3.)) { + close_pos_two.push_back(atom_positions[i]); + } + + if(d1 < Real(3.) || d2 < Real(3.)) { + close_pos.push_back(atom_positions[i]); + } + } + + // the geometric center of the close pos vector in combination with the + // membrane axis define the central "line" of the disks that will represent + // the membrane + geom::Vec3 center_pos = close_pos.GetCenter(); + geom::Line3 center_line = geom::Line3(center_pos, center_pos + membrane_axis); + + // the final radius of the "disks" is based on the maximal distance of any + // position in close_pos to the center_line plus the membrane_margin + + Real max_d_to_center_line = 0; + for(uint i = 0; i < close_pos.size(); ++i) { + Real d = geom::Distance(center_line, close_pos[i]); + max_d_to_center_line = std::max(max_d_to_center_line, d); + } + + Real disk_radius = max_d_to_center_line + membrane_margin; + int num_sampling_points = (Real(2.) * disk_radius) / delta; + + // reassign the top and bottom positions, that have been only arbitrary + // points on the membrane planes + one = geom::IntersectionPoint(center_line, plane_one); + two = geom::IntersectionPoint(center_line, plane_two); + + // find a pair of perpendicular vectors, that are on the plane + geom::Vec3 arbitrary_vec(1.0, 0.0, 0.0); + if(geom::Angle(membrane_axis, arbitrary_vec) < 0.1) { + // parallel is not cool in this case + arbitrary_vec = geom::Vec3(0.0, 1.0, 0.0); + } + geom::Vec3 plane_x = geom::Normalize(geom::Cross(membrane_axis, arbitrary_vec)); + geom::Vec3 plane_y = geom::Normalize(geom::Cross(membrane_axis, plane_x)); + + // final representing positions come in here + std::vector<geom::Vec3> final_pos; + + // do plane one + geom::Vec3 origin = one - delta * num_sampling_points * 0.5 * plane_x - + delta * num_sampling_points * 0.5 * plane_y; + + for(int i = 0; i < num_sampling_points; ++i) { + for(int j = 0; j < num_sampling_points; ++j) { + geom::Vec3 pos = origin + i*delta*plane_x + j*delta*plane_y; + if(geom::Distance(pos, one) < disk_radius) { + bool far_far_away = true; + // this is slow... + for(uint k = 0; k < close_pos_one.size(); ++k) { + if(geom::Length2(pos - close_pos_one[k]) < Real(16.)) { + far_far_away = false; + break; + } + } + if(far_far_away) { + final_pos.push_back(pos); + } + } + } + } + + // do plane two + origin = two - delta * num_sampling_points * 0.5 * plane_x - + delta * num_sampling_points * 0.5 * plane_y; + + for(int i = 0; i < num_sampling_points; ++i) { + for(int j = 0; j < num_sampling_points; ++j) { + geom::Vec3 pos = origin + i*delta*plane_x + j*delta*plane_y; + if(geom::Distance(pos, two) < disk_radius) { + bool far_far_away = true; + // this is slow... + for(uint k = 0; k < close_pos_two.size(); ++k) { + if(geom::Length2(pos - close_pos_two[k]) < Real(16.)) { + far_far_away = false; + break; + } + } + if(far_far_away) { + final_pos.push_back(pos); + } + } + } + } + + // create hacky entity that contains membrane representing positions and + // return + ost::mol::EntityHandle membrane_ent = ost::mol::CreateEntity(); + ost::mol::XCSEditor ed = membrane_ent.EditXCS(); + + ost::mol::ChainHandle chain = ed.InsertChain("M"); + ost::mol::ResidueHandle res = ed.AppendResidue(chain, "MEM"); + String atom_names = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + uint atom_name_idx = 0; + uint atom_name_secondary_idx = 0; + + for(uint i = 0; i < final_pos.size(); ++i) { + + if(atom_name_secondary_idx == atom_names.size()) { + ++atom_name_idx; + atom_name_secondary_idx = 0; + } + if(atom_name_idx == atom_names.size()) { + res = ed.AppendResidue(chain, "MEM"); + atom_name_idx = 0; + atom_name_secondary_idx = 0; + } + + String atom_name = "--"; + atom_name[0] = atom_names[atom_name_idx]; + atom_name[1] = atom_names[atom_name_secondary_idx]; + + ed.InsertAtom(res, atom_name, final_pos[i]); + ++atom_name_secondary_idx; + } + + return membrane_ent; +} + + +} // anon namespace + + +namespace ost{ namespace mol{ namespace alg{ + +geom::Vec3 FindMemParam::GetMembraneAxis() const { + + geom::Vec3 result = RotateAroundAxis(axis,tilt_axis,tilt); + result = RotateAroundAxis(result,axis,angle); + return result; +} + + +FindMemParam FindMembrane(ost::mol::EntityView& ent, + bool assign_membrane_representation, bool fast) { + + ost::mol::EntityView peptide_view = ent.Select("peptide=true and ele!=H"); + Accessibility(peptide_view); + + std::vector<geom::Vec3> atom_pos; + std::vector<Real> transfer_energies; + + atom_pos.reserve(peptide_view.GetAtomCount()); + transfer_energies.reserve(peptide_view.GetAtomCount()); + + ost::mol::AtomViewList atoms = peptide_view.GetAtomList(); + String stupid_string("S_N_O_C"); + + for(ost::mol::AtomViewList::iterator it = atoms.begin(); + it != atoms.end(); ++it) { + + if(!it->HasProp("asaAtom")) { + continue; + } + + String element = it->GetElement(); + if(stupid_string.find(element) == std::string::npos) { + continue; + } + + Real asa = it->GetFloatProp("asaAtom"); + atom_pos.push_back(it->GetPos()); + + if(element == "S") { + transfer_energies.push_back(asa * Real(10.0)); + } + else if(element == "N") { + transfer_energies.push_back(asa * Real(53.0)); + } + else if(element == "O") { + transfer_energies.push_back(asa * Real(57.0)); + } + else if(element == "C") { + // check whether we find a double bond to distinguish between + // hibridization states + bool assigned_energy = false; + ost::mol::BondHandleList bond_list = it->GetBondList(); + for(ost::mol::BondHandleList::iterator bond_it = bond_list.begin(); + bond_it != bond_list.end(); ++bond_it){ + unsigned char bond_order = bond_it->GetBondOrder(); + if(bond_order > '1'){ + transfer_energies.push_back(asa * Real(-19.0)); + assigned_energy = true; + break;; + } + } + if(!assigned_energy) { + transfer_energies.push_back(asa * Real(-22.6)); + } + } + } + + if(atom_pos.size() < 10) { + throw ost::Error("Cannot detect membrane with such a low number " + "of heavy atoms!"); + } + + // we always optimizer along the z-axis. + // We therefore have to transform the positions. We use a rotation + // around the z-axis with subsequent rotation around the x-axis for this task + std::vector<geom::Transform> transformations; + int n_euler_angles = 3; + int n_transformations = n_euler_angles * n_euler_angles * n_euler_angles; + std::vector<Real> euler_angles(n_euler_angles); + euler_angles[0] = 0.0; + euler_angles[1] = M_PI/3; + euler_angles[2] = 2*M_PI/3; + + for(int i = 0; i < n_euler_angles; ++i) { + for(int j = 0; j < n_euler_angles; ++j) { + for(int k = 0; k < n_euler_angles; ++k) { + geom::Mat3 rot_matrix = geom::EulerTransformation(euler_angles[i], + euler_angles[j], + euler_angles[k]); + geom::Transform transform; + transform.SetRot(rot_matrix); + transformations.push_back(transform); + } + } + } + + // lets use the generated transforms to search for initial solutions that can + // then be fed into a final minimization... + std::list<LMInput> top_solutions; + int n_initial_solutions = fast ? 1 : 20; + + for(int transformation_idx = 0; transformation_idx < n_transformations; + ++transformation_idx) { + SampleZ(atom_pos, transfer_energies, transformations[transformation_idx], + n_initial_solutions, top_solutions); + } + + // Perform the final minimization and return the best solution. + // please note, that the returned solution is transformed back in order + // to match the initial atom positions + FindMemParam final_solution = GetFinalSolution(top_solutions, 0.9); + + if(assign_membrane_representation) { + final_solution.membrane_representation = CreateMembraneRepresentation( + atom_pos, + final_solution); + } + + return final_solution; +} + + +FindMemParam FindMembrane(ost::mol::EntityHandle& ent, + bool assign_membrane_representation, + bool fast) { + + ost::mol::EntityView ent_view = ent.CreateFullView(); + return FindMembrane(ent_view, assign_membrane_representation, fast); +} + +}}} // ns diff --git a/modules/mol/alg/src/find_membrane.hh b/modules/mol/alg/src/find_membrane.hh new file mode 100644 index 0000000000000000000000000000000000000000..962ea68fc09680304c2da095f78900eff5483225 --- /dev/null +++ b/modules/mol/alg/src/find_membrane.hh @@ -0,0 +1,31 @@ +#include <ost/mol/mol.hh> + +#include <ost/geom/geom.hh> +#include <ost/io/binary_data_source.hh> +#include <ost/io/binary_data_sink.hh> + +namespace ost { namespace mol{ namespace alg{ + +struct FindMemParam{ + FindMemParam() { } + + geom::Vec3 GetMembraneAxis() const; + geom::Vec3 axis; + geom::Vec3 tilt_axis; + Real tilt; + Real angle; + Real width; + Real pos; + Real energy; + ost::mol::EntityHandle membrane_representation; +}; + +FindMemParam FindMembrane(ost::mol::EntityHandle& ent, + bool assign_membrane_representation, + bool fast); + +FindMemParam FindMembrane(ost::mol::EntityView& ent, + bool assign_membrane_representation, + bool fast); + +}}} // ns diff --git a/modules/mol/alg/src/lddt.cc b/modules/mol/alg/src/lddt.cc index 11e328e4deb40c00117dd666337af25a57112fc3..60156b86b4515c773994cd3031252915525c07fe 100644 --- a/modules/mol/alg/src/lddt.cc +++ b/modules/mol/alg/src/lddt.cc @@ -32,6 +32,7 @@ #include <ost/mol/alg/filter_clashes.hh> #include <ost/io/mol/pdb_reader.hh> #include <ost/io/io_exception.hh> +#include <ost/io/stereochemical_params_reader.hh> #include <ost/conop/conop.hh> #include <ost/conop/compound_lib.hh> #include <ost/string_ref.hh> @@ -101,30 +102,6 @@ void usage() } -// computes coverage -std::pair<int,int> compute_coverage (const EntityView& v,const GlobalRDMap& glob_dist_list) -{ - int second=0; - int first=0; - if (v.GetResidueList().size()==0) { - if (glob_dist_list.size()==0) { - return std::make_pair(0,-1); - } else { - return std::make_pair(0,glob_dist_list.size()); - } - } - ChainView vchain=v.GetChainList()[0]; - for (GlobalRDMap::const_iterator i=glob_dist_list.begin();i!=glob_dist_list.end();++i) - { - ResNum rnum = (*i).first; - second++; - if (vchain.FindResidue(rnum)) { - first++; - } - } - return std::make_pair(first,second); -} - CompoundLibPtr load_compound_lib(const String& custom_path) { if (custom_path!="") { @@ -174,37 +151,21 @@ CompoundLibPtr load_compound_lib(const String& custom_path) } return CompoundLibPtr(); } -bool is_resnum_in_globalrdmap(const ResNum& resnum, const GlobalRDMap& glob_dist_list) -{ - for (GlobalRDMap::const_iterator i=glob_dist_list.begin(), e=glob_dist_list.end(); i!=e; ++i) { - ResNum rn = i->first; - if (rn==resnum) { - return true; - } - } - return false; -} int main (int argc, char **argv) { // sets some default values for parameters String version = OST_VERSION_STRING; - Real bond_tolerance = 12.0; - Real angle_tolerance = 12.0; - Real radius=15.0; - int sequence_separation = 0; - + lDDTSettings settings; + settings.structural_checks=false; // creates the required loading profile IOProfile profile; // parses options - String sel; String custom_path; - bool structural_checks=false; - bool consistency_checks=true; po::options_description desc("Options"); desc.add_options() ("calpha,c", "consider only calpha atoms") - ("sel,s", po::value<String>(&sel)->default_value(""), "selection performed on reference structure") + ("sel,s", po::value<String>(&settings.sel)->default_value(""), "selection performed on reference structure") ("tolerant,t", "fault tolerant mode") ("structural-checks,f", "perform stereo-chemical and clash checks") ("ignore-consistency-checks,x", "ignore residue name consistency checks") @@ -253,18 +214,18 @@ int main (int argc, char **argv) profile.calpha_only=true; } if (vm.count("structural-checks")) { - structural_checks=true; + settings.structural_checks=true; } if (vm.count("ignore-consistency-checks")) { - consistency_checks=false; + settings.consistency_checks=false; } if (vm.count("tolerant")) { profile.fault_tolerant=true; } - String parameter_filename; + if (vm.count("parameter-file")) { - parameter_filename=vm["parameter-file"].as<String>(); - } else if (structural_checks==true) { + settings.parameter_file_path=vm["parameter-file"].as<String>(); + } else if (settings.structural_checks==true) { std::cout << "Please specify a stereo-chemical parameter file" << std::endl; exit(-1); } @@ -287,23 +248,17 @@ int main (int argc, char **argv) Logger::Instance().PushVerbosityLevel(ost_verbosity_level); if (vm.count("bond_tolerance")) { - bond_tolerance=vm["bond_tolerance"].as<Real>(); + settings.bond_tolerance=vm["bond_tolerance"].as<Real>(); } if (vm.count("angle_tolerance")) { - angle_tolerance=vm["angle_tolerance"].as<Real>(); + settings.angle_tolerance=vm["angle_tolerance"].as<Real>(); } if (vm.count("inclusion_radius")) { - radius=vm["inclusion_radius"].as<Real>(); + settings.radius=vm["inclusion_radius"].as<Real>(); } if (vm.count("sequence_separation")) { - sequence_separation=vm["sequence_separation"].as<int>(); + settings.sequence_separation=vm["sequence_separation"].as<int>(); } - - std::vector<Real> cutoffs; - cutoffs.push_back(0.5); - cutoffs.push_back(1.0); - cutoffs.push_back(2.0); - cutoffs.push_back(4.0); std::vector<EntityView> ref_list; @@ -311,55 +266,37 @@ int main (int argc, char **argv) // if the reference file is a comma-separated list of files, switches to multi- // reference mode GlobalRDMap glob_dist_list; - String ref_file=files.back(); + String ref_file=files.back(); ost::StringRef ref_file_sr(ref_file.c_str(),ref_file.length()); std::vector<StringRef> ref_file_split_sr=ref_file_sr.split(','); - if (ref_file_split_sr.size()==1) { - std::cout << "Multi-reference mode: Off" << std::endl; - String ref_filename = ref_file_split_sr[0].str(); + for (std::vector<StringRef>::const_iterator ref_file_split_sr_it = ref_file_split_sr.begin(); + ref_file_split_sr_it != ref_file_split_sr.end();++ref_file_split_sr_it) { + String ref_filename = ref_file_split_sr_it->str(); EntityHandle ref=load(ref_filename, profile); if (!ref) { exit(-1); - } - EntityView refview=ref.GetChainList()[0].Select("peptide=true"); - ref_list.push_back(refview); - glob_dist_list = CreateDistanceList(refview,radius); - } else { - std::cout << "Multi-reference mode: On" << std::endl; - for (std::vector<StringRef>::const_iterator ref_file_split_sr_it = ref_file_split_sr.begin(); - ref_file_split_sr_it != ref_file_split_sr.end();++ref_file_split_sr_it) { - String ref_filename = ref_file_split_sr_it->str(); - EntityHandle ref=load(ref_filename, profile); - if (!ref) { + } + if (settings.sel != ""){ + std::cout << "Performing \"" << settings.sel << "\" selection on reference " << ref_filename << std::endl; + try { + ref_list.push_back(ref.Select(settings.sel)); + } catch (const ost::mol::QueryError& e) { + std::cerr << "Provided selection argument failed." << std::endl << e.GetFormattedMessage() << std::endl; exit(-1); } - if (! ref_list.empty()) { - if (ref_list[0].GetChainList()[0].GetName()!=ref.GetChainList()[0].GetName()) { - std::cout << "ERROR: First chains in the reference structures have different names" << std::endl; - exit(-1); - } - } - EntityView refview=ref.GetChainList()[0].Select("peptide=true"); + } + else { ref_list.push_back(ref.CreateFullView()); - } - glob_dist_list = CreateDistanceListFromMultipleReferences (ref_list,cutoffs,sequence_separation,radius); - } + } + } + CleanlDDTReferences(ref_list); + glob_dist_list = PreparelDDTGlobalRDMap(ref_list, settings); files.pop_back(); // prints out parameters used in the lddt calculation std::cout << "Verbosity level: " << verbosity_level << std::endl; - if (structural_checks) { - std::cout << "Stereo-chemical and steric clash checks: On " << std::endl; - } else { - std::cout << "Stereo-chemical and steric clash checks: Off " << std::endl; - } - std::cout << "Inclusion Radius: " << radius << std::endl; - - std::cout << "Sequence separation: " << sequence_separation << std::endl; - if (structural_checks) { - std::cout << "Parameter filename: " << parameter_filename << std::endl; - std::cout << "Tolerance in stddevs for bonds: " << bond_tolerance << std::endl; - std::cout << "Tolerance in stddevs for angles: " << angle_tolerance << std::endl; + settings.PrintParameters(); + if (settings.structural_checks) { LOG_INFO("Log entries format:"); LOG_INFO("BOND INFO FORMAT: Chain Residue ResNum Bond Min Max Observed Z-score Status"); LOG_INFO("ANGLE INFO FORMAT: Chain Residue ResNum Angle Min Max Observed Z-score Status"); @@ -382,183 +319,41 @@ int main (int argc, char **argv) } continue; } - EntityView v=model.GetChainList()[0].Select("peptide=true"); - EntityView outv=model.GetChainList()[0].Select("peptide=true"); - for (std::vector<EntityView>::const_iterator ref_list_it = ref_list.begin(); - ref_list_it != ref_list.end(); ++ref_list_it) { - bool cons_check = ResidueNamesMatch(v,*ref_list_it,consistency_checks); - if (cons_check==false) { - if (consistency_checks==true) { - LOG_ERROR("Residue names in model: " << files[i] << " and in reference structure(s) are inconsistent."); - exit(-1); - } else { - LOG_WARNING("Residue names in model: " << files[i] << " and in reference structure(s) are inconsistent."); - } - } - } + EntityView model_view = model.GetChainList()[0].Select("peptide=true"); boost::filesystem::path pathstring(files[i]); - String filestring=BFPathToString(pathstring); - std::cout << "File: " << files[i] << std::endl; - std::pair<int,int> cov = compute_coverage(v,glob_dist_list); - if (cov.second == -1) { - std::cout << "Coverage: 0 (0 out of 0 residues)" << std::endl; - } else { - std::cout << "Coverage: " << (float(cov.first)/float(cov.second)) << " (" << cov.first << " out of " << cov.second << " residues)" << std::endl; - } + std::cout << "File: " << files[i] << std::endl; - if (structural_checks) { - // reads in parameter files - boost::filesystem::path loc(parameter_filename); - boost::filesystem::ifstream infile(loc); - if (!infile) { - std::cout << "Could not find " << parameter_filename << std::endl; - exit(-1); - } - std::vector<String> stereo_chemical_props; - String line; - while (std::getline(infile, line)) - { - std::stringstream line_stream(line); - stereo_chemical_props.push_back(line); - } - StereoChemicalParams bond_table = FillStereoChemicalParams("Bond",stereo_chemical_props); - if (bond_table.IsEmpty()) { - std::cout << "Error reading the Bond section of the stereo-chemical parameter file." << std::endl; - exit(-1); - } - StereoChemicalParams angle_table = FillStereoChemicalParams("Angle",stereo_chemical_props); - if (angle_table.IsEmpty()) { - std::cout << "Error reading the Angles section of the stereo-chemical parameter file." << std::endl; - exit(-1); - } - - ClashingDistances nonbonded_table = FillClashingDistances(stereo_chemical_props); - - if (nonbonded_table.IsEmpty()) { - std::cout << "Error reading the Clashing section of the stereo-chemical parameter file." << std::endl; - exit(-1); - } - // performs structural checks and filters the structure - StereoChemistryInfo stereo_chemistry_info; - try { - std::pair<EntityView,StereoChemistryInfo> csc_result = alg::CheckStereoChemistry(v,bond_table,angle_table,bond_tolerance,angle_tolerance); - v = csc_result.first; - stereo_chemistry_info = csc_result.second; - } catch (std::exception& e) { - std::cout << "An error occurred during the structure quality checks, stage 1:" << std::endl; + if (settings.structural_checks) { + StereoChemicalParamsReader stereochemical_params(settings.parameter_file_path); + try { + stereochemical_params.Read(true); + } catch (std::exception& e) { std::cout << e.what() << std::endl; exit(-1); } - std::cout << "Average Z-Score for bond lengths: " << std::fixed << std::setprecision(5) << stereo_chemistry_info.GetAvgZscoreBonds() << std::endl; - std::cout << "Bonds outside of tolerance range: " << stereo_chemistry_info.GetBadBondCount() << " out of " << stereo_chemistry_info.GetBondCount() << std::endl; - std::cout << "Bond\tAvg Length\tAvg zscore\tNum Bonds" << std::endl; - std::map<String,BondLengthInfo> avg_bond_length_info = stereo_chemistry_info.GetAvgBondLengthInfo(); - for (std::map<String,BondLengthInfo>::const_iterator abli_it=avg_bond_length_info.begin();abli_it!=avg_bond_length_info.end();++abli_it) { - String key = (*abli_it).first; - BondLengthInfo bond_length_info = (*abli_it).second; - std::cout << key << "\t" << std::fixed << std::setprecision(5) << std::left << std::setw(10) << - bond_length_info.GetAvgLength() << "\t" << std::left << std::setw(10) << bond_length_info.GetAvgZscore() << "\t" << bond_length_info.GetCount() << std::endl; - } - std::cout << "Average Z-Score angle widths: " << std::fixed << std::setprecision(5) << stereo_chemistry_info.GetAvgZscoreAngles() << std::endl; - std::cout << "Angles outside of tolerance range: " << stereo_chemistry_info.GetBadAngleCount() << " out of " << stereo_chemistry_info.GetAngleCount() << std::endl; - ClashingInfo clash_info; + try { - std::pair<EntityView,ClashingInfo> fc_result = alg::FilterClashes(v,nonbonded_table); - v = fc_result.first; - clash_info = fc_result.second; - } catch (std::exception& e) { - std::cout << "An error occurred during the structure quality checks, stage 2:" << std::endl; + CheckStructure(model_view, + stereochemical_params.bond_table, + stereochemical_params.angle_table, + stereochemical_params.nonbonded_table, + settings.bond_tolerance, + settings.angle_tolerance); + } catch (std::exception& e) { std::cout << e.what() << std::endl; exit(-1); } - std::cout << clash_info.GetClashCount() << " non-bonded short-range distances shorter than tolerance distance" << std::endl; - std::cout << "Distances shorter than tolerance are on average shorter by: " << std::fixed << std::setprecision(5) << clash_info.GetAverageOffset() << std::endl; - - } - if (cov.first==0) { - std::cout << "Global LDDT score: 0.0" << std::endl; - return 0; } // computes the lddt score - String label="localldt"; - std::pair<int,int> total_ov=alg::LocalDistDiffTest(v, glob_dist_list, cutoffs, sequence_separation, label); - Real lddt = static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1); - std::cout << "Global LDDT score: " << std::setprecision(4) << lddt << std::endl; - std::cout << "(" << std::fixed << total_ov.first << " conserved distances out of " << total_ov.second - << " checked, over " << cutoffs.size() << " thresholds)" << std::endl; + Real lddt = LocalDistDiffTest(model_view, ref_list, glob_dist_list, settings); - // prints the residue-by-residue statistics - if (structural_checks) { - std::cout << "Local LDDT Scores:" << std::endl; - std::cout << "(A 'Yes' in the 'Quality Problems' column stands for problems" << std::endl; - std::cout << "in the side-chain of a residue, while a 'Yes+' for problems" << std::endl; - std::cout << "in the backbone)" << std::endl; - } else { - std::cout << "Local LDDT Scores:" << std::endl; - } - if (structural_checks) { - std::cout << "Chain\tResName\tResNum\tAsses.\tQ.Prob.\tScore\t(Conserved/Total, over " << cutoffs.size() << " thresholds)" << std::endl; - } else { - std::cout << "Chain\tResName\tResNum\tAsses.\tScore\t(Conserved/Total, over " << cutoffs.size() << " thresholds)" << std::endl; - } - for (ChainViewList::const_iterator ci = outv.GetChainList().begin(), - ce = outv.GetChainList().end(); ci != ce; ++ci) { - for (ResidueViewList::const_iterator rit = ci->GetResidueList().begin(), - re = ci->GetResidueList().end(); rit != re; ++rit) { - - ResidueView ritv=*rit; - ResNum rnum = ritv.GetNumber(); - bool assessed = false; - String assessed_string="No"; - String quality_problems_string="No"; - Real lddt_local = -1; - String lddt_local_string="-"; - int conserved_dist = -1; - int total_dist = -1; - String dist_string = "-"; - if (is_resnum_in_globalrdmap(rnum,glob_dist_list)) { - assessed = true; - assessed_string="Yes"; - } - if (ritv.HasProp("stereo_chemical_violation_sidechain") || - ritv.HasProp("steric_clash_sidechain")) { - quality_problems_string="Yes"; - } - if (ritv.HasProp("stereo_chemical_violation_backbone") || - ritv.HasProp("steric_clash_backbone")) { - quality_problems_string="Yes+"; - } - - if (assessed==true) { - if (ritv.HasProp(label)) { - lddt_local=ritv.GetFloatProp(label); - std::stringstream stkeylddt; - stkeylddt << std::fixed << std::setprecision(4) << lddt_local; - lddt_local_string=stkeylddt.str(); - conserved_dist=ritv.GetIntProp(label+"_conserved"); - total_dist=ritv.GetIntProp(label+"_total"); - std::stringstream stkeydist; - stkeydist << "("<< conserved_dist << "/" << total_dist << ")"; - dist_string=stkeydist.str(); - } else { - lddt_local = 0; - lddt_local_string="0.0000"; - conserved_dist = 0; - total_dist = 0; - dist_string="(0/0)"; - } - } - if (structural_checks) { - std::cout << ritv.GetChain() << "\t" << ritv.GetName() << "\t" << ritv.GetNumber() << '\t' << assessed_string << '\t' << quality_problems_string << '\t' << lddt_local_string << "\t" << dist_string << std::endl; - } else { - std::cout << ritv.GetChain() << "\t" << ritv.GetName() << "\t" << ritv.GetNumber() << '\t' << assessed_string << '\t' << lddt_local_string << "\t" << dist_string << std::endl; - } - } - } - std::cout << std::endl; + // prints the residue-by-residue statistics + std::vector<lDDTLocalScore> local_scores; + local_scores = GetlDDTPerResidueStats(model, glob_dist_list, settings); + PrintlDDTPerResidueStats(local_scores, settings); } return 0; } diff --git a/modules/mol/alg/src/local_dist_diff_test.cc b/modules/mol/alg/src/local_dist_diff_test.cc index aad66e7a639e265256fe5511a628203f2e57df53..9c988d1005c1ca2614a48fbf74e7258a07caf135 100644 --- a/modules/mol/alg/src/local_dist_diff_test.cc +++ b/modules/mol/alg/src/local_dist_diff_test.cc @@ -1,12 +1,29 @@ +#include <iomanip> +#include <sstream> #include <ost/log.hh> #include <ost/mol/mol.hh> +#include <ost/platform.hh> #include "local_dist_diff_test.hh" #include <boost/concept_check.hpp> +#include <boost/filesystem/convenience.hpp> +#include <ost/mol/alg/consistency_checks.hh> namespace ost { namespace mol { namespace alg { namespace { +// helper_function +String vector_to_string(std::vector<Real> vec) { + std::ostringstream str; + for (unsigned int i = 0; i < vec.size(); ++i) { + str << vec[i]; + if (i+1 != vec.size()) { + str << ", "; + } + } + return str.str(); +} + // helper function bool within_tolerance(Real mdl_dist, const std::pair<Real,Real>& values, Real tol) { @@ -307,6 +324,42 @@ void merge_distance_lists(GlobalRDMap& ref_dist_map, const GlobalRDMap& new_dist } +// Computes coverage +std::pair<int,int> ComputeCoverage(const EntityView& v,const GlobalRDMap& glob_dist_list) +{ + int second=0; + int first=0; + if (v.GetResidueList().size()==0) { + if (glob_dist_list.size()==0) { + return std::make_pair(0,-1); + } else { + return std::make_pair(0,glob_dist_list.size()); + } + } + ChainView vchain=v.GetChainList()[0]; + for (GlobalRDMap::const_iterator i=glob_dist_list.begin();i!=glob_dist_list.end();++i) + { + ResNum rnum = (*i).first; + second++; + if (vchain.FindResidue(rnum)) { + first++; + } + } + return std::make_pair(first,second); +} + +bool IsResnumInGlobalRDMap(const ResNum& resnum, const GlobalRDMap& glob_dist_list) +{ + for (GlobalRDMap::const_iterator i=glob_dist_list.begin(), e=glob_dist_list.end(); i!=e; ++i) { + ResNum rn = i->first; + if (rn==resnum) { + return true; + } + } + return false; +} + + // helper function bool IsStandardResidue(String rn) { @@ -337,6 +390,142 @@ bool IsStandardResidue(String rn) return false; } +lDDTSettings::lDDTSettings(): bond_tolerance(12.0), + angle_tolerance(12.0), + radius(15.0), + sequence_separation(0), + sel(""), + structural_checks(true), + consistency_checks(true), + label("localldt") { + cutoffs.push_back(0.5); + cutoffs.push_back(1.0); + cutoffs.push_back(2.0); + cutoffs.push_back(4.0); + SetStereoChemicalParamsPath(); + } + +lDDTSettings::lDDTSettings(Real init_bond_tolerance, + Real init_angle_tolerance, + Real init_radius, + int init_sequence_separation, + String init_sel, + String init_parameter_file_path, + bool init_structural_checks, + bool init_consistency_checks, + std::vector<Real>& init_cutoffs, + String init_label): + bond_tolerance(init_bond_tolerance), + angle_tolerance(init_angle_tolerance), + radius(init_radius), + sequence_separation(init_sequence_separation), + sel(init_sel), + structural_checks(init_structural_checks), + consistency_checks(init_consistency_checks), + cutoffs(init_cutoffs), + label(init_label) { + SetStereoChemicalParamsPath(init_parameter_file_path); +} + +void lDDTSettings::SetStereoChemicalParamsPath(const String& path) { + if (path == ""){ + try { + std::cerr << "Setting default stereochemical parameters file path." << std::endl; + parameter_file_path = ost::GetSharedDataPath() + "/stereo_chemical_props.txt"; + if (! structural_checks) { + std::cerr << "WARNING: Stereochemical parameters file is set but structural checks are disabled" << std::endl; + } + } catch (std::runtime_error& e) { + std::cerr << "WARNING: " << e.what() << std::endl; + std::cerr << "WARNING: Parameter file setting failed - structural check will be disabled" << std::endl; + structural_checks = false; + parameter_file_path = ""; + } + } else if (! boost::filesystem::exists(path)) { + std::cerr << "WARNING: Parameter file does not exist - structural check will be disabled" << std::endl; + structural_checks = false; + parameter_file_path = ""; + } else { + parameter_file_path = path; + structural_checks = true; + } +} + +std::string lDDTSettings::ToString() { + std::ostringstream rep; + if (structural_checks) { + rep << "Stereo-chemical and steric clash checks: On \n"; + } else { + rep << "Stereo-chemical and steric clash checks: Off \n"; + } + + rep << "Inclusion Radius: " << radius << "\n"; + rep << "Sequence separation: " << sequence_separation << "\n"; + rep << "Cutoffs: " << vector_to_string(cutoffs) << "\n"; + + if (structural_checks) { + rep << "Parameter filename: " + parameter_file_path + "\n"; + rep << "Tolerance in stddevs for bonds: " << bond_tolerance << "\n"; + rep << "Tolerance in stddevs for angles: " << angle_tolerance << "\n"; + } + rep << "Residue properties label: " << label << "\n"; + + return rep.str(); +} + +void lDDTSettings::PrintParameters() { + std::cout << ToString(); +} + +// Default constructor is neccessary for boost exposure +lDDTLocalScore::lDDTLocalScore(): cname(""), + rname(""), + rnum(-1), + is_assessed(""), + quality_problems(""), + local_lddt(-1.0), + conserved_dist(-1), + total_dist(-1) {} + +lDDTLocalScore::lDDTLocalScore(String init_cname, + String init_rname, + int init_rnum, + String init_is_assessed, + String init_quality_problems, + Real init_local_lddt, + int init_conserved_dist, + int init_total_dist): + cname(init_cname), + rname(init_rname), + rnum(init_rnum), + is_assessed(init_is_assessed), + quality_problems(init_quality_problems), + local_lddt(init_local_lddt), + conserved_dist(init_conserved_dist), + total_dist(init_total_dist) {} + +String lDDTLocalScore::ToString(bool structural_checks) const { + std::stringstream stkeylddt; + std::stringstream outstr; + stkeylddt << "(" << conserved_dist << "/" << total_dist << ")"; + String dist_string = stkeylddt.str(); + if (structural_checks) { + outstr << cname << "\t" << rname << "\t" << rnum << '\t' << is_assessed << '\t' << quality_problems << '\t' << local_lddt << "\t" << dist_string; + } else { + outstr << cname << "\t" << rname << "\t" << rnum << '\t' << is_assessed << '\t' << local_lddt << "\t" << dist_string; + } + return outstr.str(); +} + +String lDDTLocalScore::GetHeader(bool structural_checks, int cutoffs_length) { + std::stringstream outstr; + if (structural_checks) { + outstr << "Chain\tResName\tResNum\tAsses.\tQ.Prob.\tScore\t(Conserved/Total, over " << cutoffs_length << " thresholds)"; + } else { + outstr << "Chain\tResName\tResNum\tAsses.\tScore\t(Conserved/Total, over " << cutoffs_length << " thresholds)"; + } + return outstr.str(); +} GlobalRDMap CreateDistanceList(const EntityView& ref,Real max_dist) { @@ -473,6 +662,41 @@ Real LocalDistDiffTest(const EntityView& mdl, const EntityView& target, Real cut return static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1); } +Real LocalDistDiffTest(const EntityView& v, + std::vector<EntityView>& ref_list, + const GlobalRDMap& glob_dist_list, + lDDTSettings& settings) { + for (std::vector<EntityView>::const_iterator ref_list_it = ref_list.begin(); + ref_list_it != ref_list.end(); ++ref_list_it) { + bool cons_check = ResidueNamesMatch(v,*ref_list_it,settings.consistency_checks); + if (cons_check==false) { + if (settings.consistency_checks==true) { + throw std::runtime_error("Residue names in model and in reference structure(s) are inconsistent."); + } else { + LOG_WARNING("Residue names in model and in reference structure(s) are inconsistent."); + } + } + } + + std::pair<int,int> cov = ComputeCoverage(v,glob_dist_list); + if (cov.second == -1) { + std::cout << "Coverage: 0 (0 out of 0 residues)" << std::endl; + } else { + std::cout << "Coverage: " << (float(cov.first)/float(cov.second)) << " (" << cov.first << " out of " << cov.second << " residues)" << std::endl; + } + + if (cov.first==0) { + std::cout << "Global LDDT score: 0.0" << std::endl; + return 0.0; + } + + std::pair<int,int> total_ov=alg::LocalDistDiffTest(v, glob_dist_list, settings.cutoffs, settings.sequence_separation, settings.label); + Real lddt = static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1); + std::cout << "Global LDDT score: " << std::setprecision(4) << lddt << std::endl; + std::cout << "(" << std::fixed << total_ov.first << " conserved distances out of " << total_ov.second + << " checked, over " << settings.cutoffs.size() << " thresholds)" << std::endl; + return lddt; +} Real LocalDistDiffTest(const ost::seq::AlignmentHandle& aln, Real cutoff, Real max_dist, int ref_index, int mdl_index) @@ -536,6 +760,160 @@ Real LDDTHA(EntityView& v, const GlobalRDMap& global_dist_list, int sequence_sep return static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1); } + +void CleanlDDTReferences(std::vector<EntityView>& ref_list){ + for (unsigned int i=0;i<ref_list.size();i++) { + if (ref_list[0].GetChainList()[0].GetName()!=ref_list[i].GetChainList()[0].GetName()) { + std::cout << "ERROR: First chains in the reference structures have different names" << std::endl; + exit(-1); + } + ref_list[i] = ref_list[i].GetChainList()[0].Select("peptide=true"); + } +} + +GlobalRDMap PreparelDDTGlobalRDMap(const std::vector<EntityView>& ref_list, + lDDTSettings& settings){ + GlobalRDMap glob_dist_list; + if (ref_list.size()==1) { + std::cout << "Multi-reference mode: Off" << std::endl; + glob_dist_list = CreateDistanceList(ref_list[0], settings.radius); + } else { + std::cout << "Multi-reference mode: On" << std::endl; + glob_dist_list = CreateDistanceListFromMultipleReferences(ref_list, + settings.cutoffs, + settings.sequence_separation, + settings.radius); + } + + return glob_dist_list; +} + +void CheckStructure(EntityView& ent, + StereoChemicalParams& bond_table, + StereoChemicalParams& angle_table, + ClashingDistances& nonbonded_table, + Real bond_tolerance, + Real angle_tolerance){ + // performs structural checks and filters the structure + StereoChemistryInfo stereo_chemistry_info; + try { + std::pair<EntityView,StereoChemistryInfo> csc_result = alg::CheckStereoChemistry(ent,bond_table,angle_table,bond_tolerance,angle_tolerance); + ent = csc_result.first; + stereo_chemistry_info = csc_result.second; + } catch (std::exception& e) { + std::cout << "An error occurred during the structure quality checks, stage 1:" << std::endl; + std::cout << e.what() << std::endl; + exit(-1); + } + std::cout << "Average Z-Score for bond lengths: " << std::fixed << std::setprecision(5) << stereo_chemistry_info.GetAvgZscoreBonds() << std::endl; + std::cout << "Bonds outside of tolerance range: " << stereo_chemistry_info.GetBadBondCount() << " out of " << stereo_chemistry_info.GetBondCount() << std::endl; + std::cout << "Bond\tAvg Length\tAvg zscore\tNum Bonds" << std::endl; + std::map<String,BondLengthInfo> avg_bond_length_info = stereo_chemistry_info.GetAvgBondLengthInfo(); + for (std::map<String,BondLengthInfo>::const_iterator abli_it=avg_bond_length_info.begin();abli_it!=avg_bond_length_info.end();++abli_it) { + String key = (*abli_it).first; + BondLengthInfo bond_length_info = (*abli_it).second; + std::cout << key << "\t" << std::fixed << std::setprecision(5) << std::left << std::setw(10) << + bond_length_info.GetAvgLength() << "\t" << std::left << std::setw(10) << bond_length_info.GetAvgZscore() << "\t" << bond_length_info.GetCount() << std::endl; + } + std::cout << "Average Z-Score angle widths: " << std::fixed << std::setprecision(5) << stereo_chemistry_info.GetAvgZscoreAngles() << std::endl; + std::cout << "Angles outside of tolerance range: " << stereo_chemistry_info.GetBadAngleCount() << " out of " << stereo_chemistry_info.GetAngleCount() << std::endl; + ClashingInfo clash_info; + try { + std::pair<EntityView,ClashingInfo> fc_result = alg::FilterClashes(ent,nonbonded_table); + ent = fc_result.first; + clash_info = fc_result.second; + } catch (std::exception& e) { + std::stringstream serr; + serr << "An error occurred during the structure quality checks, stage 2: " << e.what(); + throw ost::Error(serr.str()); + } + std::cout << clash_info.GetClashCount() << " non-bonded short-range distances shorter than tolerance distance" << std::endl; + std::cout << "Distances shorter than tolerance are on average shorter by: " << std::fixed << std::setprecision(5) << clash_info.GetAverageOffset() << std::endl; +} + +std::vector<lDDTLocalScore> GetlDDTPerResidueStats(EntityHandle& model, GlobalRDMap& glob_dist_list, lDDTSettings& settings){ + std::vector<lDDTLocalScore> scores; + EntityView outv = model.GetChainList()[0].Select("peptide=true"); + for (ChainViewList::const_iterator ci = outv.GetChainList().begin(), + ce = outv.GetChainList().end(); ci != ce; ++ci) { + for (ResidueViewList::const_iterator rit = ci->GetResidueList().begin(), + re = ci->GetResidueList().end(); rit != re; ++rit) { + + ResidueView ritv=*rit; + ResNum rnum = ritv.GetNumber(); + bool assessed = false; + String assessed_string="No"; + String quality_problems_string; + if (settings.structural_checks) { + quality_problems_string="No"; + } else { + quality_problems_string="NA"; + } + Real lddt_local = -1; + String lddt_local_string="-"; + int conserved_dist = -1; + int total_dist = -1; + if (IsResnumInGlobalRDMap(rnum,glob_dist_list)) { + assessed = true; + assessed_string="Yes"; + } + if (ritv.HasProp("stereo_chemical_violation_sidechain") || + ritv.HasProp("steric_clash_sidechain")) { + quality_problems_string="Yes"; + } + if (ritv.HasProp("stereo_chemical_violation_backbone") || + ritv.HasProp("steric_clash_backbone")) { + quality_problems_string="Yes+"; + } + + if (assessed==true) { + if (ritv.HasProp(settings.label)) { + lddt_local=ritv.GetFloatProp(settings.label); + std::stringstream stkeylddt; + stkeylddt << std::fixed << std::setprecision(4) << lddt_local; + lddt_local_string=stkeylddt.str(); + conserved_dist=ritv.GetIntProp(settings.label+"_conserved"); + total_dist=ritv.GetIntProp(settings.label+"_total"); + } else { + std::cout << settings.label << std::endl; + lddt_local = 0; + lddt_local_string="0.0000"; + conserved_dist = 0; + total_dist = 0; + } + } + // std::tuple<String, String, int, String, String, Real, int, int> + lDDTLocalScore row(ritv.GetChain().GetName(), + ritv.GetName(), + ritv.GetNumber().GetNum(), + assessed_string, + quality_problems_string, + lddt_local, + conserved_dist, + total_dist); + scores.push_back(row); + } + } + + return scores; +} + +void PrintlDDTPerResidueStats(std::vector<lDDTLocalScore>& scores, lDDTSettings& settings){ + if (settings.structural_checks) { + std::cout << "Local LDDT Scores:" << std::endl; + std::cout << "(A 'Yes' in the 'Quality Problems' column stands for problems" << std::endl; + std::cout << "in the side-chain of a residue, while a 'Yes+' for problems" << std::endl; + std::cout << "in the backbone)" << std::endl; + } else { + std::cout << "Local LDDT Scores:" << std::endl; + } + std::cout << lDDTLocalScore::GetHeader(settings.structural_checks, settings.cutoffs.size()) << std::endl; + for (std::vector<lDDTLocalScore>::const_iterator sit = scores.begin(); sit != scores.end(); ++sit) { + std::cout << sit->ToString(settings.structural_checks) << std::endl; + } + std::cout << std::endl; +} + // debugging code /* Real OldStyleLDDTHA(EntityView& v, const GlobalRDMap& global_dist_list) diff --git a/modules/mol/alg/src/local_dist_diff_test.hh b/modules/mol/alg/src/local_dist_diff_test.hh index 9dd2f230617ac1b0b795ec6a7e467a80c1485655..21aee5a1d7a035ae21450b05c84212bc103a1b9d 100644 --- a/modules/mol/alg/src/local_dist_diff_test.hh +++ b/modules/mol/alg/src/local_dist_diff_test.hh @@ -22,8 +22,67 @@ #include <ost/mol/alg/module_config.hh> #include <ost/seq/alignment_handle.hh> #include <ost/mol/alg/distance_test_common.hh> +#include <ost/mol/alg/filter_clashes.hh> namespace ost { namespace mol { namespace alg { + +struct lDDTSettings { + Real bond_tolerance; + Real angle_tolerance; + Real radius; + int sequence_separation; + String sel; + String parameter_file_path; + bool structural_checks; + bool consistency_checks; + std::vector<Real> cutoffs; + String label; + + lDDTSettings(); + lDDTSettings(Real init_bond_tolerance, + Real init_angle_tolerance, + Real init_radius, + int init_sequence_separation, + String init_sel, + String init_parameter_file_path, + bool init_structural_checks, + bool init_consistency_checks, + std::vector<Real>& init_cutoffs, + String init_label); + void SetStereoChemicalParamsPath(const String& path=""); + void PrintParameters(); + std::string ToString(); +}; + +struct lDDTLocalScore { + String cname; + String rname; + int rnum; + String is_assessed; + String quality_problems; + Real local_lddt; + int conserved_dist; + int total_dist; + + lDDTLocalScore(); + + lDDTLocalScore(String init_cname, + String init_rname, + int init_rnum, + String init_is_assessed, + String init_quality_problems, + Real init_local_lddt, + int init_conserved_dist, + int init_total_dist); + + String ToString(bool structural_checks) const; + + static String GetHeader(bool structural_checks, int cutoffs_length); +}; + +std::pair<int,int> DLLEXPORT_OST_MOL_ALG ComputeCoverage(const EntityView& v,const GlobalRDMap& glob_dist_list); + +bool DLLEXPORT_OST_MOL_ALG IsResnumInGlobalRDMap(const ResNum& resnum, const GlobalRDMap& glob_dist_list); /// \brief Calculates number of distances conserved in a model, given a list of distances to check and a model /// @@ -73,6 +132,12 @@ Real DLLEXPORT_OST_MOL_ALG LocalDistDiffTest(const EntityView& mdl, Real cutoff, Real max_dist, const String& local_ldt_property_string=""); +/// TODO document me +Real DLLEXPORT_OST_MOL_ALG LocalDistDiffTest(const EntityView& v, + std::vector<EntityView>& ref_list, + const GlobalRDMap& glob_dist_list, + lDDTSettings& settings); + /// \brief Calculates the Local Distance Difference Test score for a given model starting from an alignment between a reference structure and the model. /// /// Calculates the Local Distance Difference Test score given an alignment between a model and a taget structure. @@ -135,6 +200,27 @@ void DLLEXPORT_OST_MOL_ALG PrintResidueRDMap(const ResidueRDMap& res_dist_list); // circular dependencies bool DLLEXPORT_OST_MOL_ALG IsStandardResidue(String rn); +void DLLEXPORT_OST_MOL_ALG CleanlDDTReferences(std::vector<EntityView>& ref_list); + +// Prepare GlobalRDMap from reference list +GlobalRDMap DLLEXPORT_OST_MOL_ALG PreparelDDTGlobalRDMap( + const std::vector<EntityView>& ref_list, + lDDTSettings& settings); + +void DLLEXPORT_OST_MOL_ALG CheckStructure(EntityView& ent, + StereoChemicalParams& bond_table, + StereoChemicalParams& angle_table, + ClashingDistances& nonbonded_table, + Real bond_tolerance, + Real angle_tolerance); + +std::vector<lDDTLocalScore> DLLEXPORT_OST_MOL_ALG GetlDDTPerResidueStats(EntityHandle& model, + GlobalRDMap& glob_dist_list, + lDDTSettings& settings); + +void DLLEXPORT_OST_MOL_ALG PrintlDDTPerResidueStats(std::vector<lDDTLocalScore>& scores, + lDDTSettings& settings); + }}} #endif diff --git a/modules/mol/alg/src/molck.cc b/modules/mol/alg/src/molck.cc new file mode 100644 index 0000000000000000000000000000000000000000..e1c35083fca0a424e1b05a7eba5973e8d5e7ead3 --- /dev/null +++ b/modules/mol/alg/src/molck.cc @@ -0,0 +1,164 @@ +#include <ost/mol/xcs_editor.hh> +#include <ost/mol/alg/nonstandard.hh> +#include <ost/conop/model_check.hh> +#include <ost/conop/amino_acids.hh> +#include <ost/mol/alg/molck.hh> + +using namespace ost::conop; +using namespace ost::mol; + + +void ost::mol::alg::MapNonStandardResidues(EntityHandle& ent, CompoundLibPtr lib) { + // TODO: Maybe it is possible to make it in-place operation + EntityHandle new_ent=CreateEntity(); + ChainHandleList chains=ent.GetChainList(); + XCSEditor new_edi=new_ent.EditXCS(); + for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { + ChainHandle new_chain = new_edi.InsertChain(c->GetName()); + ResidueHandleList residues = c->GetResidueList(); + for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { + AminoAcid aa = ResidueToAminoAcid(*r); + if (aa!=XXX) { + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); + AtomHandleList atoms = r->GetAtomList(); + for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { + new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); + } + continue; + } else { + CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); + if (!compound || !compound->IsPeptideLinking() || compound->GetChemClass()==ChemClass::D_PEPTIDE_LINKING || + OneLetterCodeToAminoAcid(compound->GetOneLetterCode())==XXX) { + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); + AtomHandleList atoms = r->GetAtomList(); + for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { + new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); + } + continue; + } + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,OneLetterCodeToResidueName(compound->GetOneLetterCode()),r->GetNumber()); + ost::mol::alg::CopyResidue(*r,dest_res,new_edi,lib); + } + } + } + ent = new_ent; +} + +void ost::mol::alg::RemoveAtoms( + EntityHandle& ent, + CompoundLibPtr lib, + bool rm_unk_atoms, + bool rm_non_std, + bool rm_hyd_atoms, + bool rm_oxt_atoms, + bool rm_zero_occ_atoms, + bool colored /*=true*/){ + XCSEditor edi=ent.EditXCS(); + Diagnostics diags; + Checker checker(lib, ent, diags); + if (rm_zero_occ_atoms) { + std::cerr << "removing atoms with zero occupancy" << std::endl; + int zremoved=0; + AtomHandleList zero_atoms=checker.GetZeroOccupancy(); + for (AtomHandleList::const_iterator i=zero_atoms.begin(), e=zero_atoms.end(); i!=e; ++i) { + edi.DeleteAtom(*i); + zremoved++; + } + std::cerr << " --> removed " << zremoved << " hydrogen atoms" << std::endl; + } + + if (rm_hyd_atoms) { + std::cerr << "removing hydrogen atoms" << std::endl; + int hremoved=0; + AtomHandleList hyd_atoms=checker.GetHydrogens(); + for (AtomHandleList::const_iterator i=hyd_atoms.begin(), e=hyd_atoms.end(); i!=e; ++i) { + edi.DeleteAtom(*i); + hremoved++; + } + std::cerr << " --> removed " << hremoved << " hydrogen atoms" << std::endl; + } + + if (rm_oxt_atoms) { + std::cerr << "removing OXT atoms" << std::endl; + int oremoved=0; + AtomHandleList atoms=ent.GetAtomList(); + for (AtomHandleList::const_iterator i=atoms.begin(), e=atoms.end(); i!=e; ++i) { + if (i->GetName()=="OXT") { + edi.DeleteAtom(*i); + oremoved++; + } + } + std::cerr << " --> removed " << oremoved << " OXT atoms" << std::endl; + } + + checker.CheckForCompleteness(); + checker.CheckForUnknownAtoms(); + checker.CheckForNonStandard(); + for (Diagnostics::const_diag_iterator + j = diags.diags_begin(), e = diags.diags_end(); j != e; ++j) { + const Diag* diag=*j; + std::cerr << diag->Format(colored); + switch (diag->GetType()) { + case DIAG_UNK_ATOM: + if (rm_unk_atoms) { + edi.DeleteAtom(diag->GetAtom(0)); + std::cerr << " --> removed "; + } + break; + case DIAG_NONSTD_RESIDUE: + if (rm_non_std) { + edi.DeleteResidue(diag->GetResidue(0)); + std::cerr << " --> removed "; + } + break; + default: + break; + } + std::cerr << std::endl; + } +} + +void ost::mol::alg::CleanUpElementColumn(EntityHandle& ent, CompoundLibPtr lib){ + ChainHandleList chains=ent.GetChainList(); + for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { + ResidueHandleList residues = c->GetResidueList(); + for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { + CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); + AtomHandleList atoms=r->GetAtomList(); + if (!compound) { + for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { + j->SetElement(""); + } + continue; + } + for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { + int specindx=compound->GetAtomSpecIndex(j->GetName()); + if (specindx!=-1) { + j->SetElement(compound->GetAtomSpecs()[specindx].element); + } else { + j->SetElement(""); + } + } + } + } +} + +void ost::mol::alg::Molck( + ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib, + const ost::mol::alg::MolckSettings& settings=ost::mol::alg::MolckSettings()){ + if (settings.map_nonstd_res) { + ost::mol::alg::MapNonStandardResidues(ent, lib); + } + ost::mol::alg::RemoveAtoms(ent, + lib, + settings.rm_unk_atoms, + settings.rm_non_std, + settings.rm_hyd_atoms, + settings.rm_oxt_atoms, + settings.rm_zero_occ_atoms, + settings.colored); + if (settings.assign_elem) { + ost::mol::alg::CleanUpElementColumn(ent, lib); + } +} \ No newline at end of file diff --git a/modules/mol/alg/src/molck.hh b/modules/mol/alg/src/molck.hh new file mode 100644 index 0000000000000000000000000000000000000000..d61e0218678bd07fba0e828fdf0f901dd69977e5 --- /dev/null +++ b/modules/mol/alg/src/molck.hh @@ -0,0 +1,104 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_MOL_ALG_MOLCK_HH +#define OST_MOL_ALG_MOLCK_HH + +#include <string> +#include <ost/mol/entity_handle.hh> +#include <ost/conop/compound_lib.hh> + +namespace { + inline std::string BoolToString(bool b) + { + return b ? "True" : "False"; + } +} + +namespace ost { namespace mol{ namespace alg { + +struct MolckSettings; + +struct MolckSettings{ + + bool rm_unk_atoms; + bool rm_non_std; + bool rm_hyd_atoms; + bool rm_oxt_atoms; + bool rm_zero_occ_atoms; + bool colored; + bool map_nonstd_res; + bool assign_elem; + + MolckSettings(bool init_rm_unk_atoms=false, + bool init_rm_non_std=false, + bool init_rm_hyd_atoms=true, + bool init_rm_oxt_atoms=false, + bool init_rm_zero_occ_atoms=false, + bool init_colored=false, + bool init_map_nonstd_res=true, + bool init_assign_elem=true): + rm_unk_atoms(init_rm_unk_atoms), // Remove unknown and atoms not following the nomenclature + rm_non_std(init_rm_non_std), // Remove all residues not one of the 20 standard amino acids + rm_hyd_atoms(init_rm_hyd_atoms), // Remove hydrogen atoms + rm_oxt_atoms(init_rm_oxt_atoms), // Remove terminal oxygens + rm_zero_occ_atoms(init_rm_zero_occ_atoms), // Remove atoms with zero occupancy + colored(init_colored), // Whether the output should be colored + map_nonstd_res(init_map_nonstd_res), // Map non standard residues back to standard ones (e.g.: MSE->MET,SEP->SER,etc.) + assign_elem(init_assign_elem){} // Clean up element column + + public: + std::string ToString(){ + std::string rep = "MolckSettings(rm_unk_atoms=" + BoolToString(rm_unk_atoms) + + ", rm_unk_atoms=" + BoolToString(rm_unk_atoms) + + ", rm_non_std=" + BoolToString(rm_non_std) + + ", rm_hyd_atoms=" + BoolToString(rm_hyd_atoms) + + ", rm_oxt_atoms=" + BoolToString(rm_oxt_atoms) + + ", rm_zero_occ_atoms=" + BoolToString(rm_zero_occ_atoms) + + ", colored=" + BoolToString(colored) + + ", map_nonstd_res=" + BoolToString(map_nonstd_res) + + ", assign_elem=" + BoolToString(assign_elem) + + ")"; + return rep; + } + +}; + +void MapNonStandardResidues(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib); + +void RemoveAtoms(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib, + bool rm_unk_atoms, + bool rm_non_std, + bool rm_hyd_atoms, + bool rm_oxt_atoms, + bool rm_zero_occ_atoms, + bool colored=true); + +void CleanUpElementColumn(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib); + +void Molck(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib, + const MolckSettings& settings); + + +}}} // namespace + +#endif diff --git a/modules/conop/src/nonstandard.cc b/modules/mol/alg/src/nonstandard.cc similarity index 99% rename from modules/conop/src/nonstandard.cc rename to modules/mol/alg/src/nonstandard.cc index eaf06680d087c05080f0e578828c7bad3fd68c66..b51c582adcdd81fb5b02cff834c18c218b7b317d 100644 --- a/modules/conop/src/nonstandard.cc +++ b/modules/mol/alg/src/nonstandard.cc @@ -32,7 +32,7 @@ using namespace ost::mol; using namespace ost; using namespace ost::conop; -namespace ost { namespace conop { +namespace ost { namespace mol { namespace alg { bool CopyResidue(ResidueHandle src_res, ResidueHandle dst_res, XCSEditor& edi) @@ -234,4 +234,4 @@ bool CopyNonConserved(ResidueHandle src_res, ResidueHandle dst_res, -}} +}}} diff --git a/modules/conop/src/nonstandard.hh b/modules/mol/alg/src/nonstandard.hh similarity index 78% rename from modules/conop/src/nonstandard.hh rename to modules/mol/alg/src/nonstandard.hh index a57022def8ef7436889491c7ae6714c6e9a69d6e..1dce74fd4c2cf102f985f06d22c2723e93053075 100644 --- a/modules/conop/src/nonstandard.hh +++ b/modules/mol/alg/src/nonstandard.hh @@ -17,30 +17,31 @@ // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA //------------------------------------------------------------------------------ -#ifndef OST_CONOP_NONSTANDARD_HH -#define OST_CONOP_NONSTANDARD_HH +#ifndef OST_MOL_ALG_NONSTANDARD_HH +#define OST_MOL_ALG_NONSTANDARD_HH /* Author: Marco Biasini, Juergen Haas */ #include "module_config.hh" -#include "compound_lib.hh" +#include <ost/conop/compound_lib.hh> -namespace ost { namespace conop { +namespace ost { namespace mol { namespace alg { /// \brief copies all atom of src_res to dst_res, gets compound lib from builder -bool DLLEXPORT_OST_CONOP CopyResidue(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyResidue(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi); /// \brief copies all atom of src_res to dst_res, requires a compound lib -bool DLLEXPORT_OST_CONOP CopyResidue(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyResidue(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, - ost::mol::XCSEditor& edi, CompoundLibPtr lib); + ost::mol::XCSEditor& edi, + ost::conop::CompoundLibPtr lib); /// \brief copies all atom of src_res to dst_res @@ -49,7 +50,7 @@ bool DLLEXPORT_OST_CONOP CopyResidue(ost::mol::ResidueHandle src_res, /// \param edi /// \param has_cbeta will be set to true if the src_res has a cbeta and the /// dst_residue is not a glycine -bool DLLEXPORT_OST_CONOP CopyIdentical(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyIdentical(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); @@ -63,10 +64,11 @@ bool DLLEXPORT_OST_CONOP CopyIdentical(ost::mol::ResidueHandle src_res, -bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyConserved(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, - bool& has_cbeta, CompoundLibPtr lib); + bool& has_cbeta, + ost::conop::CompoundLibPtr lib); /// \brief copies atoms of src_res to dst_res, requires compound lib /// @@ -77,7 +79,7 @@ bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, -bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyConserved(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); @@ -89,14 +91,14 @@ bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, /// only copied if dst_res is not equal to glycine. -bool DLLEXPORT_OST_CONOP CopyNonConserved(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyNonConserved(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); /// \brief construct dst_res from src_res when src_res is an MSE -bool DLLEXPORT_OST_CONOP CopyMSE(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyMSE(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); @@ -104,13 +106,14 @@ bool DLLEXPORT_OST_CONOP CopyMSE(ost::mol::ResidueHandle src_res, /// \brief construct a dst_res with only atoms matching the standard aminoacid /// from src_res when src_res is an is modified -bool DLLEXPORT_OST_CONOP CopyModified(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyModified(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, - bool& has_cbeta, CompoundLibPtr lib); + bool& has_cbeta, + ost::conop::CompoundLibPtr lib); -}} +}}} #endif diff --git a/modules/mol/alg/tests/CMakeLists.txt b/modules/mol/alg/tests/CMakeLists.txt index 0002d71492e639d21d456a93f94950db59aa1afe..0715910b1b9f9afac4de6e94169456ba21f80a3c 100644 --- a/modules/mol/alg/tests/CMakeLists.txt +++ b/modules/mol/alg/tests/CMakeLists.txt @@ -11,7 +11,8 @@ set(OST_MOL_ALG_UNIT_TESTS ) if (COMPOUND_LIB) - list(APPEND OST_MOL_ALG_UNIT_TESTS test_qsscoring.py) + list(APPEND OST_MOL_ALG_UNIT_TESTS test_qsscoring.py + test_nonstandard.py) endif() ost_unittest(MODULE mol_alg SOURCES "${OST_MOL_ALG_UNIT_TESTS}" LINK ost_io) diff --git a/modules/conop/tests/test_nonstandard.py b/modules/mol/alg/tests/test_nonstandard.py similarity index 78% rename from modules/conop/tests/test_nonstandard.py rename to modules/mol/alg/tests/test_nonstandard.py index 566db9a2b1902339828abaec67e7e35713fdf47a..c81c92479a4150c76aa89ff92409a4dc7656e25f 100644 --- a/modules/conop/tests/test_nonstandard.py +++ b/modules/mol/alg/tests/test_nonstandard.py @@ -1,5 +1,5 @@ import unittest -from ost import conop, io, mol +from ost import io, mol class TestNonStandard(unittest.TestCase): @@ -12,7 +12,7 @@ class TestNonStandard(unittest.TestCase): c=ed.InsertChain('A') ed.AppendResidue(c, 'SER') - err, has_cbeta=conop.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) self.assertTrue(err) self.assertTrue(has_cbeta) residues=new_hdl.residues @@ -36,16 +36,16 @@ class TestNonStandard(unittest.TestCase): ed.AppendResidue(c, 'GLY') ed.AppendResidue(c, 'GLY') ed.AppendResidue(c, 'HIS') - err, has_cbeta=conop.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) self.assertTrue(has_cbeta) self.assertTrue(err) - err, has_cbeta=conop.CopyConserved(tpl.residues[1], new_hdl.residues[1], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[1], new_hdl.residues[1], ed) self.assertFalse(has_cbeta) self.assertTrue(err) - err, has_cbeta=conop.CopyConserved(tpl.residues[2], new_hdl.residues[2], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[2], new_hdl.residues[2], ed) self.assertFalse(has_cbeta) self.assertTrue(err) - err, has_cbeta=conop.CopyConserved(tpl.residues[3], new_hdl.residues[3], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[3], new_hdl.residues[3], ed) self.assertTrue(has_cbeta) self.assertTrue(err) @@ -72,28 +72,28 @@ class TestNonStandard(unittest.TestCase): ed.AppendResidue(c, 'MET') # MET to MET - err =conop.CopyResidue(tpl.residues[0], new_hdl.residues[0], ed) + err =mol.alg.CopyResidue(tpl.residues[0], new_hdl.residues[0], ed) self.assertTrue(err) #GLY to GLY - err =conop.CopyResidue(tpl.residues[1], new_hdl.residues[1], ed) + err =mol.alg.CopyResidue(tpl.residues[1], new_hdl.residues[1], ed) self.assertTrue(err) # GLY to GLY - err =conop.CopyResidue(tpl.residues[2], new_hdl.residues[2], ed) + err =mol.alg.CopyResidue(tpl.residues[2], new_hdl.residues[2], ed) self.assertTrue(err) #now we copy a HIS to a HIS - err =conop.CopyResidue(tpl.residues[3], new_hdl.residues[3], ed) + err =mol.alg.CopyResidue(tpl.residues[3], new_hdl.residues[3], ed) self.assertTrue(err) # copy a GLY to a HIS - err, has_cbeta=conop.CopyNonConserved(tpl.residues[1], new_hdl.residues[4], ed) + err, has_cbeta=mol.alg.CopyNonConserved(tpl.residues[1], new_hdl.residues[4], ed) self.assertFalse(has_cbeta) # copy a MET to a GLY - err =conop.CopyResidue(tpl.residues[0], new_hdl.residues[5], ed) + err =mol.alg.CopyResidue(tpl.residues[0], new_hdl.residues[5], ed) self.assertFalse(err) # copy a MET to a HIS - err =conop.CopyResidue(tpl.residues[0], new_hdl.residues[6], ed) + err =mol.alg.CopyResidue(tpl.residues[0], new_hdl.residues[6], ed) self.assertFalse(err) # copy a GLY to a MET with adding CB - err=conop.CopyResidue(tpl.residues[1], new_hdl.residues[7], ed) + err=mol.alg.CopyResidue(tpl.residues[1], new_hdl.residues[7], ed) self.assertFalse(err) residues=new_hdl.residues diff --git a/modules/conop/tests/testfiles/cbeta.pdb b/modules/mol/alg/tests/testfiles/cbeta.pdb similarity index 100% rename from modules/conop/tests/testfiles/cbeta.pdb rename to modules/mol/alg/tests/testfiles/cbeta.pdb diff --git a/modules/conop/tests/testfiles/sep.pdb b/modules/mol/alg/tests/testfiles/sep.pdb similarity index 100% rename from modules/conop/tests/testfiles/sep.pdb rename to modules/mol/alg/tests/testfiles/sep.pdb diff --git a/modules/mol/base/doc/entity.rst b/modules/mol/base/doc/entity.rst index 653625756a1b38c696a50da10840078fb4d74c58..3b0aa5c9fdcd51cdb563922b9fb384f8ec046736 100644 --- a/modules/mol/base/doc/entity.rst +++ b/modules/mol/base/doc/entity.rst @@ -118,7 +118,13 @@ The Handle Classes an enabled ``USE_NUMPY`` flag (see :ref:`here <cmake-flags>` for details). :type: :class:`numpy.array` - + + .. attribute:: valid + + Validity of handle. + + :type: bool + .. method:: GetName() :returns: Name associated to this entity. @@ -327,6 +333,10 @@ The Handle Classes :type radius: float :returns: :class:`AtomHandleList` (list of :class:`AtomHandle`) + + .. method:: IsValid() + + See :attr:`valid` .. class:: ChainHandle @@ -424,6 +434,12 @@ The Handle Classes :meth:`GetCenterOfAtoms`. :type: :class:`~ost.geom.Vec3` + + .. attribute:: valid + + Validity of handle. + + :type: bool .. method:: FindResidue(res_num) @@ -469,6 +485,10 @@ The Handle Classes See :attr:`description` + .. method:: IsValid() + + See :attr:`valid` + .. class:: ResidueHandle The residue is either used to represent complete molecules or building blocks @@ -620,6 +640,43 @@ The Handle Classes Residue index (starting at 0) within chain. + .. attribute:: central_atom + + Central atom used for rendering traces. For peptides, this is usually + the CA atom. For nucleotides, this is usually the P atom. + + :type: :class:`AtomHandle` + + .. attribute:: central_normal + + Normal computed for :attr:`central_atom`. Only defined for peptides and + nucleotides if all required atoms available. Otherwise, the (1,0,0) vector + is returned. + + :type: :class:`~ost.geom.Vec3` + + .. attribute:: valid + + Validity of handle. + + :type: bool + + .. attribute:: next + + Residue after this one in the same chain. Invalid handle returned if there + is no next residue. Residues are ordered as in :attr:`ChainHandle.residues` + independently on whether they are connected or not (see + :func:`InSequence` to check for connected residues). + + :type: :class:`ResidueHandle` + + .. attribute:: prev + + Residue before this one in the same chain. Otherwise same behaviour as + :attr:`next`. + + :type: :class:`ResidueHandle` + .. method:: FindAtom(atom_name) Get atom by atom name. See also :attr:`atoms` @@ -666,7 +723,20 @@ The Handle Classes .. method:: GetIndex() See :attr:`index` + + .. method:: GetCentralAtom() + SetCentralAtom() + + See :attr:`central_atom` + + .. method:: GetCentralNormal() + + See :attr:`central_normal` + + .. method:: IsValid() + See :attr:`valid` + .. class:: AtomHandle @@ -732,6 +802,7 @@ The Handle Classes The atom's occupancy in the range 0 to 1. Read/write. Also available as :meth:`GetOccupancy`, :meth:`SetOccupancy`. + :type: float .. attribute:: b_factor @@ -782,6 +853,12 @@ The Handle Classes :type: int + .. attribute:: valid + + Validity of handle. + + :type: bool + .. method:: FindBondToAtom(other_atom) Finds and returns the bond formed between this atom and `other_atom`. If no @@ -899,8 +976,7 @@ The Handle Classes .. method:: IsValid() See :attr:`valid` - - :rtype: bool + The View Classes -------------------------------------------------------------------------------- @@ -981,6 +1057,12 @@ The View Classes :type: :class:`EntityHandle` + .. attribute:: valid + + Validity of view. + + :type: bool + .. method:: GetName() :returns: :func:`~EntityHandle.GetName` of entity :attr:`handle`. @@ -1199,7 +1281,8 @@ The View Classes .. method:: GetBondCount() Get number of bonds - :rtype: int + + :rtype: :class:`int` .. method:: GetBondList() @@ -1241,6 +1324,10 @@ The View Classes See :attr:`atom_count` + .. method:: IsValid() + + See :attr:`valid` + .. class:: ChainView A view representation of a :class:`ChainHandle`. Mostly, the same @@ -1350,7 +1437,7 @@ The View Classes .. attribute:: valid - Validity of handle. + Validity of view. :type: bool @@ -1752,6 +1839,28 @@ Other Entity-Related Functions :returns: :class:`EntityHandle` +.. function:: InSequence(res, res_next) + + :return: True, if both *res* and *res_next* are :attr:`~ResidueHandle.valid`, + *res_next* is the residue following *res* (see + :attr:`ResidueHandle.next`), both residues are linking (i.e. + :attr:`~ChemClass.IsPeptideLinking` or + :attr:`~ChemClass.IsNucleotideLinking`) and are connected by an + appropriate bond. + :rtype: :class:`bool` + :param res: First residue to check. + :type res: :class:`ResidueHandle` + :param res_next: Second residue to check. + :type res_next: :class:`ResidueHandle` + +.. function:: BondExists(atom_a, atom_b) + + :return: True, if *atom_a* and *atom_b* are connected by a bond. + :rtype: :class:`bool` + :param atom_a: First atom to check. + :type atom_a: :class:`AtomHandle` + :param atom_b: Second atom to check. + :type atom_b: :class:`AtomHandle` Residue Numbering -------------------------------------------------------------------------------- diff --git a/modules/mol/base/src/impl/residue_impl.cc b/modules/mol/base/src/impl/residue_impl.cc index fe1e7f60b0448d79e661fabc2b4cbe2630cd58ae..014f4069f5cae65e808d25151db5cee4ef10f213 100644 --- a/modules/mol/base/src/impl/residue_impl.cc +++ b/modules/mol/base/src/impl/residue_impl.cc @@ -212,12 +212,12 @@ AtomImplPtr ResidueImpl::GetCentralAtom() const for (AtomImplList::const_iterator it=atom_list_.begin(); it!=atom_list_.end();++it) { if((*it)->Name()=="P") return *it; - } + } } else if (chem_class_.IsPeptideLinking()) { for (AtomImplList::const_iterator it=atom_list_.begin(); it!=atom_list_.end();++it) { if((*it)->Name()=="CA") return *it; - } + } } return AtomImplPtr(); @@ -266,18 +266,21 @@ geom::Vec3 ResidueImpl::GetCentralNormal() const geom::Vec3 nrvo(1,0,0); if (chem_class_.IsPeptideLinking()) { AtomImplPtr a1 = FindAtom("C"); - AtomImplPtr a2 = FindAtom("O"); + AtomImplPtr a2 = FindAtom("O"); if(a1 && a2) { nrvo = geom::Normalize(a2->TransformedPos()-a1->TransformedPos()); } else { a1 = FindAtom("CB"); - a2 = FindAtom("CA"); + a2 = FindAtom("CA"); if(a1 && a2) { nrvo = geom::Normalize(a2->TransformedPos()-a1->TransformedPos()); } else { - geom::Vec3 v0=GetCentralAtom()->TransformedPos(); - nrvo=geom::Cross(geom::Normalize(v0), - geom::Normalize(geom::Vec3(-v0[2],v0[0],v0[1]))); + AtomImplPtr a0 = GetCentralAtom(); + if (a0) { + geom::Vec3 v0 = a0->TransformedPos(); + nrvo = geom::Cross(geom::Normalize(v0), + geom::Normalize(geom::Vec3(-v0[2], v0[0], v0[1]))); + } LOG_VERBOSE("warning: could not find atoms for proper central normal calculation"); } } @@ -288,9 +291,12 @@ geom::Vec3 ResidueImpl::GetCentralNormal() const if(a1 && a2 && a3) { nrvo = geom::Normalize(a1->TransformedPos()-(a2->TransformedPos()+a3->TransformedPos())*.5); } else { - geom::Vec3 v0=GetCentralAtom()->TransformedPos(); - nrvo=geom::Cross(geom::Normalize(v0), - geom::Normalize(geom::Vec3(-v0[2],v0[0],v0[1]))); + AtomImplPtr a0 = GetCentralAtom(); + if (a0) { + geom::Vec3 v0 = a0->TransformedPos(); + nrvo = geom::Cross(geom::Normalize(v0), + geom::Normalize(geom::Vec3(-v0[2], v0[0], v0[1]))); + } LOG_VERBOSE("warning: could not find atoms for proper central normal calculation"); } } diff --git a/modules/mol/base/tests/test_residue.cc b/modules/mol/base/tests/test_residue.cc index 89e8da33914110dc1d50150fb9dc19e2ab3dd50e..d828544b7eba1ef47d5c995b4341b438e68e8c9d 100644 --- a/modules/mol/base/tests/test_residue.cc +++ b/modules/mol/base/tests/test_residue.cc @@ -125,4 +125,92 @@ BOOST_AUTO_TEST_CASE(rename_res) BOOST_CHECK_EQUAL(rA2B.GetName(), "B"); } +BOOST_AUTO_TEST_CASE(test_centralatom) +{ + // COOK UP ENTITY FOR TEST + EntityHandle eh = CreateEntity(); + XCSEditor e = eh.EditXCS(); + ChainHandle ch = e.InsertChain("A"); + // decent peptide with all entries + ResidueHandle rp1 = e.AppendResidue(ch, "A"); + e.InsertAtom(rp1, "CA", geom::Vec3(2, 0, 0)); + e.InsertAtom(rp1, "CB", geom::Vec3(1, 0, 0)); + e.InsertAtom(rp1, "C", geom::Vec3(0, 0, 0)); + e.InsertAtom(rp1, "O", geom::Vec3(0, 1, 0)); + rp1.SetChemClass(ChemClass(ChemClass::PEPTIDE_LINKING)); + // weird peptide with only CA and CB + ResidueHandle rp2 = e.AppendResidue(ch, "B"); + e.InsertAtom(rp2, "CA", geom::Vec3(3, 0, 0)); + e.InsertAtom(rp2, "CB", geom::Vec3(3, 1, 0)); + rp2.SetChemClass(ChemClass(ChemClass::PEPTIDE_LINKING)); + // CA-only peptide + ResidueHandle rp3 = e.AppendResidue(ch, "C"); + e.InsertAtom(rp3, "CA", geom::Vec3(4, 0, 0)); + rp3.SetChemClass(ChemClass(ChemClass::PEPTIDE_LINKING)); + // peptide with custom atoms + ResidueHandle rp4 = e.AppendResidue(ch, "D"); + AtomHandle rp4_ax = e.InsertAtom(rp4, "XX", geom::Vec3(5, 0, 0)); + rp4.SetChemClass(ChemClass(ChemClass::PEPTIDE_LINKING)); + // nucleotide with all needed entries + ResidueHandle rn1 = e.AppendResidue(ch, "E"); + e.InsertAtom(rn1, "P", geom::Vec3(6, 0, 0)); + e.InsertAtom(rn1, "OP1", geom::Vec3(6, 0.5, 0)); + e.InsertAtom(rn1, "OP2", geom::Vec3(6, 1.5, 0)); + rn1.SetChemClass(ChemClass(ChemClass::DNA_LINKING)); + // nucleotide with only P + ResidueHandle rn2 = e.AppendResidue(ch, "F"); + e.InsertAtom(rn2, "P", geom::Vec3(7, 0, 0)); + rn2.SetChemClass(ChemClass(ChemClass::DNA_LINKING)); + // nucleotide with custom atoms + ResidueHandle rn3 = e.AppendResidue(ch, "G"); + AtomHandle rn3_ax = e.InsertAtom(rn3, "XX", geom::Vec3(8, 0, 0)); + rn3.SetChemClass(ChemClass(ChemClass::DNA_LINKING)); + // unknown chem class + ResidueHandle ru = e.AppendResidue(ch, "H"); + e.InsertAtom(ru, "P", geom::Vec3(9, 0, 0)); + e.InsertAtom(ru, "CA", geom::Vec3(9, 1, 0)); + AtomHandle ru_ax = e.InsertAtom(ru, "XX", geom::Vec3(9, 2, 0)); + ru.SetChemClass(ChemClass(ChemClass::UNKNOWN)); + + // CHECK CENTRAL ATOMS + BOOST_CHECK(rp1.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(rp1.GetCentralAtom().GetQualifiedName(), "A.A1.CA"); + BOOST_CHECK(rp2.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(rp2.GetCentralAtom().GetQualifiedName(), "A.B2.CA"); + BOOST_CHECK(rp3.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(rp3.GetCentralAtom().GetQualifiedName(), "A.C3.CA"); + BOOST_CHECK(!rp4.GetCentralAtom().IsValid()); + BOOST_CHECK(rn1.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(rn1.GetCentralAtom().GetQualifiedName(), "A.E5.P"); + BOOST_CHECK(rn2.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(rn2.GetCentralAtom().GetQualifiedName(), "A.F6.P"); + BOOST_CHECK(!rn3.GetCentralAtom().IsValid()); + BOOST_CHECK(!ru.GetCentralAtom().IsValid()); + + // CHECK NORMALS + BOOST_CHECK_EQUAL(rp1.GetCentralNormal(), geom::Vec3(0, 1, 0)); + BOOST_CHECK_EQUAL(rp2.GetCentralNormal(), geom::Vec3(0, -1, 0)); + BOOST_CHECK_EQUAL(rp3.GetCentralNormal(), geom::Vec3(0, 0, 1)); + BOOST_CHECK_EQUAL(rp4.GetCentralNormal(), geom::Vec3(1, 0, 0)); + BOOST_CHECK_EQUAL(rn1.GetCentralNormal(), geom::Vec3(0, -1, 0)); + BOOST_CHECK_EQUAL(rn2.GetCentralNormal(), geom::Vec3(0, 0, 1)); + BOOST_CHECK_EQUAL(rn3.GetCentralNormal(), geom::Vec3(1, 0, 0)); + BOOST_CHECK_EQUAL(ru.GetCentralNormal(), geom::Vec3(1, 0, 0)); + + // CHECK SETTING CENTRAL ATOMS + rp4.SetCentralAtom(rp4_ax); + BOOST_CHECK(rp4.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(rp4.GetCentralAtom().GetQualifiedName(), "A.D4.XX"); + BOOST_CHECK_EQUAL(rp4.GetCentralNormal(), geom::Vec3(0, 0, 1)); + rn3.SetCentralAtom(rn3_ax); + BOOST_CHECK(rn3.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(rn3.GetCentralAtom().GetQualifiedName(), "A.G7.XX"); + BOOST_CHECK_EQUAL(rn3.GetCentralNormal(), geom::Vec3(0, 0, 1)); + ru.SetCentralAtom(ru_ax); + BOOST_CHECK(ru.GetCentralAtom().IsValid()); + BOOST_CHECK_EQUAL(ru.GetCentralAtom().GetQualifiedName(), "A.H8.XX"); + // no normal for unknown residues + BOOST_CHECK_EQUAL(ru.GetCentralNormal(), geom::Vec3(1, 0, 0)); +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/mol/mm/src/simulation.cc b/modules/mol/mm/src/simulation.cc index 2a23ddb9a629a112b3301c35dd6756ad2ba08b92..dea985213bf993a1a49f0ec06c17f7b34d4e36fe 100644 --- a/modules/mol/mm/src/simulation.cc +++ b/modules/mol/mm/src/simulation.cc @@ -27,10 +27,10 @@ Simulation::Simulation(const ost::mol::EntityHandle& handle, //note, that ent_ will be "completed" inside this function! //(hydrogens and shit) - - ent_ = handle.Copy(); - TopologyPtr top = TopologyCreator::Create(ent_,settings); - this->Init(top, settings); + + ost::mol::EntityHandle ent = handle.Copy(); + TopologyPtr top = TopologyCreator::Create(ent,settings); + this->Init(top, ent, settings); } Simulation::Simulation(const TopologyPtr top, @@ -40,21 +40,16 @@ Simulation::Simulation(const TopologyPtr top, if(static_cast<uint>(handle.GetAtomCount()) != top->GetNumParticles()){ throw ost::Error("Number of atoms in entity must be consistent with number of particles in topology!"); } - ent_ = handle.Copy(); - this->Init(top, settings); + ost::mol::EntityHandle ent = handle.Copy(); + this->Init(top, ent, settings); } void Simulation::Save(const String& filename){ + std::ofstream stream(filename.c_str(), std::ios_base::binary); io::BinaryDataSink ds(stream); + ds << *top_; - geom::Vec3List positions = this->GetPositions(false,false); - for(geom::Vec3List::iterator i = positions.begin(); - i != positions.end(); ++i){ - ds & (*i)[0]; - ds & (*i)[1]; - ds & (*i)[2]; - } uint num_chains; uint num_residues; @@ -96,15 +91,11 @@ void Simulation::Save(const String& filename){ k != atom_list.end(); ++k){ atom_name = k->GetName(); atom_element = k->GetElement(); - geom::Vec3 pos = k->GetPos(); bfac = k->GetBFactor(); occ = k->GetOccupancy(); is_hetatm = k->IsHetAtom(); ds & atom_name; ds & atom_element; - ds & pos[0]; - ds & pos[1]; - ds & pos[2]; ds & bfac; ds & occ; ds & is_hetatm; @@ -112,19 +103,18 @@ void Simulation::Save(const String& filename){ } } - ost::mol::AtomHandleList atom_list = ent_.GetAtomList(); ost::mol::AtomHandleList bonded_atoms; std::map<long,int> atom_indices; int actual_index = 0; - for(ost::mol::AtomHandleList::const_iterator i = atom_list.begin(), e = atom_list.end(); - i != e; ++i){ + for(ost::mol::AtomHandleList::const_iterator i = atom_list_.begin(), + e = atom_list_.end(); i != e; ++i){ atom_indices[i->GetHashCode()] = actual_index; ++actual_index; } - for(ost::mol::AtomHandleList::iterator i = atom_list.begin(); - i != atom_list.end(); ++i){ + for(ost::mol::AtomHandleList::iterator i = atom_list_.begin(); + i != atom_list_.end(); ++i){ bonded_atoms = i->GetBondPartners(); num_bonded_atoms = bonded_atoms.size(); ds & num_bonded_atoms; @@ -139,6 +129,7 @@ void Simulation::Save(const String& filename){ } SimulationPtr Simulation::Load(const String& filename, SettingsPtr settings){ + if (!boost::filesystem::exists(filename)) { std::stringstream ss; ss << "Could not open simulation File '" @@ -146,71 +137,17 @@ SimulationPtr Simulation::Load(const String& filename, SettingsPtr settings){ throw ost::io::IOException(ss.str()); } - SimulationPtr sim_ptr(new Simulation); - std::ifstream stream(filename.c_str(), std::ios_base::binary); io::BinaryDataSource ds(stream); - TopologyPtr top_p(new Topology); - ds >> *top_p; - - sim_ptr->top_ = top_p; - - sim_ptr->system_ = SystemCreator::Create(sim_ptr->top_,settings, - sim_ptr->system_force_mapper_); - - sim_ptr->integrator_ = settings->integrator; - - OpenMM::Platform::loadPluginsFromDirectory (settings->openmm_plugin_directory); - OpenMM::Platform::loadPluginsFromDirectory (settings->custom_plugin_directory); - OpenMM::Platform* platform; - - switch(settings->platform){ - case Reference:{ - platform = &OpenMM::Platform::getPlatformByName("Reference"); - break; - } - case OpenCL:{ - platform = &OpenMM::Platform::getPlatformByName("OpenCL"); - break; - } - case CUDA:{ - platform = &OpenMM::Platform::getPlatformByName("CUDA"); - break; - } - case CPU:{ - platform = &OpenMM::Platform::getPlatformByName("CPU"); - break; - } - default:{ - throw ost::Error("Invalid Platform when Loading simulation!"); - } - } - - sim_ptr->context_ = ContextPtr(new OpenMM::Context(*(sim_ptr->system_), - *(sim_ptr->integrator_), - *platform)); - std::vector<OpenMM::Vec3> positions; - OpenMM::Vec3 open_mm_vec; - Real a,b,c; - for(int i = 0; i < sim_ptr->system_->getNumParticles(); ++i){ - ds & a; - ds & b; - ds & c; - open_mm_vec[0] = a; - open_mm_vec[1] = b; - open_mm_vec[2] = c; - positions.push_back(open_mm_vec); - } - sim_ptr->context_->setPositions(positions); + SimulationPtr sim_ptr(new Simulation); + TopologyPtr top(new Topology); + ds >> *top; uint num_chains; uint num_residues; uint num_atoms; uint num_bonded_atoms; - Real x_pos; - Real y_pos; - Real z_pos; Real bfac; Real occ; bool is_hetatm; @@ -239,29 +176,29 @@ SimulationPtr Simulation::Load(const String& filename, SettingsPtr settings){ for(uint k = 0; k < num_atoms; ++k){ ds & atom_name; ds & atom_element; - ds & x_pos; - ds & y_pos; - ds & z_pos; ds & bfac; ds & occ; ds & is_hetatm; - geom::Vec3 pos(x_pos,y_pos,z_pos); - ed.InsertAtom(res,atom_name,pos,atom_element,occ,bfac,is_hetatm); + ed.InsertAtom(res, atom_name, geom::Vec3(0.0,0.0,0.0), + atom_element, occ, bfac, is_hetatm); } } } - ost::mol::AtomHandleList atom_list = ent.GetAtomList(); - for(uint i = 0; i < atom_list.size(); ++i){ + + sim_ptr->Init(top, ent, settings); + + for(uint i = 0; i < sim_ptr->atom_list_.size(); ++i){ ds & num_bonded_atoms; for(uint j = 0; j < num_bonded_atoms; ++j){ ds & atom_index; - ed.Connect(atom_list[i],atom_list[atom_index]); + ed.Connect(sim_ptr->atom_list_[i], sim_ptr->atom_list_[atom_index]); } } - sim_ptr->ent_ = ent; - + // also loads the positions that have been set in the context + // they get mapped over to the attached entity sim_ptr->context_->loadCheckpoint(stream); + sim_ptr->UpdatePositions(); return sim_ptr; } @@ -297,10 +234,12 @@ void Simulation::EnsurePluginsLoaded(const String& plugin_path) { void Simulation::Init(const TopologyPtr top, + const ost::mol::EntityHandle& ent, const SettingsPtr settings){ - top_ = top; + ent_ = ent; + atom_list_ = ent_.GetAtomList(); if(!settings->integrator){ //user did not specify an integrator, so let's just use a standard integrator @@ -358,12 +297,11 @@ void Simulation::Init(const TopologyPtr top, context_ = ContextPtr(new OpenMM::Context(*system_,*integrator_,*platform,context_properties)); - ost::mol::AtomHandleList atom_list = ent_.GetAtomList(); std::vector<OpenMM::Vec3> positions; geom::Vec3 ost_vec; OpenMM::Vec3 open_mm_vec; - for(ost::mol::AtomHandleList::iterator i = atom_list.begin(); - i!=atom_list.end();++i){ + for(ost::mol::AtomHandleList::iterator i = atom_list_.begin(); + i!=atom_list_.end();++i){ ost_vec = i->GetPos(); open_mm_vec[0] = ost_vec[0]/10; open_mm_vec[1] = ost_vec[1]/10; @@ -446,14 +384,12 @@ void Simulation::UpdatePositions(bool enforce_periodic_box){ if(top_->GetNumParticles() != static_cast<uint>(ent_.GetAtomCount())){ throw ost::Error("Num particles in topology and num atoms in entity are not consistent!"); } - geom::Vec3List positions = this->GetPositions(enforce_periodic_box, true); + geom::Vec3List positions; + StateExtractor::ExtractPositions(context_, positions, enforce_periodic_box, + true); ost::mol::XCSEditor ed = ent_.EditXCS(ost::mol::BUFFERED_EDIT); - ost::mol::AtomHandleList atom_list = ent_.GetAtomList(); - ost::mol::AtomHandleList::iterator a = atom_list.begin(); - ost::mol::AtomHandleList::iterator ae = atom_list.end(); - geom::Vec3List::iterator v = positions.begin(); - for(; a != ae; ++a, ++v){ - ed.SetAtomPos(*a,*v); + for(uint i = 0; i < atom_list_.size(); ++i) { + ed.SetAtomPos(atom_list_[i], positions[i]); } } diff --git a/modules/mol/mm/src/simulation.hh b/modules/mol/mm/src/simulation.hh index 89c58b10eb96f8b2b0a5995986d2ea80a47a43b7..7cbf06d1aededa101f55658b14f50f42d73dd6ca 100644 --- a/modules/mol/mm/src/simulation.hh +++ b/modules/mol/mm/src/simulation.hh @@ -143,6 +143,7 @@ private: Simulation() { } //hidden constructor... void Init(const ost::mol::mm::TopologyPtr top, + const ost::mol::EntityHandle& ent, const SettingsPtr settings); int TimeToNextNotification(); @@ -160,6 +161,7 @@ private: std::vector<int> time_to_notify_; std::map<FuncType,uint> system_force_mapper_; ost::mol::EntityHandle ent_; + ost::mol::AtomHandleList atom_list_; }; }}} //ns diff --git a/tools/molck/CMakeLists.txt b/tools/molck/CMakeLists.txt index 4e1dc2fb79b73e90022dd2f546a5011c98eca118..c76f1fd71b4532bf32b2cfcfa5dc34a0b7fb00ff 100644 --- a/tools/molck/CMakeLists.txt +++ b/tools/molck/CMakeLists.txt @@ -1,4 +1,4 @@ if (NOT WIN32) executable(NAME molck SOURCES main.cc - DEPENDS_ON ost_io STATIC) + DEPENDS_ON ost_io ost_mol_alg STATIC) endif(NOT WIN32) diff --git a/tools/molck/main.cc b/tools/molck/main.cc index 2a445620212e97165e53b7f9bf3592ecd5a21387..ef2d77c207213957a20d2d2d460da74fa0280b73 100644 --- a/tools/molck/main.cc +++ b/tools/molck/main.cc @@ -5,25 +5,52 @@ #include <ost/base.hh> #include <ost/boost_filesystem_helper.hh> #include <ost/platform.hh> -#include <ost/conop/model_check.hh> #include <ost/conop/conop.hh> -#include <ost/conop/amino_acids.hh> -#include <ost/io/mol/pdb_reader.hh> #include <ost/io/mol/pdb_writer.hh> +#include <ost/io/mol/pdb_reader.hh> #include <ost/io/mol/mmcif_reader.hh> #include <ost/io/io_exception.hh> -#include <ost/conop/nonstandard.hh> +#include <ost/mol/alg/molck.hh> #if defined(__APPLE__) #include <mach-o/dyld.h> #endif using namespace ost; -using namespace ost::conop; using namespace ost::mol; using namespace ost::io; namespace po=boost::program_options; namespace fs=boost::filesystem; +const char* USAGE= +"this is molck - the molecule checker\n" +"usage: molck [options] file1.pdb [file2.pdb [...]]\n" +"options \n" +" --complib=path location of the compound library file. If not provided, the \n" +" following locations are searched in this order: \n" +" 1. Working directory, 2. OpenStructure standard library location (if the \n" +" executable is part of a standard OpenStructure installation) \n" +" --rm=<a>,<b> remove atoms and residues matching some criteria \n" +" zeroocc - Remove atoms with zero occupancy \n" +" hyd - Remove hydrogen atoms \n" +" oxt - Remove terminal oxygens \n" +" nonstd - Remove all residues not one of the 20 standard amino acids \n" +" unk - Remove unknown and atoms not following the nomenclature\n" +" --fix-ele clean up element column\n" +" --stdout write cleaned file(s) to stdout \n" +" --out=filename write cleaned file(s) to disk. % characters in the filename are \n" +" replaced with the basename of the input file without extension. \n" +" Default: %-molcked.pdb \n" +" --color=auto|on|off \n" +" whether output should be colored\n" +" --map-nonstd maps modified residues back to the parent amino acid, for example\n" +" MSE -> MET, SEP -> SER.\n"; + +void usage() +{ + std::cerr << USAGE << std::endl; + exit(0); +} + EntityHandle load_x(const String& file, const IOProfile& profile) { try { @@ -53,20 +80,20 @@ EntityHandle load_x(const String& file, const IOProfile& profile) } // load compound library, exiting if it could not be found... -CompoundLibPtr load_compound_lib(const String& custom_path) +ost::conop::CompoundLibPtr load_compound_lib(const String& custom_path) { if (custom_path!="") { if (fs::exists(custom_path)) { - return CompoundLib::Load(custom_path); + return ost::conop::CompoundLib::Load(custom_path); } else { std::cerr << "Could not find compounds.chemlib at the provided location, trying other options" << std::endl; } } if (fs::exists("compounds.chemlib")) { - return CompoundLib::Load("compounds.chemlib"); + return ost::conop::CompoundLib::Load("compounds.chemlib"); } char result[ 1024 ]; - CompoundLibPtr lib; + ost::conop::CompoundLibPtr lib; String exe_path; #if defined(__APPLE__) uint32_t size=1023; @@ -89,45 +116,16 @@ CompoundLibPtr load_compound_lib(const String& custom_path) String share_path_string=BFPathToString(share_path); if (fs::exists(share_path_string)) { - return CompoundLib::Load(share_path_string); + return ost::conop::CompoundLib::Load(share_path_string); } } if (!lib) { std::cerr << "Could not load compounds.chemlib" << std::endl; exit(-1); } - return CompoundLibPtr(); + return ost::conop::CompoundLibPtr(); } -const char* USAGE= -"this is molck - the molecule checker\n" -"usage: molck [options] file1.pdb [file2.pdb [...]]\n" -"options \n" -" --complib=path location of the compound library file. If not provided, the \n" -" following locations are searched in this order: \n" -" 1. Working directory, 2. OpenStructure standard library location (if the \n" -" executable is part of a standard OpenStructure installation) \n" -" --rm=<a>,<b> remove atoms and residues matching some criteria \n" -" zeroocc - Remove atoms with zero occupancy \n" -" hyd - Remove hydrogen atoms \n" -" oxt - Remove terminal oxygens \n" -" nonstd - Remove all residues not one of the 20 standard amino acids \n" -" unk - Remove unknown and atoms not following the nomenclature\n" -" --fix-ele clean up element column\n" -" --stdout write cleaned file(s) to stdout \n" -" --out=filename write cleaned file(s) to disk. % characters in the filename are \n" -" replaced with the basename of the input file without extension. \n" -" Default: %-molcked.pdb \n" -" --color=auto|on|off \n" -" whether output should be colored\n" -" --map-nonstd maps modified residues back to the parent amino acid, for example\n" -" MSE -> MET, SEP -> SER.\n"; - -void usage() -{ - std::cerr << USAGE << std::endl; - exit(0); -} int main(int argc, char *argv[]) { @@ -136,19 +134,12 @@ int main(int argc, char *argv[]) } IOProfile prof; prof.fault_tolerant=true; + ost::mol::alg::MolckSettings settings; String rm; String color; - bool colored = false; - bool rm_unk_atoms=false; - bool rm_hyd_atoms=false; - bool rm_non_std=false; - bool rm_oxt_atoms=false; - bool rm_zero_occ_atoms=false; bool write_to_stdout = false; bool write_to_file = false; - bool map_nonstd_res = false; - bool assign_elem = false; String output_blueprint_string; String custom_path=""; @@ -196,24 +187,24 @@ int main(int argc, char *argv[]) output_blueprint_string = vm["out"].as<String>(); } if (vm.count("map-nonstd")) { - map_nonstd_res = true; + settings.map_nonstd_res = true; } if (vm.count("fix-ele")) { - assign_elem = true; + settings.assign_elem = true; } std::vector<StringRef> rms=StringRef(rm.c_str(), rm.size()).split(','); for (size_t i=0; i<rms.size(); ++i) { if (rms[i] == StringRef("unk", 3)) { - rm_unk_atoms = true; + settings.rm_unk_atoms = true; } else if (rms[i] == StringRef("nonstd", 6)) { - rm_non_std = true; + settings.rm_non_std = true; } else if (rms[i] == StringRef("hyd", 3)) { - rm_hyd_atoms = true; + settings.rm_hyd_atoms = true; } else if (rms[i] == StringRef("oxt", 3)) { - rm_oxt_atoms = true; + settings.rm_oxt_atoms = true; } else if (rms[i] == StringRef("zeroocc", 7)) { - rm_zero_occ_atoms = true; + settings.rm_zero_occ_atoms = true; } else { std::cerr << "unknown value to remove '" << rms[i] << "'" << std::endl; usage(); @@ -221,144 +212,23 @@ int main(int argc, char *argv[]) } } if (color=="auto") { - colored = isatty(STDERR_FILENO); + settings.colored = isatty(STDERR_FILENO); } else if (color == "on" || color == "1" || color == "yes") { - colored = true; + settings.colored = true; } else if (color == "off" || color == "0" || color == "no") { - colored = false; + settings.colored = false; } else { usage(); exit(-1); } - CompoundLibPtr lib=load_compound_lib(custom_path); + ost::conop::CompoundLibPtr lib=load_compound_lib(custom_path); for (unsigned int i = 0; i < files.size(); ++i) { EntityHandle ent=load_x(files[i], prof); if (!ent.IsValid()) { continue; } - if (map_nonstd_res) { - EntityHandle new_ent=CreateEntity(); - ChainHandleList chains=ent.GetChainList(); - XCSEditor new_edi=new_ent.EditXCS(); - for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { - ChainHandle new_chain = new_edi.InsertChain(c->GetName()); - ResidueHandleList residues = c->GetResidueList(); - for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { - AminoAcid aa = ResidueToAminoAcid(*r); - if (aa!=XXX) { - ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); - AtomHandleList atoms = r->GetAtomList(); - for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { - new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); - } - continue; - } else { - CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); - if (!compound || !compound->IsPeptideLinking() || compound->GetChemClass()==ChemClass::D_PEPTIDE_LINKING || - OneLetterCodeToAminoAcid(compound->GetOneLetterCode())==XXX) { - ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); - AtomHandleList atoms = r->GetAtomList(); - for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { - new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); - } - continue; - } - ResidueHandle dest_res = new_edi.AppendResidue(new_chain,OneLetterCodeToResidueName(compound->GetOneLetterCode()),r->GetNumber()); - CopyResidue(*r,dest_res,new_edi,lib); - } - } - } - ent=new_ent; - } - - XCSEditor edi=ent.EditXCS(); - Diagnostics diags; - Checker checker(lib, ent, diags); - if (rm_zero_occ_atoms) { - std::cerr << "removing atoms with zero occupancy" << std::endl; - int zremoved=0; - AtomHandleList zero_atoms=checker.GetZeroOccupancy(); - for (AtomHandleList::const_iterator i=zero_atoms.begin(), e=zero_atoms.end(); i!=e; ++i) { - edi.DeleteAtom(*i); - zremoved++; - } - std::cerr << " --> removed " << zremoved << " hydrogen atoms" << std::endl; - } - - if (rm_hyd_atoms) { - std::cerr << "removing hydrogen atoms" << std::endl; - int hremoved=0; - AtomHandleList hyd_atoms=checker.GetHydrogens(); - for (AtomHandleList::const_iterator i=hyd_atoms.begin(), e=hyd_atoms.end(); i!=e; ++i) { - edi.DeleteAtom(*i); - hremoved++; - } - std::cerr << " --> removed " << hremoved << " hydrogen atoms" << std::endl; - } - if (rm_oxt_atoms) { - std::cerr << "removing OXT atoms" << std::endl; - int oremoved=0; - AtomHandleList atoms=ent.GetAtomList(); - for (AtomHandleList::const_iterator i=atoms.begin(), e=atoms.end(); i!=e; ++i) { - if (i->GetName()=="OXT") { - edi.DeleteAtom(*i); - oremoved++; - } - } - std::cerr << " --> removed " << oremoved << " OXT atoms" << std::endl; - } - - checker.CheckForCompleteness(); - checker.CheckForUnknownAtoms(); - checker.CheckForNonStandard(); - for (Diagnostics::const_diag_iterator - j = diags.diags_begin(), e = diags.diags_end(); j != e; ++j) { - const Diag* diag=*j; - std::cerr << diag->Format(colored); - switch (diag->GetType()) { - case DIAG_UNK_ATOM: - if (rm_unk_atoms) { - edi.DeleteAtom(diag->GetAtom(0)); - std::cerr << " --> removed "; - } - break; - case DIAG_NONSTD_RESIDUE: - if (rm_non_std) { - edi.DeleteResidue(diag->GetResidue(0)); - std::cerr << " --> removed "; - } - break; - default: - break; - } - std::cerr << std::endl; - } - - if (assign_elem) { - ChainHandleList chains=ent.GetChainList(); - for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { - ResidueHandleList residues = c->GetResidueList(); - for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { - CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); - AtomHandleList atoms=r->GetAtomList(); - if (!compound) { - for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { - j->SetElement(""); - } - continue; - } - for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { - int specindx=compound->GetAtomSpecIndex(j->GetName()); - if (specindx!=-1) { - j->SetElement(compound->GetAtomSpecs()[specindx].element); - } else { - j->SetElement(""); - } - } - } - } - } + ost::mol::alg::Molck(ent, lib, settings); if (write_to_stdout) { PDBWriter writer(std::cout, prof);