diff --git a/modules/index.rst b/modules/index.rst index 4521313c8592ca3d57a7a91dcac7813c7a524316..2e8c37b274c8c40308f7aedb259fd51e1dd8888e 100644 --- a/modules/index.rst +++ b/modules/index.rst @@ -30,6 +30,7 @@ OpenStructure documentation contributing table mol/alg/lddt + mol/alg/molck For Starters -------------------------------------------------------------------------------- @@ -101,7 +102,9 @@ Varia **Users**: :doc:`Reporting a problem <users>` -**lDDT**: :doc:`lDDT command line executable<mol/alg/lddt>` +**lDDT**: :doc:`lDDT command line executable and Python API<mol/alg/lddt>` + +**Molck**: :doc:`Molecular Checker<mol/alg/molck>` Extending OpenStructure -------------------------------------------------------------------------------- diff --git a/modules/io/doc/io.rst b/modules/io/doc/io.rst index f8f5302e060d360d13b2851d42dce12d3e9d2c66..aa3e15991ba46ea68d9ffeb68c95d6a3aed6a1c5 100644 --- a/modules/io/doc/io.rst +++ b/modules/io/doc/io.rst @@ -343,3 +343,45 @@ Saving Density Maps 12 +Stereochemical Parameters +-------------------------------------------------------------------------------- + +In order to check the structure for some stereo-chemical and steric clashes +before computing the lDDT scores it is required to pass parameter file based on +Engh and Huber parameters, and on the atomic radii as defined in the Cambridge +Structural Database. OpenStructure ships with default file called +`stereo_chemical_props.txt` located in `$OST_ROOT/share/openstructure` +directory. A class :class:`~ost.io.StereoChemicalParamsReader` is used to read +this file. + +.. class:: StereoChemicalParamsReader(filename="") + + Object that holds and reads stereochemical parameters. + + :param filename: Sets :attr:`filename`. + + .. attribute:: bond_table + + The table containing bond information of type :class:`~ost.mol.alg.StereoChemicalParams`. + + .. attribute:: angle_table + + The table containing angle information of type :class:`~ost.mol.alg.StereoChemicalParams`. + + .. attribute:: nonbonded_table + + The table containing clashes of type :class:`~ost.mol.alg.ClashingDistances`. + + .. attribute:: filename + + The path to the parameter file that will be used. If set to "", it reads the + default file shipped with OpenStructure. + + :type: :class:`str` + + .. method:: Read(check=False) + + Read the file. + + :param check: Raise an error when any of the resulting tables are empty. + :type check: :class:`bool` diff --git a/modules/io/pymod/wrap_io.cc b/modules/io/pymod/wrap_io.cc index 302720245b63d534d88aceb2bba9c4d64bba0464..8704a4657d5a7f152f96d339164976fe5bbeb0ce 100644 --- a/modules/io/pymod/wrap_io.cc +++ b/modules/io/pymod/wrap_io.cc @@ -33,6 +33,7 @@ using namespace boost::python; #include <ost/io/mol/entity_io_sdf_handler.hh> #include <ost/io/mol/pdb_reader.hh> #include <ost/io/mol/dcd_io.hh> +#include <ost/io/stereochemical_params_reader.hh> using namespace ost; using namespace ost::io; @@ -120,6 +121,15 @@ BOOST_PYTHON_MODULE(_ost_io) def("LoadMAE", &LoadMAE); def("LoadPQR", &LoadPQR); + class_<io::StereoChemicalParamsReader>("StereoChemicalParamsReader", + init<String>(arg("filename"))) + .def(init<>()) + .def("Read", &io::StereoChemicalParamsReader::Read, (arg("check")=false)) + .def_readwrite("filename", &io::StereoChemicalParamsReader::filename) + .def_readwrite("bond_table", &io::StereoChemicalParamsReader::bond_table) + .def_readwrite("angle_table", &io::StereoChemicalParamsReader::angle_table) + .def_readwrite("nonbonded_table", &io::StereoChemicalParamsReader::nonbonded_table); + export_pdb_io(); export_mmcif_io(); #if OST_IMG_ENABLED diff --git a/modules/io/src/CMakeLists.txt b/modules/io/src/CMakeLists.txt index 13d6e3c6adef7c8adbe30aa0494d2668ce7242d7..41ac18cc63766a9674afc7e61fd4acc7542ea99d 100644 --- a/modules/io/src/CMakeLists.txt +++ b/modules/io/src/CMakeLists.txt @@ -65,7 +65,7 @@ if (ENABLE_IMG) endif() #################################### -set(OST_IO_DEPENDENCIES ost_base;ost_conop;ost_seq) +set(OST_IO_DEPENDENCIES ost_base;ost_conop;ost_seq;ost_mol_alg) if (ENABLE_IMG) set(OST_IO_DEPENDENCIES ${OST_IO_DEPENDENCIES};ost_img;ost_img_alg) endif() diff --git a/modules/io/src/mol/CMakeLists.txt b/modules/io/src/mol/CMakeLists.txt index e0792e93ee64ec6038451cad5a283a5c5c12144c..c88d99db8690af8efafc44a896f1cd017d03ff19 100644 --- a/modules/io/src/mol/CMakeLists.txt +++ b/modules/io/src/mol/CMakeLists.txt @@ -20,6 +20,7 @@ star_parser.cc mmcif_reader.cc mmcif_info.cc pdb_str.cc +stereochemical_params_reader.cc PARENT_SCOPE ) @@ -47,5 +48,6 @@ surface_io_handler.hh load_surface.hh surface_io_msms_handler.hh pdb_str.hh +stereochemical_params_reader.hh PARENT_SCOPE ) diff --git a/modules/io/src/mol/stereochemical_params_reader.cc b/modules/io/src/mol/stereochemical_params_reader.cc new file mode 100644 index 0000000000000000000000000000000000000000..f0317a3ea19ae8dd78b675a4c08576555e5e659a --- /dev/null +++ b/modules/io/src/mol/stereochemical_params_reader.cc @@ -0,0 +1,64 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#include <boost/filesystem/fstream.hpp> +#include <ost/platform.hh> +#include <ost/mol/alg/filter_clashes.hh> +#include <ost/io/stereochemical_params_reader.hh> + + +namespace { + std::vector<String> ReadStereoChemicalFile(const String& filename){ + boost::filesystem::path loc(filename); + boost::filesystem::ifstream infile(loc); + if (!infile) { + std::stringstream serr; + serr << "Could not find " << filename; + throw ost::Error(serr.str()); + } + std::vector<String> stereo_chemical_props; + String line; + while (std::getline(infile, line)) + { + std::stringstream line_stream(line); + stereo_chemical_props.push_back(line); + } + + return stereo_chemical_props; + } +} + +namespace ost { namespace io { + +StereoChemicalParamsReader::StereoChemicalParamsReader() { + filename = ost::GetSharedDataPath() + "/stereo_chemical_props.txt"; +} + +StereoChemicalParamsReader::StereoChemicalParamsReader(const String& init_filename): filename(init_filename) {} + +void StereoChemicalParamsReader::Read(bool check) { + std::vector<String> stereo_chemical_props = ReadStereoChemicalFile(filename); + // Bonds + bond_table = ost::mol::alg::FillStereoChemicalParams("Bond", stereo_chemical_props, check); + // Angles + angle_table = ost::mol::alg::FillStereoChemicalParams("Angle", stereo_chemical_props, check); + // Not bonded + nonbonded_table = ost::mol::alg::FillClashingDistances(stereo_chemical_props, check); +} +}} // ns + diff --git a/modules/io/src/mol/stereochemical_params_reader.hh b/modules/io/src/mol/stereochemical_params_reader.hh new file mode 100644 index 0000000000000000000000000000000000000000..f8990f55268de150d6f17a28160f53e695781bfa --- /dev/null +++ b/modules/io/src/mol/stereochemical_params_reader.hh @@ -0,0 +1,40 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_IO_STEREOCHEMICAL_PARAMS_READER_H +#define OST_IO_STEREOCHEMICAL_PARAMS_READER_H + +#include <ost/io/module_config.hh> +#include <ost/mol/alg/filter_clashes.hh> + +namespace ost { namespace io { + +struct StereoChemicalParamsReader { + String filename; + ost::mol::alg::StereoChemicalParams bond_table; + ost::mol::alg::StereoChemicalParams angle_table; + ost::mol::alg::ClashingDistances nonbonded_table; + + StereoChemicalParamsReader(); + StereoChemicalParamsReader(const String& filename); + void Read(bool check=false); +}; + +}} // ns + +#endif \ No newline at end of file diff --git a/modules/mol/alg/doc/lddt.rst b/modules/mol/alg/doc/lddt.rst index 3d3e862c3fbc61682eb880fe3e738365b8cb1501..6529c47758e29d8fa292c475d7a212ce25cc9e56 100644 --- a/modules/mol/alg/doc/lddt.rst +++ b/modules/mol/alg/doc/lddt.rst @@ -1,6 +1,6 @@ -====== +==== lDDT -====== +==== ------------------------------------- Where can I find the lDDT executable? @@ -180,3 +180,69 @@ For example: WARNING: Verbosity levels 1 and 2 can generate a large amount of output text, especially with large structures and multiple models being evaluated. + +=============== +lDDT Python API +=============== + +One can replicate the binary using simple python script: + +.. code-block:: python + + #! /bin/env python + """Run lDDT from within script.""" + from ost.io import LoadPDB + from ost.mol.alg import (CleanlDDTReferences, + PreparelDDTGlobalRDMap, + lDDTSettings, + CheckStructure, + LocalDistDiffTest, + GetlDDTPerResidueStats, + PrintlDDTPerResidueStats) + from ost.io import StereoChemicalParamsReader + + model_path = "Path to your model pdb file" + reference_path = "Path to your reference pdb file" + # + # Load model and prepare its view + model = LoadPDB(model_path) + model_view = model.GetChainList()[0].Select("peptide=true") + # + # Prepare references - it should be alist of EntityView(s) + references = [LoadPDB(reference_path).CreateFullView()] + # + # Initialize settings with default parameters and print them + settings = lDDTSettings() + settings.PrintParameters() + + # + # Clean up references + CleanlDDTReferences(references) + # + # Prepare residue map from references + rdmap = PreparelDDTGlobalRDMap(references, settings) + # + # This part is optional and it depends on our settings parameter + if settings.structural_checks: + stereochemical_parameters = StereoChemicalParamsReader( + settings.parameter_file_path) + stereochemical_parameters.Read() + CheckStructure(ent=model_view, + bond_table=stereochemical_parameters.bond_table, + angle_table=stereochemical_parameters.angle_table, + nonbonded_table=stereochemical_parameters.nonbonded_table, + bond_tolerance=settings.bond_tolerance, + angle_tolerance=settings.angle_tolerance) + # + # Calculate lDDT + LocalDistDiffTest(model_view, references, rdmap, settings) + # + # Get the local scores + local_scores = GetlDDTPerResidueStats(model, rdmap, settings) + # + # Pring local scores + PrintlDDTPerResidueStats(local_scores, settings) + +This can be useful when we already have an models and references already read +in the memory and we do not want run the binary. +Please refere to specific function documentation for more details. diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst index d9ba6a6a60588da14ff02de9793aa46d899bd69a..d4b71376c848fc14e9e0dcb1abecb7421e8debc1 100644 --- a/modules/mol/alg/doc/molalg.rst +++ b/modules/mol/alg/doc/molalg.rst @@ -59,6 +59,24 @@ Local Distance Test scores (lDDT, DRMSD) :returns: a tuple containing the counts of the conserved distances in the model and of all the checked distances +.. function:: LocalDistDiffTest(model, reference_list, distance_list, settings) + + Wrapper around :func:`LocalDistDiffTest` above. + + :param model: the model structure + :type model: :class:`~ost.mol.EntityView` + :param reference_list: the list of reference structures from which distances were derived + :type reference_list: :class:`list` of :class:`~ost.mol.EntityView` + :param distance_list: A residue distance map prepared with :func:`PreparelDDTGlobalRDMap` + with *reference_list* and *settings* as parameters. + :type distance_list: :class:`~ost.mol.alg.GlobalRDMap` + :param settings: lDDT settings + :type settings: :class:`~ost.mol.alg.lDDTSettings` + + :returns: the Local Distance Difference Test score (conserved distances + divided by all the checked distances) + :rtype: :class:`float` + .. function:: LocalDistDiffTest(model, target, cutoff, max_dist, \ local_lddt_property_string="") @@ -287,22 +305,283 @@ Local Distance Test scores (lDDT, DRMSD) :returns: :class:`~ost.mol.alg.GlobalRDMap` - -.. class:: UniqueAtomIdentifier - Object containing enough information to uniquely identify an atom in a structure +.. function:: PreparelDDTGlobalRDMap(reference_list, settings) - .. method:: UniqueAtomIdentifier(chain,residue_number,residue_name,atom_name) - - Creates an UniqueAtomIdentifier object starting from relevant atom information + A wrapper around :func:`CreateDistanceList` and + :func:`CreateDistanceListFromMultipleReferences`. Depending on the length of + the ``reference_list`` it calls one or the other. + + :param reference_list: a list of reference structures from which distances are + derived + :type reference_list: list of :class:`~ost.mol.EntityView` + :param settings: lDDT settings + :type settings: :class:`~ost.mol.alg.lDDTSettings` + + :returns: :class:`~ost.mol.alg.GlobalRDMap` + + +.. function:: CleanlDDTReferences(reference_list) + + Prepares references to be used in lDDT calculation. It checks if all references + has the same chain name and selects this chain for for further calculations. + + .. warning:: + + This function modifies the passed *reference_list* list. + + :param reference_list: A list of reference structures from which distances are + derived + :type reference_list: :class:`list` of :class:`~ost.mol.EntityView` + + +.. function:: CheckStructure(ent, \ + bond_table, \ + angle_table, \ + nonbonded_table, \ + bond_tolerance, \ + angle_tolerance) + + Perform structural checks and filters the structure. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityView` + :param bond_table: List of bond stereo chemical parameters obtained from + :class:`~ost.io.StereoChemicalParamsReader` or :func:`FillStereoChemicalParams` + :type bond_table: :class:`~ost.mol.alg.StereoChemicalParams` + :param angle_table: List of angle stereo chemical parameters obtained from + :class:`~ost.io.StereoChemicalParamsReader` or :func:`FillStereoChemicalParams` + :type angle_table: :class:`~ost.mol.alg.StereoChemicalParams` + :param nonbonded_table: Information about the clashing distances obtained from + :class:`~ost.io.StereoChemicalParamsReader` or :func:`FillClashingDistances` + :type nonbonded_table: :class:`~ost.mol.alg.ClashingDistances` + :param bond_tolerance: Tolerance in stddev for bonds + :type bond_tolerance: float + :param angle_tolerance: Tolerance in stddev for angles + :type angle_tolerance: float + + +.. function:: GetlDDTPerResidueStats(model, distance_list, settings) + + Get the per-residue statistics from the lDDT calculation. + + :param model: The model structure + :type model: :class:`~ost.mol.EntityHandle` + :param distance_list: The list of distances to check for conservation + :type distance_list: :class:`~ost.mol.alg.GlobalRDMap` + :param settings: lDDT settings + :type settings: :class:`~ost.mol.alg.lDDTSettings` + :returns: Per-residue local lDDT scores + :rtype: :class:`list` of :class:`~ost.mol.alg.lDDTLocalScore` + + +.. function:: PrintlDDTPerResidueStats(scores, settings) + + Print per-residue statistics from lDDT calculation. + + :param scores: Local lDDT scores + :type scores: :class:`list` of :class:`~ost.mol.alg.lDDTLocalScore` + :param settings: lDDT settings + :type settings: :class:`~ost.mol.alg.lDDTSettings` + + +.. class:: lDDTLocalScore(cname, rname, rnum, is_assessed, quality_problems, \ + local_lddt, conserved_dist, total_dist) + + Object containing per-residue information about calculated lDDT. + + :param cname: Sets :attr:`cname` + :param rname: Sets :attr:`rname` + :param rnum: Sets :attr:`rnum` + :param is_assessed: Sets :attr:`is_assessed` + :param quality_problems: Sets :attr:`quality_problems` + :param local_lddt: Sets :attr:`local_lddt` + :param conserved_dist: Sets :attr:`conserved_dist` + :param total_dist: Sets :attr:`total_dist` + + .. attribute:: cname + + Chain name. + + :type: :class:`str` + + .. attribute:: rname + + Residue name. + + :type: :class:`str` + + .. attribute:: rnum + + Residue number. + + :type: :class:`int` + + .. attribute:: is_assessed + + Is the residue taken into account? Yes or No. + + :type: :class:`str` + + .. attribute:: quality_problems + + Does the residue have quality problems? + No if there are no problems, NA if the problems were not assessed, Yes if + there are sidechain problems and Yes+ if there are backbone problems. + + :type: :class:`str` + + .. attribute:: local_lddt + + Local lDDT score for residue. + + :type: :class:`float` + + .. attribute:: conserved_dist + + Number of conserved distances. + + :type: :class:`int` + + .. attribute:: total_dist + + Total number of conserved distances. + + :type: :class:`int` + + .. method:: ToString(structural_checks) + + :return: String representation of the lDDTLocalScore object. + :rtype: :class:`str` + + :param structural_checks: Where structural checks applied during calculations? + :type structural_checks: bool + + .. method:: GetHeader(structural_checks, cutoffs_length) + + Get the names of the fields as printed by ToString method. + + :param structural_checks: Where structural checks applied during calculations? + :type structural_checks: bool + :param cutoffs_length: Length of the cutoffs list used for calculations + :type cutoffs_length: int + + +.. class:: lDDTSettings(bond_tolerance=12, \ + angle_tolerance=12, \ + radius=15, \ + sequence_separation=0, \ + sel="", \ + parameter_file_path="", \ + structural_checks=True, \ + consistency_checks=True, \ + cutoffs=(0.5, 1.0, 2.0, 4.0), \ + label="locallddt") + + Object containing the settings used for lDDT calculations. - :param chain: a string containing the name of the chain to which the atom - belongs - :param residue_number: the number of the residue to which the atom belongs - :type residue_number: :class:`~ost.mol.ResNum` - :param residue_name: a string containing the name of the residue to which - the atom belongs - :param atom_name: a string containing the name of the atom + :param bond_tolerance: Sets :attr:`bond_tolerance`. + :param angle_tolerance: Sets :attr:`angle_tolerance`. + :param radius: Sets :attr:`radius`. + :param sequence_separation: Sets :attr:`sequence_separation`. + :param sel: Sets :attr:`sel`. + :param parameter_file_path: Sets :attr:`parameter_file_path`. + :param structural_checks: Sets :attr:`structural_checks`. + :param consistency_checks: Sets :attr:`consistency_checks`. + :param cutoffs: Sets :attr:`cutoffs`. + :param label: Sets :attr:`label`. + + .. attribute:: bond_tolerance + + Tolerance in stddevs for bonds. + + :type: :class:`float` + + .. attribute:: angle_tolerance + + Tolerance in stddevs for angles. + + :type: :class:`float` + + .. attribute:: radius + + Distance inclusion radius. + + :type: :class:`float` + + .. attribute:: sequence_separation + + Sequence separation. + + :type: :class:`int` + + .. attribute:: sel + + Selection performed on reference(s). + + :type: :class:`str` + + .. attribute:: parameter_file_path + + Path to the stereochemical parameter file. If set to "", it the default file + shipped with OpenStructure is used (see + :class:`~ost.io.StereoChemicalParamsReader`). + + :type: :class:`str` + + .. attribute:: structural_checks + + Are structural checks and filter input data on? + + :type: :class:`bool` + + .. attribute:: consistency_checks + + Are consistency checks on? + + :type: :class:`bool` + + .. attribute:: cutoffs + + List of thresholds used to determine distance conservation. + + :type: :class:`list` of :class:`float` + + .. attribute:: label + + The base name for the ResidueHandle properties that store the local scores. + + :type: :class:`str` + + .. method:: SetStereoChemicalParamsPath(path) + + Set the path to the stereochemical parameter file. + + :param path: Path to stereochemical parameter file + :type path: str + + .. method:: PrintParameters() + + Print settings. + + .. method:: ToString() + + :return: String representation of the lDDTSettings object. + :rtype: :class:`str` + + +.. class:: UniqueAtomIdentifier(chain, residue_number, residue_name, atom_name) + + Object containing enough information to uniquely identify an atom in a + structure. + + :param chain: A string containing the name of the chain to which the atom + belongs + :param residue_number: The number of the residue to which the atom belongs + :type residue_number: :class:`~ost.mol.ResNum` + :param residue_name: A string containing the name of the residue to which + the atom belongs + :param atom_name: A string containing the name of the atom .. method:: GetChainName() @@ -1289,6 +1568,8 @@ used to skip frames in the analysis. Mapping functions -------------------------------------------------------------------------------- +.. currentmodule:: ost.mol.alg + The following functions help to convert one residue into another by reusing as much as possible from the present atoms. They are mainly meant to map from standard amino acid to other standard amino acids or from modified amino acids @@ -1389,7 +1670,7 @@ function: map_nonstd_res=False, assign_elem=True) Molck(ent, lib, ms) - SavePDB(ent, "<OUTPUT PATH>", profile="SLOPPY") + SavePDB(ent, "<OUTPUT PATH>") It can also be split into subsequent commands for greater controll: @@ -1433,42 +1714,86 @@ It can also be split into subsequent commands for greater controll: colored=False) CleanUpElementColumn(lib=lib, ent=ent) - SavePDB(ent, "<OUTPUT PATH>", profile="SLOPPY") + SavePDB(ent, "<OUTPUT PATH>") API ### - -.. class:: MolckSettings +.. class:: MolckSettings(rm_unk_atoms=False, rm_non_std=False, \ + rm_hyd_atoms=True, rm_oxt_atoms=False, \ + rm_zero_occ_atoms=False, colored=False, \ + map_nonstd_res=True, assign_elem=True) Stores settings used for Molecular Checker. - .. method:: __init__(rm_unk_atoms=False,rm_non_std=False,rm_hyd_atoms=True,rm_oxt_atoms=False, rm_zero_occ_atoms=False,colored=False,map_nonstd_res=True, assign_elem=True) + :param rm_unk_atoms: Sets :attr:`rm_unk_atoms`. + :param rm_non_std: Sets :attr:`rm_non_std`. + :param rm_hyd_atoms: Sets :attr:`rm_hyd_atoms`. + :param rm_oxt_atoms: Sets :attr:`rm_oxt_atoms`. + :param rm_zero_occ_atoms: Sets :attr:`rm_zero_occ_atoms`. + :param colored: Sets :attr:`colored`. + :param map_nonstd_res: Sets :attr:`map_nonstd_res`. + :param assign_elem: Sets :attr:`assign_elem`. + + .. attribute:: rm_unk_atoms + + Remove unknown and atoms not following the nomenclature. + + :type: :class:`bool` + + .. attribute:: rm_non_std + + Remove all residues not one of the 20 standard amino acids + + :type: :class:`bool` + + .. attribute:: rm_hyd_atoms + + Remove hydrogen atoms - Initializes MolckSettings. - - :param rm_unk_atoms: Remove unknown and atoms not following the nomenclature - :type rm_unk_atoms: :class:`bool` - :param rm_non_std: Remove all residues not one of the 20 standard amino acids - :type rm_non_std: :class:`bool` - :param rm_hyd_atoms: Remove hydrogen atoms - :type rm_hyd_atoms: :class:`bool` - :param rm_oxt_atoms: Remove terminal oxygens - :type rm_oxt_atoms: :class:`bool` - :param rm_zero_occ_atoms: Remove atoms with zero occupancy - :type rm_zero_occ_atoms: :class:`bool` - :param colored: Whether output should be colored - :type colored: :class:`bool` - :param map_nonstd_res: Maps modified residues back to the parent amino acid, for example - MSE -> MET, SEP -> SER - :type map_nonstd_res: :class:`bool` - :param assign_elem: Clean up element column - :type assign_elem: :class:`bool` + :type: :class:`bool` + + .. attribute:: rm_oxt_atoms + + Remove terminal oxygens + + :type: :class:`bool` + + .. attribute:: rm_zero_occ_atoms + + Remove atoms with zero occupancy + + :type: :class:`bool` + + .. attribute:: colored + + Whether output should be colored + + :type: :class:`bool` + + .. attribute:: map_nonstd_res + + Maps modified residues back to the parent amino acid, for example + MSE -> MET, SEP -> SER + + :type: :class:`bool` + + .. attribute:: assign_elem + + Clean up element column + + :type: :class:`bool` + .. method:: ToString() - String representation of the MolckSettings. + :return: String representation of the MolckSettings. + :rtype: :class:`str` + +.. warning:: + The API here is set such that the functions modify the passed structure *ent* + in-place. If this is not ok, please work on a copy of the structure. .. function:: Molck(ent, lib, settings) @@ -1491,7 +1816,9 @@ API :param lib: Compound library :type lib: :class:`~ost.conop.CompoundLib` -.. function:: RemoveAtoms(ent,lib,rm_unk_atoms=False,rm_non_std=False,rm_hyd_atoms=True,rm_oxt_atoms=False,rm_zero_occ_atoms=False,colored=False) +.. function:: RemoveAtoms(ent, lib, rm_unk_atoms=False, rm_non_std=False, \ + rm_hyd_atoms=True, rm_oxt_atoms=False, \ + rm_zero_occ_atoms=False, colored=False) Removes atoms and residues according to some criteria. @@ -1499,18 +1826,12 @@ API :type ent: :class:`~ost.mol.EntityHandle` :param lib: Compound library :type lib: :class:`~ost.conop.CompoundLib` - :param rm_unk_atoms: Remove unknown and atoms not following the nomenclature - :type rm_unk_atoms: :class:`bool` - :param rm_non_std: Remove all residues not one of the 20 standard amino acids - :type rm_non_std: :class:`bool` - :param rm_hyd_atoms: Remove hydrogen atoms - :type rm_hyd_atoms: :class:`bool` - :param rm_oxt_atoms: Remove terminal oxygens - :type rm_oxt_atoms: :class:`bool` - :param rm_zero_occ_atoms: Remove atoms with zero occupancy - :type rm_zero_occ_atoms: :class:`bool` - :param colored: Whether output should be colored - :type colored: :class:`bool` + :param rm_unk_atoms: See :attr:`MolckSettings.rm_unk_atoms` + :param rm_non_std: See :attr:`MolckSettings.rm_non_std` + :param rm_hyd_atoms: See :attr:`MolckSettings.rm_hyd_atoms` + :param rm_oxt_atoms: See :attr:`MolckSettings.rm_oxt_atoms` + :param rm_zero_occ_atoms: See :attr:`MolckSettings.rm_zero_occ_atoms` + :param colored: See :attr:`MolckSettings.colored` .. function:: CleanUpElementColumn(ent, lib) diff --git a/modules/mol/alg/doc/molck.rst b/modules/mol/alg/doc/molck.rst index 8dd1d1f39ed9ae3ebb1c8bba5126701c58f85476..5936b0584c5b1c3fb8697bb8151859a0a28b017d 100644 --- a/modules/mol/alg/doc/molck.rst +++ b/modules/mol/alg/doc/molck.rst @@ -56,4 +56,11 @@ please find them following: Default: %-molcked.pdb --color=auto|on|off whether output should be colored --map-nonstd maps modified residues back to the parent amino acid, for example - MSE -> MET, SEP -> SER. \ No newline at end of file + MSE -> MET, SEP -> SER. + +================ +Molck Python API +================ + +Within OST, one can also call the :func:`~ost.mol.alg.Molck` function directly +on entities to get the same effect as with the binary. diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc index 1642c2cafe124f4eb189d2ddb8d4d308458e0309..913b157748b3febce98b159a742b64afe41054fb 100644 --- a/modules/mol/alg/pymod/wrap_mol_alg.cc +++ b/modules/mol/alg/pymod/wrap_mol_alg.cc @@ -18,6 +18,7 @@ //------------------------------------------------------------------------------ #include <boost/python.hpp> +#include <boost/python/raw_function.hpp> #include <boost/python/suite/indexing/map_indexing_suite.hpp> #include <ost/config.hh> #include <ost/mol/alg/local_dist_diff_test.hh> @@ -53,6 +54,7 @@ namespace { std::pair<long int,long int> (*lddt_a)(const mol::EntityView&, const mol::alg::GlobalRDMap& , std::vector<Real>, int, const String&)=&mol::alg::LocalDistDiffTest; Real (*lddt_c)(const mol::EntityView&, const mol::EntityView& , Real, Real, const String&)=&mol::alg::LocalDistDiffTest; +// Real (*lddt_d)(const mol::EntityView&, std::vector<mol::EntityView>&, const mol::alg::GlobalRDMap&, mol::alg::lDDTSettings&)=&mol::alg::LocalDistDiffTest; Real (*lddt_b)(const seq::AlignmentHandle&,Real, Real, int, int)=&mol::alg::LocalDistDiffTest; std::pair<mol::EntityView,mol::alg::ClashingInfo> (*fc_a)(const mol::EntityView&, const mol::alg::ClashingDistances&,bool)=&mol::alg::FilterClashes; std::pair<mol::EntityView,mol::alg::ClashingInfo> (*fc_b)(const mol::EntityHandle&, const mol::alg::ClashingDistances&, bool)=&mol::alg::FilterClashes; @@ -61,6 +63,18 @@ std::pair<mol::EntityView,mol::alg::StereoChemistryInfo> (*csc_b)(const mol::Ent mol::CoordGroupHandle (*superpose_frames1)(mol::CoordGroupHandle&, mol::EntityView&, int, int, int)=&mol::alg::SuperposeFrames; mol::CoordGroupHandle (*superpose_frames2)(mol::CoordGroupHandle&, mol::EntityView&, mol::EntityView&, int, int)=&mol::alg::SuperposeFrames; + +Real lddt_d(const mol::EntityView& model, list& reference_list, const mol::alg::GlobalRDMap& distance_list, mol::alg::lDDTSettings& settings){ + int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); + std::vector<ost::mol::EntityView> reference_list_vector(reference_list_length); + + for (int i=0; i<reference_list_length; i++) { + reference_list_vector[i] = boost::python::extract<ost::mol::EntityView>(reference_list[i]); + } + + return LocalDistDiffTest(model, reference_list_vector, distance_list, settings); +} + ost::mol::alg::StereoChemicalParams fill_stereochemical_params_wrapper (const String& header, const list& stereo_chemical_props_file) { int stereo_chemical_props_file_length = boost::python::extract<int>(stereo_chemical_props_file.attr("__len__")()); @@ -85,13 +99,13 @@ ost::mol::alg::ClashingDistances fill_clashing_distances_wrapper (const list& st return ost::mol::alg::FillClashingDistances(stereo_chemical_props_file_vector); } -ost::mol::alg::GlobalRDMap create_distance_list_from_multiple_references(const list& ref_list, const list& cutoff_list, int sequence_separation, Real max_dist) +ost::mol::alg::GlobalRDMap create_distance_list_from_multiple_references(const list& reference_list, const list& cutoff_list, int sequence_separation, Real max_dist) { - int ref_list_length = boost::python::extract<int>(ref_list.attr("__len__")()); - std::vector<ost::mol::EntityView> ref_list_vector(ref_list_length); + int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); + std::vector<ost::mol::EntityView> reference_list_vector(reference_list_length); - for (int i=0; i<ref_list_length; i++) { - ref_list_vector[i] = boost::python::extract<ost::mol::EntityView>(ref_list[i]); + for (int i=0; i<reference_list_length; i++) { + reference_list_vector[i] = boost::python::extract<ost::mol::EntityView>(reference_list[i]); } int cutoff_list_length = boost::python::extract<int>(cutoff_list.attr("__len__")()); @@ -100,10 +114,152 @@ ost::mol::alg::GlobalRDMap create_distance_list_from_multiple_references(const l for (int i=0; i<cutoff_list_length; i++) { cutoff_list_vector[i] = boost::python::extract<Real>(cutoff_list[i]); } - return ost::mol::alg::CreateDistanceListFromMultipleReferences(ref_list_vector, cutoff_list_vector, sequence_separation, max_dist); + return ost::mol::alg::CreateDistanceListFromMultipleReferences(reference_list_vector, cutoff_list_vector, sequence_separation, max_dist); } +object lDDTSettingsInitWrapper(tuple args, dict kwargs){ + + object self = args[0]; + args = tuple(args.slice(1,_)); + + Real bond_tolerance = 12.0; + if(kwargs.contains("bond_tolerance")){ + bond_tolerance = extract<Real>(kwargs["bond_tolerance"]); + kwargs["bond_tolerance"].del(); + } + + Real angle_tolerance = 12.0; + if(kwargs.contains("angle_tolerance")){ + angle_tolerance = extract<Real>(kwargs["angle_tolerance"]); + kwargs["angle_tolerance"].del(); + } + + Real radius = 15.0; + if(kwargs.contains("radius")){ + radius = extract<Real>(kwargs["radius"]); + kwargs["radius"].del(); + } + + int sequence_separation = 0; + if(kwargs.contains("sequence_separation")){ + sequence_separation = extract<int>(kwargs["sequence_separation"]); + kwargs["sequence_separation"].del(); + } + + String sel = ""; + if(kwargs.contains("sel")){ + sel = extract<String>(kwargs["sel"]); + kwargs["sel"].del(); + } + + String parameter_file_path = ""; + if(kwargs.contains("parameter_file_path")){ + parameter_file_path = extract<String>(kwargs["parameter_file_path"]); + kwargs["parameter_file_path"].del(); + } + + bool structural_checks = true; + if(kwargs.contains("structural_checks")){ + structural_checks = extract<bool>(kwargs["structural_checks"]); + kwargs["structural_checks"].del(); + } + bool consistency_checks = true; + if(kwargs.contains("consistency_checks")){ + consistency_checks = extract<bool>(kwargs["consistency_checks"]); + kwargs["consistency_checks"].del(); + } + + std::vector<Real> cutoffs; + if(kwargs.contains("cutoffs")){ + list cutoff_list = extract<list>(kwargs["cutoffs"]); + int cutoff_list_length = boost::python::extract<int>(cutoff_list.attr("__len__")()); + for (int i=0; i<cutoff_list_length; i++) { + cutoffs.push_back(boost::python::extract<Real>(cutoff_list[i])); + } + kwargs["cutoffs"].del(); + } else { + cutoffs.push_back(0.5); + cutoffs.push_back(1.0); + cutoffs.push_back(2.0); + cutoffs.push_back(4.0); + } + + String label = "locallddt"; + if(kwargs.contains("label")){ + label = extract<String>(kwargs["label"]); + kwargs["label"].del(); + } + + if(len(kwargs) > 0){ + std::stringstream ss; + ss << "Invalid keywords observed when setting up MolckSettings! "; + ss << "Or did you pass the same keyword twice? "; + ss << "Valid keywords are: bond_tolerance, angle_tolerance, radius, "; + ss << "sequence_separation, sel, parameter_file_path, structural_checks, "; + ss << "consistency_checks, cutoffs, label!"; + throw std::invalid_argument(ss.str()); + } + + return self.attr("__init__")(bond_tolerance, + angle_tolerance, + radius, + sequence_separation, + sel, + parameter_file_path, + structural_checks, + consistency_checks, + cutoffs, + label); +} + + +void clean_lddt_references_wrapper(const list& reference_list) +{ + int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); + std::vector<ost::mol::EntityView> reference_list_vector(reference_list_length); + + for (int i=0; i<reference_list_length; i++) { + reference_list_vector[i] = boost::python::extract<ost::mol::EntityView>(reference_list[i]); + } + + return ost::mol::alg::CleanlDDTReferences(reference_list_vector); +} + +ost::mol::alg::GlobalRDMap prepare_lddt_global_rdmap_wrapper(const list& reference_list, mol::alg::lDDTSettings settings) +{ + int reference_list_length = boost::python::extract<int>(reference_list.attr("__len__")()); + std::vector<ost::mol::EntityView> reference_list_vector(reference_list_length); + + for (int i=0; i<reference_list_length; i++) { + reference_list_vector[i] = boost::python::extract<ost::mol::EntityView>(reference_list[i]); + } + + return mol::alg::PreparelDDTGlobalRDMap(reference_list_vector, settings); +} + +list get_lddt_per_residue_stats_wrapper(mol::EntityHandle& model, + ost::mol::alg::GlobalRDMap& distance_list, + ost::mol::alg::lDDTSettings& settings) { + std::vector<mol::alg::lDDTLocalScore> scores = GetlDDTPerResidueStats(model, distance_list, settings); + list local_scores_list; + for (std::vector<mol::alg::lDDTLocalScore>::const_iterator sit = scores.begin(); sit != scores.end(); ++sit) { + local_scores_list.append(*sit); + } + return local_scores_list; +} + +void print_lddt_per_residue_stats_wrapper(list& scores, mol::alg::lDDTSettings settings){ + int scores_length = boost::python::extract<int>(scores.attr("__len__")()); + std::vector<mol::alg::lDDTLocalScore> scores_vector(scores_length); + + for (int i=0; i<scores_length; i++) { + scores_vector[i] = boost::python::extract<mol::alg::lDDTLocalScore>(scores[i]); + } + + return mol::alg::PrintlDDTPerResidueStats(scores_vector, settings); +} + } @@ -126,6 +282,7 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) def("LocalDistDiffTest", lddt_a, (arg("sequence_separation")=0,arg("local_lddt_property_string")="")); def("LocalDistDiffTest", lddt_c, (arg("local_lddt_property_string")="")); def("LocalDistDiffTest", lddt_b, (arg("ref_index")=0, arg("mdl_index")=1)); + def("LocalDistDiffTest", &lddt_d, (arg("model"), arg("reference_list"), ("distance_list"), arg("settings"))); def("FilterClashes", fc_a, (arg("ent"), arg("clashing_distances"), arg("always_remove_bb")=false)); def("FilterClashes", fc_b, (arg("ent"), arg("clashing_distances"), arg("always_remove_bb")=false)); def("CheckStereoChemistry", csc_a, (arg("ent"), arg("bonds"), arg("angles"), arg("bond_tolerance"), arg("angle_tolerance"), arg("always_remove_bb")=false)); @@ -167,13 +324,59 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) .def("GetResidueName",&mol::alg::UniqueAtomIdentifier::GetResidueName) .def("GetAtomName",&mol::alg::UniqueAtomIdentifier::GetAtomName) .def("GetQualifiedAtomName",&mol::alg::UniqueAtomIdentifier::GetQualifiedAtomName) - ; + ; + + class_<mol::alg::lDDTSettings>("lDDTSettings", no_init) + .def("__init__", raw_function(lDDTSettingsInitWrapper)) + .def(init<Real, Real, Real, int, String, String, bool, bool, std::vector<Real>&, String>()) + .def("ToString", &mol::alg::lDDTSettings::ToString) + .def("SetStereoChemicalParamsPath", &mol::alg::lDDTSettings::SetStereoChemicalParamsPath) + .def("PrintParameters", &mol::alg::lDDTSettings::PrintParameters) + .def("__repr__", &mol::alg::lDDTSettings::ToString) + .def("__str__", &mol::alg::lDDTSettings::ToString) + .def_readwrite("bond_tolerance", &mol::alg::lDDTSettings::bond_tolerance) + .def_readwrite("angle_tolerance", &mol::alg::lDDTSettings::angle_tolerance) + .def_readwrite("radius", &mol::alg::lDDTSettings::radius) + .def_readwrite("sequence_separation", &mol::alg::lDDTSettings::sequence_separation) + .def_readwrite("sel", &mol::alg::lDDTSettings::sel) + .def_readwrite("parameter_file_path", &mol::alg::lDDTSettings::parameter_file_path) + .def_readwrite("structural_checks", &mol::alg::lDDTSettings::structural_checks) + .def_readwrite("consistency_checks", &mol::alg::lDDTSettings::consistency_checks) + .def_readwrite("cutoffs", &mol::alg::lDDTSettings::cutoffs) + .def_readwrite("label", &mol::alg::lDDTSettings::label); + + class_<mol::alg::lDDTLocalScore>("lDDTLocalScore", init<String, String, int, String, String, Real, int, int>()) + .def("ToString", &mol::alg::lDDTLocalScore::ToString) + .def("GetHeader", &mol::alg::lDDTLocalScore::GetHeader) + .def_readwrite("cname", &mol::alg::lDDTLocalScore::cname) + .def_readwrite("rname", &mol::alg::lDDTLocalScore::rname) + .def_readwrite("rnum", &mol::alg::lDDTLocalScore::rnum) + .def_readwrite("is_assessed", &mol::alg::lDDTLocalScore::is_assessed) + .def_readwrite("quality_problems", &mol::alg::lDDTLocalScore::quality_problems) + .def_readwrite("local_lddt", &mol::alg::lDDTLocalScore::local_lddt) + .def_readwrite("conserved_dist", &mol::alg::lDDTLocalScore::conserved_dist) + .def_readwrite("total_dist", &mol::alg::lDDTLocalScore::total_dist); def("FillClashingDistances",&fill_clashing_distances_wrapper); def("FillStereoChemicalParams",&fill_stereochemical_params_wrapper); def("IsStandardResidue",&mol::alg::IsStandardResidue); def("PrintGlobalRDMap",&mol::alg::PrintGlobalRDMap); def("PrintResidueRDMap",&mol::alg::PrintResidueRDMap); + def("CleanlDDTReferences", &clean_lddt_references_wrapper); + def("PreparelDDTGlobalRDMap", + &prepare_lddt_global_rdmap_wrapper, + (arg("reference_list"), arg("settings"))); + def("CheckStructure", + &mol::alg::CheckStructure, + (arg("ent"), arg("bond_table"), arg("angle_table"), arg("nonbonded_table"), + arg("bond_tolerance"), arg("angle_tolerance"))); + def("GetlDDTPerResidueStats", + &get_lddt_per_residue_stats_wrapper, + (arg("model"), arg("distance_list"), arg("settings"))); + def("PrintlDDTPerResidueStats", + &print_lddt_per_residue_stats_wrapper, + (arg("scores"), arg("settings"))); + class_<mol::alg::PDBize>("PDBize", init<int>(arg("min_polymer_size")=10)) diff --git a/modules/mol/alg/src/filter_clashes.cc b/modules/mol/alg/src/filter_clashes.cc index 690a10aff11a238a45663186ac6b98c3ace29db5..455d1e7dbd8114aeb7ef972cc50691de2e9a0e8c 100644 --- a/modules/mol/alg/src/filter_clashes.cc +++ b/modules/mol/alg/src/filter_clashes.cc @@ -201,7 +201,7 @@ bool StereoChemicalParams::IsEmpty() const return false; } -StereoChemicalParams FillStereoChemicalParams(const String& header, std::vector<String>& stereo_chemical_props_file) +StereoChemicalParams FillStereoChemicalParams(const String& header, std::vector<String>& stereo_chemical_props_file, bool check) { StereoChemicalParams table; bool found=false; @@ -276,11 +276,18 @@ StereoChemicalParams FillStereoChemicalParams(const String& header, std::vector< if (found==false) { std::cout << "Could not find the relevant section in the stereo-chemical parameter file" << std::endl; return StereoChemicalParams(); - }; + }; + if (check) { + if (table.IsEmpty()) { + std::stringstream serr; + serr << "Error reading the " << header << " section of the stereo-chemical parameter file."; + throw ost::Error(serr.str()); + } + } return table; }; -ClashingDistances FillClashingDistances(std::vector<String>& stereo_chemical_props_file) +ClashingDistances FillClashingDistances(std::vector<String>& stereo_chemical_props_file, bool check) { ClashingDistances table; bool found=false; @@ -340,7 +347,14 @@ ClashingDistances FillClashingDistances(std::vector<String>& stereo_chemical_pro if (found==false) { std::cout << "Could not find the relevant section in the stereo-chemical parameter file" << std::endl; return ClashingDistances(); - } + } + if (check) { + if (table.IsEmpty()) { + std::stringstream serr; + serr << "Error reading the Clashing section of the stereo-chemical parameter file."; + throw ost::Error(serr.str()); + } + } return table; } diff --git a/modules/mol/alg/src/filter_clashes.hh b/modules/mol/alg/src/filter_clashes.hh index f53f123dc09922937382a5bb67502b0add7603c2..fe6d12c75a7303ec637e194d341831c1200ac4b0 100644 --- a/modules/mol/alg/src/filter_clashes.hh +++ b/modules/mol/alg/src/filter_clashes.hh @@ -244,13 +244,13 @@ private: /// \brief Fills a list of reference clashing distances from the content of a parameter file /// /// Requires a list of strings holding the contents of a parameter file, one line per string -ClashingDistances DLLEXPORT_OST_MOL_ALG FillClashingDistances(std::vector<String>& stereo_chemical_props_file); +ClashingDistances DLLEXPORT_OST_MOL_ALG FillClashingDistances(std::vector<String>& stereo_chemical_props_file, bool check=false); /// \brief Fills a list of stereo-chemical statistics from the content of a parameter file /// /// Requires a list of strings holding the contents of a parameter file, one line per string /// The header can be 'Bonds' to read bond statistics or 'Angles' to read angle statistics -StereoChemicalParams DLLEXPORT_OST_MOL_ALG FillStereoChemicalParams(const String& header, std::vector<String>& stereo_chemical_props_file); +StereoChemicalParams DLLEXPORT_OST_MOL_ALG FillStereoChemicalParams(const String& header, std::vector<String>& stereo_chemical_props_file, bool check=false); /// \brief Filters a structure based on detected clashes between non bonded atoms. Entity version /// diff --git a/modules/mol/alg/src/lddt.cc b/modules/mol/alg/src/lddt.cc index 11e328e4deb40c00117dd666337af25a57112fc3..60156b86b4515c773994cd3031252915525c07fe 100644 --- a/modules/mol/alg/src/lddt.cc +++ b/modules/mol/alg/src/lddt.cc @@ -32,6 +32,7 @@ #include <ost/mol/alg/filter_clashes.hh> #include <ost/io/mol/pdb_reader.hh> #include <ost/io/io_exception.hh> +#include <ost/io/stereochemical_params_reader.hh> #include <ost/conop/conop.hh> #include <ost/conop/compound_lib.hh> #include <ost/string_ref.hh> @@ -101,30 +102,6 @@ void usage() } -// computes coverage -std::pair<int,int> compute_coverage (const EntityView& v,const GlobalRDMap& glob_dist_list) -{ - int second=0; - int first=0; - if (v.GetResidueList().size()==0) { - if (glob_dist_list.size()==0) { - return std::make_pair(0,-1); - } else { - return std::make_pair(0,glob_dist_list.size()); - } - } - ChainView vchain=v.GetChainList()[0]; - for (GlobalRDMap::const_iterator i=glob_dist_list.begin();i!=glob_dist_list.end();++i) - { - ResNum rnum = (*i).first; - second++; - if (vchain.FindResidue(rnum)) { - first++; - } - } - return std::make_pair(first,second); -} - CompoundLibPtr load_compound_lib(const String& custom_path) { if (custom_path!="") { @@ -174,37 +151,21 @@ CompoundLibPtr load_compound_lib(const String& custom_path) } return CompoundLibPtr(); } -bool is_resnum_in_globalrdmap(const ResNum& resnum, const GlobalRDMap& glob_dist_list) -{ - for (GlobalRDMap::const_iterator i=glob_dist_list.begin(), e=glob_dist_list.end(); i!=e; ++i) { - ResNum rn = i->first; - if (rn==resnum) { - return true; - } - } - return false; -} int main (int argc, char **argv) { // sets some default values for parameters String version = OST_VERSION_STRING; - Real bond_tolerance = 12.0; - Real angle_tolerance = 12.0; - Real radius=15.0; - int sequence_separation = 0; - + lDDTSettings settings; + settings.structural_checks=false; // creates the required loading profile IOProfile profile; // parses options - String sel; String custom_path; - bool structural_checks=false; - bool consistency_checks=true; po::options_description desc("Options"); desc.add_options() ("calpha,c", "consider only calpha atoms") - ("sel,s", po::value<String>(&sel)->default_value(""), "selection performed on reference structure") + ("sel,s", po::value<String>(&settings.sel)->default_value(""), "selection performed on reference structure") ("tolerant,t", "fault tolerant mode") ("structural-checks,f", "perform stereo-chemical and clash checks") ("ignore-consistency-checks,x", "ignore residue name consistency checks") @@ -253,18 +214,18 @@ int main (int argc, char **argv) profile.calpha_only=true; } if (vm.count("structural-checks")) { - structural_checks=true; + settings.structural_checks=true; } if (vm.count("ignore-consistency-checks")) { - consistency_checks=false; + settings.consistency_checks=false; } if (vm.count("tolerant")) { profile.fault_tolerant=true; } - String parameter_filename; + if (vm.count("parameter-file")) { - parameter_filename=vm["parameter-file"].as<String>(); - } else if (structural_checks==true) { + settings.parameter_file_path=vm["parameter-file"].as<String>(); + } else if (settings.structural_checks==true) { std::cout << "Please specify a stereo-chemical parameter file" << std::endl; exit(-1); } @@ -287,23 +248,17 @@ int main (int argc, char **argv) Logger::Instance().PushVerbosityLevel(ost_verbosity_level); if (vm.count("bond_tolerance")) { - bond_tolerance=vm["bond_tolerance"].as<Real>(); + settings.bond_tolerance=vm["bond_tolerance"].as<Real>(); } if (vm.count("angle_tolerance")) { - angle_tolerance=vm["angle_tolerance"].as<Real>(); + settings.angle_tolerance=vm["angle_tolerance"].as<Real>(); } if (vm.count("inclusion_radius")) { - radius=vm["inclusion_radius"].as<Real>(); + settings.radius=vm["inclusion_radius"].as<Real>(); } if (vm.count("sequence_separation")) { - sequence_separation=vm["sequence_separation"].as<int>(); + settings.sequence_separation=vm["sequence_separation"].as<int>(); } - - std::vector<Real> cutoffs; - cutoffs.push_back(0.5); - cutoffs.push_back(1.0); - cutoffs.push_back(2.0); - cutoffs.push_back(4.0); std::vector<EntityView> ref_list; @@ -311,55 +266,37 @@ int main (int argc, char **argv) // if the reference file is a comma-separated list of files, switches to multi- // reference mode GlobalRDMap glob_dist_list; - String ref_file=files.back(); + String ref_file=files.back(); ost::StringRef ref_file_sr(ref_file.c_str(),ref_file.length()); std::vector<StringRef> ref_file_split_sr=ref_file_sr.split(','); - if (ref_file_split_sr.size()==1) { - std::cout << "Multi-reference mode: Off" << std::endl; - String ref_filename = ref_file_split_sr[0].str(); + for (std::vector<StringRef>::const_iterator ref_file_split_sr_it = ref_file_split_sr.begin(); + ref_file_split_sr_it != ref_file_split_sr.end();++ref_file_split_sr_it) { + String ref_filename = ref_file_split_sr_it->str(); EntityHandle ref=load(ref_filename, profile); if (!ref) { exit(-1); - } - EntityView refview=ref.GetChainList()[0].Select("peptide=true"); - ref_list.push_back(refview); - glob_dist_list = CreateDistanceList(refview,radius); - } else { - std::cout << "Multi-reference mode: On" << std::endl; - for (std::vector<StringRef>::const_iterator ref_file_split_sr_it = ref_file_split_sr.begin(); - ref_file_split_sr_it != ref_file_split_sr.end();++ref_file_split_sr_it) { - String ref_filename = ref_file_split_sr_it->str(); - EntityHandle ref=load(ref_filename, profile); - if (!ref) { + } + if (settings.sel != ""){ + std::cout << "Performing \"" << settings.sel << "\" selection on reference " << ref_filename << std::endl; + try { + ref_list.push_back(ref.Select(settings.sel)); + } catch (const ost::mol::QueryError& e) { + std::cerr << "Provided selection argument failed." << std::endl << e.GetFormattedMessage() << std::endl; exit(-1); } - if (! ref_list.empty()) { - if (ref_list[0].GetChainList()[0].GetName()!=ref.GetChainList()[0].GetName()) { - std::cout << "ERROR: First chains in the reference structures have different names" << std::endl; - exit(-1); - } - } - EntityView refview=ref.GetChainList()[0].Select("peptide=true"); + } + else { ref_list.push_back(ref.CreateFullView()); - } - glob_dist_list = CreateDistanceListFromMultipleReferences (ref_list,cutoffs,sequence_separation,radius); - } + } + } + CleanlDDTReferences(ref_list); + glob_dist_list = PreparelDDTGlobalRDMap(ref_list, settings); files.pop_back(); // prints out parameters used in the lddt calculation std::cout << "Verbosity level: " << verbosity_level << std::endl; - if (structural_checks) { - std::cout << "Stereo-chemical and steric clash checks: On " << std::endl; - } else { - std::cout << "Stereo-chemical and steric clash checks: Off " << std::endl; - } - std::cout << "Inclusion Radius: " << radius << std::endl; - - std::cout << "Sequence separation: " << sequence_separation << std::endl; - if (structural_checks) { - std::cout << "Parameter filename: " << parameter_filename << std::endl; - std::cout << "Tolerance in stddevs for bonds: " << bond_tolerance << std::endl; - std::cout << "Tolerance in stddevs for angles: " << angle_tolerance << std::endl; + settings.PrintParameters(); + if (settings.structural_checks) { LOG_INFO("Log entries format:"); LOG_INFO("BOND INFO FORMAT: Chain Residue ResNum Bond Min Max Observed Z-score Status"); LOG_INFO("ANGLE INFO FORMAT: Chain Residue ResNum Angle Min Max Observed Z-score Status"); @@ -382,183 +319,41 @@ int main (int argc, char **argv) } continue; } - EntityView v=model.GetChainList()[0].Select("peptide=true"); - EntityView outv=model.GetChainList()[0].Select("peptide=true"); - for (std::vector<EntityView>::const_iterator ref_list_it = ref_list.begin(); - ref_list_it != ref_list.end(); ++ref_list_it) { - bool cons_check = ResidueNamesMatch(v,*ref_list_it,consistency_checks); - if (cons_check==false) { - if (consistency_checks==true) { - LOG_ERROR("Residue names in model: " << files[i] << " and in reference structure(s) are inconsistent."); - exit(-1); - } else { - LOG_WARNING("Residue names in model: " << files[i] << " and in reference structure(s) are inconsistent."); - } - } - } + EntityView model_view = model.GetChainList()[0].Select("peptide=true"); boost::filesystem::path pathstring(files[i]); - String filestring=BFPathToString(pathstring); - std::cout << "File: " << files[i] << std::endl; - std::pair<int,int> cov = compute_coverage(v,glob_dist_list); - if (cov.second == -1) { - std::cout << "Coverage: 0 (0 out of 0 residues)" << std::endl; - } else { - std::cout << "Coverage: " << (float(cov.first)/float(cov.second)) << " (" << cov.first << " out of " << cov.second << " residues)" << std::endl; - } + std::cout << "File: " << files[i] << std::endl; - if (structural_checks) { - // reads in parameter files - boost::filesystem::path loc(parameter_filename); - boost::filesystem::ifstream infile(loc); - if (!infile) { - std::cout << "Could not find " << parameter_filename << std::endl; - exit(-1); - } - std::vector<String> stereo_chemical_props; - String line; - while (std::getline(infile, line)) - { - std::stringstream line_stream(line); - stereo_chemical_props.push_back(line); - } - StereoChemicalParams bond_table = FillStereoChemicalParams("Bond",stereo_chemical_props); - if (bond_table.IsEmpty()) { - std::cout << "Error reading the Bond section of the stereo-chemical parameter file." << std::endl; - exit(-1); - } - StereoChemicalParams angle_table = FillStereoChemicalParams("Angle",stereo_chemical_props); - if (angle_table.IsEmpty()) { - std::cout << "Error reading the Angles section of the stereo-chemical parameter file." << std::endl; - exit(-1); - } - - ClashingDistances nonbonded_table = FillClashingDistances(stereo_chemical_props); - - if (nonbonded_table.IsEmpty()) { - std::cout << "Error reading the Clashing section of the stereo-chemical parameter file." << std::endl; - exit(-1); - } - // performs structural checks and filters the structure - StereoChemistryInfo stereo_chemistry_info; - try { - std::pair<EntityView,StereoChemistryInfo> csc_result = alg::CheckStereoChemistry(v,bond_table,angle_table,bond_tolerance,angle_tolerance); - v = csc_result.first; - stereo_chemistry_info = csc_result.second; - } catch (std::exception& e) { - std::cout << "An error occurred during the structure quality checks, stage 1:" << std::endl; + if (settings.structural_checks) { + StereoChemicalParamsReader stereochemical_params(settings.parameter_file_path); + try { + stereochemical_params.Read(true); + } catch (std::exception& e) { std::cout << e.what() << std::endl; exit(-1); } - std::cout << "Average Z-Score for bond lengths: " << std::fixed << std::setprecision(5) << stereo_chemistry_info.GetAvgZscoreBonds() << std::endl; - std::cout << "Bonds outside of tolerance range: " << stereo_chemistry_info.GetBadBondCount() << " out of " << stereo_chemistry_info.GetBondCount() << std::endl; - std::cout << "Bond\tAvg Length\tAvg zscore\tNum Bonds" << std::endl; - std::map<String,BondLengthInfo> avg_bond_length_info = stereo_chemistry_info.GetAvgBondLengthInfo(); - for (std::map<String,BondLengthInfo>::const_iterator abli_it=avg_bond_length_info.begin();abli_it!=avg_bond_length_info.end();++abli_it) { - String key = (*abli_it).first; - BondLengthInfo bond_length_info = (*abli_it).second; - std::cout << key << "\t" << std::fixed << std::setprecision(5) << std::left << std::setw(10) << - bond_length_info.GetAvgLength() << "\t" << std::left << std::setw(10) << bond_length_info.GetAvgZscore() << "\t" << bond_length_info.GetCount() << std::endl; - } - std::cout << "Average Z-Score angle widths: " << std::fixed << std::setprecision(5) << stereo_chemistry_info.GetAvgZscoreAngles() << std::endl; - std::cout << "Angles outside of tolerance range: " << stereo_chemistry_info.GetBadAngleCount() << " out of " << stereo_chemistry_info.GetAngleCount() << std::endl; - ClashingInfo clash_info; + try { - std::pair<EntityView,ClashingInfo> fc_result = alg::FilterClashes(v,nonbonded_table); - v = fc_result.first; - clash_info = fc_result.second; - } catch (std::exception& e) { - std::cout << "An error occurred during the structure quality checks, stage 2:" << std::endl; + CheckStructure(model_view, + stereochemical_params.bond_table, + stereochemical_params.angle_table, + stereochemical_params.nonbonded_table, + settings.bond_tolerance, + settings.angle_tolerance); + } catch (std::exception& e) { std::cout << e.what() << std::endl; exit(-1); } - std::cout << clash_info.GetClashCount() << " non-bonded short-range distances shorter than tolerance distance" << std::endl; - std::cout << "Distances shorter than tolerance are on average shorter by: " << std::fixed << std::setprecision(5) << clash_info.GetAverageOffset() << std::endl; - - } - if (cov.first==0) { - std::cout << "Global LDDT score: 0.0" << std::endl; - return 0; } // computes the lddt score - String label="localldt"; - std::pair<int,int> total_ov=alg::LocalDistDiffTest(v, glob_dist_list, cutoffs, sequence_separation, label); - Real lddt = static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1); - std::cout << "Global LDDT score: " << std::setprecision(4) << lddt << std::endl; - std::cout << "(" << std::fixed << total_ov.first << " conserved distances out of " << total_ov.second - << " checked, over " << cutoffs.size() << " thresholds)" << std::endl; + Real lddt = LocalDistDiffTest(model_view, ref_list, glob_dist_list, settings); - // prints the residue-by-residue statistics - if (structural_checks) { - std::cout << "Local LDDT Scores:" << std::endl; - std::cout << "(A 'Yes' in the 'Quality Problems' column stands for problems" << std::endl; - std::cout << "in the side-chain of a residue, while a 'Yes+' for problems" << std::endl; - std::cout << "in the backbone)" << std::endl; - } else { - std::cout << "Local LDDT Scores:" << std::endl; - } - if (structural_checks) { - std::cout << "Chain\tResName\tResNum\tAsses.\tQ.Prob.\tScore\t(Conserved/Total, over " << cutoffs.size() << " thresholds)" << std::endl; - } else { - std::cout << "Chain\tResName\tResNum\tAsses.\tScore\t(Conserved/Total, over " << cutoffs.size() << " thresholds)" << std::endl; - } - for (ChainViewList::const_iterator ci = outv.GetChainList().begin(), - ce = outv.GetChainList().end(); ci != ce; ++ci) { - for (ResidueViewList::const_iterator rit = ci->GetResidueList().begin(), - re = ci->GetResidueList().end(); rit != re; ++rit) { - - ResidueView ritv=*rit; - ResNum rnum = ritv.GetNumber(); - bool assessed = false; - String assessed_string="No"; - String quality_problems_string="No"; - Real lddt_local = -1; - String lddt_local_string="-"; - int conserved_dist = -1; - int total_dist = -1; - String dist_string = "-"; - if (is_resnum_in_globalrdmap(rnum,glob_dist_list)) { - assessed = true; - assessed_string="Yes"; - } - if (ritv.HasProp("stereo_chemical_violation_sidechain") || - ritv.HasProp("steric_clash_sidechain")) { - quality_problems_string="Yes"; - } - if (ritv.HasProp("stereo_chemical_violation_backbone") || - ritv.HasProp("steric_clash_backbone")) { - quality_problems_string="Yes+"; - } - - if (assessed==true) { - if (ritv.HasProp(label)) { - lddt_local=ritv.GetFloatProp(label); - std::stringstream stkeylddt; - stkeylddt << std::fixed << std::setprecision(4) << lddt_local; - lddt_local_string=stkeylddt.str(); - conserved_dist=ritv.GetIntProp(label+"_conserved"); - total_dist=ritv.GetIntProp(label+"_total"); - std::stringstream stkeydist; - stkeydist << "("<< conserved_dist << "/" << total_dist << ")"; - dist_string=stkeydist.str(); - } else { - lddt_local = 0; - lddt_local_string="0.0000"; - conserved_dist = 0; - total_dist = 0; - dist_string="(0/0)"; - } - } - if (structural_checks) { - std::cout << ritv.GetChain() << "\t" << ritv.GetName() << "\t" << ritv.GetNumber() << '\t' << assessed_string << '\t' << quality_problems_string << '\t' << lddt_local_string << "\t" << dist_string << std::endl; - } else { - std::cout << ritv.GetChain() << "\t" << ritv.GetName() << "\t" << ritv.GetNumber() << '\t' << assessed_string << '\t' << lddt_local_string << "\t" << dist_string << std::endl; - } - } - } - std::cout << std::endl; + // prints the residue-by-residue statistics + std::vector<lDDTLocalScore> local_scores; + local_scores = GetlDDTPerResidueStats(model, glob_dist_list, settings); + PrintlDDTPerResidueStats(local_scores, settings); } return 0; } diff --git a/modules/mol/alg/src/local_dist_diff_test.cc b/modules/mol/alg/src/local_dist_diff_test.cc index aad66e7a639e265256fe5511a628203f2e57df53..9c988d1005c1ca2614a48fbf74e7258a07caf135 100644 --- a/modules/mol/alg/src/local_dist_diff_test.cc +++ b/modules/mol/alg/src/local_dist_diff_test.cc @@ -1,12 +1,29 @@ +#include <iomanip> +#include <sstream> #include <ost/log.hh> #include <ost/mol/mol.hh> +#include <ost/platform.hh> #include "local_dist_diff_test.hh" #include <boost/concept_check.hpp> +#include <boost/filesystem/convenience.hpp> +#include <ost/mol/alg/consistency_checks.hh> namespace ost { namespace mol { namespace alg { namespace { +// helper_function +String vector_to_string(std::vector<Real> vec) { + std::ostringstream str; + for (unsigned int i = 0; i < vec.size(); ++i) { + str << vec[i]; + if (i+1 != vec.size()) { + str << ", "; + } + } + return str.str(); +} + // helper function bool within_tolerance(Real mdl_dist, const std::pair<Real,Real>& values, Real tol) { @@ -307,6 +324,42 @@ void merge_distance_lists(GlobalRDMap& ref_dist_map, const GlobalRDMap& new_dist } +// Computes coverage +std::pair<int,int> ComputeCoverage(const EntityView& v,const GlobalRDMap& glob_dist_list) +{ + int second=0; + int first=0; + if (v.GetResidueList().size()==0) { + if (glob_dist_list.size()==0) { + return std::make_pair(0,-1); + } else { + return std::make_pair(0,glob_dist_list.size()); + } + } + ChainView vchain=v.GetChainList()[0]; + for (GlobalRDMap::const_iterator i=glob_dist_list.begin();i!=glob_dist_list.end();++i) + { + ResNum rnum = (*i).first; + second++; + if (vchain.FindResidue(rnum)) { + first++; + } + } + return std::make_pair(first,second); +} + +bool IsResnumInGlobalRDMap(const ResNum& resnum, const GlobalRDMap& glob_dist_list) +{ + for (GlobalRDMap::const_iterator i=glob_dist_list.begin(), e=glob_dist_list.end(); i!=e; ++i) { + ResNum rn = i->first; + if (rn==resnum) { + return true; + } + } + return false; +} + + // helper function bool IsStandardResidue(String rn) { @@ -337,6 +390,142 @@ bool IsStandardResidue(String rn) return false; } +lDDTSettings::lDDTSettings(): bond_tolerance(12.0), + angle_tolerance(12.0), + radius(15.0), + sequence_separation(0), + sel(""), + structural_checks(true), + consistency_checks(true), + label("localldt") { + cutoffs.push_back(0.5); + cutoffs.push_back(1.0); + cutoffs.push_back(2.0); + cutoffs.push_back(4.0); + SetStereoChemicalParamsPath(); + } + +lDDTSettings::lDDTSettings(Real init_bond_tolerance, + Real init_angle_tolerance, + Real init_radius, + int init_sequence_separation, + String init_sel, + String init_parameter_file_path, + bool init_structural_checks, + bool init_consistency_checks, + std::vector<Real>& init_cutoffs, + String init_label): + bond_tolerance(init_bond_tolerance), + angle_tolerance(init_angle_tolerance), + radius(init_radius), + sequence_separation(init_sequence_separation), + sel(init_sel), + structural_checks(init_structural_checks), + consistency_checks(init_consistency_checks), + cutoffs(init_cutoffs), + label(init_label) { + SetStereoChemicalParamsPath(init_parameter_file_path); +} + +void lDDTSettings::SetStereoChemicalParamsPath(const String& path) { + if (path == ""){ + try { + std::cerr << "Setting default stereochemical parameters file path." << std::endl; + parameter_file_path = ost::GetSharedDataPath() + "/stereo_chemical_props.txt"; + if (! structural_checks) { + std::cerr << "WARNING: Stereochemical parameters file is set but structural checks are disabled" << std::endl; + } + } catch (std::runtime_error& e) { + std::cerr << "WARNING: " << e.what() << std::endl; + std::cerr << "WARNING: Parameter file setting failed - structural check will be disabled" << std::endl; + structural_checks = false; + parameter_file_path = ""; + } + } else if (! boost::filesystem::exists(path)) { + std::cerr << "WARNING: Parameter file does not exist - structural check will be disabled" << std::endl; + structural_checks = false; + parameter_file_path = ""; + } else { + parameter_file_path = path; + structural_checks = true; + } +} + +std::string lDDTSettings::ToString() { + std::ostringstream rep; + if (structural_checks) { + rep << "Stereo-chemical and steric clash checks: On \n"; + } else { + rep << "Stereo-chemical and steric clash checks: Off \n"; + } + + rep << "Inclusion Radius: " << radius << "\n"; + rep << "Sequence separation: " << sequence_separation << "\n"; + rep << "Cutoffs: " << vector_to_string(cutoffs) << "\n"; + + if (structural_checks) { + rep << "Parameter filename: " + parameter_file_path + "\n"; + rep << "Tolerance in stddevs for bonds: " << bond_tolerance << "\n"; + rep << "Tolerance in stddevs for angles: " << angle_tolerance << "\n"; + } + rep << "Residue properties label: " << label << "\n"; + + return rep.str(); +} + +void lDDTSettings::PrintParameters() { + std::cout << ToString(); +} + +// Default constructor is neccessary for boost exposure +lDDTLocalScore::lDDTLocalScore(): cname(""), + rname(""), + rnum(-1), + is_assessed(""), + quality_problems(""), + local_lddt(-1.0), + conserved_dist(-1), + total_dist(-1) {} + +lDDTLocalScore::lDDTLocalScore(String init_cname, + String init_rname, + int init_rnum, + String init_is_assessed, + String init_quality_problems, + Real init_local_lddt, + int init_conserved_dist, + int init_total_dist): + cname(init_cname), + rname(init_rname), + rnum(init_rnum), + is_assessed(init_is_assessed), + quality_problems(init_quality_problems), + local_lddt(init_local_lddt), + conserved_dist(init_conserved_dist), + total_dist(init_total_dist) {} + +String lDDTLocalScore::ToString(bool structural_checks) const { + std::stringstream stkeylddt; + std::stringstream outstr; + stkeylddt << "(" << conserved_dist << "/" << total_dist << ")"; + String dist_string = stkeylddt.str(); + if (structural_checks) { + outstr << cname << "\t" << rname << "\t" << rnum << '\t' << is_assessed << '\t' << quality_problems << '\t' << local_lddt << "\t" << dist_string; + } else { + outstr << cname << "\t" << rname << "\t" << rnum << '\t' << is_assessed << '\t' << local_lddt << "\t" << dist_string; + } + return outstr.str(); +} + +String lDDTLocalScore::GetHeader(bool structural_checks, int cutoffs_length) { + std::stringstream outstr; + if (structural_checks) { + outstr << "Chain\tResName\tResNum\tAsses.\tQ.Prob.\tScore\t(Conserved/Total, over " << cutoffs_length << " thresholds)"; + } else { + outstr << "Chain\tResName\tResNum\tAsses.\tScore\t(Conserved/Total, over " << cutoffs_length << " thresholds)"; + } + return outstr.str(); +} GlobalRDMap CreateDistanceList(const EntityView& ref,Real max_dist) { @@ -473,6 +662,41 @@ Real LocalDistDiffTest(const EntityView& mdl, const EntityView& target, Real cut return static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1); } +Real LocalDistDiffTest(const EntityView& v, + std::vector<EntityView>& ref_list, + const GlobalRDMap& glob_dist_list, + lDDTSettings& settings) { + for (std::vector<EntityView>::const_iterator ref_list_it = ref_list.begin(); + ref_list_it != ref_list.end(); ++ref_list_it) { + bool cons_check = ResidueNamesMatch(v,*ref_list_it,settings.consistency_checks); + if (cons_check==false) { + if (settings.consistency_checks==true) { + throw std::runtime_error("Residue names in model and in reference structure(s) are inconsistent."); + } else { + LOG_WARNING("Residue names in model and in reference structure(s) are inconsistent."); + } + } + } + + std::pair<int,int> cov = ComputeCoverage(v,glob_dist_list); + if (cov.second == -1) { + std::cout << "Coverage: 0 (0 out of 0 residues)" << std::endl; + } else { + std::cout << "Coverage: " << (float(cov.first)/float(cov.second)) << " (" << cov.first << " out of " << cov.second << " residues)" << std::endl; + } + + if (cov.first==0) { + std::cout << "Global LDDT score: 0.0" << std::endl; + return 0.0; + } + + std::pair<int,int> total_ov=alg::LocalDistDiffTest(v, glob_dist_list, settings.cutoffs, settings.sequence_separation, settings.label); + Real lddt = static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1); + std::cout << "Global LDDT score: " << std::setprecision(4) << lddt << std::endl; + std::cout << "(" << std::fixed << total_ov.first << " conserved distances out of " << total_ov.second + << " checked, over " << settings.cutoffs.size() << " thresholds)" << std::endl; + return lddt; +} Real LocalDistDiffTest(const ost::seq::AlignmentHandle& aln, Real cutoff, Real max_dist, int ref_index, int mdl_index) @@ -536,6 +760,160 @@ Real LDDTHA(EntityView& v, const GlobalRDMap& global_dist_list, int sequence_sep return static_cast<Real>(total_ov.first)/(static_cast<Real>(total_ov.second) ? static_cast<Real>(total_ov.second) : 1); } + +void CleanlDDTReferences(std::vector<EntityView>& ref_list){ + for (unsigned int i=0;i<ref_list.size();i++) { + if (ref_list[0].GetChainList()[0].GetName()!=ref_list[i].GetChainList()[0].GetName()) { + std::cout << "ERROR: First chains in the reference structures have different names" << std::endl; + exit(-1); + } + ref_list[i] = ref_list[i].GetChainList()[0].Select("peptide=true"); + } +} + +GlobalRDMap PreparelDDTGlobalRDMap(const std::vector<EntityView>& ref_list, + lDDTSettings& settings){ + GlobalRDMap glob_dist_list; + if (ref_list.size()==1) { + std::cout << "Multi-reference mode: Off" << std::endl; + glob_dist_list = CreateDistanceList(ref_list[0], settings.radius); + } else { + std::cout << "Multi-reference mode: On" << std::endl; + glob_dist_list = CreateDistanceListFromMultipleReferences(ref_list, + settings.cutoffs, + settings.sequence_separation, + settings.radius); + } + + return glob_dist_list; +} + +void CheckStructure(EntityView& ent, + StereoChemicalParams& bond_table, + StereoChemicalParams& angle_table, + ClashingDistances& nonbonded_table, + Real bond_tolerance, + Real angle_tolerance){ + // performs structural checks and filters the structure + StereoChemistryInfo stereo_chemistry_info; + try { + std::pair<EntityView,StereoChemistryInfo> csc_result = alg::CheckStereoChemistry(ent,bond_table,angle_table,bond_tolerance,angle_tolerance); + ent = csc_result.first; + stereo_chemistry_info = csc_result.second; + } catch (std::exception& e) { + std::cout << "An error occurred during the structure quality checks, stage 1:" << std::endl; + std::cout << e.what() << std::endl; + exit(-1); + } + std::cout << "Average Z-Score for bond lengths: " << std::fixed << std::setprecision(5) << stereo_chemistry_info.GetAvgZscoreBonds() << std::endl; + std::cout << "Bonds outside of tolerance range: " << stereo_chemistry_info.GetBadBondCount() << " out of " << stereo_chemistry_info.GetBondCount() << std::endl; + std::cout << "Bond\tAvg Length\tAvg zscore\tNum Bonds" << std::endl; + std::map<String,BondLengthInfo> avg_bond_length_info = stereo_chemistry_info.GetAvgBondLengthInfo(); + for (std::map<String,BondLengthInfo>::const_iterator abli_it=avg_bond_length_info.begin();abli_it!=avg_bond_length_info.end();++abli_it) { + String key = (*abli_it).first; + BondLengthInfo bond_length_info = (*abli_it).second; + std::cout << key << "\t" << std::fixed << std::setprecision(5) << std::left << std::setw(10) << + bond_length_info.GetAvgLength() << "\t" << std::left << std::setw(10) << bond_length_info.GetAvgZscore() << "\t" << bond_length_info.GetCount() << std::endl; + } + std::cout << "Average Z-Score angle widths: " << std::fixed << std::setprecision(5) << stereo_chemistry_info.GetAvgZscoreAngles() << std::endl; + std::cout << "Angles outside of tolerance range: " << stereo_chemistry_info.GetBadAngleCount() << " out of " << stereo_chemistry_info.GetAngleCount() << std::endl; + ClashingInfo clash_info; + try { + std::pair<EntityView,ClashingInfo> fc_result = alg::FilterClashes(ent,nonbonded_table); + ent = fc_result.first; + clash_info = fc_result.second; + } catch (std::exception& e) { + std::stringstream serr; + serr << "An error occurred during the structure quality checks, stage 2: " << e.what(); + throw ost::Error(serr.str()); + } + std::cout << clash_info.GetClashCount() << " non-bonded short-range distances shorter than tolerance distance" << std::endl; + std::cout << "Distances shorter than tolerance are on average shorter by: " << std::fixed << std::setprecision(5) << clash_info.GetAverageOffset() << std::endl; +} + +std::vector<lDDTLocalScore> GetlDDTPerResidueStats(EntityHandle& model, GlobalRDMap& glob_dist_list, lDDTSettings& settings){ + std::vector<lDDTLocalScore> scores; + EntityView outv = model.GetChainList()[0].Select("peptide=true"); + for (ChainViewList::const_iterator ci = outv.GetChainList().begin(), + ce = outv.GetChainList().end(); ci != ce; ++ci) { + for (ResidueViewList::const_iterator rit = ci->GetResidueList().begin(), + re = ci->GetResidueList().end(); rit != re; ++rit) { + + ResidueView ritv=*rit; + ResNum rnum = ritv.GetNumber(); + bool assessed = false; + String assessed_string="No"; + String quality_problems_string; + if (settings.structural_checks) { + quality_problems_string="No"; + } else { + quality_problems_string="NA"; + } + Real lddt_local = -1; + String lddt_local_string="-"; + int conserved_dist = -1; + int total_dist = -1; + if (IsResnumInGlobalRDMap(rnum,glob_dist_list)) { + assessed = true; + assessed_string="Yes"; + } + if (ritv.HasProp("stereo_chemical_violation_sidechain") || + ritv.HasProp("steric_clash_sidechain")) { + quality_problems_string="Yes"; + } + if (ritv.HasProp("stereo_chemical_violation_backbone") || + ritv.HasProp("steric_clash_backbone")) { + quality_problems_string="Yes+"; + } + + if (assessed==true) { + if (ritv.HasProp(settings.label)) { + lddt_local=ritv.GetFloatProp(settings.label); + std::stringstream stkeylddt; + stkeylddt << std::fixed << std::setprecision(4) << lddt_local; + lddt_local_string=stkeylddt.str(); + conserved_dist=ritv.GetIntProp(settings.label+"_conserved"); + total_dist=ritv.GetIntProp(settings.label+"_total"); + } else { + std::cout << settings.label << std::endl; + lddt_local = 0; + lddt_local_string="0.0000"; + conserved_dist = 0; + total_dist = 0; + } + } + // std::tuple<String, String, int, String, String, Real, int, int> + lDDTLocalScore row(ritv.GetChain().GetName(), + ritv.GetName(), + ritv.GetNumber().GetNum(), + assessed_string, + quality_problems_string, + lddt_local, + conserved_dist, + total_dist); + scores.push_back(row); + } + } + + return scores; +} + +void PrintlDDTPerResidueStats(std::vector<lDDTLocalScore>& scores, lDDTSettings& settings){ + if (settings.structural_checks) { + std::cout << "Local LDDT Scores:" << std::endl; + std::cout << "(A 'Yes' in the 'Quality Problems' column stands for problems" << std::endl; + std::cout << "in the side-chain of a residue, while a 'Yes+' for problems" << std::endl; + std::cout << "in the backbone)" << std::endl; + } else { + std::cout << "Local LDDT Scores:" << std::endl; + } + std::cout << lDDTLocalScore::GetHeader(settings.structural_checks, settings.cutoffs.size()) << std::endl; + for (std::vector<lDDTLocalScore>::const_iterator sit = scores.begin(); sit != scores.end(); ++sit) { + std::cout << sit->ToString(settings.structural_checks) << std::endl; + } + std::cout << std::endl; +} + // debugging code /* Real OldStyleLDDTHA(EntityView& v, const GlobalRDMap& global_dist_list) diff --git a/modules/mol/alg/src/local_dist_diff_test.hh b/modules/mol/alg/src/local_dist_diff_test.hh index 9dd2f230617ac1b0b795ec6a7e467a80c1485655..21aee5a1d7a035ae21450b05c84212bc103a1b9d 100644 --- a/modules/mol/alg/src/local_dist_diff_test.hh +++ b/modules/mol/alg/src/local_dist_diff_test.hh @@ -22,8 +22,67 @@ #include <ost/mol/alg/module_config.hh> #include <ost/seq/alignment_handle.hh> #include <ost/mol/alg/distance_test_common.hh> +#include <ost/mol/alg/filter_clashes.hh> namespace ost { namespace mol { namespace alg { + +struct lDDTSettings { + Real bond_tolerance; + Real angle_tolerance; + Real radius; + int sequence_separation; + String sel; + String parameter_file_path; + bool structural_checks; + bool consistency_checks; + std::vector<Real> cutoffs; + String label; + + lDDTSettings(); + lDDTSettings(Real init_bond_tolerance, + Real init_angle_tolerance, + Real init_radius, + int init_sequence_separation, + String init_sel, + String init_parameter_file_path, + bool init_structural_checks, + bool init_consistency_checks, + std::vector<Real>& init_cutoffs, + String init_label); + void SetStereoChemicalParamsPath(const String& path=""); + void PrintParameters(); + std::string ToString(); +}; + +struct lDDTLocalScore { + String cname; + String rname; + int rnum; + String is_assessed; + String quality_problems; + Real local_lddt; + int conserved_dist; + int total_dist; + + lDDTLocalScore(); + + lDDTLocalScore(String init_cname, + String init_rname, + int init_rnum, + String init_is_assessed, + String init_quality_problems, + Real init_local_lddt, + int init_conserved_dist, + int init_total_dist); + + String ToString(bool structural_checks) const; + + static String GetHeader(bool structural_checks, int cutoffs_length); +}; + +std::pair<int,int> DLLEXPORT_OST_MOL_ALG ComputeCoverage(const EntityView& v,const GlobalRDMap& glob_dist_list); + +bool DLLEXPORT_OST_MOL_ALG IsResnumInGlobalRDMap(const ResNum& resnum, const GlobalRDMap& glob_dist_list); /// \brief Calculates number of distances conserved in a model, given a list of distances to check and a model /// @@ -73,6 +132,12 @@ Real DLLEXPORT_OST_MOL_ALG LocalDistDiffTest(const EntityView& mdl, Real cutoff, Real max_dist, const String& local_ldt_property_string=""); +/// TODO document me +Real DLLEXPORT_OST_MOL_ALG LocalDistDiffTest(const EntityView& v, + std::vector<EntityView>& ref_list, + const GlobalRDMap& glob_dist_list, + lDDTSettings& settings); + /// \brief Calculates the Local Distance Difference Test score for a given model starting from an alignment between a reference structure and the model. /// /// Calculates the Local Distance Difference Test score given an alignment between a model and a taget structure. @@ -135,6 +200,27 @@ void DLLEXPORT_OST_MOL_ALG PrintResidueRDMap(const ResidueRDMap& res_dist_list); // circular dependencies bool DLLEXPORT_OST_MOL_ALG IsStandardResidue(String rn); +void DLLEXPORT_OST_MOL_ALG CleanlDDTReferences(std::vector<EntityView>& ref_list); + +// Prepare GlobalRDMap from reference list +GlobalRDMap DLLEXPORT_OST_MOL_ALG PreparelDDTGlobalRDMap( + const std::vector<EntityView>& ref_list, + lDDTSettings& settings); + +void DLLEXPORT_OST_MOL_ALG CheckStructure(EntityView& ent, + StereoChemicalParams& bond_table, + StereoChemicalParams& angle_table, + ClashingDistances& nonbonded_table, + Real bond_tolerance, + Real angle_tolerance); + +std::vector<lDDTLocalScore> DLLEXPORT_OST_MOL_ALG GetlDDTPerResidueStats(EntityHandle& model, + GlobalRDMap& glob_dist_list, + lDDTSettings& settings); + +void DLLEXPORT_OST_MOL_ALG PrintlDDTPerResidueStats(std::vector<lDDTLocalScore>& scores, + lDDTSettings& settings); + }}} #endif