diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 24961624505b023beaf1aef720920578dc9558b2..a74f28d4d27440806392edbeba1ee36255b67351 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,5 +1,11 @@ Changes in Release <RELEASE NUMBER> -------------------------------------------------------------------------------- + + * nonstandard C++ module was moved from ost.conop to ost.mol.alg. This implies + change in the API. Mapping functions CopyResidue, CopyConserved and + CopyNonConserved that were previousely imported from ost.conop are now + to be imported from ost.mol.alg. + * Added Molck API to the ost.mol.alg module. * Removed habit of changing secondary structure of entities when loading from mmCIF PDB files. Before, OST would turn secondary structure 'EEH' into 'ECH' to make it look nicer in DNG. Now, 'EEH' stays 'EEH'. diff --git a/modules/conop/doc/aminoacid.rst b/modules/conop/doc/aminoacid.rst index de15eb98a91c3fe53a9008c20d314d4aa8954edc..a33104ca6b91085b63a3a0df02b100ebd1d9c2d7 100644 --- a/modules/conop/doc/aminoacid.rst +++ b/modules/conop/doc/aminoacid.rst @@ -79,63 +79,4 @@ Converter functions .. method:: Empty() - Whether the set is empty, i.e. doesn't contain any amino acids. - - -Mapping functions --------------------------------------------------------------------------------- - -The following functions help to convert one residue into another by reusing as -much as possible from the present atoms. They are mainly meant to map from -standard amino acid to other standard amino acids or from modified amino acids -to standard amino acids. - -.. function:: CopyResidue(src_res, dst_res, editor) - - Copies the atoms of ``src_res`` to ``dst_res`` using the residue names - as guide to decide which of the atoms should be copied. If ``src_res`` and - ``dst_res`` have the same name, or ``src_res`` is a modified version of - ``dst_res`` (i.e. have the same single letter code), CopyConserved will be - called, otherwise CopyNonConserved will be called. - - :param src_res: The source residue - :type src_res: :class:`~ost.mol.ResidueHandle` - :param dst_res: The destination residue - :type dst_res: :class:`~ost.mol.ResidueHandle` - - :returns: true if the residue could be copied, false if not. - -.. function:: CopyConserved(src_res, dst_res, editor) - - Copies the atoms of ``src_res`` to ``dst_res`` assuming that the parent - amino acid of ``src_res`` (or ``src_res`` itself) are identical to ``dst_res``. - - If ``src_res`` and ``dst_res`` are identical, all heavy atoms are copied - to ``dst_res``. If ``src_res`` is a modified version of ``dst_res`` and the - modification is a pure addition (e.g. the phosphate group of phosphoserine), - the modification is stripped off and all other heavy atoms are copied to - ``dst_res``. If the modification is not a pure addition, only the backbone - heavy atoms are copied to ``dst_res``. - - Additionally, the selenium atom of ``MSE`` is converted to sulphur. - - :param src_res: The source residue - :type src_res: :class:`~ost.mol.ResidueHandle` - :param dst_res: The destination residue - :type dst_res: :class:`~ost.mol.ResidueHandle` - - :returns: a tuple of bools stating whether the residue could be copied and - whether the Cbeta atom was inserted into the ``dst_res``. - -.. function:: CopyNonConserved(src_res, dst_res, editor) - - Copies the heavy backbone atoms and Cbeta (except for ``GLY``) of ``src_res`` - to ``dst_res``. - - :param src_res: The source residue - :type src_res: :class:`~ost.mol.ResidueHandle` - :param dst_res: The destination residue - :type dst_res: :class:`~ost.mol.ResidueHandle` - - :returns: a tuple of bools stating whether the residue could be copied and - whether the Cbeta atom was inserted into the ``dst_res``. + Whether the set is empty, i.e. doesn't contain any amino acids. \ No newline at end of file diff --git a/modules/conop/pymod/CMakeLists.txt b/modules/conop/pymod/CMakeLists.txt index 9c09906222fb97e1c935590ec22e81b536c94891..23dff26a0022fa9c312c9bfd1acda033d1c304b7 100644 --- a/modules/conop/pymod/CMakeLists.txt +++ b/modules/conop/pymod/CMakeLists.txt @@ -8,7 +8,6 @@ set(OST_CONOP_PYMOD_SOURCES export_conop.cc export_diag.cc export_rule_based.cc - export_non_standard.cc export_ring_finder.cc ) diff --git a/modules/conop/pymod/wrap_conop.cc b/modules/conop/pymod/wrap_conop.cc index 119c86c325f3ce6fc48cf88298838f441cb04a36..04bcfce147b3ebf986cee454c2594e242ade7059 100644 --- a/modules/conop/pymod/wrap_conop.cc +++ b/modules/conop/pymod/wrap_conop.cc @@ -25,7 +25,6 @@ void export_Sanitizer(); void export_Conop(); void export_RingFinder(); void export_AminoAcids(); -void export_NonStandard(); void export_processor(); void export_rule_based(); void export_heuristic(); @@ -41,6 +40,5 @@ BOOST_PYTHON_MODULE(_ost_conop) export_Compound(); export_RingFinder(); export_AminoAcids(); - export_NonStandard(); export_diag(); } diff --git a/modules/conop/src/CMakeLists.txt b/modules/conop/src/CMakeLists.txt index 0185618ea4a9ebb02d917393670ce3f401ed494c..c2f1f4c6d0148c62461be5852bd33872ad39f903 100644 --- a/modules/conop/src/CMakeLists.txt +++ b/modules/conop/src/CMakeLists.txt @@ -9,7 +9,6 @@ compound.hh compound_lib.hh module_config.hh rule_based.hh -nonstandard.hh minimal_compound_lib.hh compound_lib_base.hh ring_finder.hh @@ -27,12 +26,11 @@ rule_based.cc model_check.cc compound.cc compound_lib.cc -nonstandard.cc ring_finder.cc ) module(NAME conop SOURCES ${OST_CONOP_SOURCES} - HEADERS ${OST_CONOP_HEADERS} DEPENDS_ON ost_mol ost_mol_alg ost_geom ost_db) + HEADERS ${OST_CONOP_HEADERS} DEPENDS_ON ost_mol ost_geom ost_db) if (WIN32) diff --git a/modules/conop/tests/CMakeLists.txt b/modules/conop/tests/CMakeLists.txt index 96400650977655f3efed4108de874bf9e1168bee..44fd58acfc7036926e3e5b31ebe51db7129291c8 100644 --- a/modules/conop/tests/CMakeLists.txt +++ b/modules/conop/tests/CMakeLists.txt @@ -9,8 +9,7 @@ set(OST_CONOP_UNIT_TESTS if (COMPOUND_LIB) list(APPEND OST_CONOP_UNIT_TESTS test_compound.py - test_cleanup.py - test_nonstandard.py) + test_cleanup.py) endif() ost_unittest(MODULE conop diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst index 17c38aa50e5e8ba7e606e0faeac4f9c73943cc76..d9ba6a6a60588da14ff02de9793aa46d899bd69a 100644 --- a/modules/mol/alg/doc/molalg.rst +++ b/modules/mol/alg/doc/molalg.rst @@ -1283,3 +1283,240 @@ used to skip frames in the analysis. .. automodule:: ost.mol.alg.structure_analysis :members: + +.. _mapping-functions: + +Mapping functions +-------------------------------------------------------------------------------- + +The following functions help to convert one residue into another by reusing as +much as possible from the present atoms. They are mainly meant to map from +standard amino acid to other standard amino acids or from modified amino acids +to standard amino acids. + +.. function:: CopyResidue(src_res, dst_res, editor) + + Copies the atoms of ``src_res`` to ``dst_res`` using the residue names + as guide to decide which of the atoms should be copied. If ``src_res`` and + ``dst_res`` have the same name, or ``src_res`` is a modified version of + ``dst_res`` (i.e. have the same single letter code), CopyConserved will be + called, otherwise CopyNonConserved will be called. + + :param src_res: The source residue + :type src_res: :class:`~ost.mol.ResidueHandle` + :param dst_res: The destination residue + :type dst_res: :class:`~ost.mol.ResidueHandle` + + :returns: true if the residue could be copied, false if not. + +.. function:: CopyConserved(src_res, dst_res, editor) + + Copies the atoms of ``src_res`` to ``dst_res`` assuming that the parent + amino acid of ``src_res`` (or ``src_res`` itself) are identical to ``dst_res``. + + If ``src_res`` and ``dst_res`` are identical, all heavy atoms are copied + to ``dst_res``. If ``src_res`` is a modified version of ``dst_res`` and the + modification is a pure addition (e.g. the phosphate group of phosphoserine), + the modification is stripped off and all other heavy atoms are copied to + ``dst_res``. If the modification is not a pure addition, only the backbone + heavy atoms are copied to ``dst_res``. + + Additionally, the selenium atom of ``MSE`` is converted to sulphur. + + :param src_res: The source residue + :type src_res: :class:`~ost.mol.ResidueHandle` + :param dst_res: The destination residue + :type dst_res: :class:`~ost.mol.ResidueHandle` + + :returns: a tuple of bools stating whether the residue could be copied and + whether the Cbeta atom was inserted into the ``dst_res``. + +.. function:: CopyNonConserved(src_res, dst_res, editor) + + Copies the heavy backbone atoms and Cbeta (except for ``GLY``) of ``src_res`` + to ``dst_res``. + + :param src_res: The source residue + :type src_res: :class:`~ost.mol.ResidueHandle` + :param dst_res: The destination residue + :type dst_res: :class:`~ost.mol.ResidueHandle` + + :returns: a tuple of bools stating whether the residue could be copied and + whether the Cbeta atom was inserted into the ``dst_res``. + + +Molecular Checker (Molck) +-------------------------------------------------------------------------------- + +Programmatic usage +################## + +Molecular Checker (Molck) could be called directly from the code using Molck +function: + +.. code-block:: python + + #! /bin/env python + + """Run Molck with Python API. + + + This is an exemplary procedure on how to run Molck using Python API which is + equivalent to the command line: + + molck <PDB PATH> --rm=hyd,oxt,nonstd,unk \ + --fix-ele --out=<OUTPUT PATH> \ + --complib=<PATH TO compounds.chemlib> + """ + + from ost.io import LoadPDB, SavePDB + from ost.mol.alg import MolckSettings, Molck + + from ost.conop import CompoundLib + + + pdbid = "<PDB PATH>" + lib = CompoundLib.Load("<PATH TO compounds.chemlib>") + + # Using Molck function + ent = LoadPDB(pdbid) + ms = MolckSettings(rm_unk_atoms=True, + rm_non_std=True, + rm_hyd_atoms=True, + rm_oxt_atoms=True, + rm_zero_occ_atoms=False, + colored=False, + map_nonstd_res=False, + assign_elem=True) + Molck(ent, lib, ms) + SavePDB(ent, "<OUTPUT PATH>", profile="SLOPPY") + +It can also be split into subsequent commands for greater controll: + +.. code-block:: python + + #! /bin/env python + + """Run Molck with Python API. + + + This is an exemplary procedure on how to run Molck using Python API which is + equivalent to the command line: + + molck <PDB PATH> --rm=hyd,oxt,nonstd,unk \ + --fix-ele --out=<OUTPUT PATH> \ + --complib=<PATH TO compounds.chemlib> + """ + + from ost.io import LoadPDB, SavePDB + from ost.mol.alg import (RemoveAtoms, MapNonStandardResidues, + CleanUpElementColumn) + from ost.conop import CompoundLib + + + pdbid = "<PDB PATH>" + lib = CompoundLib.Load("<PATH TO compounds.chemlib>") + map_nonstd = False + + # Using function chain + ent = LoadPDB(pdbid) + if map_nonstd: + MapNonStandardResidues(lib=lib, ent=ent) + + RemoveAtoms(lib=lib, + ent=ent, + rm_unk_atoms=True, + rm_non_std=True, + rm_hyd_atoms=True, + rm_oxt_atoms=True, + rm_zero_occ_atoms=False, + colored=False) + + CleanUpElementColumn(lib=lib, ent=ent) + SavePDB(ent, "<OUTPUT PATH>", profile="SLOPPY") + +API +### + + +.. class:: MolckSettings + + Stores settings used for Molecular Checker. + + .. method:: __init__(rm_unk_atoms=False,rm_non_std=False,rm_hyd_atoms=True,rm_oxt_atoms=False, rm_zero_occ_atoms=False,colored=False,map_nonstd_res=True, assign_elem=True) + + Initializes MolckSettings. + + :param rm_unk_atoms: Remove unknown and atoms not following the nomenclature + :type rm_unk_atoms: :class:`bool` + :param rm_non_std: Remove all residues not one of the 20 standard amino acids + :type rm_non_std: :class:`bool` + :param rm_hyd_atoms: Remove hydrogen atoms + :type rm_hyd_atoms: :class:`bool` + :param rm_oxt_atoms: Remove terminal oxygens + :type rm_oxt_atoms: :class:`bool` + :param rm_zero_occ_atoms: Remove atoms with zero occupancy + :type rm_zero_occ_atoms: :class:`bool` + :param colored: Whether output should be colored + :type colored: :class:`bool` + :param map_nonstd_res: Maps modified residues back to the parent amino acid, for example + MSE -> MET, SEP -> SER + :type map_nonstd_res: :class:`bool` + :param assign_elem: Clean up element column + :type assign_elem: :class:`bool` + + .. method:: ToString() + + String representation of the MolckSettings. + + +.. function:: Molck(ent, lib, settings) + + Runs Molck on provided entity. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` + :param settings: Molck settings + :type settings: :class:`MolckSettings` + + +.. function:: MapNonStandardResidues(ent, lib) + + Maps modified residues back to the parent amino acid, for example MSE -> MET. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` + +.. function:: RemoveAtoms(ent,lib,rm_unk_atoms=False,rm_non_std=False,rm_hyd_atoms=True,rm_oxt_atoms=False,rm_zero_occ_atoms=False,colored=False) + + Removes atoms and residues according to some criteria. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` + :param rm_unk_atoms: Remove unknown and atoms not following the nomenclature + :type rm_unk_atoms: :class:`bool` + :param rm_non_std: Remove all residues not one of the 20 standard amino acids + :type rm_non_std: :class:`bool` + :param rm_hyd_atoms: Remove hydrogen atoms + :type rm_hyd_atoms: :class:`bool` + :param rm_oxt_atoms: Remove terminal oxygens + :type rm_oxt_atoms: :class:`bool` + :param rm_zero_occ_atoms: Remove atoms with zero occupancy + :type rm_zero_occ_atoms: :class:`bool` + :param colored: Whether output should be colored + :type colored: :class:`bool` + +.. function:: CleanUpElementColumn(ent, lib) + + Clean up element column. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` \ No newline at end of file diff --git a/modules/mol/alg/doc/molck.rst b/modules/mol/alg/doc/molck.rst new file mode 100644 index 0000000000000000000000000000000000000000..8dd1d1f39ed9ae3ebb1c8bba5126701c58f85476 --- /dev/null +++ b/modules/mol/alg/doc/molck.rst @@ -0,0 +1,59 @@ +========================= +Molecular Checker (Molck) +========================= + +-------------------------------------- +Where can I find the Molck executable? +-------------------------------------- + +The Molck executable can be found at <YOUR-OST-STAGE-DIR>/bin + +----------- +Basic Usage +----------- + +To check one PDB file (struc1.pdb) with Molck, use the following command: + +.. code-block:: bash + + molck --complib <PATH TO COMPOUND LIB> struc1.pdb + +The checked and cleaned file will be saved by default ad struc1-molck.pdb. + +Similarly it is possible to check a list of PDB files: + +.. code-block:: bash + + molck --complib <PATH TO COMPOUND LIB> struc1.pdb struc2.pdb struc3.pdb + + +----------- +All Options +----------- + +The molck executable supports several other command line options, +please find them following: + +.. code-block:: bash + + usage: molck [options] file1.pdb [file2.pdb [...]] + options + --complib=path location of the compound library file. If not provided, the + following locations are searched in this order: + 1. Working directory, + 2. OpenStructure standard library location (if the + executable is part of a standard OpenStructure installation) + --rm=<a>,<b> remove atoms and residues matching some criteria: + - zeroocc - Remove atoms with zero occupancy + - hyd - Remove hydrogen atoms + - oxt - Remove terminal oxygens + - nonstd - Remove all residues not one of the 20 standard amino acids + - unk - Remove unknown and atoms not following the nomenclature + --fix-ele clean up element column + --stdout write cleaned file(s) to stdout + --out=filename write cleaned file(s) to disk. % characters in the filename are + replaced with the basename of the input file without extension. + Default: %-molcked.pdb + --color=auto|on|off whether output should be colored + --map-nonstd maps modified residues back to the parent amino acid, for example + MSE -> MET, SEP -> SER. \ No newline at end of file diff --git a/modules/mol/alg/pymod/CMakeLists.txt b/modules/mol/alg/pymod/CMakeLists.txt index 36d1ad97dab69f62304e100f158353c134d12b68..f4d1fdb71b68519b303d5df13dea34a2d529c230 100644 --- a/modules/mol/alg/pymod/CMakeLists.txt +++ b/modules/mol/alg/pymod/CMakeLists.txt @@ -7,6 +7,8 @@ set(OST_MOL_ALG_PYMOD_SOURCES export_contact_overlap.cc export_accessibility.cc export_sec_structure.cc + export_non_standard.cc + export_molck.cc export_membrane.cc ) diff --git a/modules/mol/alg/pymod/export_molck.cc b/modules/mol/alg/pymod/export_molck.cc new file mode 100644 index 0000000000000000000000000000000000000000..e5e27dac593ebaf321c1b2fd5558ff53f209f543 --- /dev/null +++ b/modules/mol/alg/pymod/export_molck.cc @@ -0,0 +1,137 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#include <stdexcept> +#include <boost/python.hpp> +#include <boost/python/raw_function.hpp> + +using namespace boost::python; + +#include <ost/mol/alg/molck.hh> + +using namespace ost::mol::alg; + +namespace { + +object MolckSettingsInitWrapper(tuple args, dict kwargs){ + + object self = args[0]; + args = tuple(args.slice(1,_)); + + bool rm_unk_atoms = false; + if(kwargs.contains("rm_unk_atoms")){ + rm_unk_atoms = extract<bool>(kwargs["rm_unk_atoms"]); + kwargs["rm_unk_atoms"].del(); + } + + bool rm_non_std = false; + if(kwargs.contains("rm_non_std")){ + rm_non_std = extract<bool>(kwargs["rm_non_std"]); + kwargs["rm_non_std"].del(); + } + + bool rm_hyd_atoms = true; + if(kwargs.contains("rm_hyd_atoms")){ + rm_hyd_atoms = extract<bool>(kwargs["rm_hyd_atoms"]); + kwargs["rm_hyd_atoms"].del(); + } + + bool rm_oxt_atoms = false; + if(kwargs.contains("rm_oxt_atoms")){ + rm_oxt_atoms = extract<bool>(kwargs["rm_oxt_atoms"]); + kwargs["rm_oxt_atoms"].del(); + } + + bool rm_zero_occ_atoms = false; + if(kwargs.contains("rm_zero_occ_atoms")){ + rm_zero_occ_atoms = extract<bool>(kwargs["rm_zero_occ_atoms"]); + kwargs["rm_zero_occ_atoms"].del(); + } + + bool colored = false; + if(kwargs.contains("colored")){ + colored = extract<bool>(kwargs["colored"]); + kwargs["colored"].del(); + } + + bool map_nonstd_res = true; + if(kwargs.contains("map_nonstd_res")){ + map_nonstd_res = extract<bool>(kwargs["map_nonstd_res"]); + kwargs["map_nonstd_res"].del(); + } + + bool assign_elem = true; + if(kwargs.contains("assign_elem")){ + assign_elem = extract<bool>(kwargs["assign_elem"]); + kwargs["assign_elem"].del(); + } + + if(len(kwargs) > 0){ + std::stringstream ss; + ss << "Invalid keywords observed when setting up MolckSettings! "; + ss << "Or did you pass the same keyword twice? "; + ss << "Valid keywords are: rm_unk_atoms, rm_non_std, rm_hyd_atoms, "; + ss << "rm_oxt_atoms, rm_zero_occ_atoms, colored, map_nonstd_res, "; + ss << "assign_elem!"; + throw std::invalid_argument(ss.str()); + } + + + return self.attr("__init__")(rm_unk_atoms, + rm_non_std, + rm_hyd_atoms, + rm_oxt_atoms, + rm_zero_occ_atoms, + colored, + map_nonstd_res, + assign_elem); +}} + +void export_Molck() +{ + class_<MolckSettings>("MolckSettings", no_init) + .def("__init__", raw_function(MolckSettingsInitWrapper)) + .def(init<bool , bool, bool, bool, bool, bool, bool, bool>()) + .def("ToString", &MolckSettings::ToString) + .def("__repr__", &MolckSettings::ToString) + .def("__str__", &MolckSettings::ToString) + .def_readwrite("rm_unk_atoms", &MolckSettings::rm_unk_atoms) + .def_readwrite("rm_non_std", &MolckSettings::rm_non_std) + .def_readwrite("rm_hyd_atoms", &MolckSettings::rm_hyd_atoms) + .def_readwrite("rm_oxt_atoms", &MolckSettings::rm_oxt_atoms) + .def_readwrite("rm_zero_occ_atoms", &MolckSettings::rm_zero_occ_atoms) + .def_readwrite("colored", &MolckSettings::colored) + .def_readwrite("map_nonstd_res", &MolckSettings::map_nonstd_res) + .def_readwrite("assign_elem", &MolckSettings::assign_elem); + + def("MapNonStandardResidues", &MapNonStandardResidues, (arg("ent"), + arg("lib"))); + + def("RemoveAtoms", &RemoveAtoms, (arg("ent"), + arg("lib"), + arg("rm_unk_atoms")=false, + arg("rm_non_std")=false, + arg("rm_hyd_atoms")=true, + arg("rm_oxt_atoms")=false, + arg("rm_zero_occ_atoms")=false, + arg("colored")=false)); + + def("CleanUpElementColumn", &CleanUpElementColumn, (arg("ent"), arg("lib"))); + + def("Molck", &Molck, (arg("ent"), arg("lib"), arg("settings"))); +} diff --git a/modules/conop/pymod/export_non_standard.cc b/modules/mol/alg/pymod/export_non_standard.cc similarity index 96% rename from modules/conop/pymod/export_non_standard.cc rename to modules/mol/alg/pymod/export_non_standard.cc index 50b1fd2d6e169e31ff5e192e03b19b98541bf89d..8d485b169ecbd290483daf5f0ab5ca3e06e8da80 100644 --- a/modules/conop/pymod/export_non_standard.cc +++ b/modules/mol/alg/pymod/export_non_standard.cc @@ -18,11 +18,11 @@ //------------------------------------------------------------------------------ #include <boost/python.hpp> #include <ost/mol/mol.hh> -#include <ost/conop/nonstandard.hh> +#include <ost/mol/alg/nonstandard.hh> using namespace boost::python; -using namespace ost::conop; +using namespace ost::mol::alg; using namespace ost::mol; object copy_conserved_handle(ResidueHandle src_res, ResidueHandle dst_res, diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc index ec9d4439a930cac8a8fbf245b80db6e3dd951264..1642c2cafe124f4eb189d2ddb8d4d308458e0309 100644 --- a/modules/mol/alg/pymod/wrap_mol_alg.cc +++ b/modules/mol/alg/pymod/wrap_mol_alg.cc @@ -39,6 +39,8 @@ void export_svdSuperPose(); void export_TrajectoryAnalysis(); void export_StructureAnalysis(); void export_Clash(); +void export_NonStandard(); +void export_Molck(); void export_contact_overlap(); void export_accessibility(); void export_sec_struct(); @@ -111,6 +113,8 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) export_TrajectoryAnalysis(); export_StructureAnalysis(); export_Clash(); + export_NonStandard(); + export_Molck(); export_contact_overlap(); export_accessibility(); export_sec_struct(); diff --git a/modules/mol/alg/src/CMakeLists.txt b/modules/mol/alg/src/CMakeLists.txt index e7404f6494664599a2284e35841d206ea677693d..42c571d79444add71b47a8da9916a106a2e1caf1 100644 --- a/modules/mol/alg/src/CMakeLists.txt +++ b/modules/mol/alg/src/CMakeLists.txt @@ -20,6 +20,8 @@ set(OST_MOL_ALG_HEADERS similarity_matrix.hh accessibility.hh sec_struct.hh + nonstandard.hh + molck.hh find_membrane.hh ) @@ -44,6 +46,8 @@ set(OST_MOL_ALG_SOURCES similarity_matrix.cc accessibility.cc sec_struct.cc + nonstandard.cc + molck.cc find_membrane.cc ) @@ -60,7 +64,7 @@ if (ENABLE_IMG) entity_to_density.cc ) - set(MOL_ALG_DEPS ${MOL_ALG_DEPS} ost_img ost_img_alg ost_seq_alg) + set(MOL_ALG_DEPS ${MOL_ALG_DEPS} ost_img ost_img_alg ost_seq_alg ost_conop) endif() executable(NAME lddt SOURCES lddt.cc diff --git a/modules/mol/alg/src/molck.cc b/modules/mol/alg/src/molck.cc new file mode 100644 index 0000000000000000000000000000000000000000..e1c35083fca0a424e1b05a7eba5973e8d5e7ead3 --- /dev/null +++ b/modules/mol/alg/src/molck.cc @@ -0,0 +1,164 @@ +#include <ost/mol/xcs_editor.hh> +#include <ost/mol/alg/nonstandard.hh> +#include <ost/conop/model_check.hh> +#include <ost/conop/amino_acids.hh> +#include <ost/mol/alg/molck.hh> + +using namespace ost::conop; +using namespace ost::mol; + + +void ost::mol::alg::MapNonStandardResidues(EntityHandle& ent, CompoundLibPtr lib) { + // TODO: Maybe it is possible to make it in-place operation + EntityHandle new_ent=CreateEntity(); + ChainHandleList chains=ent.GetChainList(); + XCSEditor new_edi=new_ent.EditXCS(); + for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { + ChainHandle new_chain = new_edi.InsertChain(c->GetName()); + ResidueHandleList residues = c->GetResidueList(); + for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { + AminoAcid aa = ResidueToAminoAcid(*r); + if (aa!=XXX) { + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); + AtomHandleList atoms = r->GetAtomList(); + for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { + new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); + } + continue; + } else { + CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); + if (!compound || !compound->IsPeptideLinking() || compound->GetChemClass()==ChemClass::D_PEPTIDE_LINKING || + OneLetterCodeToAminoAcid(compound->GetOneLetterCode())==XXX) { + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); + AtomHandleList atoms = r->GetAtomList(); + for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { + new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); + } + continue; + } + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,OneLetterCodeToResidueName(compound->GetOneLetterCode()),r->GetNumber()); + ost::mol::alg::CopyResidue(*r,dest_res,new_edi,lib); + } + } + } + ent = new_ent; +} + +void ost::mol::alg::RemoveAtoms( + EntityHandle& ent, + CompoundLibPtr lib, + bool rm_unk_atoms, + bool rm_non_std, + bool rm_hyd_atoms, + bool rm_oxt_atoms, + bool rm_zero_occ_atoms, + bool colored /*=true*/){ + XCSEditor edi=ent.EditXCS(); + Diagnostics diags; + Checker checker(lib, ent, diags); + if (rm_zero_occ_atoms) { + std::cerr << "removing atoms with zero occupancy" << std::endl; + int zremoved=0; + AtomHandleList zero_atoms=checker.GetZeroOccupancy(); + for (AtomHandleList::const_iterator i=zero_atoms.begin(), e=zero_atoms.end(); i!=e; ++i) { + edi.DeleteAtom(*i); + zremoved++; + } + std::cerr << " --> removed " << zremoved << " hydrogen atoms" << std::endl; + } + + if (rm_hyd_atoms) { + std::cerr << "removing hydrogen atoms" << std::endl; + int hremoved=0; + AtomHandleList hyd_atoms=checker.GetHydrogens(); + for (AtomHandleList::const_iterator i=hyd_atoms.begin(), e=hyd_atoms.end(); i!=e; ++i) { + edi.DeleteAtom(*i); + hremoved++; + } + std::cerr << " --> removed " << hremoved << " hydrogen atoms" << std::endl; + } + + if (rm_oxt_atoms) { + std::cerr << "removing OXT atoms" << std::endl; + int oremoved=0; + AtomHandleList atoms=ent.GetAtomList(); + for (AtomHandleList::const_iterator i=atoms.begin(), e=atoms.end(); i!=e; ++i) { + if (i->GetName()=="OXT") { + edi.DeleteAtom(*i); + oremoved++; + } + } + std::cerr << " --> removed " << oremoved << " OXT atoms" << std::endl; + } + + checker.CheckForCompleteness(); + checker.CheckForUnknownAtoms(); + checker.CheckForNonStandard(); + for (Diagnostics::const_diag_iterator + j = diags.diags_begin(), e = diags.diags_end(); j != e; ++j) { + const Diag* diag=*j; + std::cerr << diag->Format(colored); + switch (diag->GetType()) { + case DIAG_UNK_ATOM: + if (rm_unk_atoms) { + edi.DeleteAtom(diag->GetAtom(0)); + std::cerr << " --> removed "; + } + break; + case DIAG_NONSTD_RESIDUE: + if (rm_non_std) { + edi.DeleteResidue(diag->GetResidue(0)); + std::cerr << " --> removed "; + } + break; + default: + break; + } + std::cerr << std::endl; + } +} + +void ost::mol::alg::CleanUpElementColumn(EntityHandle& ent, CompoundLibPtr lib){ + ChainHandleList chains=ent.GetChainList(); + for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { + ResidueHandleList residues = c->GetResidueList(); + for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { + CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); + AtomHandleList atoms=r->GetAtomList(); + if (!compound) { + for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { + j->SetElement(""); + } + continue; + } + for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { + int specindx=compound->GetAtomSpecIndex(j->GetName()); + if (specindx!=-1) { + j->SetElement(compound->GetAtomSpecs()[specindx].element); + } else { + j->SetElement(""); + } + } + } + } +} + +void ost::mol::alg::Molck( + ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib, + const ost::mol::alg::MolckSettings& settings=ost::mol::alg::MolckSettings()){ + if (settings.map_nonstd_res) { + ost::mol::alg::MapNonStandardResidues(ent, lib); + } + ost::mol::alg::RemoveAtoms(ent, + lib, + settings.rm_unk_atoms, + settings.rm_non_std, + settings.rm_hyd_atoms, + settings.rm_oxt_atoms, + settings.rm_zero_occ_atoms, + settings.colored); + if (settings.assign_elem) { + ost::mol::alg::CleanUpElementColumn(ent, lib); + } +} \ No newline at end of file diff --git a/modules/mol/alg/src/molck.hh b/modules/mol/alg/src/molck.hh new file mode 100644 index 0000000000000000000000000000000000000000..d61e0218678bd07fba0e828fdf0f901dd69977e5 --- /dev/null +++ b/modules/mol/alg/src/molck.hh @@ -0,0 +1,104 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_MOL_ALG_MOLCK_HH +#define OST_MOL_ALG_MOLCK_HH + +#include <string> +#include <ost/mol/entity_handle.hh> +#include <ost/conop/compound_lib.hh> + +namespace { + inline std::string BoolToString(bool b) + { + return b ? "True" : "False"; + } +} + +namespace ost { namespace mol{ namespace alg { + +struct MolckSettings; + +struct MolckSettings{ + + bool rm_unk_atoms; + bool rm_non_std; + bool rm_hyd_atoms; + bool rm_oxt_atoms; + bool rm_zero_occ_atoms; + bool colored; + bool map_nonstd_res; + bool assign_elem; + + MolckSettings(bool init_rm_unk_atoms=false, + bool init_rm_non_std=false, + bool init_rm_hyd_atoms=true, + bool init_rm_oxt_atoms=false, + bool init_rm_zero_occ_atoms=false, + bool init_colored=false, + bool init_map_nonstd_res=true, + bool init_assign_elem=true): + rm_unk_atoms(init_rm_unk_atoms), // Remove unknown and atoms not following the nomenclature + rm_non_std(init_rm_non_std), // Remove all residues not one of the 20 standard amino acids + rm_hyd_atoms(init_rm_hyd_atoms), // Remove hydrogen atoms + rm_oxt_atoms(init_rm_oxt_atoms), // Remove terminal oxygens + rm_zero_occ_atoms(init_rm_zero_occ_atoms), // Remove atoms with zero occupancy + colored(init_colored), // Whether the output should be colored + map_nonstd_res(init_map_nonstd_res), // Map non standard residues back to standard ones (e.g.: MSE->MET,SEP->SER,etc.) + assign_elem(init_assign_elem){} // Clean up element column + + public: + std::string ToString(){ + std::string rep = "MolckSettings(rm_unk_atoms=" + BoolToString(rm_unk_atoms) + + ", rm_unk_atoms=" + BoolToString(rm_unk_atoms) + + ", rm_non_std=" + BoolToString(rm_non_std) + + ", rm_hyd_atoms=" + BoolToString(rm_hyd_atoms) + + ", rm_oxt_atoms=" + BoolToString(rm_oxt_atoms) + + ", rm_zero_occ_atoms=" + BoolToString(rm_zero_occ_atoms) + + ", colored=" + BoolToString(colored) + + ", map_nonstd_res=" + BoolToString(map_nonstd_res) + + ", assign_elem=" + BoolToString(assign_elem) + + ")"; + return rep; + } + +}; + +void MapNonStandardResidues(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib); + +void RemoveAtoms(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib, + bool rm_unk_atoms, + bool rm_non_std, + bool rm_hyd_atoms, + bool rm_oxt_atoms, + bool rm_zero_occ_atoms, + bool colored=true); + +void CleanUpElementColumn(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib); + +void Molck(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib, + const MolckSettings& settings); + + +}}} // namespace + +#endif diff --git a/modules/conop/src/nonstandard.cc b/modules/mol/alg/src/nonstandard.cc similarity index 99% rename from modules/conop/src/nonstandard.cc rename to modules/mol/alg/src/nonstandard.cc index eaf06680d087c05080f0e578828c7bad3fd68c66..b51c582adcdd81fb5b02cff834c18c218b7b317d 100644 --- a/modules/conop/src/nonstandard.cc +++ b/modules/mol/alg/src/nonstandard.cc @@ -32,7 +32,7 @@ using namespace ost::mol; using namespace ost; using namespace ost::conop; -namespace ost { namespace conop { +namespace ost { namespace mol { namespace alg { bool CopyResidue(ResidueHandle src_res, ResidueHandle dst_res, XCSEditor& edi) @@ -234,4 +234,4 @@ bool CopyNonConserved(ResidueHandle src_res, ResidueHandle dst_res, -}} +}}} diff --git a/modules/conop/src/nonstandard.hh b/modules/mol/alg/src/nonstandard.hh similarity index 78% rename from modules/conop/src/nonstandard.hh rename to modules/mol/alg/src/nonstandard.hh index a57022def8ef7436889491c7ae6714c6e9a69d6e..1dce74fd4c2cf102f985f06d22c2723e93053075 100644 --- a/modules/conop/src/nonstandard.hh +++ b/modules/mol/alg/src/nonstandard.hh @@ -17,30 +17,31 @@ // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA //------------------------------------------------------------------------------ -#ifndef OST_CONOP_NONSTANDARD_HH -#define OST_CONOP_NONSTANDARD_HH +#ifndef OST_MOL_ALG_NONSTANDARD_HH +#define OST_MOL_ALG_NONSTANDARD_HH /* Author: Marco Biasini, Juergen Haas */ #include "module_config.hh" -#include "compound_lib.hh" +#include <ost/conop/compound_lib.hh> -namespace ost { namespace conop { +namespace ost { namespace mol { namespace alg { /// \brief copies all atom of src_res to dst_res, gets compound lib from builder -bool DLLEXPORT_OST_CONOP CopyResidue(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyResidue(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi); /// \brief copies all atom of src_res to dst_res, requires a compound lib -bool DLLEXPORT_OST_CONOP CopyResidue(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyResidue(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, - ost::mol::XCSEditor& edi, CompoundLibPtr lib); + ost::mol::XCSEditor& edi, + ost::conop::CompoundLibPtr lib); /// \brief copies all atom of src_res to dst_res @@ -49,7 +50,7 @@ bool DLLEXPORT_OST_CONOP CopyResidue(ost::mol::ResidueHandle src_res, /// \param edi /// \param has_cbeta will be set to true if the src_res has a cbeta and the /// dst_residue is not a glycine -bool DLLEXPORT_OST_CONOP CopyIdentical(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyIdentical(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); @@ -63,10 +64,11 @@ bool DLLEXPORT_OST_CONOP CopyIdentical(ost::mol::ResidueHandle src_res, -bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyConserved(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, - bool& has_cbeta, CompoundLibPtr lib); + bool& has_cbeta, + ost::conop::CompoundLibPtr lib); /// \brief copies atoms of src_res to dst_res, requires compound lib /// @@ -77,7 +79,7 @@ bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, -bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyConserved(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); @@ -89,14 +91,14 @@ bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, /// only copied if dst_res is not equal to glycine. -bool DLLEXPORT_OST_CONOP CopyNonConserved(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyNonConserved(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); /// \brief construct dst_res from src_res when src_res is an MSE -bool DLLEXPORT_OST_CONOP CopyMSE(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyMSE(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); @@ -104,13 +106,14 @@ bool DLLEXPORT_OST_CONOP CopyMSE(ost::mol::ResidueHandle src_res, /// \brief construct a dst_res with only atoms matching the standard aminoacid /// from src_res when src_res is an is modified -bool DLLEXPORT_OST_CONOP CopyModified(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyModified(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, - bool& has_cbeta, CompoundLibPtr lib); + bool& has_cbeta, + ost::conop::CompoundLibPtr lib); -}} +}}} #endif diff --git a/modules/mol/alg/tests/CMakeLists.txt b/modules/mol/alg/tests/CMakeLists.txt index 0002d71492e639d21d456a93f94950db59aa1afe..0715910b1b9f9afac4de6e94169456ba21f80a3c 100644 --- a/modules/mol/alg/tests/CMakeLists.txt +++ b/modules/mol/alg/tests/CMakeLists.txt @@ -11,7 +11,8 @@ set(OST_MOL_ALG_UNIT_TESTS ) if (COMPOUND_LIB) - list(APPEND OST_MOL_ALG_UNIT_TESTS test_qsscoring.py) + list(APPEND OST_MOL_ALG_UNIT_TESTS test_qsscoring.py + test_nonstandard.py) endif() ost_unittest(MODULE mol_alg SOURCES "${OST_MOL_ALG_UNIT_TESTS}" LINK ost_io) diff --git a/modules/conop/tests/test_nonstandard.py b/modules/mol/alg/tests/test_nonstandard.py similarity index 78% rename from modules/conop/tests/test_nonstandard.py rename to modules/mol/alg/tests/test_nonstandard.py index 566db9a2b1902339828abaec67e7e35713fdf47a..c81c92479a4150c76aa89ff92409a4dc7656e25f 100644 --- a/modules/conop/tests/test_nonstandard.py +++ b/modules/mol/alg/tests/test_nonstandard.py @@ -1,5 +1,5 @@ import unittest -from ost import conop, io, mol +from ost import io, mol class TestNonStandard(unittest.TestCase): @@ -12,7 +12,7 @@ class TestNonStandard(unittest.TestCase): c=ed.InsertChain('A') ed.AppendResidue(c, 'SER') - err, has_cbeta=conop.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) self.assertTrue(err) self.assertTrue(has_cbeta) residues=new_hdl.residues @@ -36,16 +36,16 @@ class TestNonStandard(unittest.TestCase): ed.AppendResidue(c, 'GLY') ed.AppendResidue(c, 'GLY') ed.AppendResidue(c, 'HIS') - err, has_cbeta=conop.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) self.assertTrue(has_cbeta) self.assertTrue(err) - err, has_cbeta=conop.CopyConserved(tpl.residues[1], new_hdl.residues[1], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[1], new_hdl.residues[1], ed) self.assertFalse(has_cbeta) self.assertTrue(err) - err, has_cbeta=conop.CopyConserved(tpl.residues[2], new_hdl.residues[2], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[2], new_hdl.residues[2], ed) self.assertFalse(has_cbeta) self.assertTrue(err) - err, has_cbeta=conop.CopyConserved(tpl.residues[3], new_hdl.residues[3], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[3], new_hdl.residues[3], ed) self.assertTrue(has_cbeta) self.assertTrue(err) @@ -72,28 +72,28 @@ class TestNonStandard(unittest.TestCase): ed.AppendResidue(c, 'MET') # MET to MET - err =conop.CopyResidue(tpl.residues[0], new_hdl.residues[0], ed) + err =mol.alg.CopyResidue(tpl.residues[0], new_hdl.residues[0], ed) self.assertTrue(err) #GLY to GLY - err =conop.CopyResidue(tpl.residues[1], new_hdl.residues[1], ed) + err =mol.alg.CopyResidue(tpl.residues[1], new_hdl.residues[1], ed) self.assertTrue(err) # GLY to GLY - err =conop.CopyResidue(tpl.residues[2], new_hdl.residues[2], ed) + err =mol.alg.CopyResidue(tpl.residues[2], new_hdl.residues[2], ed) self.assertTrue(err) #now we copy a HIS to a HIS - err =conop.CopyResidue(tpl.residues[3], new_hdl.residues[3], ed) + err =mol.alg.CopyResidue(tpl.residues[3], new_hdl.residues[3], ed) self.assertTrue(err) # copy a GLY to a HIS - err, has_cbeta=conop.CopyNonConserved(tpl.residues[1], new_hdl.residues[4], ed) + err, has_cbeta=mol.alg.CopyNonConserved(tpl.residues[1], new_hdl.residues[4], ed) self.assertFalse(has_cbeta) # copy a MET to a GLY - err =conop.CopyResidue(tpl.residues[0], new_hdl.residues[5], ed) + err =mol.alg.CopyResidue(tpl.residues[0], new_hdl.residues[5], ed) self.assertFalse(err) # copy a MET to a HIS - err =conop.CopyResidue(tpl.residues[0], new_hdl.residues[6], ed) + err =mol.alg.CopyResidue(tpl.residues[0], new_hdl.residues[6], ed) self.assertFalse(err) # copy a GLY to a MET with adding CB - err=conop.CopyResidue(tpl.residues[1], new_hdl.residues[7], ed) + err=mol.alg.CopyResidue(tpl.residues[1], new_hdl.residues[7], ed) self.assertFalse(err) residues=new_hdl.residues diff --git a/modules/conop/tests/testfiles/cbeta.pdb b/modules/mol/alg/tests/testfiles/cbeta.pdb similarity index 100% rename from modules/conop/tests/testfiles/cbeta.pdb rename to modules/mol/alg/tests/testfiles/cbeta.pdb diff --git a/modules/conop/tests/testfiles/sep.pdb b/modules/mol/alg/tests/testfiles/sep.pdb similarity index 100% rename from modules/conop/tests/testfiles/sep.pdb rename to modules/mol/alg/tests/testfiles/sep.pdb diff --git a/tools/molck/CMakeLists.txt b/tools/molck/CMakeLists.txt index 4e1dc2fb79b73e90022dd2f546a5011c98eca118..c76f1fd71b4532bf32b2cfcfa5dc34a0b7fb00ff 100644 --- a/tools/molck/CMakeLists.txt +++ b/tools/molck/CMakeLists.txt @@ -1,4 +1,4 @@ if (NOT WIN32) executable(NAME molck SOURCES main.cc - DEPENDS_ON ost_io STATIC) + DEPENDS_ON ost_io ost_mol_alg STATIC) endif(NOT WIN32) diff --git a/tools/molck/main.cc b/tools/molck/main.cc index 2a445620212e97165e53b7f9bf3592ecd5a21387..ef2d77c207213957a20d2d2d460da74fa0280b73 100644 --- a/tools/molck/main.cc +++ b/tools/molck/main.cc @@ -5,25 +5,52 @@ #include <ost/base.hh> #include <ost/boost_filesystem_helper.hh> #include <ost/platform.hh> -#include <ost/conop/model_check.hh> #include <ost/conop/conop.hh> -#include <ost/conop/amino_acids.hh> -#include <ost/io/mol/pdb_reader.hh> #include <ost/io/mol/pdb_writer.hh> +#include <ost/io/mol/pdb_reader.hh> #include <ost/io/mol/mmcif_reader.hh> #include <ost/io/io_exception.hh> -#include <ost/conop/nonstandard.hh> +#include <ost/mol/alg/molck.hh> #if defined(__APPLE__) #include <mach-o/dyld.h> #endif using namespace ost; -using namespace ost::conop; using namespace ost::mol; using namespace ost::io; namespace po=boost::program_options; namespace fs=boost::filesystem; +const char* USAGE= +"this is molck - the molecule checker\n" +"usage: molck [options] file1.pdb [file2.pdb [...]]\n" +"options \n" +" --complib=path location of the compound library file. If not provided, the \n" +" following locations are searched in this order: \n" +" 1. Working directory, 2. OpenStructure standard library location (if the \n" +" executable is part of a standard OpenStructure installation) \n" +" --rm=<a>,<b> remove atoms and residues matching some criteria \n" +" zeroocc - Remove atoms with zero occupancy \n" +" hyd - Remove hydrogen atoms \n" +" oxt - Remove terminal oxygens \n" +" nonstd - Remove all residues not one of the 20 standard amino acids \n" +" unk - Remove unknown and atoms not following the nomenclature\n" +" --fix-ele clean up element column\n" +" --stdout write cleaned file(s) to stdout \n" +" --out=filename write cleaned file(s) to disk. % characters in the filename are \n" +" replaced with the basename of the input file without extension. \n" +" Default: %-molcked.pdb \n" +" --color=auto|on|off \n" +" whether output should be colored\n" +" --map-nonstd maps modified residues back to the parent amino acid, for example\n" +" MSE -> MET, SEP -> SER.\n"; + +void usage() +{ + std::cerr << USAGE << std::endl; + exit(0); +} + EntityHandle load_x(const String& file, const IOProfile& profile) { try { @@ -53,20 +80,20 @@ EntityHandle load_x(const String& file, const IOProfile& profile) } // load compound library, exiting if it could not be found... -CompoundLibPtr load_compound_lib(const String& custom_path) +ost::conop::CompoundLibPtr load_compound_lib(const String& custom_path) { if (custom_path!="") { if (fs::exists(custom_path)) { - return CompoundLib::Load(custom_path); + return ost::conop::CompoundLib::Load(custom_path); } else { std::cerr << "Could not find compounds.chemlib at the provided location, trying other options" << std::endl; } } if (fs::exists("compounds.chemlib")) { - return CompoundLib::Load("compounds.chemlib"); + return ost::conop::CompoundLib::Load("compounds.chemlib"); } char result[ 1024 ]; - CompoundLibPtr lib; + ost::conop::CompoundLibPtr lib; String exe_path; #if defined(__APPLE__) uint32_t size=1023; @@ -89,45 +116,16 @@ CompoundLibPtr load_compound_lib(const String& custom_path) String share_path_string=BFPathToString(share_path); if (fs::exists(share_path_string)) { - return CompoundLib::Load(share_path_string); + return ost::conop::CompoundLib::Load(share_path_string); } } if (!lib) { std::cerr << "Could not load compounds.chemlib" << std::endl; exit(-1); } - return CompoundLibPtr(); + return ost::conop::CompoundLibPtr(); } -const char* USAGE= -"this is molck - the molecule checker\n" -"usage: molck [options] file1.pdb [file2.pdb [...]]\n" -"options \n" -" --complib=path location of the compound library file. If not provided, the \n" -" following locations are searched in this order: \n" -" 1. Working directory, 2. OpenStructure standard library location (if the \n" -" executable is part of a standard OpenStructure installation) \n" -" --rm=<a>,<b> remove atoms and residues matching some criteria \n" -" zeroocc - Remove atoms with zero occupancy \n" -" hyd - Remove hydrogen atoms \n" -" oxt - Remove terminal oxygens \n" -" nonstd - Remove all residues not one of the 20 standard amino acids \n" -" unk - Remove unknown and atoms not following the nomenclature\n" -" --fix-ele clean up element column\n" -" --stdout write cleaned file(s) to stdout \n" -" --out=filename write cleaned file(s) to disk. % characters in the filename are \n" -" replaced with the basename of the input file without extension. \n" -" Default: %-molcked.pdb \n" -" --color=auto|on|off \n" -" whether output should be colored\n" -" --map-nonstd maps modified residues back to the parent amino acid, for example\n" -" MSE -> MET, SEP -> SER.\n"; - -void usage() -{ - std::cerr << USAGE << std::endl; - exit(0); -} int main(int argc, char *argv[]) { @@ -136,19 +134,12 @@ int main(int argc, char *argv[]) } IOProfile prof; prof.fault_tolerant=true; + ost::mol::alg::MolckSettings settings; String rm; String color; - bool colored = false; - bool rm_unk_atoms=false; - bool rm_hyd_atoms=false; - bool rm_non_std=false; - bool rm_oxt_atoms=false; - bool rm_zero_occ_atoms=false; bool write_to_stdout = false; bool write_to_file = false; - bool map_nonstd_res = false; - bool assign_elem = false; String output_blueprint_string; String custom_path=""; @@ -196,24 +187,24 @@ int main(int argc, char *argv[]) output_blueprint_string = vm["out"].as<String>(); } if (vm.count("map-nonstd")) { - map_nonstd_res = true; + settings.map_nonstd_res = true; } if (vm.count("fix-ele")) { - assign_elem = true; + settings.assign_elem = true; } std::vector<StringRef> rms=StringRef(rm.c_str(), rm.size()).split(','); for (size_t i=0; i<rms.size(); ++i) { if (rms[i] == StringRef("unk", 3)) { - rm_unk_atoms = true; + settings.rm_unk_atoms = true; } else if (rms[i] == StringRef("nonstd", 6)) { - rm_non_std = true; + settings.rm_non_std = true; } else if (rms[i] == StringRef("hyd", 3)) { - rm_hyd_atoms = true; + settings.rm_hyd_atoms = true; } else if (rms[i] == StringRef("oxt", 3)) { - rm_oxt_atoms = true; + settings.rm_oxt_atoms = true; } else if (rms[i] == StringRef("zeroocc", 7)) { - rm_zero_occ_atoms = true; + settings.rm_zero_occ_atoms = true; } else { std::cerr << "unknown value to remove '" << rms[i] << "'" << std::endl; usage(); @@ -221,144 +212,23 @@ int main(int argc, char *argv[]) } } if (color=="auto") { - colored = isatty(STDERR_FILENO); + settings.colored = isatty(STDERR_FILENO); } else if (color == "on" || color == "1" || color == "yes") { - colored = true; + settings.colored = true; } else if (color == "off" || color == "0" || color == "no") { - colored = false; + settings.colored = false; } else { usage(); exit(-1); } - CompoundLibPtr lib=load_compound_lib(custom_path); + ost::conop::CompoundLibPtr lib=load_compound_lib(custom_path); for (unsigned int i = 0; i < files.size(); ++i) { EntityHandle ent=load_x(files[i], prof); if (!ent.IsValid()) { continue; } - if (map_nonstd_res) { - EntityHandle new_ent=CreateEntity(); - ChainHandleList chains=ent.GetChainList(); - XCSEditor new_edi=new_ent.EditXCS(); - for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { - ChainHandle new_chain = new_edi.InsertChain(c->GetName()); - ResidueHandleList residues = c->GetResidueList(); - for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { - AminoAcid aa = ResidueToAminoAcid(*r); - if (aa!=XXX) { - ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); - AtomHandleList atoms = r->GetAtomList(); - for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { - new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); - } - continue; - } else { - CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); - if (!compound || !compound->IsPeptideLinking() || compound->GetChemClass()==ChemClass::D_PEPTIDE_LINKING || - OneLetterCodeToAminoAcid(compound->GetOneLetterCode())==XXX) { - ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); - AtomHandleList atoms = r->GetAtomList(); - for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { - new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); - } - continue; - } - ResidueHandle dest_res = new_edi.AppendResidue(new_chain,OneLetterCodeToResidueName(compound->GetOneLetterCode()),r->GetNumber()); - CopyResidue(*r,dest_res,new_edi,lib); - } - } - } - ent=new_ent; - } - - XCSEditor edi=ent.EditXCS(); - Diagnostics diags; - Checker checker(lib, ent, diags); - if (rm_zero_occ_atoms) { - std::cerr << "removing atoms with zero occupancy" << std::endl; - int zremoved=0; - AtomHandleList zero_atoms=checker.GetZeroOccupancy(); - for (AtomHandleList::const_iterator i=zero_atoms.begin(), e=zero_atoms.end(); i!=e; ++i) { - edi.DeleteAtom(*i); - zremoved++; - } - std::cerr << " --> removed " << zremoved << " hydrogen atoms" << std::endl; - } - - if (rm_hyd_atoms) { - std::cerr << "removing hydrogen atoms" << std::endl; - int hremoved=0; - AtomHandleList hyd_atoms=checker.GetHydrogens(); - for (AtomHandleList::const_iterator i=hyd_atoms.begin(), e=hyd_atoms.end(); i!=e; ++i) { - edi.DeleteAtom(*i); - hremoved++; - } - std::cerr << " --> removed " << hremoved << " hydrogen atoms" << std::endl; - } - if (rm_oxt_atoms) { - std::cerr << "removing OXT atoms" << std::endl; - int oremoved=0; - AtomHandleList atoms=ent.GetAtomList(); - for (AtomHandleList::const_iterator i=atoms.begin(), e=atoms.end(); i!=e; ++i) { - if (i->GetName()=="OXT") { - edi.DeleteAtom(*i); - oremoved++; - } - } - std::cerr << " --> removed " << oremoved << " OXT atoms" << std::endl; - } - - checker.CheckForCompleteness(); - checker.CheckForUnknownAtoms(); - checker.CheckForNonStandard(); - for (Diagnostics::const_diag_iterator - j = diags.diags_begin(), e = diags.diags_end(); j != e; ++j) { - const Diag* diag=*j; - std::cerr << diag->Format(colored); - switch (diag->GetType()) { - case DIAG_UNK_ATOM: - if (rm_unk_atoms) { - edi.DeleteAtom(diag->GetAtom(0)); - std::cerr << " --> removed "; - } - break; - case DIAG_NONSTD_RESIDUE: - if (rm_non_std) { - edi.DeleteResidue(diag->GetResidue(0)); - std::cerr << " --> removed "; - } - break; - default: - break; - } - std::cerr << std::endl; - } - - if (assign_elem) { - ChainHandleList chains=ent.GetChainList(); - for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { - ResidueHandleList residues = c->GetResidueList(); - for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { - CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); - AtomHandleList atoms=r->GetAtomList(); - if (!compound) { - for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { - j->SetElement(""); - } - continue; - } - for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { - int specindx=compound->GetAtomSpecIndex(j->GetName()); - if (specindx!=-1) { - j->SetElement(compound->GetAtomSpecs()[specindx].element); - } else { - j->SetElement(""); - } - } - } - } - } + ost::mol::alg::Molck(ent, lib, settings); if (write_to_stdout) { PDBWriter writer(std::cout, prof);