diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 45ebdf60c4dcd6fdac658c24ad70a10190623430..e7b55a2574e40bd3e792c5746be5ec7e95716d98 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,3 +1,12 @@ +Changes in Release 1.8 +-------------------------------------------------------------------------------- + + * nonstandard C++ module was moved from ost.conop to ost.mol.alg. This implies + change in the API. Mapping functions CopyResidue, CopyConserved and + CopyNonConserved that were previousely imported from ost.conop are now + to be imported from ost.mol.alg. + * Added Molck API to the ost.mol.alg module. + Changes in Release 1.7.1 -------------------------------------------------------------------------------- diff --git a/modules/conop/doc/aminoacid.rst b/modules/conop/doc/aminoacid.rst index de15eb98a91c3fe53a9008c20d314d4aa8954edc..a33104ca6b91085b63a3a0df02b100ebd1d9c2d7 100644 --- a/modules/conop/doc/aminoacid.rst +++ b/modules/conop/doc/aminoacid.rst @@ -79,63 +79,4 @@ Converter functions .. method:: Empty() - Whether the set is empty, i.e. doesn't contain any amino acids. - - -Mapping functions --------------------------------------------------------------------------------- - -The following functions help to convert one residue into another by reusing as -much as possible from the present atoms. They are mainly meant to map from -standard amino acid to other standard amino acids or from modified amino acids -to standard amino acids. - -.. function:: CopyResidue(src_res, dst_res, editor) - - Copies the atoms of ``src_res`` to ``dst_res`` using the residue names - as guide to decide which of the atoms should be copied. If ``src_res`` and - ``dst_res`` have the same name, or ``src_res`` is a modified version of - ``dst_res`` (i.e. have the same single letter code), CopyConserved will be - called, otherwise CopyNonConserved will be called. - - :param src_res: The source residue - :type src_res: :class:`~ost.mol.ResidueHandle` - :param dst_res: The destination residue - :type dst_res: :class:`~ost.mol.ResidueHandle` - - :returns: true if the residue could be copied, false if not. - -.. function:: CopyConserved(src_res, dst_res, editor) - - Copies the atoms of ``src_res`` to ``dst_res`` assuming that the parent - amino acid of ``src_res`` (or ``src_res`` itself) are identical to ``dst_res``. - - If ``src_res`` and ``dst_res`` are identical, all heavy atoms are copied - to ``dst_res``. If ``src_res`` is a modified version of ``dst_res`` and the - modification is a pure addition (e.g. the phosphate group of phosphoserine), - the modification is stripped off and all other heavy atoms are copied to - ``dst_res``. If the modification is not a pure addition, only the backbone - heavy atoms are copied to ``dst_res``. - - Additionally, the selenium atom of ``MSE`` is converted to sulphur. - - :param src_res: The source residue - :type src_res: :class:`~ost.mol.ResidueHandle` - :param dst_res: The destination residue - :type dst_res: :class:`~ost.mol.ResidueHandle` - - :returns: a tuple of bools stating whether the residue could be copied and - whether the Cbeta atom was inserted into the ``dst_res``. - -.. function:: CopyNonConserved(src_res, dst_res, editor) - - Copies the heavy backbone atoms and Cbeta (except for ``GLY``) of ``src_res`` - to ``dst_res``. - - :param src_res: The source residue - :type src_res: :class:`~ost.mol.ResidueHandle` - :param dst_res: The destination residue - :type dst_res: :class:`~ost.mol.ResidueHandle` - - :returns: a tuple of bools stating whether the residue could be copied and - whether the Cbeta atom was inserted into the ``dst_res``. + Whether the set is empty, i.e. doesn't contain any amino acids. \ No newline at end of file diff --git a/modules/conop/pymod/CMakeLists.txt b/modules/conop/pymod/CMakeLists.txt index 9c09906222fb97e1c935590ec22e81b536c94891..23dff26a0022fa9c312c9bfd1acda033d1c304b7 100644 --- a/modules/conop/pymod/CMakeLists.txt +++ b/modules/conop/pymod/CMakeLists.txt @@ -8,7 +8,6 @@ set(OST_CONOP_PYMOD_SOURCES export_conop.cc export_diag.cc export_rule_based.cc - export_non_standard.cc export_ring_finder.cc ) diff --git a/modules/conop/pymod/wrap_conop.cc b/modules/conop/pymod/wrap_conop.cc index 119c86c325f3ce6fc48cf88298838f441cb04a36..04bcfce147b3ebf986cee454c2594e242ade7059 100644 --- a/modules/conop/pymod/wrap_conop.cc +++ b/modules/conop/pymod/wrap_conop.cc @@ -25,7 +25,6 @@ void export_Sanitizer(); void export_Conop(); void export_RingFinder(); void export_AminoAcids(); -void export_NonStandard(); void export_processor(); void export_rule_based(); void export_heuristic(); @@ -41,6 +40,5 @@ BOOST_PYTHON_MODULE(_ost_conop) export_Compound(); export_RingFinder(); export_AminoAcids(); - export_NonStandard(); export_diag(); } diff --git a/modules/conop/src/CMakeLists.txt b/modules/conop/src/CMakeLists.txt index 0185618ea4a9ebb02d917393670ce3f401ed494c..c2f1f4c6d0148c62461be5852bd33872ad39f903 100644 --- a/modules/conop/src/CMakeLists.txt +++ b/modules/conop/src/CMakeLists.txt @@ -9,7 +9,6 @@ compound.hh compound_lib.hh module_config.hh rule_based.hh -nonstandard.hh minimal_compound_lib.hh compound_lib_base.hh ring_finder.hh @@ -27,12 +26,11 @@ rule_based.cc model_check.cc compound.cc compound_lib.cc -nonstandard.cc ring_finder.cc ) module(NAME conop SOURCES ${OST_CONOP_SOURCES} - HEADERS ${OST_CONOP_HEADERS} DEPENDS_ON ost_mol ost_mol_alg ost_geom ost_db) + HEADERS ${OST_CONOP_HEADERS} DEPENDS_ON ost_mol ost_geom ost_db) if (WIN32) diff --git a/modules/conop/tests/CMakeLists.txt b/modules/conop/tests/CMakeLists.txt index 96400650977655f3efed4108de874bf9e1168bee..44fd58acfc7036926e3e5b31ebe51db7129291c8 100644 --- a/modules/conop/tests/CMakeLists.txt +++ b/modules/conop/tests/CMakeLists.txt @@ -9,8 +9,7 @@ set(OST_CONOP_UNIT_TESTS if (COMPOUND_LIB) list(APPEND OST_CONOP_UNIT_TESTS test_compound.py - test_cleanup.py - test_nonstandard.py) + test_cleanup.py) endif() ost_unittest(MODULE conop diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst index 028e9dc2147a5a7555b91fe9586b775369547483..a7c9c4c92b52b9f0a7092945a524bddd87f38636 100644 --- a/modules/mol/alg/doc/molalg.rst +++ b/modules/mol/alg/doc/molalg.rst @@ -1467,3 +1467,240 @@ used to skip frames in the analysis. .. automodule:: ost.mol.alg.structure_analysis :members: + +.. _mapping-functions: + +Mapping functions +-------------------------------------------------------------------------------- + +The following functions help to convert one residue into another by reusing as +much as possible from the present atoms. They are mainly meant to map from +standard amino acid to other standard amino acids or from modified amino acids +to standard amino acids. + +.. function:: CopyResidue(src_res, dst_res, editor) + + Copies the atoms of ``src_res`` to ``dst_res`` using the residue names + as guide to decide which of the atoms should be copied. If ``src_res`` and + ``dst_res`` have the same name, or ``src_res`` is a modified version of + ``dst_res`` (i.e. have the same single letter code), CopyConserved will be + called, otherwise CopyNonConserved will be called. + + :param src_res: The source residue + :type src_res: :class:`~ost.mol.ResidueHandle` + :param dst_res: The destination residue + :type dst_res: :class:`~ost.mol.ResidueHandle` + + :returns: true if the residue could be copied, false if not. + +.. function:: CopyConserved(src_res, dst_res, editor) + + Copies the atoms of ``src_res`` to ``dst_res`` assuming that the parent + amino acid of ``src_res`` (or ``src_res`` itself) are identical to ``dst_res``. + + If ``src_res`` and ``dst_res`` are identical, all heavy atoms are copied + to ``dst_res``. If ``src_res`` is a modified version of ``dst_res`` and the + modification is a pure addition (e.g. the phosphate group of phosphoserine), + the modification is stripped off and all other heavy atoms are copied to + ``dst_res``. If the modification is not a pure addition, only the backbone + heavy atoms are copied to ``dst_res``. + + Additionally, the selenium atom of ``MSE`` is converted to sulphur. + + :param src_res: The source residue + :type src_res: :class:`~ost.mol.ResidueHandle` + :param dst_res: The destination residue + :type dst_res: :class:`~ost.mol.ResidueHandle` + + :returns: a tuple of bools stating whether the residue could be copied and + whether the Cbeta atom was inserted into the ``dst_res``. + +.. function:: CopyNonConserved(src_res, dst_res, editor) + + Copies the heavy backbone atoms and Cbeta (except for ``GLY``) of ``src_res`` + to ``dst_res``. + + :param src_res: The source residue + :type src_res: :class:`~ost.mol.ResidueHandle` + :param dst_res: The destination residue + :type dst_res: :class:`~ost.mol.ResidueHandle` + + :returns: a tuple of bools stating whether the residue could be copied and + whether the Cbeta atom was inserted into the ``dst_res``. + + +Molecular Checker (Molck) +-------------------------------------------------------------------------------- + +Programmatic usage +################## + +Molecular Checker (Molck) could be called directly from the code using Molck +function: + +.. code-block:: python + + #! /bin/env python + + """Run Molck with Python API. + + + This is an exemplary procedure on how to run Molck using Python API which is + equivalent to the command line: + + molck <PDB PATH> --rm=hyd,oxt,nonstd,unk \ + --fix-ele --out=<OUTPUT PATH> \ + --complib=<PATH TO compounds.chemlib> + """ + + from ost.io import LoadPDB, SavePDB + from ost.mol.alg import MolckSettings, Molck + + from ost.conop import CompoundLib + + + pdbid = "<PDB PATH>" + lib = CompoundLib.Load("<PATH TO compounds.chemlib>") + + # Using Molck function + ent = LoadPDB(pdbid) + ms = MolckSettings(rm_unk_atoms=True, + rm_non_std=True, + rm_hyd_atoms=True, + rm_oxt_atoms=True, + rm_zero_occ_atoms=False, + colored=False, + map_nonstd_res=False, + assign_elem=True) + Molck(ent, lib, ms) + SavePDB(ent, "<OUTPUT PATH>", profile="SLOPPY") + +It can also be split into subsequent commands for greater controll: + +.. code-block:: python + + #! /bin/env python + + """Run Molck with Python API. + + + This is an exemplary procedure on how to run Molck using Python API which is + equivalent to the command line: + + molck <PDB PATH> --rm=hyd,oxt,nonstd,unk \ + --fix-ele --out=<OUTPUT PATH> \ + --complib=<PATH TO compounds.chemlib> + """ + + from ost.io import LoadPDB, SavePDB + from ost.mol.alg import (RemoveAtoms, MapNonStandardResidues, + CleanUpElementColumn) + from ost.conop import CompoundLib + + + pdbid = "<PDB PATH>" + lib = CompoundLib.Load("<PATH TO compounds.chemlib>") + map_nonstd = False + + # Using function chain + ent = LoadPDB(pdbid) + if map_nonstd: + MapNonStandardResidues(lib=lib, ent=ent) + + RemoveAtoms(lib=lib, + ent=ent, + rm_unk_atoms=True, + rm_non_std=True, + rm_hyd_atoms=True, + rm_oxt_atoms=True, + rm_zero_occ_atoms=False, + colored=False) + + CleanUpElementColumn(lib=lib, ent=ent) + SavePDB(ent, "<OUTPUT PATH>", profile="SLOPPY") + +API +### + + +.. class:: MolckSettings + + Stores settings used for Molecular Checker. + + .. method:: __init__(rm_unk_atoms=False,rm_non_std=False,rm_hyd_atoms=True,rm_oxt_atoms=False, rm_zero_occ_atoms=False,colored=False,map_nonstd_res=True, assign_elem=True) + + Initializes MolckSettings. + + :param rm_unk_atoms: Remove unknown and atoms not following the nomenclature + :type rm_unk_atoms: :class:`bool` + :param rm_non_std: Remove all residues not one of the 20 standard amino acids + :type rm_non_std: :class:`bool` + :param rm_hyd_atoms: Remove hydrogen atoms + :type rm_hyd_atoms: :class:`bool` + :param rm_oxt_atoms: Remove terminal oxygens + :type rm_oxt_atoms: :class:`bool` + :param rm_zero_occ_atoms: Remove atoms with zero occupancy + :type rm_zero_occ_atoms: :class:`bool` + :param colored: Whether output should be colored + :type colored: :class:`bool` + :param map_nonstd_res: Maps modified residues back to the parent amino acid, for example + MSE -> MET, SEP -> SER + :type map_nonstd_res: :class:`bool` + :param assign_elem: Clean up element column + :type assign_elem: :class:`bool` + + .. method:: ToString() + + String representation of the MolckSettings. + + +.. function:: Molck(ent, lib, settings) + + Runs Molck on provided entity. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` + :param settings: Molck settings + :type settings: :class:`MolckSettings` + + +.. function:: MapNonStandardResidues(ent, lib) + + Maps modified residues back to the parent amino acid, for example MSE -> MET. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` + +.. function:: RemoveAtoms(ent,lib,rm_unk_atoms=False,rm_non_std=False,rm_hyd_atoms=True,rm_oxt_atoms=False,rm_zero_occ_atoms=False,colored=False) + + Removes atoms and residues according to some criteria. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` + :param rm_unk_atoms: Remove unknown and atoms not following the nomenclature + :type rm_unk_atoms: :class:`bool` + :param rm_non_std: Remove all residues not one of the 20 standard amino acids + :type rm_non_std: :class:`bool` + :param rm_hyd_atoms: Remove hydrogen atoms + :type rm_hyd_atoms: :class:`bool` + :param rm_oxt_atoms: Remove terminal oxygens + :type rm_oxt_atoms: :class:`bool` + :param rm_zero_occ_atoms: Remove atoms with zero occupancy + :type rm_zero_occ_atoms: :class:`bool` + :param colored: Whether output should be colored + :type colored: :class:`bool` + +.. function:: CleanUpElementColumn(ent, lib) + + Clean up element column. + + :param ent: Structure to check + :type ent: :class:`~ost.mol.EntityHandle` + :param lib: Compound library + :type lib: :class:`~ost.conop.CompoundLib` \ No newline at end of file diff --git a/modules/mol/alg/doc/molck.rst b/modules/mol/alg/doc/molck.rst new file mode 100644 index 0000000000000000000000000000000000000000..8dd1d1f39ed9ae3ebb1c8bba5126701c58f85476 --- /dev/null +++ b/modules/mol/alg/doc/molck.rst @@ -0,0 +1,59 @@ +========================= +Molecular Checker (Molck) +========================= + +-------------------------------------- +Where can I find the Molck executable? +-------------------------------------- + +The Molck executable can be found at <YOUR-OST-STAGE-DIR>/bin + +----------- +Basic Usage +----------- + +To check one PDB file (struc1.pdb) with Molck, use the following command: + +.. code-block:: bash + + molck --complib <PATH TO COMPOUND LIB> struc1.pdb + +The checked and cleaned file will be saved by default ad struc1-molck.pdb. + +Similarly it is possible to check a list of PDB files: + +.. code-block:: bash + + molck --complib <PATH TO COMPOUND LIB> struc1.pdb struc2.pdb struc3.pdb + + +----------- +All Options +----------- + +The molck executable supports several other command line options, +please find them following: + +.. code-block:: bash + + usage: molck [options] file1.pdb [file2.pdb [...]] + options + --complib=path location of the compound library file. If not provided, the + following locations are searched in this order: + 1. Working directory, + 2. OpenStructure standard library location (if the + executable is part of a standard OpenStructure installation) + --rm=<a>,<b> remove atoms and residues matching some criteria: + - zeroocc - Remove atoms with zero occupancy + - hyd - Remove hydrogen atoms + - oxt - Remove terminal oxygens + - nonstd - Remove all residues not one of the 20 standard amino acids + - unk - Remove unknown and atoms not following the nomenclature + --fix-ele clean up element column + --stdout write cleaned file(s) to stdout + --out=filename write cleaned file(s) to disk. % characters in the filename are + replaced with the basename of the input file without extension. + Default: %-molcked.pdb + --color=auto|on|off whether output should be colored + --map-nonstd maps modified residues back to the parent amino acid, for example + MSE -> MET, SEP -> SER. \ No newline at end of file diff --git a/modules/mol/alg/pymod/CMakeLists.txt b/modules/mol/alg/pymod/CMakeLists.txt index fa2942575835fd70e480563148784918bad58122..5519c6adf2741e213eba44cb82fbaf559f4c19e8 100644 --- a/modules/mol/alg/pymod/CMakeLists.txt +++ b/modules/mol/alg/pymod/CMakeLists.txt @@ -7,6 +7,8 @@ set(OST_MOL_ALG_PYMOD_SOURCES export_contact_overlap.cc export_accessibility.cc export_sec_structure.cc + export_non_standard.cc + export_molck.cc ) set(OST_MOL_ALG_PYMOD_MODULES diff --git a/modules/mol/alg/pymod/export_molck.cc b/modules/mol/alg/pymod/export_molck.cc new file mode 100644 index 0000000000000000000000000000000000000000..e5e27dac593ebaf321c1b2fd5558ff53f209f543 --- /dev/null +++ b/modules/mol/alg/pymod/export_molck.cc @@ -0,0 +1,137 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#include <stdexcept> +#include <boost/python.hpp> +#include <boost/python/raw_function.hpp> + +using namespace boost::python; + +#include <ost/mol/alg/molck.hh> + +using namespace ost::mol::alg; + +namespace { + +object MolckSettingsInitWrapper(tuple args, dict kwargs){ + + object self = args[0]; + args = tuple(args.slice(1,_)); + + bool rm_unk_atoms = false; + if(kwargs.contains("rm_unk_atoms")){ + rm_unk_atoms = extract<bool>(kwargs["rm_unk_atoms"]); + kwargs["rm_unk_atoms"].del(); + } + + bool rm_non_std = false; + if(kwargs.contains("rm_non_std")){ + rm_non_std = extract<bool>(kwargs["rm_non_std"]); + kwargs["rm_non_std"].del(); + } + + bool rm_hyd_atoms = true; + if(kwargs.contains("rm_hyd_atoms")){ + rm_hyd_atoms = extract<bool>(kwargs["rm_hyd_atoms"]); + kwargs["rm_hyd_atoms"].del(); + } + + bool rm_oxt_atoms = false; + if(kwargs.contains("rm_oxt_atoms")){ + rm_oxt_atoms = extract<bool>(kwargs["rm_oxt_atoms"]); + kwargs["rm_oxt_atoms"].del(); + } + + bool rm_zero_occ_atoms = false; + if(kwargs.contains("rm_zero_occ_atoms")){ + rm_zero_occ_atoms = extract<bool>(kwargs["rm_zero_occ_atoms"]); + kwargs["rm_zero_occ_atoms"].del(); + } + + bool colored = false; + if(kwargs.contains("colored")){ + colored = extract<bool>(kwargs["colored"]); + kwargs["colored"].del(); + } + + bool map_nonstd_res = true; + if(kwargs.contains("map_nonstd_res")){ + map_nonstd_res = extract<bool>(kwargs["map_nonstd_res"]); + kwargs["map_nonstd_res"].del(); + } + + bool assign_elem = true; + if(kwargs.contains("assign_elem")){ + assign_elem = extract<bool>(kwargs["assign_elem"]); + kwargs["assign_elem"].del(); + } + + if(len(kwargs) > 0){ + std::stringstream ss; + ss << "Invalid keywords observed when setting up MolckSettings! "; + ss << "Or did you pass the same keyword twice? "; + ss << "Valid keywords are: rm_unk_atoms, rm_non_std, rm_hyd_atoms, "; + ss << "rm_oxt_atoms, rm_zero_occ_atoms, colored, map_nonstd_res, "; + ss << "assign_elem!"; + throw std::invalid_argument(ss.str()); + } + + + return self.attr("__init__")(rm_unk_atoms, + rm_non_std, + rm_hyd_atoms, + rm_oxt_atoms, + rm_zero_occ_atoms, + colored, + map_nonstd_res, + assign_elem); +}} + +void export_Molck() +{ + class_<MolckSettings>("MolckSettings", no_init) + .def("__init__", raw_function(MolckSettingsInitWrapper)) + .def(init<bool , bool, bool, bool, bool, bool, bool, bool>()) + .def("ToString", &MolckSettings::ToString) + .def("__repr__", &MolckSettings::ToString) + .def("__str__", &MolckSettings::ToString) + .def_readwrite("rm_unk_atoms", &MolckSettings::rm_unk_atoms) + .def_readwrite("rm_non_std", &MolckSettings::rm_non_std) + .def_readwrite("rm_hyd_atoms", &MolckSettings::rm_hyd_atoms) + .def_readwrite("rm_oxt_atoms", &MolckSettings::rm_oxt_atoms) + .def_readwrite("rm_zero_occ_atoms", &MolckSettings::rm_zero_occ_atoms) + .def_readwrite("colored", &MolckSettings::colored) + .def_readwrite("map_nonstd_res", &MolckSettings::map_nonstd_res) + .def_readwrite("assign_elem", &MolckSettings::assign_elem); + + def("MapNonStandardResidues", &MapNonStandardResidues, (arg("ent"), + arg("lib"))); + + def("RemoveAtoms", &RemoveAtoms, (arg("ent"), + arg("lib"), + arg("rm_unk_atoms")=false, + arg("rm_non_std")=false, + arg("rm_hyd_atoms")=true, + arg("rm_oxt_atoms")=false, + arg("rm_zero_occ_atoms")=false, + arg("colored")=false)); + + def("CleanUpElementColumn", &CleanUpElementColumn, (arg("ent"), arg("lib"))); + + def("Molck", &Molck, (arg("ent"), arg("lib"), arg("settings"))); +} diff --git a/modules/conop/pymod/export_non_standard.cc b/modules/mol/alg/pymod/export_non_standard.cc similarity index 96% rename from modules/conop/pymod/export_non_standard.cc rename to modules/mol/alg/pymod/export_non_standard.cc index 50b1fd2d6e169e31ff5e192e03b19b98541bf89d..8d485b169ecbd290483daf5f0ab5ca3e06e8da80 100644 --- a/modules/conop/pymod/export_non_standard.cc +++ b/modules/mol/alg/pymod/export_non_standard.cc @@ -18,11 +18,11 @@ //------------------------------------------------------------------------------ #include <boost/python.hpp> #include <ost/mol/mol.hh> -#include <ost/conop/nonstandard.hh> +#include <ost/mol/alg/nonstandard.hh> using namespace boost::python; -using namespace ost::conop; +using namespace ost::mol::alg; using namespace ost::mol; object copy_conserved_handle(ResidueHandle src_res, ResidueHandle dst_res, diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc index d03e2d7b2939b57dd9c244d9776f660823179296..51a8284045df4cc1ee0ef674dca287fe67b0df36 100644 --- a/modules/mol/alg/pymod/wrap_mol_alg.cc +++ b/modules/mol/alg/pymod/wrap_mol_alg.cc @@ -39,6 +39,8 @@ void export_svdSuperPose(); void export_TrajectoryAnalysis(); void export_StructureAnalysis(); void export_Clash(); +void export_NonStandard(); +void export_Molck(); void export_contact_overlap(); void export_accessibility(); void export_sec_struct(); @@ -265,6 +267,8 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) export_TrajectoryAnalysis(); export_StructureAnalysis(); export_Clash(); + export_NonStandard(); + export_Molck(); export_contact_overlap(); export_accessibility(); export_sec_struct(); diff --git a/modules/mol/alg/src/CMakeLists.txt b/modules/mol/alg/src/CMakeLists.txt index d473247e656e17639bc903ce900cfafb42cea83c..ef2ae22ae10ccc9172285b7d79e96b298c7d6fec 100644 --- a/modules/mol/alg/src/CMakeLists.txt +++ b/modules/mol/alg/src/CMakeLists.txt @@ -20,6 +20,8 @@ set(OST_MOL_ALG_HEADERS similarity_matrix.hh accessibility.hh sec_struct.hh + nonstandard.hh + molck.hh ) set(OST_MOL_ALG_SOURCES @@ -43,6 +45,8 @@ set(OST_MOL_ALG_SOURCES similarity_matrix.cc accessibility.cc sec_struct.cc + nonstandard.cc + molck.cc ) set(MOL_ALG_DEPS ost_mol ost_seq) @@ -58,7 +62,7 @@ if (ENABLE_IMG) entity_to_density.cc ) - set(MOL_ALG_DEPS ${MOL_ALG_DEPS} ost_img ost_img_alg ost_seq_alg) + set(MOL_ALG_DEPS ${MOL_ALG_DEPS} ost_img ost_img_alg ost_seq_alg ost_conop) endif() executable(NAME lddt SOURCES lddt.cc diff --git a/modules/mol/alg/src/molck.cc b/modules/mol/alg/src/molck.cc new file mode 100644 index 0000000000000000000000000000000000000000..e1c35083fca0a424e1b05a7eba5973e8d5e7ead3 --- /dev/null +++ b/modules/mol/alg/src/molck.cc @@ -0,0 +1,164 @@ +#include <ost/mol/xcs_editor.hh> +#include <ost/mol/alg/nonstandard.hh> +#include <ost/conop/model_check.hh> +#include <ost/conop/amino_acids.hh> +#include <ost/mol/alg/molck.hh> + +using namespace ost::conop; +using namespace ost::mol; + + +void ost::mol::alg::MapNonStandardResidues(EntityHandle& ent, CompoundLibPtr lib) { + // TODO: Maybe it is possible to make it in-place operation + EntityHandle new_ent=CreateEntity(); + ChainHandleList chains=ent.GetChainList(); + XCSEditor new_edi=new_ent.EditXCS(); + for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { + ChainHandle new_chain = new_edi.InsertChain(c->GetName()); + ResidueHandleList residues = c->GetResidueList(); + for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { + AminoAcid aa = ResidueToAminoAcid(*r); + if (aa!=XXX) { + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); + AtomHandleList atoms = r->GetAtomList(); + for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { + new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); + } + continue; + } else { + CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); + if (!compound || !compound->IsPeptideLinking() || compound->GetChemClass()==ChemClass::D_PEPTIDE_LINKING || + OneLetterCodeToAminoAcid(compound->GetOneLetterCode())==XXX) { + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); + AtomHandleList atoms = r->GetAtomList(); + for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { + new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); + } + continue; + } + ResidueHandle dest_res = new_edi.AppendResidue(new_chain,OneLetterCodeToResidueName(compound->GetOneLetterCode()),r->GetNumber()); + ost::mol::alg::CopyResidue(*r,dest_res,new_edi,lib); + } + } + } + ent = new_ent; +} + +void ost::mol::alg::RemoveAtoms( + EntityHandle& ent, + CompoundLibPtr lib, + bool rm_unk_atoms, + bool rm_non_std, + bool rm_hyd_atoms, + bool rm_oxt_atoms, + bool rm_zero_occ_atoms, + bool colored /*=true*/){ + XCSEditor edi=ent.EditXCS(); + Diagnostics diags; + Checker checker(lib, ent, diags); + if (rm_zero_occ_atoms) { + std::cerr << "removing atoms with zero occupancy" << std::endl; + int zremoved=0; + AtomHandleList zero_atoms=checker.GetZeroOccupancy(); + for (AtomHandleList::const_iterator i=zero_atoms.begin(), e=zero_atoms.end(); i!=e; ++i) { + edi.DeleteAtom(*i); + zremoved++; + } + std::cerr << " --> removed " << zremoved << " hydrogen atoms" << std::endl; + } + + if (rm_hyd_atoms) { + std::cerr << "removing hydrogen atoms" << std::endl; + int hremoved=0; + AtomHandleList hyd_atoms=checker.GetHydrogens(); + for (AtomHandleList::const_iterator i=hyd_atoms.begin(), e=hyd_atoms.end(); i!=e; ++i) { + edi.DeleteAtom(*i); + hremoved++; + } + std::cerr << " --> removed " << hremoved << " hydrogen atoms" << std::endl; + } + + if (rm_oxt_atoms) { + std::cerr << "removing OXT atoms" << std::endl; + int oremoved=0; + AtomHandleList atoms=ent.GetAtomList(); + for (AtomHandleList::const_iterator i=atoms.begin(), e=atoms.end(); i!=e; ++i) { + if (i->GetName()=="OXT") { + edi.DeleteAtom(*i); + oremoved++; + } + } + std::cerr << " --> removed " << oremoved << " OXT atoms" << std::endl; + } + + checker.CheckForCompleteness(); + checker.CheckForUnknownAtoms(); + checker.CheckForNonStandard(); + for (Diagnostics::const_diag_iterator + j = diags.diags_begin(), e = diags.diags_end(); j != e; ++j) { + const Diag* diag=*j; + std::cerr << diag->Format(colored); + switch (diag->GetType()) { + case DIAG_UNK_ATOM: + if (rm_unk_atoms) { + edi.DeleteAtom(diag->GetAtom(0)); + std::cerr << " --> removed "; + } + break; + case DIAG_NONSTD_RESIDUE: + if (rm_non_std) { + edi.DeleteResidue(diag->GetResidue(0)); + std::cerr << " --> removed "; + } + break; + default: + break; + } + std::cerr << std::endl; + } +} + +void ost::mol::alg::CleanUpElementColumn(EntityHandle& ent, CompoundLibPtr lib){ + ChainHandleList chains=ent.GetChainList(); + for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { + ResidueHandleList residues = c->GetResidueList(); + for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { + CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); + AtomHandleList atoms=r->GetAtomList(); + if (!compound) { + for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { + j->SetElement(""); + } + continue; + } + for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { + int specindx=compound->GetAtomSpecIndex(j->GetName()); + if (specindx!=-1) { + j->SetElement(compound->GetAtomSpecs()[specindx].element); + } else { + j->SetElement(""); + } + } + } + } +} + +void ost::mol::alg::Molck( + ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib, + const ost::mol::alg::MolckSettings& settings=ost::mol::alg::MolckSettings()){ + if (settings.map_nonstd_res) { + ost::mol::alg::MapNonStandardResidues(ent, lib); + } + ost::mol::alg::RemoveAtoms(ent, + lib, + settings.rm_unk_atoms, + settings.rm_non_std, + settings.rm_hyd_atoms, + settings.rm_oxt_atoms, + settings.rm_zero_occ_atoms, + settings.colored); + if (settings.assign_elem) { + ost::mol::alg::CleanUpElementColumn(ent, lib); + } +} \ No newline at end of file diff --git a/modules/mol/alg/src/molck.hh b/modules/mol/alg/src/molck.hh new file mode 100644 index 0000000000000000000000000000000000000000..d61e0218678bd07fba0e828fdf0f901dd69977e5 --- /dev/null +++ b/modules/mol/alg/src/molck.hh @@ -0,0 +1,104 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_MOL_ALG_MOLCK_HH +#define OST_MOL_ALG_MOLCK_HH + +#include <string> +#include <ost/mol/entity_handle.hh> +#include <ost/conop/compound_lib.hh> + +namespace { + inline std::string BoolToString(bool b) + { + return b ? "True" : "False"; + } +} + +namespace ost { namespace mol{ namespace alg { + +struct MolckSettings; + +struct MolckSettings{ + + bool rm_unk_atoms; + bool rm_non_std; + bool rm_hyd_atoms; + bool rm_oxt_atoms; + bool rm_zero_occ_atoms; + bool colored; + bool map_nonstd_res; + bool assign_elem; + + MolckSettings(bool init_rm_unk_atoms=false, + bool init_rm_non_std=false, + bool init_rm_hyd_atoms=true, + bool init_rm_oxt_atoms=false, + bool init_rm_zero_occ_atoms=false, + bool init_colored=false, + bool init_map_nonstd_res=true, + bool init_assign_elem=true): + rm_unk_atoms(init_rm_unk_atoms), // Remove unknown and atoms not following the nomenclature + rm_non_std(init_rm_non_std), // Remove all residues not one of the 20 standard amino acids + rm_hyd_atoms(init_rm_hyd_atoms), // Remove hydrogen atoms + rm_oxt_atoms(init_rm_oxt_atoms), // Remove terminal oxygens + rm_zero_occ_atoms(init_rm_zero_occ_atoms), // Remove atoms with zero occupancy + colored(init_colored), // Whether the output should be colored + map_nonstd_res(init_map_nonstd_res), // Map non standard residues back to standard ones (e.g.: MSE->MET,SEP->SER,etc.) + assign_elem(init_assign_elem){} // Clean up element column + + public: + std::string ToString(){ + std::string rep = "MolckSettings(rm_unk_atoms=" + BoolToString(rm_unk_atoms) + + ", rm_unk_atoms=" + BoolToString(rm_unk_atoms) + + ", rm_non_std=" + BoolToString(rm_non_std) + + ", rm_hyd_atoms=" + BoolToString(rm_hyd_atoms) + + ", rm_oxt_atoms=" + BoolToString(rm_oxt_atoms) + + ", rm_zero_occ_atoms=" + BoolToString(rm_zero_occ_atoms) + + ", colored=" + BoolToString(colored) + + ", map_nonstd_res=" + BoolToString(map_nonstd_res) + + ", assign_elem=" + BoolToString(assign_elem) + + ")"; + return rep; + } + +}; + +void MapNonStandardResidues(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib); + +void RemoveAtoms(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib, + bool rm_unk_atoms, + bool rm_non_std, + bool rm_hyd_atoms, + bool rm_oxt_atoms, + bool rm_zero_occ_atoms, + bool colored=true); + +void CleanUpElementColumn(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib); + +void Molck(ost::mol::EntityHandle& ent, + ost::conop::CompoundLibPtr lib, + const MolckSettings& settings); + + +}}} // namespace + +#endif diff --git a/modules/conop/src/nonstandard.cc b/modules/mol/alg/src/nonstandard.cc similarity index 99% rename from modules/conop/src/nonstandard.cc rename to modules/mol/alg/src/nonstandard.cc index eaf06680d087c05080f0e578828c7bad3fd68c66..b51c582adcdd81fb5b02cff834c18c218b7b317d 100644 --- a/modules/conop/src/nonstandard.cc +++ b/modules/mol/alg/src/nonstandard.cc @@ -32,7 +32,7 @@ using namespace ost::mol; using namespace ost; using namespace ost::conop; -namespace ost { namespace conop { +namespace ost { namespace mol { namespace alg { bool CopyResidue(ResidueHandle src_res, ResidueHandle dst_res, XCSEditor& edi) @@ -234,4 +234,4 @@ bool CopyNonConserved(ResidueHandle src_res, ResidueHandle dst_res, -}} +}}} diff --git a/modules/conop/src/nonstandard.hh b/modules/mol/alg/src/nonstandard.hh similarity index 78% rename from modules/conop/src/nonstandard.hh rename to modules/mol/alg/src/nonstandard.hh index a57022def8ef7436889491c7ae6714c6e9a69d6e..1dce74fd4c2cf102f985f06d22c2723e93053075 100644 --- a/modules/conop/src/nonstandard.hh +++ b/modules/mol/alg/src/nonstandard.hh @@ -17,30 +17,31 @@ // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA //------------------------------------------------------------------------------ -#ifndef OST_CONOP_NONSTANDARD_HH -#define OST_CONOP_NONSTANDARD_HH +#ifndef OST_MOL_ALG_NONSTANDARD_HH +#define OST_MOL_ALG_NONSTANDARD_HH /* Author: Marco Biasini, Juergen Haas */ #include "module_config.hh" -#include "compound_lib.hh" +#include <ost/conop/compound_lib.hh> -namespace ost { namespace conop { +namespace ost { namespace mol { namespace alg { /// \brief copies all atom of src_res to dst_res, gets compound lib from builder -bool DLLEXPORT_OST_CONOP CopyResidue(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyResidue(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi); /// \brief copies all atom of src_res to dst_res, requires a compound lib -bool DLLEXPORT_OST_CONOP CopyResidue(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyResidue(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, - ost::mol::XCSEditor& edi, CompoundLibPtr lib); + ost::mol::XCSEditor& edi, + ost::conop::CompoundLibPtr lib); /// \brief copies all atom of src_res to dst_res @@ -49,7 +50,7 @@ bool DLLEXPORT_OST_CONOP CopyResidue(ost::mol::ResidueHandle src_res, /// \param edi /// \param has_cbeta will be set to true if the src_res has a cbeta and the /// dst_residue is not a glycine -bool DLLEXPORT_OST_CONOP CopyIdentical(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyIdentical(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); @@ -63,10 +64,11 @@ bool DLLEXPORT_OST_CONOP CopyIdentical(ost::mol::ResidueHandle src_res, -bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyConserved(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, - bool& has_cbeta, CompoundLibPtr lib); + bool& has_cbeta, + ost::conop::CompoundLibPtr lib); /// \brief copies atoms of src_res to dst_res, requires compound lib /// @@ -77,7 +79,7 @@ bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, -bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyConserved(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); @@ -89,14 +91,14 @@ bool DLLEXPORT_OST_CONOP CopyConserved(ost::mol::ResidueHandle src_res, /// only copied if dst_res is not equal to glycine. -bool DLLEXPORT_OST_CONOP CopyNonConserved(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyNonConserved(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); /// \brief construct dst_res from src_res when src_res is an MSE -bool DLLEXPORT_OST_CONOP CopyMSE(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyMSE(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, bool& has_cbeta); @@ -104,13 +106,14 @@ bool DLLEXPORT_OST_CONOP CopyMSE(ost::mol::ResidueHandle src_res, /// \brief construct a dst_res with only atoms matching the standard aminoacid /// from src_res when src_res is an is modified -bool DLLEXPORT_OST_CONOP CopyModified(ost::mol::ResidueHandle src_res, +bool DLLEXPORT_OST_MOL_ALG CopyModified(ost::mol::ResidueHandle src_res, ost::mol::ResidueHandle dst_res, ost::mol::XCSEditor& edi, - bool& has_cbeta, CompoundLibPtr lib); + bool& has_cbeta, + ost::conop::CompoundLibPtr lib); -}} +}}} #endif diff --git a/modules/mol/alg/tests/CMakeLists.txt b/modules/mol/alg/tests/CMakeLists.txt index 0002d71492e639d21d456a93f94950db59aa1afe..0715910b1b9f9afac4de6e94169456ba21f80a3c 100644 --- a/modules/mol/alg/tests/CMakeLists.txt +++ b/modules/mol/alg/tests/CMakeLists.txt @@ -11,7 +11,8 @@ set(OST_MOL_ALG_UNIT_TESTS ) if (COMPOUND_LIB) - list(APPEND OST_MOL_ALG_UNIT_TESTS test_qsscoring.py) + list(APPEND OST_MOL_ALG_UNIT_TESTS test_qsscoring.py + test_nonstandard.py) endif() ost_unittest(MODULE mol_alg SOURCES "${OST_MOL_ALG_UNIT_TESTS}" LINK ost_io) diff --git a/modules/conop/tests/test_nonstandard.py b/modules/mol/alg/tests/test_nonstandard.py similarity index 78% rename from modules/conop/tests/test_nonstandard.py rename to modules/mol/alg/tests/test_nonstandard.py index 566db9a2b1902339828abaec67e7e35713fdf47a..c81c92479a4150c76aa89ff92409a4dc7656e25f 100644 --- a/modules/conop/tests/test_nonstandard.py +++ b/modules/mol/alg/tests/test_nonstandard.py @@ -1,5 +1,5 @@ import unittest -from ost import conop, io, mol +from ost import io, mol class TestNonStandard(unittest.TestCase): @@ -12,7 +12,7 @@ class TestNonStandard(unittest.TestCase): c=ed.InsertChain('A') ed.AppendResidue(c, 'SER') - err, has_cbeta=conop.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) self.assertTrue(err) self.assertTrue(has_cbeta) residues=new_hdl.residues @@ -36,16 +36,16 @@ class TestNonStandard(unittest.TestCase): ed.AppendResidue(c, 'GLY') ed.AppendResidue(c, 'GLY') ed.AppendResidue(c, 'HIS') - err, has_cbeta=conop.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) self.assertTrue(has_cbeta) self.assertTrue(err) - err, has_cbeta=conop.CopyConserved(tpl.residues[1], new_hdl.residues[1], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[1], new_hdl.residues[1], ed) self.assertFalse(has_cbeta) self.assertTrue(err) - err, has_cbeta=conop.CopyConserved(tpl.residues[2], new_hdl.residues[2], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[2], new_hdl.residues[2], ed) self.assertFalse(has_cbeta) self.assertTrue(err) - err, has_cbeta=conop.CopyConserved(tpl.residues[3], new_hdl.residues[3], ed) + err, has_cbeta=mol.alg.CopyConserved(tpl.residues[3], new_hdl.residues[3], ed) self.assertTrue(has_cbeta) self.assertTrue(err) @@ -72,28 +72,28 @@ class TestNonStandard(unittest.TestCase): ed.AppendResidue(c, 'MET') # MET to MET - err =conop.CopyResidue(tpl.residues[0], new_hdl.residues[0], ed) + err =mol.alg.CopyResidue(tpl.residues[0], new_hdl.residues[0], ed) self.assertTrue(err) #GLY to GLY - err =conop.CopyResidue(tpl.residues[1], new_hdl.residues[1], ed) + err =mol.alg.CopyResidue(tpl.residues[1], new_hdl.residues[1], ed) self.assertTrue(err) # GLY to GLY - err =conop.CopyResidue(tpl.residues[2], new_hdl.residues[2], ed) + err =mol.alg.CopyResidue(tpl.residues[2], new_hdl.residues[2], ed) self.assertTrue(err) #now we copy a HIS to a HIS - err =conop.CopyResidue(tpl.residues[3], new_hdl.residues[3], ed) + err =mol.alg.CopyResidue(tpl.residues[3], new_hdl.residues[3], ed) self.assertTrue(err) # copy a GLY to a HIS - err, has_cbeta=conop.CopyNonConserved(tpl.residues[1], new_hdl.residues[4], ed) + err, has_cbeta=mol.alg.CopyNonConserved(tpl.residues[1], new_hdl.residues[4], ed) self.assertFalse(has_cbeta) # copy a MET to a GLY - err =conop.CopyResidue(tpl.residues[0], new_hdl.residues[5], ed) + err =mol.alg.CopyResidue(tpl.residues[0], new_hdl.residues[5], ed) self.assertFalse(err) # copy a MET to a HIS - err =conop.CopyResidue(tpl.residues[0], new_hdl.residues[6], ed) + err =mol.alg.CopyResidue(tpl.residues[0], new_hdl.residues[6], ed) self.assertFalse(err) # copy a GLY to a MET with adding CB - err=conop.CopyResidue(tpl.residues[1], new_hdl.residues[7], ed) + err=mol.alg.CopyResidue(tpl.residues[1], new_hdl.residues[7], ed) self.assertFalse(err) residues=new_hdl.residues diff --git a/modules/conop/tests/testfiles/cbeta.pdb b/modules/mol/alg/tests/testfiles/cbeta.pdb similarity index 100% rename from modules/conop/tests/testfiles/cbeta.pdb rename to modules/mol/alg/tests/testfiles/cbeta.pdb diff --git a/modules/conop/tests/testfiles/sep.pdb b/modules/mol/alg/tests/testfiles/sep.pdb similarity index 100% rename from modules/conop/tests/testfiles/sep.pdb rename to modules/mol/alg/tests/testfiles/sep.pdb diff --git a/tools/molck/CMakeLists.txt b/tools/molck/CMakeLists.txt index 4e1dc2fb79b73e90022dd2f546a5011c98eca118..c76f1fd71b4532bf32b2cfcfa5dc34a0b7fb00ff 100644 --- a/tools/molck/CMakeLists.txt +++ b/tools/molck/CMakeLists.txt @@ -1,4 +1,4 @@ if (NOT WIN32) executable(NAME molck SOURCES main.cc - DEPENDS_ON ost_io STATIC) + DEPENDS_ON ost_io ost_mol_alg STATIC) endif(NOT WIN32) diff --git a/tools/molck/main.cc b/tools/molck/main.cc index 2a445620212e97165e53b7f9bf3592ecd5a21387..ef2d77c207213957a20d2d2d460da74fa0280b73 100644 --- a/tools/molck/main.cc +++ b/tools/molck/main.cc @@ -5,25 +5,52 @@ #include <ost/base.hh> #include <ost/boost_filesystem_helper.hh> #include <ost/platform.hh> -#include <ost/conop/model_check.hh> #include <ost/conop/conop.hh> -#include <ost/conop/amino_acids.hh> -#include <ost/io/mol/pdb_reader.hh> #include <ost/io/mol/pdb_writer.hh> +#include <ost/io/mol/pdb_reader.hh> #include <ost/io/mol/mmcif_reader.hh> #include <ost/io/io_exception.hh> -#include <ost/conop/nonstandard.hh> +#include <ost/mol/alg/molck.hh> #if defined(__APPLE__) #include <mach-o/dyld.h> #endif using namespace ost; -using namespace ost::conop; using namespace ost::mol; using namespace ost::io; namespace po=boost::program_options; namespace fs=boost::filesystem; +const char* USAGE= +"this is molck - the molecule checker\n" +"usage: molck [options] file1.pdb [file2.pdb [...]]\n" +"options \n" +" --complib=path location of the compound library file. If not provided, the \n" +" following locations are searched in this order: \n" +" 1. Working directory, 2. OpenStructure standard library location (if the \n" +" executable is part of a standard OpenStructure installation) \n" +" --rm=<a>,<b> remove atoms and residues matching some criteria \n" +" zeroocc - Remove atoms with zero occupancy \n" +" hyd - Remove hydrogen atoms \n" +" oxt - Remove terminal oxygens \n" +" nonstd - Remove all residues not one of the 20 standard amino acids \n" +" unk - Remove unknown and atoms not following the nomenclature\n" +" --fix-ele clean up element column\n" +" --stdout write cleaned file(s) to stdout \n" +" --out=filename write cleaned file(s) to disk. % characters in the filename are \n" +" replaced with the basename of the input file without extension. \n" +" Default: %-molcked.pdb \n" +" --color=auto|on|off \n" +" whether output should be colored\n" +" --map-nonstd maps modified residues back to the parent amino acid, for example\n" +" MSE -> MET, SEP -> SER.\n"; + +void usage() +{ + std::cerr << USAGE << std::endl; + exit(0); +} + EntityHandle load_x(const String& file, const IOProfile& profile) { try { @@ -53,20 +80,20 @@ EntityHandle load_x(const String& file, const IOProfile& profile) } // load compound library, exiting if it could not be found... -CompoundLibPtr load_compound_lib(const String& custom_path) +ost::conop::CompoundLibPtr load_compound_lib(const String& custom_path) { if (custom_path!="") { if (fs::exists(custom_path)) { - return CompoundLib::Load(custom_path); + return ost::conop::CompoundLib::Load(custom_path); } else { std::cerr << "Could not find compounds.chemlib at the provided location, trying other options" << std::endl; } } if (fs::exists("compounds.chemlib")) { - return CompoundLib::Load("compounds.chemlib"); + return ost::conop::CompoundLib::Load("compounds.chemlib"); } char result[ 1024 ]; - CompoundLibPtr lib; + ost::conop::CompoundLibPtr lib; String exe_path; #if defined(__APPLE__) uint32_t size=1023; @@ -89,45 +116,16 @@ CompoundLibPtr load_compound_lib(const String& custom_path) String share_path_string=BFPathToString(share_path); if (fs::exists(share_path_string)) { - return CompoundLib::Load(share_path_string); + return ost::conop::CompoundLib::Load(share_path_string); } } if (!lib) { std::cerr << "Could not load compounds.chemlib" << std::endl; exit(-1); } - return CompoundLibPtr(); + return ost::conop::CompoundLibPtr(); } -const char* USAGE= -"this is molck - the molecule checker\n" -"usage: molck [options] file1.pdb [file2.pdb [...]]\n" -"options \n" -" --complib=path location of the compound library file. If not provided, the \n" -" following locations are searched in this order: \n" -" 1. Working directory, 2. OpenStructure standard library location (if the \n" -" executable is part of a standard OpenStructure installation) \n" -" --rm=<a>,<b> remove atoms and residues matching some criteria \n" -" zeroocc - Remove atoms with zero occupancy \n" -" hyd - Remove hydrogen atoms \n" -" oxt - Remove terminal oxygens \n" -" nonstd - Remove all residues not one of the 20 standard amino acids \n" -" unk - Remove unknown and atoms not following the nomenclature\n" -" --fix-ele clean up element column\n" -" --stdout write cleaned file(s) to stdout \n" -" --out=filename write cleaned file(s) to disk. % characters in the filename are \n" -" replaced with the basename of the input file without extension. \n" -" Default: %-molcked.pdb \n" -" --color=auto|on|off \n" -" whether output should be colored\n" -" --map-nonstd maps modified residues back to the parent amino acid, for example\n" -" MSE -> MET, SEP -> SER.\n"; - -void usage() -{ - std::cerr << USAGE << std::endl; - exit(0); -} int main(int argc, char *argv[]) { @@ -136,19 +134,12 @@ int main(int argc, char *argv[]) } IOProfile prof; prof.fault_tolerant=true; + ost::mol::alg::MolckSettings settings; String rm; String color; - bool colored = false; - bool rm_unk_atoms=false; - bool rm_hyd_atoms=false; - bool rm_non_std=false; - bool rm_oxt_atoms=false; - bool rm_zero_occ_atoms=false; bool write_to_stdout = false; bool write_to_file = false; - bool map_nonstd_res = false; - bool assign_elem = false; String output_blueprint_string; String custom_path=""; @@ -196,24 +187,24 @@ int main(int argc, char *argv[]) output_blueprint_string = vm["out"].as<String>(); } if (vm.count("map-nonstd")) { - map_nonstd_res = true; + settings.map_nonstd_res = true; } if (vm.count("fix-ele")) { - assign_elem = true; + settings.assign_elem = true; } std::vector<StringRef> rms=StringRef(rm.c_str(), rm.size()).split(','); for (size_t i=0; i<rms.size(); ++i) { if (rms[i] == StringRef("unk", 3)) { - rm_unk_atoms = true; + settings.rm_unk_atoms = true; } else if (rms[i] == StringRef("nonstd", 6)) { - rm_non_std = true; + settings.rm_non_std = true; } else if (rms[i] == StringRef("hyd", 3)) { - rm_hyd_atoms = true; + settings.rm_hyd_atoms = true; } else if (rms[i] == StringRef("oxt", 3)) { - rm_oxt_atoms = true; + settings.rm_oxt_atoms = true; } else if (rms[i] == StringRef("zeroocc", 7)) { - rm_zero_occ_atoms = true; + settings.rm_zero_occ_atoms = true; } else { std::cerr << "unknown value to remove '" << rms[i] << "'" << std::endl; usage(); @@ -221,144 +212,23 @@ int main(int argc, char *argv[]) } } if (color=="auto") { - colored = isatty(STDERR_FILENO); + settings.colored = isatty(STDERR_FILENO); } else if (color == "on" || color == "1" || color == "yes") { - colored = true; + settings.colored = true; } else if (color == "off" || color == "0" || color == "no") { - colored = false; + settings.colored = false; } else { usage(); exit(-1); } - CompoundLibPtr lib=load_compound_lib(custom_path); + ost::conop::CompoundLibPtr lib=load_compound_lib(custom_path); for (unsigned int i = 0; i < files.size(); ++i) { EntityHandle ent=load_x(files[i], prof); if (!ent.IsValid()) { continue; } - if (map_nonstd_res) { - EntityHandle new_ent=CreateEntity(); - ChainHandleList chains=ent.GetChainList(); - XCSEditor new_edi=new_ent.EditXCS(); - for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { - ChainHandle new_chain = new_edi.InsertChain(c->GetName()); - ResidueHandleList residues = c->GetResidueList(); - for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { - AminoAcid aa = ResidueToAminoAcid(*r); - if (aa!=XXX) { - ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); - AtomHandleList atoms = r->GetAtomList(); - for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { - new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); - } - continue; - } else { - CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); - if (!compound || !compound->IsPeptideLinking() || compound->GetChemClass()==ChemClass::D_PEPTIDE_LINKING || - OneLetterCodeToAminoAcid(compound->GetOneLetterCode())==XXX) { - ResidueHandle dest_res = new_edi.AppendResidue(new_chain,r->GetName(),r->GetNumber()); - AtomHandleList atoms = r->GetAtomList(); - for (AtomHandleList::const_iterator a=atoms.begin();a!=atoms.end();++a) { - new_edi.InsertAtom(dest_res,a->GetName(),a->GetPos(),a->GetElement(),a->GetOccupancy(),a->GetBFactor(),a->IsHetAtom()); - } - continue; - } - ResidueHandle dest_res = new_edi.AppendResidue(new_chain,OneLetterCodeToResidueName(compound->GetOneLetterCode()),r->GetNumber()); - CopyResidue(*r,dest_res,new_edi,lib); - } - } - } - ent=new_ent; - } - - XCSEditor edi=ent.EditXCS(); - Diagnostics diags; - Checker checker(lib, ent, diags); - if (rm_zero_occ_atoms) { - std::cerr << "removing atoms with zero occupancy" << std::endl; - int zremoved=0; - AtomHandleList zero_atoms=checker.GetZeroOccupancy(); - for (AtomHandleList::const_iterator i=zero_atoms.begin(), e=zero_atoms.end(); i!=e; ++i) { - edi.DeleteAtom(*i); - zremoved++; - } - std::cerr << " --> removed " << zremoved << " hydrogen atoms" << std::endl; - } - - if (rm_hyd_atoms) { - std::cerr << "removing hydrogen atoms" << std::endl; - int hremoved=0; - AtomHandleList hyd_atoms=checker.GetHydrogens(); - for (AtomHandleList::const_iterator i=hyd_atoms.begin(), e=hyd_atoms.end(); i!=e; ++i) { - edi.DeleteAtom(*i); - hremoved++; - } - std::cerr << " --> removed " << hremoved << " hydrogen atoms" << std::endl; - } - if (rm_oxt_atoms) { - std::cerr << "removing OXT atoms" << std::endl; - int oremoved=0; - AtomHandleList atoms=ent.GetAtomList(); - for (AtomHandleList::const_iterator i=atoms.begin(), e=atoms.end(); i!=e; ++i) { - if (i->GetName()=="OXT") { - edi.DeleteAtom(*i); - oremoved++; - } - } - std::cerr << " --> removed " << oremoved << " OXT atoms" << std::endl; - } - - checker.CheckForCompleteness(); - checker.CheckForUnknownAtoms(); - checker.CheckForNonStandard(); - for (Diagnostics::const_diag_iterator - j = diags.diags_begin(), e = diags.diags_end(); j != e; ++j) { - const Diag* diag=*j; - std::cerr << diag->Format(colored); - switch (diag->GetType()) { - case DIAG_UNK_ATOM: - if (rm_unk_atoms) { - edi.DeleteAtom(diag->GetAtom(0)); - std::cerr << " --> removed "; - } - break; - case DIAG_NONSTD_RESIDUE: - if (rm_non_std) { - edi.DeleteResidue(diag->GetResidue(0)); - std::cerr << " --> removed "; - } - break; - default: - break; - } - std::cerr << std::endl; - } - - if (assign_elem) { - ChainHandleList chains=ent.GetChainList(); - for (ChainHandleList::const_iterator c=chains.begin();c!=chains.end();++c) { - ResidueHandleList residues = c->GetResidueList(); - for (ResidueHandleList::const_iterator r=residues.begin();r!=residues.end();++r) { - CompoundPtr compound=lib->FindCompound(r->GetName(),Compound::PDB); - AtomHandleList atoms=r->GetAtomList(); - if (!compound) { - for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { - j->SetElement(""); - } - continue; - } - for (AtomHandleList::iterator j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { - int specindx=compound->GetAtomSpecIndex(j->GetName()); - if (specindx!=-1) { - j->SetElement(compound->GetAtomSpecs()[specindx].element); - } else { - j->SetElement(""); - } - } - } - } - } + ost::mol::alg::Molck(ent, lib, settings); if (write_to_stdout) { PDBWriter writer(std::cout, prof);