From 6347e9bfeba9765fb51e563546862a02c2433c4a Mon Sep 17 00:00:00 2001 From: Marco Biasini <marco.biasini@unibas.ch> Date: Sun, 3 Feb 2013 12:05:45 +0100 Subject: [PATCH] move the bulk of PDBize to C++ --- modules/geom/pymod/export_mat4.cc | 7 ++ modules/geom/src/mat4.hh | 3 + modules/io/pymod/__init__.py | 107 ++---------------- modules/io/tests/test_io_mmcif.py | 6 +- modules/mol/alg/pymod/wrap_mol_alg.cc | 3 + modules/mol/alg/src/CMakeLists.txt | 2 + modules/mol/alg/src/pdbize.cc | 157 ++++++++++++++++++++++++++ modules/mol/alg/src/pdbize.hh | 40 +++++++ 8 files changed, 224 insertions(+), 101 deletions(-) create mode 100644 modules/mol/alg/src/pdbize.cc create mode 100644 modules/mol/alg/src/pdbize.hh diff --git a/modules/geom/pymod/export_mat4.cc b/modules/geom/pymod/export_mat4.cc index ec003a0e3..c457eb261 100644 --- a/modules/geom/pymod/export_mat4.cc +++ b/modules/geom/pymod/export_mat4.cc @@ -18,10 +18,12 @@ //------------------------------------------------------------------------------ #define BOOST_PYTHON_MAX_ARITY 20 #include <boost/python.hpp> +#include <boost/python/suite/indexing/vector_indexing_suite.hpp> #include <boost/python/slice.hpp> using namespace boost::python; #include <ost/geom/geom.hh> +#include <ost/geom/export_helper/vector.hh> using namespace geom; @@ -138,4 +140,9 @@ void export_Mat4() .def("PasteTranslation",&Mat4::PasteTranslation) .add_property("data",mat4_data) ; + + class_<Mat4List>("Mat4List", init<>()) + .def(vector_indexing_suite<Mat4List>()) + .def(geom::VectorAdditions<Mat4List>()) + ; } diff --git a/modules/geom/src/mat4.hh b/modules/geom/src/mat4.hh index 998bbc485..95ca3d632 100644 --- a/modules/geom/src/mat4.hh +++ b/modules/geom/src/mat4.hh @@ -22,6 +22,7 @@ #include <cassert> #include <cstddef> // for size_t #include <ostream> +#include <vector> #include <boost/operators.hpp> @@ -136,6 +137,8 @@ double i32, double i33); }; +typedef std::vector<Mat4> Mat4List; + DLLEXPORT_OST_GEOM std::ostream& operator<<(std::ostream& os, const Mat4& m); } // ns geom diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index 2d402eeac..741b9572f 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -19,7 +19,7 @@ import os, tempfile, ftplib, httplib from _ost_io import * -from ost import mol, geom, conop +from ost import mol, geom, conop, seq profiles=None @@ -358,29 +358,12 @@ def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=Non # MMCifInfoBioUnit.PDBize, since this function is not included in SPHINX. def _PDBize(biounit, asu, seqres=None, min_polymer_size=10, transformation=False): - def _CopyAtoms(src_res, dst_res, edi, trans=geom.Mat4()): - atom_pos_wrong = False - for atom in src_res.atoms: - tmp_pos = geom.Vec4(atom.pos) - new_atom=edi.InsertAtom(dst_res, atom.name, geom.Vec3(trans*tmp_pos), - element=atom.element, - occupancy=atom.occupancy, - b_factor=atom.b_factor, - is_hetatm=atom.is_hetatom) - for p in range(0,3): - if new_atom.pos[p] <= -1000: - atom_pos_wrong = True - elif new_atom.pos[p] >= 10000: - atom_pos_wrong = True - return atom_pos_wrong - - chain_names='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz' # create list of operations # for cartesian products, operations are stored in a list, multiplied with # the next list of operations and re-stored... until all lists of operations # are multiplied in an all-against-all manner. operations = biounit.GetOperations() - trans_matrices = list() + trans_matrices = geom.Mat4List() if len(operations) > 0: for op in operations[0]: rot = geom.Mat4() @@ -407,86 +390,14 @@ def _PDBize(biounit, asu, seqres=None, min_polymer_size=10, assu = asu.Select('cname=' + ','.join(biounit.GetChainList())) # use each transformation on the view, store as entity and transform, PDBize # the result while adding everything to one large entity - pdb_bu = mol.CreateEntity() - edi = pdb_bu.EditXCS(mol.BUFFERED_EDIT) - cur_chain_name = 0 - water_chain = mol.ChainHandle() - ligand_chain = mol.ChainHandle() - a_pos_wrong = False - for tr in trans_matrices: - # do a PDBize, add each new entity to the end product - for chain in assu.chains: - residue_count = len(chain.residues) - if seqres: - seqres_chain = seqres.FindSequence(chain.name) - if seqres_chain.IsValid(): - residue_count = len(seqres_chain) - if chain.is_polymer and residue_count >= min_polymer_size: - if len(chain_names) == cur_chain_name: - raise RuntimeError('Running out of chain names') - new_chain = edi.InsertChain(chain_names[cur_chain_name]) - cur_chain_name += 1 - edi.SetChainDescription(new_chain, chain.description) - edi.SetChainType(new_chain, chain.type) - new_chain.SetStringProp('original_name', chain.name) - if chain.HasProp("pdb_auth_chain_name"): - new_chain.SetStringProp("pdb_auth_chain_name", - chain.GetStringProp("pdb_auth_chain_name")) - for res in chain.residues: - new_res = edi.AppendResidue(new_chain, res.name, res.number) - a_b = _CopyAtoms(res, new_res, edi, tr) - if not a_pos_wrong: - a_pos_wrong = a_b - elif chain.type == mol.CHAINTYPE_WATER: - if not water_chain.IsValid(): - # water gets '-' as name - water_chain = edi.InsertChain('-') - edi.SetChainDescription(water_chain, chain.description) - edi.SetChainType(water_chain, chain.type) - for res in chain.residues: - new_res = edi.AppendResidue(water_chain, res.name) - new_res.SetStringProp('type', mol.StringFromChainType(chain.type)) - new_res.SetStringProp('description', chain.description) - a_b = _CopyAtoms(res, new_res, edi, tr) - if not a_pos_wrong: - a_pos_wrong = a_b - else: - if not ligand_chain.IsValid(): - # all ligands, put in one chain, are named '_' - ligand_chain = edi.InsertChain('_') - last_rnum = 0 - else: - last_rnum = ligand_chain.residues[-1].number.num - residues=chain.residues - ins_code='\0' - if len(residues)>1: - ins_code='A' - for res in chain.residues: - new_res = edi.AppendResidue(ligand_chain, res.name, - mol.ResNum(last_rnum+1, ins_code)) - new_res.SetStringProp('description', chain.description) - new_res.SetStringProp('type', mol.StringFromChainType(chain.type)) - new_res.SetStringProp("original_name", chain.name) - if chain.HasProp("pdb_auth_chain_name"): - new_res.SetStringProp("pdb_auth_chain_name", - chain.GetStringProp("pdb_auth_chain_name")) - ins_code = chr(ord(ins_code)+1) - _CopyAtoms(res, new_res, edi, tr) - a_b = _CopyAtoms(res, new_res, edi, tr) - if not a_pos_wrong: - a_pos_wrong = a_b - move_to_origin = None - if a_pos_wrong: - start = pdb_bu.bounds.min - move_to_origin = geom.Mat4(1,0,0,(-999 - start[0]), - 0,1,0,(-999 - start[1]), - 0,0,1,(-999 - start[2]), - 0,0,0,1) - edi = pdb_bu.EditXCS(mol.UNBUFFERED_EDIT) - edi.ApplyTransform(move_to_origin) - conop.ConnectAll(pdb_bu) + ss = seqres + if not ss: + ss = seq.SequenceList() + + pdb_bu = mol.alg.PDBize(assu, trans_matrices, ss, min_polymer_size=min_polymer_size, + shift_to_fit=transformation) if transformation: - return pdb_bu, move_to_origin + return pdb_bu, pdb_bu.GetTransformationMatrix() return pdb_bu MMCifInfoBioUnit.PDBize = _PDBize diff --git a/modules/io/tests/test_io_mmcif.py b/modules/io/tests/test_io_mmcif.py index bc8a35aa2..6753b48e2 100644 --- a/modules/io/tests/test_io_mmcif.py +++ b/modules/io/tests/test_io_mmcif.py @@ -171,9 +171,9 @@ class TestMMCifInfo(unittest.TestCase): seqres=True, info=True) pdb_ent, t = info.GetBioUnits()[0].PDBize(ent, transformation=True) - self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[0], -915.759, 2) - self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[1], -952.345, 2) - self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[2], 3221.75, 2) + self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[0], -915.759, 1) + self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[1], -952.345, 1) + self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[2], 3221.75, 1) self.assertEquals(geom.Equal(t, geom.Mat4(1,0,0,-920.462, 0,1,0,-966.654, diff --git a/modules/mol/alg/pymod/wrap_mol_alg.cc b/modules/mol/alg/pymod/wrap_mol_alg.cc index bdf141aa5..f4bbb5f29 100644 --- a/modules/mol/alg/pymod/wrap_mol_alg.cc +++ b/modules/mol/alg/pymod/wrap_mol_alg.cc @@ -24,6 +24,7 @@ #include <ost/mol/alg/superpose_frames.hh> #include <ost/mol/alg/filter_clashes.hh> #include <ost/mol/alg/consistency_checks.hh> +#include <ost/mol/alg/pdbize.hh> #include <ost/export_helper/pair_to_tuple_conv.hh> using namespace boost::python; @@ -168,6 +169,8 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) def("PrintResidueRDMap",&mol::alg::PrintResidueRDMap); def("ResidueNamesMatch",&mol::alg::ResidueNamesMatch); + def("PDBize", &mol::alg::PDBize, (arg("asu"), arg("transformations"), arg("seqres"), + arg("min_polymer_size")=10, arg("shift_to_fit")=false)); } diff --git a/modules/mol/alg/src/CMakeLists.txt b/modules/mol/alg/src/CMakeLists.txt index eca9aa6e8..8f3450e88 100644 --- a/modules/mol/alg/src/CMakeLists.txt +++ b/modules/mol/alg/src/CMakeLists.txt @@ -10,6 +10,7 @@ set(OST_MOL_ALG_HEADERS trajectory_analysis.hh structure_analysis.hh consistency_checks.hh + pdbize.hh ) set(OST_MOL_ALG_SOURCES @@ -23,6 +24,7 @@ set(OST_MOL_ALG_SOURCES trajectory_analysis.cc structure_analysis.cc consistency_checks.cc + pdbize.cc ) set(MOL_ALG_DEPS ost_mol ost_seq) diff --git a/modules/mol/alg/src/pdbize.cc b/modules/mol/alg/src/pdbize.cc new file mode 100644 index 000000000..093abe629 --- /dev/null +++ b/modules/mol/alg/src/pdbize.cc @@ -0,0 +1,157 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#include "pdbize.hh" +#include <ost/mol/residue_handle.hh> +#include <ost/mol/residue_view.hh> +#include <ost/mol/atom_handle.hh> +#include <ost/mol/atom_view.hh> +#include <ost/mol/transfer_connectivity.hh> +#include <ost/mol/chain_handle.hh> +#include <ost/mol/chain_view.hh> +#include <ost/mol/xcs_editor.hh> +#include <ost/seq/sequence_handle.hh> + +namespace ost { namespace mol { namespace alg { + +const char* POLYPEPTIDE_CHAIN_NAMES="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz"; +const char* LIGAND_CHAIN_NAME="_"; +const char* WATER_CHAIN_NAME="-"; + + +namespace { + +bool copy_atoms(ResidueView src_res, ResidueHandle dst_res, XCSEditor& edi, + const geom::Mat4 transform) { + bool needs_adjustment = false; + for (AtomViewList::const_iterator + i = src_res.GetAtomList().begin(), + e = src_res.GetAtomList().end(); i != e; ++i) { + + AtomHandle new_atom = edi.InsertAtom(dst_res, i->GetName(), + geom::Vec3(transform*i->GetPos()), + i->GetName(), i->GetOccupancy(), + i->GetBFactor(), i->IsHetAtom()); + geom::Vec3 pos = new_atom.GetPos(); + for (int j = 0; j<3; ++j) { + needs_adjustment|= pos[j]<=-1000 || pos[j]>=10000; + } + } + return needs_adjustment; +} + +} +EntityHandle PDBize(EntityView asu, const geom::Mat4List& transforms, + seq::SequenceList seqres, int min_polymer_size, + bool shift_to_fit) +{ + EntityHandle pdb_bu = CreateEntity(); + XCSEditor edi = pdb_bu.EditXCS(BUFFERED_EDIT); + + ChainHandle ligand_chain; + ChainHandle water_chain; + const char* curr_chain_name = POLYPEPTIDE_CHAIN_NAMES; + bool needs_adjustment = false; + int last_rnum = 0; + std::map<ResidueHandle, ResidueHandle> dst_to_src_map; + for (geom::Mat4List::const_iterator i = transforms.begin(), + e = transforms.end(); i != e; ++i) { + for (ChainViewList::const_iterator j = asu.GetChainList().begin(), + e2 =asu.GetChainList().end(); j != e2; ++j) { + ChainView chain = *j; + int chain_length = chain.GetResidueCount(); + if (chain_length < min_polymer_size && seqres.IsValid()) { + seq::SequenceHandle s = seqres.FindSequence(chain.GetName()); + if (s.IsValid()) + chain_length = s.GetLength(); + } + if (chain.IsPolymer() && chain_length >= min_polymer_size) { + if (*curr_chain_name == 0) { + throw std::runtime_error("running out of chain names"); + } + ChainHandle new_chain = edi.InsertChain(String(1, curr_chain_name[0])); + curr_chain_name++; + edi.SetChainDescription(new_chain, chain.GetDescription()); + edi.SetChainType(new_chain, chain.GetType()); + new_chain.SetStringProp("original_name", chain.GetName()); + if (chain.HasProp("pdb_auth_chain_name")) + new_chain.SetStringProp("pdb_auth_chain_name", + chain.GetStringProp("pdb_auth_chain_name")); + + for (ResidueViewList::const_iterator k = chain.GetResidueList().begin(), + e3 = chain.GetResidueList().end(); k != e3; ++k) { + ResidueHandle new_res = edi.AppendResidue(new_chain, k->GetName(), + k->GetNumber()); + dst_to_src_map[new_res] = k->GetHandle(); + needs_adjustment |= copy_atoms(*k, new_res, edi, *i); + } + continue; + } + if (chain.GetType() == CHAINTYPE_WATER) { + if (!water_chain) { + water_chain = edi.InsertChain(WATER_CHAIN_NAME); + edi.SetChainDescription(water_chain, chain.GetDescription()); + edi.SetChainType(water_chain, chain.GetType()); + } + for (ResidueViewList::const_iterator k = chain.GetResidueList().begin(), + e3 = chain.GetResidueList().end(); k != e3; ++k) { + ResidueHandle new_res = edi.AppendResidue(water_chain, k->GetName()); + new_res.SetStringProp("type", StringFromChainType(chain.GetType())); + new_res.SetStringProp("description", chain.GetDescription()); + dst_to_src_map[new_res] = k->GetHandle(); + needs_adjustment |= copy_atoms(*k, new_res, edi, *i); + } + continue; + } + // deal with ligands... + if (!ligand_chain) { + ligand_chain = edi.InsertChain(LIGAND_CHAIN_NAME); + last_rnum = 0; + } + char ins_code = chain.GetResidueCount()>1 ? 'A' : '\0'; + + for (ResidueViewList::const_iterator k = chain.GetResidueList().begin(), + e3 = chain.GetResidueList().end(); k != e3; ++k) { + ResidueHandle new_res = edi.AppendResidue(ligand_chain, k->GetName(), + ResNum(last_rnum+1, ins_code)); + new_res.SetStringProp("type", StringFromChainType(chain.GetType())); + new_res.SetStringProp("description", chain.GetDescription()); + new_res.SetStringProp("original_name", chain.GetName()); + if (chain.HasProp("pdb_auth_chain_name")) + new_res.SetStringProp("pdb_auth_chain_name", + chain.GetStringProp("pdb_auth_chain_name")); + dst_to_src_map[new_res] = k->GetHandle(); + ins_code += 1; + needs_adjustment |= copy_atoms(*k, new_res, edi, *i); + } + last_rnum+=1; + } + } + if (needs_adjustment && shift_to_fit) { + geom::Vec3 min_coord = pdb_bu.GetBounds().GetMin(); + geom::Mat4 tf(1, 0, 0, -999 - min_coord[0], + 0, 1, 0, -999 - min_coord[1], + 0, 0, 1, -999 - min_coord[2], + 0, 0, 0, 1); + edi.ApplyTransform(tf); + } + TransferConnectivity(pdb_bu, dst_to_src_map); + return pdb_bu; +} + +}}} diff --git a/modules/mol/alg/src/pdbize.hh b/modules/mol/alg/src/pdbize.hh new file mode 100644 index 000000000..d4120de28 --- /dev/null +++ b/modules/mol/alg/src/pdbize.hh @@ -0,0 +1,40 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_MOL_ALG_PDBIZE_HH +#define OST_MOL_ALG_PDBIZE_HH + +#include <ost/mol/entity_view.hh> +#include <ost/mol/entity_handle.hh> +#include <ost/seq/sequence_list.hh> +#include "module_config.hh" + +namespace ost { namespace mol { namespace alg { + + +extern const char* POLYPEPTIDE_CHAIN_NAMES; +extern const char* LIGAND_CHAIN_NAME; +extern const char* WATER_CHAIN_NAME; + +mol::EntityHandle DLLEXPORT_OST_MOL_ALG +PDBize(mol::EntityView asu, const geom::Mat4List& transforms, seq::SequenceList seqres, + int min_polymer_size=10, bool shift_to_fit=true); + +}}} +#endif + -- GitLab